extraction fix: images in text nodes (#757) #1501
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions | |
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions | |
name: Tests | |
on: | |
push: | |
branches: [ master ] | |
pull_request: | |
branches: [ master ] | |
jobs: | |
build: | |
runs-on: ${{ matrix.os }} | |
strategy: | |
fail-fast: false | |
matrix: | |
os: [ubuntu-latest] | |
# https://github.com/actions/python-versions/blob/main/versions-manifest.json | |
python-version: ["3.9", "3.11", "3.13"] # "3.14-dev" | |
env: | |
- MINIMAL: "true" | |
PROXY_TEST: "false" | |
- MINIMAL: "false" | |
PROXY_TEST: "true" | |
include: | |
# custom python versions | |
- os: ubuntu-20.04 | |
python-version: 3.8 | |
- os: macos-13 | |
python-version: 3.8 | |
- os: macos-latest | |
python-version: "3.10" | |
- os: windows-latest | |
python-version: "3.10" | |
- os: ubuntu-latest | |
python-version: "3.12" | |
services: | |
socks_proxy: | |
image: ${{ matrix.os == 'ubuntu-latest' && 'serjs/go-socks5-proxy' || '' }} | |
ports: | |
- 1080:1080 | |
socks_proxy_auth: | |
image: ${{ matrix.os == 'ubuntu-latest' && 'serjs/go-socks5-proxy' || '' }} | |
env: | |
PROXY_USER: user | |
PROXY_PASSWORD: pass | |
ports: | |
- 1081:1080 | |
steps: | |
# Python and pip setup | |
- name: Set up Python ${{ matrix.python-version }} | |
uses: actions/setup-python@v5 | |
with: | |
python-version: ${{ matrix.python-version }} | |
- name: Upgrade pip | |
run: python -m pip install --upgrade pip | |
- name: Get pip cache dir | |
id: pip-cache | |
run: | | |
echo "::set-output name=dir::$(pip cache dir)" | |
- name: pip cache | |
uses: actions/cache@v4 | |
with: | |
path: ${{ steps.pip-cache.outputs.dir }} | |
key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} | |
restore-keys: | | |
${{ runner.os }}-pip- | |
# package setup | |
- uses: actions/checkout@v4 | |
- name: Install dependencies | |
run: python -m pip install -e ".[dev]" | |
- name: Lint with flake8 | |
run: | | |
# stop the build if there are Python syntax errors or undefined names | |
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics | |
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide | |
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics | |
# pycurl installation fix | |
- name: Install packages required by pycurl | |
if: ${{ matrix.env.MINIMAL == 'false' }} | |
run: | | |
sudo apt-get update | |
sudo apt-get install libcurl4-gnutls-dev libgnutls28-dev | |
# alternatively: sudo apt-get install libcurl4-openssl-dev libssl-dev | |
- name: Install full dependencies | |
if: ${{ matrix.env.MINIMAL == 'false' }} | |
run: python -m pip install -e ".[all]" | |
- name: Type checking | |
if: ${{ matrix.env.MINIMAL == 'false' && matrix.python-version == '3.13' }} | |
run: | | |
mypy -p trafilatura | |
- name: Test with pytest | |
run: | | |
python -m pytest --cov=./ --cov-report=xml | |
env: | |
PROXY_TEST: ${{ matrix.env.PROXY_TEST }} | |
# coverage | |
- name: Upload coverage to Codecov | |
if: ${{ matrix.env.MINIMAL == 'false' && matrix.python-version == '3.13' }} | |
uses: codecov/codecov-action@v4 | |
env: | |
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} | |
with: | |
fail_ci_if_error: true | |
files: ./coverage.xml | |
verbose: true |