-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fixed error occurred after BNA website change.
Created a GitHub workflow to check data sources for validity daily (currently for DE/BNA only)
- Loading branch information
1 parent
2597ef7
commit ae5c176
Showing
15 changed files
with
328 additions
and
44 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
# This workflow checks the data sources for validity | ||
|
||
name: Data source validity checks | ||
|
||
on: | ||
schedule: | ||
# Runs the workflow every day at midnight | ||
- cron: '0 0 * * *' | ||
|
||
jobs: | ||
test: | ||
runs-on: ubuntu-latest | ||
|
||
steps: | ||
- name: Check out code | ||
uses: actions/checkout@v4 | ||
|
||
- name: Set up Python | ||
uses: actions/setup-python@v5 | ||
with: | ||
python-version: 3.9.18 | ||
|
||
- name: Install dependencies | ||
run: | | ||
python -m pip install --upgrade pip | ||
pip install -r requirements.txt | ||
# - name: Run integration tests (only) | ||
# run: | | ||
# pip install -r test/requirements.txt | ||
# pytest -m "integration_test" | ||
|
||
- name: "[DE/BNA] Real data validity checks" | ||
run: | | ||
pip install -r test/requirements.txt | ||
pytest tests/integration/test_int_de_bna.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,18 +1,104 @@ | ||
"""Integration tests for the crawler of the BNA pipeline.""" | ||
|
||
"""Unit tests for the crawler of the BNA pipeline.""" | ||
import logging | ||
import os | ||
import tempfile | ||
from unittest.mock import Mock, patch | ||
|
||
import pytest | ||
import requests | ||
|
||
import charging_stations_pipelines.pipelines.de.bna_crawler | ||
from charging_stations_pipelines.pipelines.de import bna_crawler | ||
from charging_stations_pipelines.pipelines.de.bna_crawler import __name__ as test_module_name | ||
# NOTE: "local_caplog" is a pytest fixture from test.shared.local_caplog | ||
from test.shared import local_caplog, LogLocalCaptureFixture # noqa: F401 | ||
|
||
|
||
@patch.object(charging_stations_pipelines.pipelines.de.bna_crawler, 'BeautifulSoup') | ||
@patch.object(charging_stations_pipelines.pipelines.de.bna_crawler, 'download_file') | ||
@patch.object(requests, 'get') | ||
@patch.object(os.path, 'getsize') | ||
def test_get_bna_data_downloads_file_with_correct_url(mock_getsize, mock_requests_get, mock_download_file, | ||
mock_beautiful_soup): | ||
# Mock the requests.get response | ||
mock_response = Mock() | ||
mock_response.content = b'something, something...' | ||
mock_response.status_code = 200 | ||
mock_requests_get.return_value = mock_response | ||
|
||
# Mock the BeautifulSoup find_all method | ||
mock_beautiful_soup.return_value.find_all.return_value = [{'href': 'https://some_ladesaeulenregister_url.xlsx'}] | ||
|
||
# Mock the os.path.getsize method | ||
mock_getsize.return_value = 4321 | ||
|
||
# Call the method under test | ||
bna_crawler.get_bna_data('./tmp_data_path/some_ladesaeulenregister_url.xlsx') | ||
|
||
# Ensure these function were called with the expected arguments. | ||
mock_requests_get.assert_called_with( | ||
"https://www.bundesnetzagentur.de/DE/Fachthemen/ElektrizitaetundGas/E-Mobilitaet/start.html", | ||
headers={"User-Agent": "Mozilla/5.0"}) | ||
|
||
# Assert that the download_file method was called with the correct parameters | ||
mock_download_file.assert_called_once_with( | ||
'https://some_ladesaeulenregister_url.xlsx', './tmp_data_path/some_ladesaeulenregister_url.xlsx') | ||
|
||
# Assert that the os.path.getsize method was called with the correct parameters | ||
mock_getsize.assert_called_once_with('./tmp_data_path/some_ladesaeulenregister_url.xlsx') | ||
|
||
|
||
@patch.object(requests, 'get') | ||
@patch.object(charging_stations_pipelines.pipelines.de.bna_crawler, 'BeautifulSoup') | ||
def test_get_bna_data_logs_error_when_no_download_link_found(mock_beautiful_soup, mock_requests_get, caplog): | ||
# Mock the requests.get response | ||
mock_requests_get.return_value = Mock(content=b'some content', status_code=200) | ||
|
||
# Mock the BeautifulSoup find method to return None | ||
mock_beautiful_soup.return_value.find_all.return_value = [] | ||
|
||
with pytest.raises(bna_crawler.ExtractURLException, match='Failed to extract the download url from the website.'): | ||
# Call the function under test | ||
bna_crawler.get_bna_data('tmp_data_path') | ||
|
||
|
||
@patch.object(requests, 'get') | ||
@patch.object(charging_stations_pipelines.pipelines.de.bna_crawler, 'BeautifulSoup') | ||
@patch.object(charging_stations_pipelines.pipelines.de.bna_crawler, 'download_file') | ||
@patch.object(os.path, 'getsize') | ||
def test_get_bna_data_logs_file_size_after_download(mock_getsize, mock_download_file, mock_beautiful_soup, | ||
mock_requests_get, local_caplog: LogLocalCaptureFixture): | ||
# Mock the requests.get response | ||
mock_requests_get.return_value = Mock(content=b'some content') | ||
mock_requests_get.return_value.status_code = 200 | ||
|
||
# Mock the BeautifulSoup find_all method | ||
mock_beautiful_soup.return_value.find_all.return_value = [ | ||
{'href': 'some_url_without_search_term.xlsx'}, | ||
{'href': 'tmp_data_path/ladesaeulenregister.xlsx'} | ||
] | ||
|
||
# Mock the os.path.getsize method | ||
mock_getsize.return_value = 1234 | ||
|
||
logger = logging.getLogger(test_module_name) | ||
with local_caplog(level=logging.DEBUG, logger=logger): | ||
# Call method under test... with mocked logging | ||
bna_crawler.get_bna_data('tmp_data_path/some_url1_with_search_term.xlsx') | ||
|
||
# Assert that the file size was logged | ||
assert "Downloaded file size: 1234 bytes" in local_caplog.logs | ||
|
||
# Assert that requests.get was called correctly | ||
mock_requests_get.assert_called_once_with( | ||
'https://www.bundesnetzagentur.de/DE/Fachthemen/ElektrizitaetundGas/E-Mobilitaet/start.html', | ||
headers={'User-Agent': 'Mozilla/5.0'}) | ||
|
||
from charging_stations_pipelines.pipelines.de.bna_crawler import get_bna_data | ||
from test.shared import skip_if_github | ||
# Assert that BeautifulSoup was called correctly | ||
mock_beautiful_soup.assert_called_once_with(b'some content', 'html.parser') | ||
|
||
# Assert that download_file was called correctly | ||
mock_download_file.assert_called_once_with('tmp_data_path/ladesaeulenregister.xlsx', | ||
'tmp_data_path/some_url1_with_search_term.xlsx') | ||
|
||
@pytest.mark.integration_test | ||
@pytest.mark.skipif(skip_if_github(), reason="Skip the test when running on Github") | ||
def test_get_bna_data(): | ||
"""Test the get_bna_data function.""" | ||
with tempfile.NamedTemporaryFile() as temp_file: | ||
get_bna_data(temp_file.name) | ||
assert os.path.getsize(temp_file.name) > 6 * 1_000_000, "File size is less than 6MB" | ||
# Assert that os.path.getsize was called correctly | ||
mock_getsize.assert_called_once_with('tmp_data_path/some_url1_with_search_term.xlsx') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
Empty file.
Oops, something went wrong.