Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Co-authored-by: Dieter Kling <[email protected]>
  • Loading branch information
cs-dieter-kling and dkling-reply authored Nov 13, 2023
1 parent 048472f commit 52fad14
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 9 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
__pycache__/
venv/
.venv/
.idea/
.env
data/
Expand All @@ -8,4 +9,4 @@ stations_*.csv
stations_*.geo.json
testdata_merge.csv
token_deepatlas.json
alembic/versions/*.py
alembic/versions/*.py
31 changes: 24 additions & 7 deletions charging_stations_pipelines/pipelines/de/bna_crawler.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,38 @@
"""Module to download the BNA (Bundesnetzagentur) data from a specified URL."""

import logging
import os

import requests as requests
from bs4 import BeautifulSoup

from charging_stations_pipelines.shared import download_file

logger = logging.getLogger(__name__)


def get_bna_data(tmp_data_path: str):
def get_bna_data(tmp_data_path):
"""Downloads BNA (Bundesnetzagentur) data (Excel sheet 'ladesaeulenregister.xlsx') from its website into a temporary
file.
:param tmp_data_path: The path to save the downloaded data file.
:type tmp_data_path: str
:return: None
"""
# Base url & header
headers = {"User-Agent": "Mozilla/5.0"}
base_url = "https://www.bundesnetzagentur.de/SharedDocs/Downloads/DE/Sachgebiete/Energie/Unternehmen_Institutionen/E_Mobilitaet/Ladesaeulenregister.html"
base_url = "https://www.bundesnetzagentur.de/DE/Fachthemen/ElektrizitaetundGas/E-Mobilitaet/start.html"

r = requests.get(base_url, headers=headers)
soup = BeautifulSoup(r.content, "html.parser")

# Lookup for the link in the html
download_link = soup.find("a", class_="FTxlsx")
path_to_file = download_link.get("href")
full_download_link = "https://www.bundesnetzagentur.de" + path_to_file

download_file(full_download_link, tmp_data_path)
# Example URL from "href" attribute:
# https://data.bundesnetzagentur.de/Bundesnetzagentur/SharedDocs/Downloads/DE/Sachgebiete/Energie/Unternehmen_Institutionen/E_Mobilitaet/ladesaeulenregister.xlsx
download_link_elem = soup.find("a", class_="FTxlsx")
download_link_url = download_link_elem.get("href")

logger.info(f"Downloading BNA data from {download_link_url}...")
download_file(download_link_url, tmp_data_path)
logger.info(f"Downloaded BNA data to {tmp_data_path}")
logger.info(f"Downloaded file size: {os.path.getsize(tmp_data_path)} bytes")
3 changes: 2 additions & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,11 @@ services:
- POSTGRES_USER=${DB_USER}
- POSTGRES_PASSWORD=${DB_PASSWORD}
- POSTGRES_DB=${DB_NAME}
- SCHEMA_NAME=${DB_SCHEMA}
ports:
- ${DB_PORT}:5432
volumes:
- 'postgis-data:/var/lib/postgresql'

volumes:
postgis-data:
postgis-data:

0 comments on commit 52fad14

Please sign in to comment.