Skip to content

Commit

Permalink
fix: minor fixes to e2e runability (#203)
Browse files Browse the repository at this point in the history
* chore(deps): pin dependencies

* chore(deps): bumped deps and dont pin images from same repo

* fix: smaller dep updates and fixes to make sure it runs end-to-end

---------

Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>

[skip ci]
  • Loading branch information
chgl authored Sep 27, 2024
1 parent ad31a0e commit 354c0b0
Show file tree
Hide file tree
Showing 12 changed files with 23 additions and 22 deletions.
4 changes: 3 additions & 1 deletion .renovaterc.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
"docker.io/bitnami/spark",
"docker.io/library/python",
"pathling",
"pyspark"
"pyspark",
"ghcr.io/bzkf/onco-analytics-on-fhir/decompose-xmls",
"ghcr.io/bzkf/onco-analytics-on-fhir/obds-fhir-to-opal"
]
}
4 changes: 2 additions & 2 deletions docker-compose/compose.decompose-xmls.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,5 @@ services:
# Set to 'true' to enable removing of leading zeros in patient IDs
REMOVE_LEADING_PATIENTID_ZEROS: "false"
volumes:
- ./input-obds-reports:/app/input-obds-reports
- ./output-obds-reports:/app/output-obds-reports
- ./input-obds-reports:/app/input-obds-reports:ro
- ./output-obds-reports:/app/output-obds-reports:rw
2 changes: 1 addition & 1 deletion docker-compose/compose.fhir-server.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ services:
- "fhir_db_data:/var/lib/postgresql/data:rw"

fhir-to-server:
image: ghcr.io/miracum/kafka-fhir-to-server:v1.2.7
image: ghcr.io/miracum/kafka-fhir-to-server:v1.2.7@sha256:cabd9e0b233e11b17ddf4651fe0479f9c20381f51a26812864860df34b69cd3c
restart: unless-stopped
cap_drop:
- ALL
Expand Down
4 changes: 2 additions & 2 deletions docker-compose/compose.full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ services:
- "kafka_data:/bitnami:rw"

kafka-connect:
image: ghcr.io/miracum/kafka-connect-images/cricketeerone-kafka-connect:v1.1.0
image: ghcr.io/miracum/util-images/cricketeerone-kafka-connect:v1.6.0@sha256:5d4c79d5b9d64562d8906aec14c66ed5f48ddb0f9e7f12949547ab89b69e3f0f
restart: unless-stopped
cap_drop:
- ALL
Expand All @@ -52,7 +52,7 @@ services:
- 8083:8083

akhq:
image: docker.io/tchiotludo/akhq:0.24.0
image: docker.io/tchiotludo/akhq:0.24.0@sha256:6ccf8323ae6e93a893107f857cd9f7210add3569743b2c8528c6567967cc636f
restart: unless-stopped
cap_drop:
- ALL
Expand Down
4 changes: 2 additions & 2 deletions docker-compose/compose.obds-fhir-to-opal.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ services:
- "no-new-privileges:true"
user: "1001:1001"
environment:
OUTPUT_FOLDER: "/opt/bitnami/spark/opal-output"
OUTPUT_FOLDER: "/home/spark/opal-output"
KAFKA_TOPIC_YEAR_SUFFIX: "" # e.g. ".2023"
KAFKA_BOOTSTRAP_SERVER: "kafka:9092"
KAFKA_PATIENT_TOPIC: "fhir.obds.Patient"
Expand All @@ -19,4 +19,4 @@ services:
KAFKA_PROCEDURE_TOPIC: "fhir.obds.Procedure"
KAFKA_MEDICATIONSTATEMENT_TOPIC: "fhir.obds.MedicationStatement"
volumes:
- ${PWD}/opal-output:/home/spark/opal-output
- ./opal-output:/home/spark/opal-output:rw
2 changes: 1 addition & 1 deletion docker-compose/compose.obds-to-fhir.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
services:
obds-to-fhir:
image: ghcr.io/miracum/obds-to-fhir:v2.0.15@sha256:c359ef5449068ff0c1231c3ee237801f2fb76932ab9a16386813d02c07ac9f7d
image: ghcr.io/bzkf/obds-to-fhir:v2.2.0@sha256:b5d264c849fb5a19d8af1677b6f9adc26d70108c22e4f2753d0b2472da39f038
restart: unless-stopped
cap_drop:
- ALL
Expand Down
Empty file.
5 changes: 1 addition & 4 deletions src/decompose_xmls/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
# syntax=docker/dockerfile:1

# Comments are provided throughout this file to help you get started.
FROM docker.io/library/python:3.11.4-slim@sha256:36b544be6e796eb5caa0bf1ab75a17d2e20211cad7f66f04f6f5c9eeda930ef5 AS base
FROM docker.io/library/python:3.12.5-slim@sha256:c24c34b502635f1f7c4e99dc09a2cbd85d480b7dcfd077198c6b5af138906390 AS base
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1
WORKDIR /app
Expand Down
8 changes: 5 additions & 3 deletions src/decompose_xmls/compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ services:
KAFKA_BOOTSTRAP_SERVERS: "kafka:9092"
KAFKA_OUTPUT_TOPIC: "obds.einzelmeldungen"
volumes:
- ./input-obds-reports:/app/input-obds-reports
- ./output-obds-reports:/app/output-obds-reports
- ./input-obds-reports:/app/input-obds-reports:ro
- ./output-obds-reports:/app/output-obds-reports:rw

kafka:
image: docker.io/bitnami/kafka:3.6.1@sha256:b5254050e61d1912ed9cf3f7107adc6a21ff29d85047b46d7b6df57cf25fa2f7
Expand All @@ -33,8 +33,10 @@ services:
# KAFKA_CFG_ADVERTISED_LISTENERS: LISTENER_DOCKER_INTERNAL://kafka:19092,LISTENER_DOCKER_EXTERNAL://${DOCKER_HOST_IP:-127.0.0.1}:9092

akhq:
image: docker.io/tchiotludo/akhq:0.24.0
image: docker.io/tchiotludo/akhq:0.24.0@sha256:6ccf8323ae6e93a893107f857cd9f7210add3569743b2c8528c6567967cc636f
restart: unless-stopped
profiles:
- kafka
cap_drop:
- ALL
privileged: false
Expand Down
4 changes: 2 additions & 2 deletions src/obds_fhir_to_opal/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM docker.io/bitnami/spark:3.3.2@sha256:11ccd03367cadc0da48432e7636746e98a842324f590630f6d14299a40ff2ee4
FROM docker.io/bitnami/spark:3.3.4@sha256:0b8dfa8bf3593be450af51d1da1e04cc568ef92deb5dea0f834c0be0c912ce6c
ENV SPARK_JARS_IVY="/home/spark/.ivy"
WORKDIR /opt/bitnami/spark
USER 0
Expand All @@ -11,7 +11,7 @@ RUN pip install --no-cache-dir -r requirements.txt

USER 1001:1001
RUN spark-shell -v --conf spark.jars.ivy=${SPARK_JARS_IVY}\
--packages "org.apache.spark:spark-sql-kafka-0-10_2.12:3.3.2,au.csiro.pathling:library-api:6.2.1,ch.cern.sparkmeasure:spark-measure_2.13:0.21,io.delta:delta-core_2.12:2.3.0"
--packages "org.apache.spark:spark-sql-kafka-0-10_2.12:3.3.4,au.csiro.pathling:library-api:6.2.1,ch.cern.sparkmeasure:spark-measure_2.13:0.21,io.delta:delta-core_2.12:2.3.0"

WORKDIR /home/spark

Expand Down
6 changes: 3 additions & 3 deletions src/obds_fhir_to_opal/obds_fhir_to_opal.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class Settings(BaseSettings):
kafka_medicationstatement_topic: str = "fhir.obds.MedicationStatement"
# ⚠️ make sure these are consistent with the ones downloaded inside the Dockerfile
jar_list: list = [
"org.apache.spark:spark-sql-kafka-0-10_2.12:3.3.2",
"org.apache.spark:spark-sql-kafka-0-10_2.12:3.3.4",
"au.csiro.pathling:library-api:6.2.1",
"ch.cern.sparkmeasure:spark-measure_2.13:0.21",
"io.delta:delta-core_2.12:2.3.0",
Expand Down Expand Up @@ -244,9 +244,9 @@ def save_final_df(df):
# to have only one single csv
df_with_id = df_with_id.coalesce(1)
# write DataFrame to CSV, rename it
output_dir = "output_dir"
output_dir = os.path.join(settings.output_folder, "csv-dir")
df_with_id.write.mode("overwrite").csv(output_dir, header=True)
output_file = "df.csv"
output_file = os.path.join(settings.output_folder, settings.output_filename)
part_file = [file for file in os.listdir(output_dir) if file.startswith("part-")][0]
shutil.move(os.path.join(output_dir, part_file), output_file)
shutil.rmtree(output_dir)
Expand Down
2 changes: 1 addition & 1 deletion src/obds_fhir_to_opal/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
pathling==6.2.1
pydantic==1.10.14
pyspark==3.3.2
pyspark==3.3.4
pandas==2.2.0
python-dateutil==2.9.0.post0
xlsxwriter==3.2.0

0 comments on commit 354c0b0

Please sign in to comment.