diff --git a/.renovaterc.json b/.renovaterc.json index 2f6a1ecc..5d2d7e00 100644 --- a/.renovaterc.json +++ b/.renovaterc.json @@ -5,6 +5,8 @@ "docker.io/bitnami/spark", "docker.io/library/python", "pathling", - "pyspark" + "pyspark", + "ghcr.io/bzkf/onco-analytics-on-fhir/decompose-xmls", + "ghcr.io/bzkf/onco-analytics-on-fhir/obds-fhir-to-opal" ] } diff --git a/docker-compose/compose.decompose-xmls.yaml b/docker-compose/compose.decompose-xmls.yaml index 693c6856..401e87fe 100644 --- a/docker-compose/compose.decompose-xmls.yaml +++ b/docker-compose/compose.decompose-xmls.yaml @@ -10,5 +10,5 @@ services: # Set to 'true' to enable removing of leading zeros in patient IDs REMOVE_LEADING_PATIENTID_ZEROS: "false" volumes: - - ./input-obds-reports:/app/input-obds-reports - - ./output-obds-reports:/app/output-obds-reports + - ./input-obds-reports:/app/input-obds-reports:ro + - ./output-obds-reports:/app/output-obds-reports:rw diff --git a/docker-compose/compose.fhir-server.yaml b/docker-compose/compose.fhir-server.yaml index 8e6376e3..bbb300ee 100644 --- a/docker-compose/compose.fhir-server.yaml +++ b/docker-compose/compose.fhir-server.yaml @@ -53,7 +53,7 @@ services: - "fhir_db_data:/var/lib/postgresql/data:rw" fhir-to-server: - image: ghcr.io/miracum/kafka-fhir-to-server:v1.2.7 + image: ghcr.io/miracum/kafka-fhir-to-server:v1.2.7@sha256:cabd9e0b233e11b17ddf4651fe0479f9c20381f51a26812864860df34b69cd3c restart: unless-stopped cap_drop: - ALL diff --git a/docker-compose/compose.full.yaml b/docker-compose/compose.full.yaml index ecf6f7d6..9c398852 100644 --- a/docker-compose/compose.full.yaml +++ b/docker-compose/compose.full.yaml @@ -25,7 +25,7 @@ services: - "kafka_data:/bitnami:rw" kafka-connect: - image: ghcr.io/miracum/kafka-connect-images/cricketeerone-kafka-connect:v1.1.0 + image: ghcr.io/miracum/util-images/cricketeerone-kafka-connect:v1.6.0@sha256:5d4c79d5b9d64562d8906aec14c66ed5f48ddb0f9e7f12949547ab89b69e3f0f restart: unless-stopped cap_drop: - ALL @@ -52,7 +52,7 @@ services: - 8083:8083 akhq: - image: docker.io/tchiotludo/akhq:0.24.0 + image: docker.io/tchiotludo/akhq:0.24.0@sha256:6ccf8323ae6e93a893107f857cd9f7210add3569743b2c8528c6567967cc636f restart: unless-stopped cap_drop: - ALL diff --git a/docker-compose/compose.obds-fhir-to-opal.yaml b/docker-compose/compose.obds-fhir-to-opal.yaml index 75394a00..354a76e3 100644 --- a/docker-compose/compose.obds-fhir-to-opal.yaml +++ b/docker-compose/compose.obds-fhir-to-opal.yaml @@ -10,7 +10,7 @@ services: - "no-new-privileges:true" user: "1001:1001" environment: - OUTPUT_FOLDER: "/opt/bitnami/spark/opal-output" + OUTPUT_FOLDER: "/home/spark/opal-output" KAFKA_TOPIC_YEAR_SUFFIX: "" # e.g. ".2023" KAFKA_BOOTSTRAP_SERVER: "kafka:9092" KAFKA_PATIENT_TOPIC: "fhir.obds.Patient" @@ -19,4 +19,4 @@ services: KAFKA_PROCEDURE_TOPIC: "fhir.obds.Procedure" KAFKA_MEDICATIONSTATEMENT_TOPIC: "fhir.obds.MedicationStatement" volumes: - - ${PWD}/opal-output:/home/spark/opal-output + - ./opal-output:/home/spark/opal-output:rw diff --git a/docker-compose/compose.obds-to-fhir.yaml b/docker-compose/compose.obds-to-fhir.yaml index 5917ae94..61bf0105 100644 --- a/docker-compose/compose.obds-to-fhir.yaml +++ b/docker-compose/compose.obds-to-fhir.yaml @@ -1,6 +1,6 @@ services: obds-to-fhir: - image: ghcr.io/miracum/obds-to-fhir:v2.0.15@sha256:c359ef5449068ff0c1231c3ee237801f2fb76932ab9a16386813d02c07ac9f7d + image: ghcr.io/bzkf/obds-to-fhir:v2.2.0@sha256:b5d264c849fb5a19d8af1677b6f9adc26d70108c22e4f2753d0b2472da39f038 restart: unless-stopped cap_drop: - ALL diff --git a/docker-compose/output-obds-reports/.gitkeep b/docker-compose/output-obds-reports/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/src/decompose_xmls/Dockerfile b/src/decompose_xmls/Dockerfile index 2d3c515b..5c6fa036 100644 --- a/src/decompose_xmls/Dockerfile +++ b/src/decompose_xmls/Dockerfile @@ -1,7 +1,4 @@ -# syntax=docker/dockerfile:1 - -# Comments are provided throughout this file to help you get started. -FROM docker.io/library/python:3.11.4-slim@sha256:36b544be6e796eb5caa0bf1ab75a17d2e20211cad7f66f04f6f5c9eeda930ef5 AS base +FROM docker.io/library/python:3.12.5-slim@sha256:c24c34b502635f1f7c4e99dc09a2cbd85d480b7dcfd077198c6b5af138906390 AS base ENV PYTHONDONTWRITEBYTECODE=1 \ PYTHONUNBUFFERED=1 WORKDIR /app diff --git a/src/decompose_xmls/compose.yaml b/src/decompose_xmls/compose.yaml index 3f3be7b8..398a25ee 100644 --- a/src/decompose_xmls/compose.yaml +++ b/src/decompose_xmls/compose.yaml @@ -8,8 +8,8 @@ services: KAFKA_BOOTSTRAP_SERVERS: "kafka:9092" KAFKA_OUTPUT_TOPIC: "obds.einzelmeldungen" volumes: - - ./input-obds-reports:/app/input-obds-reports - - ./output-obds-reports:/app/output-obds-reports + - ./input-obds-reports:/app/input-obds-reports:ro + - ./output-obds-reports:/app/output-obds-reports:rw kafka: image: docker.io/bitnami/kafka:3.6.1@sha256:b5254050e61d1912ed9cf3f7107adc6a21ff29d85047b46d7b6df57cf25fa2f7 @@ -33,8 +33,10 @@ services: # KAFKA_CFG_ADVERTISED_LISTENERS: LISTENER_DOCKER_INTERNAL://kafka:19092,LISTENER_DOCKER_EXTERNAL://${DOCKER_HOST_IP:-127.0.0.1}:9092 akhq: - image: docker.io/tchiotludo/akhq:0.24.0 + image: docker.io/tchiotludo/akhq:0.24.0@sha256:6ccf8323ae6e93a893107f857cd9f7210add3569743b2c8528c6567967cc636f restart: unless-stopped + profiles: + - kafka cap_drop: - ALL privileged: false diff --git a/src/obds_fhir_to_opal/Dockerfile b/src/obds_fhir_to_opal/Dockerfile index 93b19dc4..b1a2b6d1 100644 --- a/src/obds_fhir_to_opal/Dockerfile +++ b/src/obds_fhir_to_opal/Dockerfile @@ -1,4 +1,4 @@ -FROM docker.io/bitnami/spark:3.3.2@sha256:11ccd03367cadc0da48432e7636746e98a842324f590630f6d14299a40ff2ee4 +FROM docker.io/bitnami/spark:3.3.4@sha256:0b8dfa8bf3593be450af51d1da1e04cc568ef92deb5dea0f834c0be0c912ce6c ENV SPARK_JARS_IVY="/home/spark/.ivy" WORKDIR /opt/bitnami/spark USER 0 @@ -11,7 +11,7 @@ RUN pip install --no-cache-dir -r requirements.txt USER 1001:1001 RUN spark-shell -v --conf spark.jars.ivy=${SPARK_JARS_IVY}\ - --packages "org.apache.spark:spark-sql-kafka-0-10_2.12:3.3.2,au.csiro.pathling:library-api:6.2.1,ch.cern.sparkmeasure:spark-measure_2.13:0.21,io.delta:delta-core_2.12:2.3.0" + --packages "org.apache.spark:spark-sql-kafka-0-10_2.12:3.3.4,au.csiro.pathling:library-api:6.2.1,ch.cern.sparkmeasure:spark-measure_2.13:0.21,io.delta:delta-core_2.12:2.3.0" WORKDIR /home/spark diff --git a/src/obds_fhir_to_opal/obds_fhir_to_opal.py b/src/obds_fhir_to_opal/obds_fhir_to_opal.py index 351aa393..a2d6128f 100644 --- a/src/obds_fhir_to_opal/obds_fhir_to_opal.py +++ b/src/obds_fhir_to_opal/obds_fhir_to_opal.py @@ -30,7 +30,7 @@ class Settings(BaseSettings): kafka_medicationstatement_topic: str = "fhir.obds.MedicationStatement" # ⚠️ make sure these are consistent with the ones downloaded inside the Dockerfile jar_list: list = [ - "org.apache.spark:spark-sql-kafka-0-10_2.12:3.3.2", + "org.apache.spark:spark-sql-kafka-0-10_2.12:3.3.4", "au.csiro.pathling:library-api:6.2.1", "ch.cern.sparkmeasure:spark-measure_2.13:0.21", "io.delta:delta-core_2.12:2.3.0", @@ -244,9 +244,9 @@ def save_final_df(df): # to have only one single csv df_with_id = df_with_id.coalesce(1) # write DataFrame to CSV, rename it - output_dir = "output_dir" + output_dir = os.path.join(settings.output_folder, "csv-dir") df_with_id.write.mode("overwrite").csv(output_dir, header=True) - output_file = "df.csv" + output_file = os.path.join(settings.output_folder, settings.output_filename) part_file = [file for file in os.listdir(output_dir) if file.startswith("part-")][0] shutil.move(os.path.join(output_dir, part_file), output_file) shutil.rmtree(output_dir) diff --git a/src/obds_fhir_to_opal/requirements.txt b/src/obds_fhir_to_opal/requirements.txt index defdf1fa..e3292cd6 100644 --- a/src/obds_fhir_to_opal/requirements.txt +++ b/src/obds_fhir_to_opal/requirements.txt @@ -1,6 +1,6 @@ pathling==6.2.1 pydantic==1.10.14 -pyspark==3.3.2 +pyspark==3.3.4 pandas==2.2.0 python-dateutil==2.9.0.post0 xlsxwriter==3.2.0