diff --git a/docker-compose/README.md b/docker-compose/README.md index f06f6ea6..33bd0379 100644 --- a/docker-compose/README.md +++ b/docker-compose/README.md @@ -20,6 +20,13 @@ Open to view the cluster's topics. docker compose -f compose.decompose-xmls.yaml up ``` +## Convert the FHIR resources to a CSV dataset + +```sh +sudo chown -R 1001:1001 ./opal-output/ +docker compose -f compose.adtfhir-to-opal.yaml up +``` + ## Start the entire stack ```sh diff --git a/docker-compose/compose.adtfhir-to-opal.yaml b/docker-compose/compose.adtfhir-to-opal.yaml index a407d468..89b9e2f0 100644 --- a/docker-compose/compose.adtfhir-to-opal.yaml +++ b/docker-compose/compose.adtfhir-to-opal.yaml @@ -8,9 +8,9 @@ services: ipc: private security_opt: - "no-new-privileges:true" - user: "1000:100" + user: "1001:1001" environment: - OUTPUT_FOLDER: "/home/jovyan/opal-output" + OUTPUT_FOLDER: "/opt/bitnami/spark/opal-output" KAFKA_TOPIC_YEAR_SUFFIX: "" # e.g. ".2023" KAFKA_BOOTSTRAP_SERVER: "kafka:9092" KAFKA_PATIENT_TOPIC: "fhir.onkoadt.Patient" @@ -19,4 +19,4 @@ services: KAFKA_PROCEDURE_TOPIC: "fhir.onkoadt.Procedure" KAFKA_MEDICATIONSTATEMENT_TOPIC: "fhir.onkoadt.MedicationStatement" volumes: - - ${PWD}/opal-output:/home/jovyan/opal-output + - ${PWD}/opal-output:/opt/bitnami/spark/opal-output diff --git a/src/adtfhir_to_opal/Dockerfile b/src/adtfhir_to_opal/Dockerfile index fb3d7fc4..09316325 100644 --- a/src/adtfhir_to_opal/Dockerfile +++ b/src/adtfhir_to_opal/Dockerfile @@ -1,13 +1,17 @@ -FROM docker.io/jupyter/pyspark-notebook:spark-3.3.2@sha256:86f23b36bbd1900e10ce15bb29cf55ce31b10b1406c5afa6e57acf529cb10093 -WORKDIR /home/jovyan -USER 1000:100 +FROM docker.io/bitnami/spark:3.3.2@sha256:11ccd03367cadc0da48432e7636746e98a842324f590630f6d14299a40ff2ee4 +ENV SPARK_JARS_IVY="/home/spark/.ivy" +WORKDIR /opt/bitnami/spark +USER 0 +RUN groupadd -g 1001 spark && \ + useradd spark -u 1001 -g spark -m -s /bin/bash COPY requirements.txt requirements.txt -RUN <