diff --git a/docker-compose/README.md b/docker-compose/README.md
index f06f6ea6..33bd0379 100644
--- a/docker-compose/README.md
+++ b/docker-compose/README.md
@@ -20,6 +20,13 @@ Open to view the cluster's topics.
docker compose -f compose.decompose-xmls.yaml up
```
+## Convert the FHIR resources to a CSV dataset
+
+```sh
+sudo chown -R 1001:1001 ./opal-output/
+docker compose -f compose.adtfhir-to-opal.yaml up
+```
+
## Start the entire stack
```sh
diff --git a/docker-compose/compose.adtfhir-to-opal.yaml b/docker-compose/compose.adtfhir-to-opal.yaml
index a407d468..89b9e2f0 100644
--- a/docker-compose/compose.adtfhir-to-opal.yaml
+++ b/docker-compose/compose.adtfhir-to-opal.yaml
@@ -8,9 +8,9 @@ services:
ipc: private
security_opt:
- "no-new-privileges:true"
- user: "1000:100"
+ user: "1001:1001"
environment:
- OUTPUT_FOLDER: "/home/jovyan/opal-output"
+ OUTPUT_FOLDER: "/opt/bitnami/spark/opal-output"
KAFKA_TOPIC_YEAR_SUFFIX: "" # e.g. ".2023"
KAFKA_BOOTSTRAP_SERVER: "kafka:9092"
KAFKA_PATIENT_TOPIC: "fhir.onkoadt.Patient"
@@ -19,4 +19,4 @@ services:
KAFKA_PROCEDURE_TOPIC: "fhir.onkoadt.Procedure"
KAFKA_MEDICATIONSTATEMENT_TOPIC: "fhir.onkoadt.MedicationStatement"
volumes:
- - ${PWD}/opal-output:/home/jovyan/opal-output
+ - ${PWD}/opal-output:/opt/bitnami/spark/opal-output
diff --git a/src/adtfhir_to_opal/Dockerfile b/src/adtfhir_to_opal/Dockerfile
index fb3d7fc4..09316325 100644
--- a/src/adtfhir_to_opal/Dockerfile
+++ b/src/adtfhir_to_opal/Dockerfile
@@ -1,13 +1,17 @@
-FROM docker.io/jupyter/pyspark-notebook:spark-3.3.2@sha256:86f23b36bbd1900e10ce15bb29cf55ce31b10b1406c5afa6e57acf529cb10093
-WORKDIR /home/jovyan
-USER 1000:100
+FROM docker.io/bitnami/spark:3.3.2@sha256:11ccd03367cadc0da48432e7636746e98a842324f590630f6d14299a40ff2ee4
+ENV SPARK_JARS_IVY="/home/spark/.ivy"
+WORKDIR /opt/bitnami/spark
+USER 0
+RUN groupadd -g 1001 spark && \
+ useradd spark -u 1001 -g spark -m -s /bin/bash
COPY requirements.txt requirements.txt
-RUN <