diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 43a21bbf..62629170 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -18,11 +18,11 @@ permissions: read-all
env:
DIZBOX_NAMESPACE_NAME: bzkf-dizbox
DECOMPOSE_XML_IMAGE_NAME: ghcr.io/${{ github.repository }}/decompose-xmls
- ADTFHIR_TO_OPAL_IMAGE_NAME: ghcr.io/${{ github.repository }}/adtfhir-to-opal
+ OBDS_FHIR_TO_OPAL_IMAGE_NAME: ghcr.io/${{ github.repository }}/obds-fhir-to-opal
jobs:
build-decompose-xml-image:
- name: build decompose_xmls container image
+ name: build decompose-xmls container image
runs-on: ubuntu-22.04
permissions:
packages: write
@@ -64,15 +64,15 @@ jobs:
labels: ${{ steps.container_meta.outputs.labels }}
load: ${{ github.event_name == 'pull_request' }}
- build-adtfhir-to-opal-image:
- name: build adtfhir_to_opal container image
+ build-obds-fhir-to-opal-image:
+ name: build obds-fhir-to-opal container image
runs-on: ubuntu-22.04
permissions:
packages: write
outputs:
image-tags: ${{ steps.container_meta.outputs.tags }}
image-digest: ${{ steps.build.outputs.digest }}
- image-name: ${{ env.ADTFHIR_TO_OPAL_IMAGE_NAME }}
+ image-name: ${{ env.OBDS_FHIR_TO_OPAL_IMAGE_NAME }}
steps:
- name: Checkout code
uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4
@@ -85,7 +85,7 @@ jobs:
uses: docker/metadata-action@96383f45573cb7f253c731d3b3ab81c87ef81934 # v5
with:
images: |
- ${{ env.ADTFHIR_TO_OPAL_IMAGE_NAME }}
+ ${{ env.OBDS_FHIR_TO_OPAL_IMAGE_NAME }}
- name: Login to GitHub Container Registry
uses: docker/login-action@343f7c4344506bcbf9b4de18042ae17996df046d # v3
@@ -99,7 +99,7 @@ jobs:
id: build
uses: docker/build-push-action@0565240e2d4ab88bba5387d719585280857ece09 # v5
with:
- context: src/adtfhir_to_opal
+ context: src/obds_fhir_to_opal
cache-from: type=gha
cache-to: type=gha,mode=max
push: ${{ github.event_name != 'pull_request' }}
@@ -113,7 +113,7 @@ jobs:
if: ${{ github.event_name == 'pull_request' }}
needs:
- build-decompose-xml-image
- - build-adtfhir-to-opal-image
+ - build-obds-fhir-to-opal-image
steps:
- name: install k3s
run: |
@@ -173,7 +173,7 @@ jobs:
run: |
helm test diz-in-a-box
- kubectl wait deployment/diz-in-a-box-stream-processors-onkoadt-to-fhir --for=condition=Available --timeout=300s
+ kubectl wait deployment/diz-in-a-box-stream-processors-obds-to-fhir --for=condition=Available --timeout=300s
kubectl wait deployment/diz-in-a-box-stream-processors-fhir-to-server --for=condition=Available --timeout=300s
- name: Print cluster logs
@@ -207,5 +207,5 @@ jobs:
packages: write
needs:
- build-decompose-xml-image
- - build-adtfhir-to-opal-image
+ - build-obds-fhir-to-opal-image
uses: ./.github/workflows/release.yaml
diff --git a/.github/workflows/release-please.yaml b/.github/workflows/release-please.yaml
index 5b34ba09..bb0a5e90 100644
--- a/.github/workflows/release-please.yaml
+++ b/.github/workflows/release-please.yaml
@@ -24,7 +24,7 @@ jobs:
charts/prerequisites/Chart.yaml
README.md
docker-compose/compose.decompose-xmls.yaml
- docker-compose/compose.adtfhir-to-opal.yaml
+ docker-compose/compose.obds-fhir-to-opal.yaml
changelog-types: |
[
{ "type": "feat", "section": "Features" },
diff --git a/.gitignore b/.gitignore
index 8f10bacf..9ae32231 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,3 +15,5 @@ __pycache__/
bundles-delta/
*.csv
+
+.vscode
diff --git a/.markdownlint.yaml b/.markdownlint.yaml
new file mode 100644
index 00000000..ff7d7cc8
--- /dev/null
+++ b/.markdownlint.yaml
@@ -0,0 +1 @@
+MD013: false
diff --git a/README.md b/README.md
index e355e459..3c3c951b 100644
--- a/README.md
+++ b/README.md
@@ -99,7 +99,7 @@ helm upgrade --install --wait --timeout=10m --version=1.6.2 diz-in-a-box oci://g
# test the installation
helm test diz-in-a-box
-kubectl wait deployment/diz-in-a-box-stream-processors-onkoadt-to-fhir --for=condition=Available --timeout=300s
+kubectl wait deployment/diz-in-a-box-stream-processors-obds-to-fhir --for=condition=Available --timeout=300s
kubectl wait deployment/diz-in-a-box-stream-processors-fhir-to-server --for=condition=Available --timeout=300s
```
diff --git a/Taskfile.yaml b/Taskfile.yaml
index c70add18..d69eafdb 100644
--- a/Taskfile.yaml
+++ b/Taskfile.yaml
@@ -4,21 +4,21 @@ env:
COMPOSE_PROJECT_NAME: bzkf-diz-in-a-box
tasks:
- adt-to-fhir:
+ obds-to-fhir:
dir: docker-compose/
cmds:
- |
docker compose \
-f compose.full.yaml \
-f compose.decompose-xmls.yaml \
- -f compose.onkoadt-to-fhir.yaml up --detach
+ -f compose.obds-to-fhir.yaml up --detach
fhir-to-opal:
dir: docker-compose/
cmds:
- |
docker compose \
- -f compose.adtfhir-to-opal.yaml up
+ -f compose.obds-to-opal.yaml up
clean:
dir: docker-compose/
diff --git a/build-air-gapped-installer.sh b/build-air-gapped-installer.sh
index dc83699c..6b41ea2d 100755
--- a/build-air-gapped-installer.sh
+++ b/build-air-gapped-installer.sh
@@ -82,7 +82,7 @@ mkdir -p "$AIR_GAPPED_COMPOSE_INSTALL_DIR"
# generate save-images.sh
docker compose --profile=kafka-connect \
-f docker-compose/compose.full.yaml \
- -f docker-compose/compose.onkoadt-to-fhir.yaml \
+ -f docker-compose/compose.obds-to-fhir.yaml \
-f docker-compose/compose.decompose-xmls.yaml \
-f docker-compose/compose.fhir-server.yaml \
-f docker-compose/compose.pseudonymization.yaml \
diff --git a/charts/diz-in-a-box/README.md b/charts/diz-in-a-box/README.md
index 48592de1..b5cfdaa3 100644
--- a/charts/diz-in-a-box/README.md
+++ b/charts/diz-in-a-box/README.md
@@ -18,10 +18,10 @@ Kubernetes: `>= 1.21.0`
| Repository | Name | Version |
| ------------------------------------------------------ | ------------------- | ------- |
-| https://akhq.io/ | akhq | 0.3.1 |
-| https://hapifhir.github.io/hapi-fhir-jpaserver-starter | hapi-fhir-jpaserver | 0.11.1 |
-| https://miracum.github.io/charts | fhir-gateway | 6.0.3 |
-| https://miracum.github.io/charts | stream-processors | 1.1.10 |
+| | akhq | 0.3.1 |
+| | hapi-fhir-jpaserver | 0.11.1 |
+| | fhir-gateway | 6.0.3 |
+| | stream-processors | 1.1.10 |
## Values
@@ -45,10 +45,10 @@ Kubernetes: `>= 1.21.0`
| fhir-gateway.fhir-pseudonymizer.vfps.enabled | bool | `true` | |
| fhir-gateway.fhir-pseudonymizer.vfps.postgresql.auth.postgresPassword | string | `"vfps-postgres"` | |
| fhir-gateway.kafka.enabled | bool | `true` | |
-| fhir-gateway.kafka.inputTopics[0] | string | `"fhir.onkoadt.MedicationStatement"` | |
-| fhir-gateway.kafka.inputTopics[1] | string | `"fhir.onkoadt.Condition"` | |
-| fhir-gateway.kafka.inputTopics[2] | string | `"fhir.onkoadt.Observation"` | |
-| fhir-gateway.kafka.inputTopics[3] | string | `"fhir.onkoadt.Procedure"` | |
+| fhir-gateway.kafka.inputTopics[0] | string | `"fhir.obds.MedicationStatement"` | |
+| fhir-gateway.kafka.inputTopics[1] | string | `"fhir.obds.Condition"` | |
+| fhir-gateway.kafka.inputTopics[2] | string | `"fhir.obds.Observation"` | |
+| fhir-gateway.kafka.inputTopics[3] | string | `"fhir.obds.Procedure"` | |
| fhir-gateway.kafka.outputTopic | string | `"fhir.post-gateway"` | |
| fhir-gateway.kafka.securityProtocol | string | `"SSL"` | |
| fhir-gateway.kafka.strimziClusterName | string | `"bzkf-dizbox-cluster"` | |
diff --git a/charts/diz-in-a-box/values.yaml b/charts/diz-in-a-box/values.yaml
index 2193835c..c41d783e 100644
--- a/charts/diz-in-a-box/values.yaml
+++ b/charts/diz-in-a-box/values.yaml
@@ -116,11 +116,11 @@ stream-processors:
value: "false"
- name: TOPIC
value: >-
- fhir.pseudonymized.onkoadt.MedicationStatement,
- fhir.pseudonymized.onkoadt.Condition,
- fhir.pseudonymized.onkoadt.Observation,
- fhir.pseudonymized.onkoadt.Procedure,
- fhir.pseudonymized.onkoadt.Patient
+ fhir.pseudonymized.obds.MedicationStatement,
+ fhir.pseudonymized.obds.Condition,
+ fhir.pseudonymized.obds.Observation,
+ fhir.pseudonymized.obds.Procedure,
+ fhir.pseudonymized.obds.Patient
- name: JAVA_TOOL_OPTIONS
value: "-XX:MaxRAMPercentage=70"
livenessProbe:
@@ -131,13 +131,13 @@ stream-processors:
httpGet:
path: /actuator/health/readiness
port: 8080
- onkoadt-to-fhir:
+ obds-to-fhir:
replicaCount: 1
container:
image:
- registry: ghcr.io
- repository: miracum/onkoadt-to-fhir
- tag: v1.11.3@sha256:429fbb4218794af7375561b61878e87cae5f353ce605f5faae925ff7f3a9962b
+ registry: harbor.miracum.org
+ repository: streams-ume/obds-to-fhir
+ tag: v1.16.0@sha256:06b798a757f3669d18504ac6c2848027d33a03465bcec91c5a9b846c240371ec
pullPolicy: IfNotPresent
env:
- name: OPENTRACING_JAEGER_ENABLED
@@ -168,11 +168,11 @@ fhir-gateway:
kafka:
enabled: true
inputTopics:
- - fhir.onkoadt.MedicationStatement
- - fhir.onkoadt.Condition
- - fhir.onkoadt.Observation
- - fhir.onkoadt.Procedure
- - fhir.onkoadt.Patient
+ - fhir.obds.MedicationStatement
+ - fhir.obds.Condition
+ - fhir.obds.Observation
+ - fhir.obds.Procedure
+ - fhir.obds.Patient
# name of the topic to write processed resources to.
# Ignored because we use `SERVICES_KAFKA_GENERATE_OUTPUT_TOPIC_MATCH_EXPRESSION`
outputTopic: fhir.post-gateway
diff --git a/docker-compose/README.md b/docker-compose/README.md
index 33bd0379..4b40845d 100644
--- a/docker-compose/README.md
+++ b/docker-compose/README.md
@@ -1,20 +1,20 @@
-# onkoadt-to-fhir Docker Compose Version
+# obds-to-fhir Docker Compose Version
-## Run only the onkoadt-to-fhir job
+## Run only the obds-to-fhir job
```sh
-docker compose -f compose.onkoadt-to-fhir.yaml up
+docker compose -f compose.obds-to-fhir.yaml up
```
## Run while also starting a Kafka cluster and Kafka connect
```sh
-docker compose -f compose.onkoadt-to-fhir.yaml -f compose.full.yaml up
+docker compose -f compose.obds-to-fhir.yaml -f compose.full.yaml up
```
Open to view the cluster's topics.
-## Load sample data from a ADT Sammelmeldung into the Kafka cluster
+## Load sample data from a oBDS Sammelmeldung into the Kafka cluster
```sh
docker compose -f compose.decompose-xmls.yaml up
@@ -24,19 +24,19 @@ docker compose -f compose.decompose-xmls.yaml up
```sh
sudo chown -R 1001:1001 ./opal-output/
-docker compose -f compose.adtfhir-to-opal.yaml up
+docker compose -f compose.obds-fhir-to-opal.yaml up
```
## Start the entire stack
```sh
-docker compose -f compose.onkoadt-to-fhir.yaml -f compose.full.yaml -f compose.decompose-xmls.yaml -f compose.adtfhir-to-opal.yaml up
+docker compose -f compose.obds-to-fhir.yaml -f compose.full.yaml -f compose.decompose-xmls.yaml -f compose.obds-fhir-to-opal.yaml up
```
## Enable Kafka Connect and the connector
```sh
-docker compose -f compose.onkoadt-to-fhir.yaml -f compose.full.yaml up
+docker compose -f compose.obds-to-fhir.yaml -f compose.full.yaml up
```
```sh
@@ -52,11 +52,11 @@ curl -X POST \
> Requires gPAS to be set-up and the [anonymization.yaml](anonymization.yaml) to be configured
```sh
-docker compose -f compose.onkoadt-to-fhir.yaml -f compose.full.yaml -f compose.pseudonymization.yaml up
+docker compose -f compose.obds-to-fhir.yaml -f compose.full.yaml -f compose.pseudonymization.yaml up
```
## Run with enabled pseudonymization and sending resources to a FHIR server
```sh
-docker compose -f compose.onkoadt-to-fhir.yaml -f compose.full.yaml -f compose.fhir-server.yaml -f compose.pseudonymization.yaml up
+docker compose -f compose.obds-to-fhir.yaml -f compose.full.yaml -f compose.fhir-server.yaml -f compose.pseudonymization.yaml up
```
diff --git a/docker-compose/compose.decompose-xmls.yaml b/docker-compose/compose.decompose-xmls.yaml
index 535a0d74..b9503874 100644
--- a/docker-compose/compose.decompose-xmls.yaml
+++ b/docker-compose/compose.decompose-xmls.yaml
@@ -8,4 +8,4 @@ services:
KAFKA_OUTPUT_TOPIC: "onkostar.MELDUNG_EXPORT"
SAVE_AS_FILES_ENABLED: "false"
volumes:
- - ./input-adt-reports:/app/input-adt-reports
+ - ./input-obds-reports:/app/input-obds-reports
diff --git a/docker-compose/compose.fhir-server.yaml b/docker-compose/compose.fhir-server.yaml
index 4a3525ea..321ac380 100644
--- a/docker-compose/compose.fhir-server.yaml
+++ b/docker-compose/compose.fhir-server.yaml
@@ -67,7 +67,7 @@ services:
JAVA_TOOL_OPTIONS: "-XX:MaxRAMPercentage=75"
FHIR_URL: http://fhir-server:8080/fhir
# TODO: change if pseudonymization is enabled
- TOPIC: fhir.onkoadt.MedicationStatement,fhir.onkoadt.Condition,fhir.onkoadt.Observation,fhir.onkoadt.Procedure,fhir.onkoadt.Patient
+ TOPIC: fhir.obds.MedicationStatement,fhir.obds.Condition,fhir.obds.Observation,fhir.obds.Procedure,fhir.obds.Patient
volumes:
fhir_db_data:
diff --git a/docker-compose/compose.adtfhir-to-opal.yaml b/docker-compose/compose.obds-fhir-to-opal.yaml
similarity index 50%
rename from docker-compose/compose.adtfhir-to-opal.yaml
rename to docker-compose/compose.obds-fhir-to-opal.yaml
index b99d9b77..a1cfba36 100644
--- a/docker-compose/compose.adtfhir-to-opal.yaml
+++ b/docker-compose/compose.obds-fhir-to-opal.yaml
@@ -1,6 +1,6 @@
services:
- adtfhir-to-opal:
- image: ghcr.io/bzkf/diz-in-a-box/adtfhir-to-opal:v1.6.2 # x-release-please-version
+ obds-fhir-to-opal:
+ image: ghcr.io/bzkf/diz-in-a-box/obds-fhir-to-opal:v1.5.11 # x-release-please-version
restart: no
cap_drop:
- ALL
@@ -13,10 +13,10 @@ services:
OUTPUT_FOLDER: "/opt/bitnami/spark/opal-output"
KAFKA_TOPIC_YEAR_SUFFIX: "" # e.g. ".2023"
KAFKA_BOOTSTRAP_SERVER: "kafka:9092"
- KAFKA_PATIENT_TOPIC: "fhir.onkoadt.Patient"
- KAFKA_CONDITION_TOPIC: "fhir.onkoadt.Condition"
- KAFKA_OBSERVATION_TOPIC: "fhir.onkoadt.Observation"
- KAFKA_PROCEDURE_TOPIC: "fhir.onkoadt.Procedure"
- KAFKA_MEDICATIONSTATEMENT_TOPIC: "fhir.onkoadt.MedicationStatement"
+ KAFKA_PATIENT_TOPIC: "fhir.obds.Patient"
+ KAFKA_CONDITION_TOPIC: "fhir.obds.Condition"
+ KAFKA_OBSERVATION_TOPIC: "fhir.obds.Observation"
+ KAFKA_PROCEDURE_TOPIC: "fhir.obds.Procedure"
+ KAFKA_MEDICATIONSTATEMENT_TOPIC: "fhir.obds.MedicationStatement"
volumes:
- ${PWD}/opal-output:/opt/bitnami/spark/opal-output
diff --git a/docker-compose/compose.onkoadt-to-fhir.yaml b/docker-compose/compose.obds-to-fhir.yaml
similarity index 79%
rename from docker-compose/compose.onkoadt-to-fhir.yaml
rename to docker-compose/compose.obds-to-fhir.yaml
index f97d451b..ec128ddf 100644
--- a/docker-compose/compose.onkoadt-to-fhir.yaml
+++ b/docker-compose/compose.obds-to-fhir.yaml
@@ -1,6 +1,6 @@
services:
- onkoadt-to-fhir:
- image: harbor.miracum.org/streams-ume/onkoadt-to-fhir:v1.13.14
+ obds-to-fhir:
+ image: harbor.miracum.org/streams-ume/obds-to-fhir:v1.16.0@sha256:06b798a757f3669d18504ac6c2848027d33a03465bcec91c5a9b846c240371ec
restart: unless-stopped
cap_drop:
- ALL
diff --git a/docker-compose/compose.pseudonymization.yaml b/docker-compose/compose.pseudonymization.yaml
index a79e8f13..fb4ff7bc 100644
--- a/docker-compose/compose.pseudonymization.yaml
+++ b/docker-compose/compose.pseudonymization.yaml
@@ -24,7 +24,7 @@ services:
OPENTRACING_JAEGER_ENABLED: "false"
JAEGER_SERVICE_NAME: "fhir-gateway"
SERVICES_KAFKA_ENABLED: "true"
- INPUT_TOPIC: "fhir.onkoadt.MedicationStatement,fhir.onkoadt.Condition,fhir.onkoadt.Observation,fhir.onkoadt.Procedure,fhir.onkoadt.Patient"
+ INPUT_TOPIC: "fhir.obds.MedicationStatement,fhir.obds.Condition,fhir.obds.Observation,fhir.obds.Procedure,fhir.obds.Patient"
OUTPUT_TOPIC: "fhir.post-gateway"
BOOTSTRAP_SERVERS: "kafka:9092"
SECURITY_PROTOCOL: "PLAINTEXT"
diff --git a/docker-compose/input-adt-reports/test-2patients.xml b/docker-compose/input-obds-reports/test-2patients.xml
similarity index 100%
rename from docker-compose/input-adt-reports/test-2patients.xml
rename to docker-compose/input-obds-reports/test-2patients.xml
diff --git a/src/adtfhir_to_opal/compose.yaml b/src/adtfhir_to_opal/compose.yaml
deleted file mode 100644
index 896949e4..00000000
--- a/src/adtfhir_to_opal/compose.yaml
+++ /dev/null
@@ -1,16 +0,0 @@
-services:
- adt-fhir-to-opal:
- build:
- context: .
- dockerfile: Dockerfile
- environment:
- OUTPUT_FOLDER: "/opt/bitnami/spark/opal-output/"
- KAFKA_TOPIC_YEAR_SUFFIX: ""
- KAFKA_BOOTSTRAP_SERVER: "kafka:9092"
- KAFKA_PATIENT_TOPIC: "fhir.onkoadt.Patient"
- KAFKA_CONDITION_TOPIC: "fhir.onkoadt.Condition"
- KAFKA_OBSERVATION_TOPIC: "fhir.onkoadt.Observation"
- KAFKA_PROCEDURE_TOPIC: "fhir.onkoadt.Procedure"
- KAFKA_MEDICATIONSTATEMENT_TOPIC: "fhir.onkoadt.MedicationStatement"
- volumes:
- - ${PWD}/output:/opt/bitnami/spark/opal-output
diff --git a/src/decompose_xmls/__snapshots__/decompose_xmls_test.ambr b/src/decompose_xmls/__snapshots__/decompose_xmls_test.ambr
index b3a9f130..e6238f57 100644
--- a/src/decompose_xmls/__snapshots__/decompose_xmls_test.ambr
+++ b/src/decompose_xmls/__snapshots__/decompose_xmls_test.ambr
@@ -1,5 +1,5 @@
# serializer version: 1
-# name: test_decompose_sammelmeldung[input-adt-reports/test-2patients.xml]
+# name: test_decompose_sammelmeldung[input-obds-reports/test-2patients.xml]
list([
{"xml": "\n\n UKKK\n \n xxxxxxxxxxxxxxxxxxxx\n \n \n \n F00000000\n 9999999\n Hafnero\n B\n E\n W\n 14.01.1900\n \n \n DE\n 91058\n Erlangen\n \n \n \n \n \n 10.03.1900\n I\n behandlungsende\n \n D07.3\n 10 2022 GM\n 08.08.1900\n L\n \n \n \n K\n 01.09.2022\n \n 5-543.42\n 5-683.00\n \n 2022\n \n 01.09.1900\n 12226123\n 8460/2\n 33\n B\n \n \n 01.09.2022\n 8\n p\n 1c3\n c\n 0\n c\n 0\n L0\n V0\n Pn0\n \n \n R0\n R0\n \n \n N\n \n \n jjjjj\n xxxxx\n \n Makroskopischer Tumorrest: R0\n \n \n \n \n \n ", "patient_id": "1004444444", "meldung_id": "FK0037275"},
{"xml": "\n\n UKKK\n \n xxxxxxxxxxxxxxxxxxxx\n \n \n \n F00000000\n 9999999\n Hafnero\n B\n E\n W\n 14.01.1900\n \n \n DE\n 91058\n Erlangen\n \n \n \n \n \n 10.03.1900\n I\n behandlungsende\n \n D07.3\n 10 2022 GM\n 08.08.1900\n L\n \n \n \n K\n 01.09.2022\n \n 5-543.42\n 5-683.00\n \n 2022\n \n 01.09.1900\n 12226123\n 8460/2\n 33\n B\n \n \n 01.09.2022\n 8\n p\n 1c3\n c\n 0\n c\n 0\n L0\n V0\n Pn0\n \n \n R0\n R0\n \n \n N\n \n \n jjjjj\n xxxxx\n \n Makroskopischer Tumorrest: R0\n \n \n \n \n \n ", "patient_id": "1004444444", "meldung_id": "FK0037276"},
diff --git a/src/decompose_xmls/compose.yaml b/src/decompose_xmls/compose.yaml
index 9bdbc545..4edc0eb1 100644
--- a/src/decompose_xmls/compose.yaml
+++ b/src/decompose_xmls/compose.yaml
@@ -6,10 +6,10 @@ services:
environment:
KAFKA_ENABLED: "true"
KAFKA_BOOTSTRAP_SERVERS: "kafka:9092"
- KAFKA_OUTPUT_TOPIC: "adt.einzelmeldungen"
+ KAFKA_OUTPUT_TOPIC: "obds.einzelmeldungen"
volumes:
- - ./input-adt-reports:/app/input-adt-reports
- - ./output-adt-reports:/app/output-files
+ - ./input-obds-reports:/app/input-obds-reports
+ - ./output-obds-reports:/app/output-files
kafka:
image: docker.io/bitnami/kafka:3.5.1
diff --git a/src/decompose_xmls/decompose_xmls.py b/src/decompose_xmls/decompose_xmls.py
index 8b469d90..2dc11ef5 100644
--- a/src/decompose_xmls/decompose_xmls.py
+++ b/src/decompose_xmls/decompose_xmls.py
@@ -10,13 +10,13 @@
class Settings(BaseSettings):
- input_folder: str = "./input-adt-reports"
+ input_folder: str = "./input-obds-reports"
output_folder: str = "./output-files"
output_folder_xml: str = "./output-files/output-xmls"
save_as_files_enabled: bool = True
kafka_enabled: bool = False
kafka_bootstrap_servers: str = "localhost:9092"
- kafka_output_topic: str = "adt.einzelmeldungen"
+ kafka_output_topic: str = "obds.einzelmeldungen"
@dataclass
diff --git a/src/decompose_xmls/decompose_xmls_test.py b/src/decompose_xmls/decompose_xmls_test.py
index bedc1520..28819876 100644
--- a/src/decompose_xmls/decompose_xmls_test.py
+++ b/src/decompose_xmls/decompose_xmls_test.py
@@ -6,11 +6,11 @@
@pytest.mark.parametrize(
- "adt_input_file_path", [("input-adt-reports/test-2patients.xml")]
+ "obds_input_file_path", [("input-obds-reports/test-2patients.xml")]
)
-def test_decompose_sammelmeldung(snapshot, adt_input_file_path):
- tree = ET.parse(adt_input_file_path)
+def test_decompose_sammelmeldung(snapshot, obds_input_file_path):
+ tree = ET.parse(obds_input_file_path)
root = tree.getroot()
- result = decompose_sammelmeldung(root, adt_input_file_path)
+ result = decompose_sammelmeldung(root, obds_input_file_path)
assert result == snapshot
diff --git a/src/decompose_xmls/input-adt-reports/test-2patients.xml b/src/decompose_xmls/input-obds-reports/test-2patients.xml
similarity index 100%
rename from src/decompose_xmls/input-adt-reports/test-2patients.xml
rename to src/decompose_xmls/input-obds-reports/test-2patients.xml
diff --git a/src/adtfhir_to_opal/output/.gitkeep b/src/decompose_xmls/output-obds-reports/.gitkeep
similarity index 100%
rename from src/adtfhir_to_opal/output/.gitkeep
rename to src/decompose_xmls/output-obds-reports/.gitkeep
diff --git a/src/adtfhir_to_opal/Dockerfile b/src/obds_fhir_to_opal/Dockerfile
similarity index 86%
rename from src/adtfhir_to_opal/Dockerfile
rename to src/obds_fhir_to_opal/Dockerfile
index 09316325..f870083a 100644
--- a/src/adtfhir_to_opal/Dockerfile
+++ b/src/obds_fhir_to_opal/Dockerfile
@@ -13,6 +13,6 @@ USER 1001:1001
RUN spark-shell -v --conf spark.jars.ivy=${SPARK_JARS_IVY}\
--packages "org.apache.spark:spark-sql-kafka-0-10_2.12:3.3.2,au.csiro.pathling:library-api:6.2.1,ch.cern.sparkmeasure:spark-measure_2.13:0.21,io.delta:delta-core_2.12:2.3.0"
-COPY adtfhir_to_opal.py adtfhir_to_opal.py
+COPY obds_fhir_to_opal.py obds_fhir_to_opal.py
-ENTRYPOINT [ "python", "adtfhir_to_opal.py" ]
+ENTRYPOINT [ "python", "obds_fhir_to_opal.py" ]
diff --git a/src/adtfhir_to_opal/__init__.py b/src/obds_fhir_to_opal/__init__.py
similarity index 100%
rename from src/adtfhir_to_opal/__init__.py
rename to src/obds_fhir_to_opal/__init__.py
diff --git a/src/obds_fhir_to_opal/compose.yaml b/src/obds_fhir_to_opal/compose.yaml
new file mode 100644
index 00000000..e6ff598d
--- /dev/null
+++ b/src/obds_fhir_to_opal/compose.yaml
@@ -0,0 +1,16 @@
+services:
+ obds-fhir-to-opal:
+ build:
+ context: .
+ dockerfile: Dockerfile
+ environment:
+ OUTPUT_FOLDER: "/opt/bitnami/spark/opal-output/"
+ KAFKA_TOPIC_YEAR_SUFFIX: ""
+ KAFKA_BOOTSTRAP_SERVER: "kafka:9092"
+ KAFKA_PATIENT_TOPIC: "fhir.obds.Patient"
+ KAFKA_CONDITION_TOPIC: "fhir.obds.Condition"
+ KAFKA_OBSERVATION_TOPIC: "fhir.obds.Observation"
+ KAFKA_PROCEDURE_TOPIC: "fhir.obds.Procedure"
+ KAFKA_MEDICATIONSTATEMENT_TOPIC: "fhir.obds.MedicationStatement"
+ volumes:
+ - ${PWD}/output:/opt/bitnami/spark/opal-output
diff --git a/src/adtfhir_to_opal/adtfhir_to_opal.py b/src/obds_fhir_to_opal/obds_fhir_to_opal.py
similarity index 86%
rename from src/adtfhir_to_opal/adtfhir_to_opal.py
rename to src/obds_fhir_to_opal/obds_fhir_to_opal.py
index a276fc2c..02958986 100644
--- a/src/adtfhir_to_opal/adtfhir_to_opal.py
+++ b/src/obds_fhir_to_opal/obds_fhir_to_opal.py
@@ -13,15 +13,13 @@
class Settings(BaseSettings):
- output_folder: str = "/opt/bitnami/spark/opal-output"
- output_filename: str = "oBDS_tabular"
- kafka_topic_year_suffix: str = ".2022"
- kafka_patient_topic: str = "fhir.post-gateway-bzkf-onkoadt.Patient"
- kafka_condition_topic: str = "fhir.post-gateway-bzkf-onkoadt.Condition"
- kafka_observation_topic: str = "fhir.post-gateway-bzkf-onkoadt.Observation"
- kafka_procedure_topic: str = "fhir.post-gateway-bzkf-onkoadt.Procedure"
- kafka_medicationstatement_topic: str = \
- "fhir.post-gateway-bzkf-onkoadt.MedicationStatement"
+ output_folder: str = "~/opal-output"
+ kafka_topic_year_suffix: str = ".2023"
+ kafka_patient_topic: str = "fhir.obds.Patient"
+ kafka_condition_topic: str = "fhir.obds.Condition"
+ kafka_observation_topic: str = "fhir.obds.Observation"
+ kafka_procedure_topic: str = "fhir.obds.Procedure"
+ kafka_medicationstatement_topic: str = "fhir.obds.MedicationStatement"
# ⚠️ make sure these are consistent with the ones downloaded inside the Dockerfile
jar_list: list = [
"org.apache.spark:spark-sql-kafka-0-10_2.12:3.3.2",
@@ -29,7 +27,7 @@ class Settings(BaseSettings):
"ch.cern.sparkmeasure:spark-measure_2.13:0.21",
"io.delta:delta-core_2.12:2.3.0",
]
- spark_app_name: str = "ADTFHIR-to-Opal"
+ spark_app_name: str = "oBDS-FHIR-to-Opal"
master: str = "local[*]"
kafka_bootstrap_server: str = "kafka:9092"
@@ -142,18 +140,20 @@ def calculate_age(birthdate):
def calculate_age_at_conditiondate(birthdate, conditiondate):
age_at_conditiondate = conditiondate - birthdate
days_in_year = 365.2425
- age_at_conditiondate = int(age_at_conditiondate.days/days_in_year)
+ age_at_conditiondate = int(age_at_conditiondate.days / days_in_year)
return age_at_conditiondate
def add_age_at_condition(df_pat_cond_joined):
- calculate_age_at_conditiondateUDF = udf(lambda x, y:
- calculate_age_at_conditiondate(x, y),
- StringType())
+ calculate_age_at_conditiondateUDF = udf(
+ lambda x, y: calculate_age_at_conditiondate(x, y), StringType()
+ )
df_pat_cond_joined = df_pat_cond_joined.withColumn(
- "age_at_diagnosis", calculate_age_at_conditiondateUDF(
- to_date(df_pat_cond_joined.birthDate),
- df_pat_cond_joined.conditiondate))
+ "age_at_diagnosis",
+ calculate_age_at_conditiondateUDF(
+ to_date(df_pat_cond_joined.birthDate), df_pat_cond_joined.conditiondate
+ ),
+ )
return df_pat_cond_joined
@@ -172,8 +172,7 @@ def encode_patients(ptl: PathlingContext, df_bundles: pyspark.sql.dataframe.Data
return_yearUDF = udf(lambda x: return_year(x), StringType())
patients = df_patients.selectExpr(
- "id as pat_id", "gender", "birthDate",
- "deceasedBoolean", "deceasedDateTime"
+ "id as pat_id", "gender", "birthDate", "deceasedBoolean", "deceasedDateTime"
)
patients = patients.withColumns(
@@ -313,35 +312,35 @@ def join_dataframes(df_one, partition_col_one: str, df_two, partition_col_two: s
def lookup_obs_value_codingcode_tnm_uicc_mapping(obs_value_codingcode_tnm_UICC):
obs_value_codingcode_tnm_uicc_mapping_dict = {
- "0": "0",
- "0a": "1",
- "0is": "2",
- "I": "3",
- "IA": "4",
- "IA1": "5",
- "IA2": "6",
- "IA3": "7",
- "IB": "8",
- "IB1": "9",
- "IB2": "10",
- "IC": "11",
- "IS": "12",
- "II": "13",
- "IIA": "14",
- "IIA1": "15",
- "IIA2": "16",
- "IIB": "17",
- "IIC": "18",
- "III": "19",
- "IIIA": "20",
- "IIIB": "21",
- "IIIC": "22",
- "IIIC1": "23",
- "IIIC2": "24",
- "IV": "25",
- "IVA": "26",
- "IVB": "27",
- "IVC": "28",
+ "0": "0",
+ "0a": "1",
+ "0is": "2",
+ "I": "3",
+ "IA": "4",
+ "IA1": "5",
+ "IA2": "6",
+ "IA3": "7",
+ "IB": "8",
+ "IB1": "9",
+ "IB2": "10",
+ "IC": "11",
+ "IS": "12",
+ "II": "13",
+ "IIA": "14",
+ "IIA1": "15",
+ "IIA2": "16",
+ "IIB": "17",
+ "IIC": "18",
+ "III": "19",
+ "IIIA": "20",
+ "IIIB": "21",
+ "IIIC": "22",
+ "IIIC1": "23",
+ "IIIC2": "24",
+ "IV": "25",
+ "IVA": "26",
+ "IVB": "27",
+ "IVC": "28",
}
if obs_value_codingcode_tnm_UICC in obs_value_codingcode_tnm_uicc_mapping_dict:
return obs_value_codingcode_tnm_uicc_mapping_dict[obs_value_codingcode_tnm_UICC]
@@ -351,35 +350,35 @@ def lookup_obs_value_codingcode_tnm_uicc_mapping(obs_value_codingcode_tnm_UICC):
def lookup_grouped_uicc(obs_value_codingcode_tnm_UICC):
grouped_uicc_dict = {
- "0": "0",
- "0a": "0",
- "0is": "0",
- "I": "1",
- "IA": "1",
- "IA1": "1",
- "IA2": "1",
- "IA3": "1",
- "IB": "1",
- "IB1": "1",
- "IB2": "1",
- "IC": "1",
- "IS": "1",
- "II": "2",
- "IIA": "2",
- "IIA1": "2",
- "IIA2": "2",
- "IIB": "2",
- "IIC": "2",
- "III": "3",
- "IIIA": "3",
- "IIIB": "3",
- "IIIC": "3",
- "IIIC1": "3",
- "IIIC2": "3",
- "IV": "4",
- "IVA": "4",
- "IVB": "4",
- "IVC": "4",
+ "0": "0",
+ "0a": "0",
+ "0is": "0",
+ "I": "1",
+ "IA": "1",
+ "IA1": "1",
+ "IA2": "1",
+ "IA3": "1",
+ "IB": "1",
+ "IB1": "1",
+ "IB2": "1",
+ "IC": "1",
+ "IS": "1",
+ "II": "2",
+ "IIA": "2",
+ "IIA1": "2",
+ "IIA2": "2",
+ "IIB": "2",
+ "IIC": "2",
+ "III": "3",
+ "IIIA": "3",
+ "IIIB": "3",
+ "IIIC": "3",
+ "IIIC1": "3",
+ "IIIC2": "3",
+ "IV": "4",
+ "IVA": "4",
+ "IVB": "4",
+ "IVC": "4",
}
if obs_value_codingcode_tnm_UICC in grouped_uicc_dict:
return grouped_uicc_dict[obs_value_codingcode_tnm_UICC]
@@ -395,9 +394,7 @@ def transform_tnmp(observations_tnmp):
"tnmp_UICC_mapped",
lookup_obs_value_codingcode_tnm_UICC_mappingUDF(observations_tnmp.obsvaluecode),
)
- lookup_lookup_grouped_uiccUDF = udf(
- lambda x: lookup_grouped_uicc(x), StringType()
- )
+ lookup_lookup_grouped_uiccUDF = udf(lambda x: lookup_grouped_uicc(x), StringType())
observations_tnmp = observations_tnmp.withColumn(
"tnmp_UICC_grouped",
lookup_lookup_grouped_uiccUDF(observations_tnmp.obsvaluecode),
@@ -410,7 +407,7 @@ def transform_tnmp(observations_tnmp):
"obsvaluecode as tnmp_obsvalue_UICC",
"obsdate as tnmp_obsdate",
"tnmp_UICC_mapped",
- "tnmp_UICC_grouped"
+ "tnmp_UICC_grouped",
)
observations_tnmp_cached = observations_tnmp.cache()
@@ -566,11 +563,10 @@ def save_final_df(final_df):
final_df_pandas = final_df.toPandas()
final_df_pandas = final_df_pandas.rename_axis("ID") # required for opal import
- output_filename = str(settings.output_filename + settings.kafka_topic_year_suffix
- + ".csv")
- output_path_filename = os.path.join(
- settings.output_folder,
- output_filename)
+ output_filename = str(
+ settings.output_filename + settings.kafka_topic_year_suffix + ".csv"
+ )
+ output_path_filename = os.path.join(settings.output_folder, output_filename)
print("###### current dir: ", os.getcwd())
print("###### output_path_filename : ", output_path_filename)
@@ -588,8 +584,9 @@ def main():
read_data_from_kafka_save_delta(spark, kafka_topics)
- df_bundles = spark.read.format("delta").load(os.path.join(
- settings.output_folder, "bundles-delta"))
+ df_bundles = spark.read.format("delta").load(
+ os.path.join(settings.output_folder, "bundles-delta")
+ )
df_patients = encode_patients(ptl, df_bundles)
df_conditions = encode_conditions(ptl, df_bundles)
diff --git a/src/decompose_xmls/output-adt-reports/.gitkeep b/src/obds_fhir_to_opal/output/.gitkeep
similarity index 100%
rename from src/decompose_xmls/output-adt-reports/.gitkeep
rename to src/obds_fhir_to_opal/output/.gitkeep
diff --git a/src/adtfhir_to_opal/requirements.txt b/src/obds_fhir_to_opal/requirements.txt
similarity index 100%
rename from src/adtfhir_to_opal/requirements.txt
rename to src/obds_fhir_to_opal/requirements.txt