From 08c6273c49f67d3e820709c878fd1f95f3926f4d Mon Sep 17 00:00:00 2001 From: "jasmin.ziegler" Date: Mon, 25 Mar 2024 20:46:34 +0100 Subject: [PATCH] fix: exchange pat identifier with actual id, handle empty conditiondate (#164) * fix: exchange pat identifier with actual id, handle empty conditiondate * update age at condition in case its empty to age at mid 2022 --- src/obds_fhir_to_opal/obds_fhir_to_opal.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/obds_fhir_to_opal/obds_fhir_to_opal.py b/src/obds_fhir_to_opal/obds_fhir_to_opal.py index 91f9e9b8..ef29c91a 100644 --- a/src/obds_fhir_to_opal/obds_fhir_to_opal.py +++ b/src/obds_fhir_to_opal/obds_fhir_to_opal.py @@ -140,7 +140,11 @@ def calculate_age(birthdate): def calculate_age_at_conditiondate(birthdate, conditiondate): - age_at_conditiondate = conditiondate - birthdate + if conditiondate is None: + # todo: change this definitely + age_at_conditiondate = birthdate - datetime.date(2022, 6, 15) + else: + age_at_conditiondate = conditiondate - birthdate days_in_year = 365.2425 age_at_conditiondate = int(age_at_conditiondate.days / days_in_year) return age_at_conditiondate @@ -199,7 +203,8 @@ def add_age_at_condition_and_groups(df_pat_cond_joined): df_pat_cond_joined = df_pat_cond_joined.withColumn( "age_at_diagnosis", calculate_age_at_conditiondateUDF( - to_date(df_pat_cond_joined.birthDate), df_pat_cond_joined.conditiondate + to_date(df_pat_cond_joined.birthDate), + df_pat_cond_joined.conditiondate ), ) @@ -236,7 +241,8 @@ def encode_patients(ptl: PathlingContext, df_bundles: pyspark.sql.dataframe.Data return_yearUDF = udf(lambda x: return_year(x), StringType()) patients = df_patients.selectExpr( - "id as pat_id", "gender", "birthDate", "deceasedBoolean", "deceasedDateTime" + "EXPLODE_OUTER(identifier.value) as pat_id", "gender", "birthDate", + "deceasedBoolean", "deceasedDateTime" ) patients = patients.withColumns(