Skip to content

Commit

Permalink
fix: exchange pat identifier with actual id, handle empty conditionda…
Browse files Browse the repository at this point in the history
…te (#164)

* fix: exchange pat identifier with actual id, handle empty conditiondate
* update age at condition in case its empty to age at mid 2022
  • Loading branch information
jasminziegler authored Mar 25, 2024
1 parent b3805f3 commit 08c6273
Showing 1 changed file with 9 additions and 3 deletions.
12 changes: 9 additions & 3 deletions src/obds_fhir_to_opal/obds_fhir_to_opal.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,11 @@ def calculate_age(birthdate):


def calculate_age_at_conditiondate(birthdate, conditiondate):
age_at_conditiondate = conditiondate - birthdate
if conditiondate is None:
# todo: change this definitely
age_at_conditiondate = birthdate - datetime.date(2022, 6, 15)
else:
age_at_conditiondate = conditiondate - birthdate
days_in_year = 365.2425
age_at_conditiondate = int(age_at_conditiondate.days / days_in_year)
return age_at_conditiondate
Expand Down Expand Up @@ -199,7 +203,8 @@ def add_age_at_condition_and_groups(df_pat_cond_joined):
df_pat_cond_joined = df_pat_cond_joined.withColumn(
"age_at_diagnosis",
calculate_age_at_conditiondateUDF(
to_date(df_pat_cond_joined.birthDate), df_pat_cond_joined.conditiondate
to_date(df_pat_cond_joined.birthDate),
df_pat_cond_joined.conditiondate
),
)

Expand Down Expand Up @@ -236,7 +241,8 @@ def encode_patients(ptl: PathlingContext, df_bundles: pyspark.sql.dataframe.Data
return_yearUDF = udf(lambda x: return_year(x), StringType())

patients = df_patients.selectExpr(
"id as pat_id", "gender", "birthDate", "deceasedBoolean", "deceasedDateTime"
"EXPLODE_OUTER(identifier.value) as pat_id", "gender", "birthDate",
"deceasedBoolean", "deceasedDateTime"
)

patients = patients.withColumns(
Expand Down

0 comments on commit 08c6273

Please sign in to comment.