Skip to content

Commit

Permalink
feat: add column conditiondate year for filtering in data shield (#169)
Browse files Browse the repository at this point in the history
* add column conditiondate year for safety check in data shield

* flake8 and data dictionary update
  • Loading branch information
jasminziegler authored Apr 8, 2024
1 parent 8ceef04 commit a3cc155
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 1 deletion.
Binary file modified src/obds_fhir_to_opal/datadictionary_bzkf_q4_22.xlsx
Binary file not shown.
7 changes: 6 additions & 1 deletion src/obds_fhir_to_opal/obds_fhir_to_opal.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
from pathling.etc import find_jar
from pydantic import BaseSettings
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, explode, first, regexp_replace, to_date, udf
from pyspark.sql.functions import col, explode, first, regexp_replace, to_date, udf, \
substring
from pyspark.sql.types import StringType


Expand Down Expand Up @@ -388,6 +389,7 @@ def encode_conditions(ptl: PathlingContext, df_bundles):
"evidencereference": regexp_replace(
"evidencereference", "Observation/", ""
),
"conditiondate_year": substring("conditiondate", 1, 4),
"stagereference": regexp_replace("stagereference", "Observation/", ""),
"conditiondate": regexp_replace("conditiondate", "T", " "),
}
Expand All @@ -403,6 +405,7 @@ def encode_conditions(ptl: PathlingContext, df_bundles):
conditions = conditions.select(
"cond_id",
"conditiondate",
"conditiondate_year",
"subjectreference",
"condcodingcode",
"condcodingcode_mapped",
Expand Down Expand Up @@ -639,6 +642,7 @@ def group_df(joined_dataframe):
first("patID").alias("patID"),
first("gender_mapped").alias("gender_mapped"),
first("conditiondate").alias("conditiondate"),
first("conditiondate_year").alias("conditiondate_year"),
first("condcodingcode").alias("condcodingcode"),
first("condcodingcode_mapped").alias("condcodingcode_mapped"),
first("entity_group").alias("entity_group"),
Expand Down Expand Up @@ -669,6 +673,7 @@ def group_df(joined_dataframe):
"cond_id",
"gender_mapped",
"conditiondate",
"conditiondate_year",
"condcodingcode",
"condcodingcode_mapped",
"entity_group",
Expand Down

0 comments on commit a3cc155

Please sign in to comment.