forked from Carrie-K/ihacru-2017-psy513
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathge3.R
134 lines (122 loc) · 8.43 KB
/
ge3.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# knitr::stitch_rmd(script="./___/___.R", output="./___/___/___.md")
# These first few lines run only when the file is run in RStudio, !!NOT when an Rmd/Rnw file calls it!!
rm(list=ls(all=TRUE)) #Clear the memory of variables from previous run. This is not called by knitr, because it's above the first chunk.
cat("\f") # clear console
# This script reads two files: patient event table + location map.
rm(list=ls(all=TRUE)) #Clear the memory of variables from previous run. This is not called by knitr, because it's above the first chunk.
# ---- load-packages -----------------------------------------------------------
library(ggplot2) #For graphing
library(magrittr) #Pipes
library(dplyr) # for shorter function names. but still prefer dplyr:: stems
library(knitr) # dynamic documents
library(rmarkdown) # dynamic documents
library(kableExtra) # enhanced tables, see http://haozhu233.github.io/kableExtra/awesome_table_in_html.html
# library(TabularManifest) # exploratory data analysis, see https://github.com/Melinae/TabularManifest
requireNamespace("knitr", quietly=TRUE)
requireNamespace("scales", quietly=TRUE) #For formating values in graphs
requireNamespace("RColorBrewer", quietly=TRUE)
requireNamespace("dplyr", quietly=TRUE)
requireNamespace("DT", quietly=TRUE) # for dynamic tables
# requireNamespace("plyr", quietly=TRUE)
# requireNamespace("reshape2", quietly=TRUE) #For converting wide to long
# requireNamespace("mgcv, quietly=TRUE) #For the Generalized Additive Model that smooths the longitudinal graphs.
# ---- load-sources ------------------------------------------------------------
#Load any source files that contain/define functions, but that don't load any other types of variables
# into memory. Avoid side effects and don't pollute the global environment.
source("./manipulation/function-support.R") # assisting functions for data wrangling and testing
source("./manipulation/object-glossary.R") # object definitions
source("./scripts/common-functions.R") # reporting functions and quick views
source("./scripts/graphing/graph-presets.R") # font and color conventions
# ---- declare-globals ---------------------------------------------------------
# dto_location_map.rds is products of `./manipulation/0-ellis-location-map.R`
path_input_location_map <- "./data-unshared/derived/dto_location_map.rds"
path_input_patient_events <- "./data-unshared/derived/dto_patient_events_addictions_4264.rds" # research cohort
# Make sure the files are located where they supposed to be, in `./data-unshared/` folder
testit::assert("File does not exist", base::file.exists(path_input_location_map))
testit::assert("File does not exist", base::file.exists(path_input_patient_events))
# list variables that constitute patient event table
components_patient_events <- c(
"id" # patient unique key
,"gender" # biological sex
,"age_group" # in groups of 5 years
,"encounter_id" # unique identifier for the encounter
,"encounter_class" # value from the D_Location dimension table in the data warehous
,"encounter_type" # value from the D_Location dimension table in the data warehous
,"event_type" # high-level classification of the event (encounter itself, diagnosis, procedure, clinical order, laboratory, pharmacy event, etc.).
,"event_title" # a shorter, more rolled up category describing the event
,"event_detail" # description of the event, can be long and specific
,"event_count" # count of the events in the row- each row has a value of 1, and can be summed
,"event_year" # year in which event occurred
,"event_month" # month in which event occured
,"event_start_date" # might be the date of the event itself (e.g. date of diagnosis) or the start date of the encounter that the event is associated with
,"event_end_date" # might be the end date of the event itself (e.g. end date of prescriptions) or the end date of the encounter that the event is associated with
,"start_day" # number of days between the first day the patient accessed Island Health services and the start date of this event. This is useful to mask dates, but still provides the relative time between events.
,"end_day" # number of days between the first day the patient accessed Island Health services and the end date of this event. This is useful to mask dates, but still provides the relative time between events.
,"duration_days" # number of days between the Start_Day and End_Day (End_Day - Start_Day)
,"addiction_location_count" # patient has accessed services at a location that was used for the selection of the cohort
,"location_map_id" # unique id for VIHA program, connects to location map
,"palette_code" # unique id for colours of this palette
,"palette_colour_name" # labels for clusters of service programs (aka 3T palette colours)
)
# list variables that consitute location map table
components_location_map <- c(
"location_map_id" # unique id for VIHA program, connects to patient events
, "site_name" # CERNER address # EHR address
, "facility_name" # CERNER address # EHR address
, "building_name" # CERNER address # EHR address
, "unit_name" # CERNER address # EHR address
, "location_category" # Data Warehouse address # EHR address
, "location_type" # Data Warehouse address # EHR address
, "location_grouping" # Data Warehouse address # EHR address
, "site_key" # Factual counterparts to CERNER address
, "facility_key" # Factual counterparts to CERNER address
, "building_key" # Factual counterparts to CERNER address
, "unit_key" # Factual counterparts to CERNER address
, "intensity_type" # Classifier, Compressor, Lense
, "intensity_severity_risk" # Classifier, Compressor, Lense
, "clinical_focus" # Classifier, Compressor, Lense
, "service_type" # Classifier, Compressor, Lense
, "service_location" # Classifier, Compressor, Lense
, "population_age" # Classifier, Compressor, Lense
, "provider_mix" # Classifier, Compressor, Lense
, "location_class_code" # Program Class, identifier
, "location_class_description" # Porgram Class, Descriptive label
, "palette_code" # Palette, identifier # cluster specific
, "palette_colour_name" # Palette, descriptive label # cluster specific
)
# define output format for the report
options(
knitr.table.format = "html"
,tibble.width = 110
# ,bootstrap_options = c("striped", "hover", "condensed","responsive")
)
# ---- utility-functions -------------------------------------------------------
# functions local to this script go here.
# ---- load-data ---------------------------------------------------------------
ds_patient_events <- readRDS(path_input_patient_events) # %>% as.data.frame()
ds_location_map <- readRDS(path_input_location_map)
ds_patient_events %>% glimpse()
ds_location_map %>% glimpse()
# ---- tweak-data ------------------------------------------------------------
# augment the event table with additional columns from location map
ds <- dplyr::left_join(
ds_patient_events, # patient event table
ds_location_map %>% # location map
dplyr::select_(.dots = components_location_map)
,by = c("location_map_id","palette_code","palette_colour_name")
) %>%
dplyr::mutate(
location_class_description_display = substr(location_class_description,1,42)
,palette_colour_name_display = substr(palette_colour_name,1,42)
)
ds_location_map %>% glimpse()
# from this point on, ds_location_map is needed only for selective reference
ds %>% glimpse()
# ds now contains full coordinates to events of the cohort down to unit level
# ehr_address + location_classifiers + palette_colours
# ---- inspect-data-1 -----------------------------------------------------------
ds_patient_events %>% glimpse()
ds_location_map %>% glimpse()
ds %>% glimpse()
rm(ds_patient_events, ds_location_map)
# ---- utility-functions -------------------------------------------------------