bringind old data and finishing analysis

Public-Health-Scotland · Jan 9, 2019 · 9a2e604 · 9a2e604
1 parent c5e863f
commit 9a2e604
Showing 1 changed file with 70 additions and 24 deletions.
diff --git a/Maternities with drug use.R b/Maternities with drug use.R
@@ -1,54 +1,100 @@
 # ScotPHO indicators: Maternities with drug use
 
-#   Part 1 - Extract data from open data platform
-#   Part 2 - P7 Child dental raw data 
-#   Part 3 - Run analysis functions
-
-# TODO:
-#   Need to bring older data in
-#I probably don't need the functions, maybe the second one
-#check the spss code
+#   Part 1 - Create basefile
+#   Part 2 - Computing rates and adding labels
 
 ###############################################.
-## Packages/Filepaths/Functions ----
+## Packages/Filepaths ----
 library(dplyr)
 library(readr)
 
 server_desktop <- "server" # change depending if you are using R server or R desktop
 if (server_desktop == "server") {
-  prepared_data <- "/PHI_conf/ScotPHO/Profiles/Data/Prepared Data/"
-  formatted_data <- "/PHI_conf/ScotPHO/Profiles/Data/Temporary/"
+  folder_data <- "/PHI_conf/ScotPHO/Profiles/Data/"
 } else if (server_desktop == "desktop") {
-  prepared_data <- "//stats/ScotPHO/Profiles/Data/Prepared Data/"
-  formatted_data <- "//stats/ScotPHO/Profiles/Data/Temporary/"
+  folder_data <- "//stats/ScotPHO/Profiles/Data/"
 }
 
 ###############################################.
-## Part 1 - Extract data from open data platform ----
+## Part 1 - Create basefile ----
 ###############################################.
-data_drugmat_ca <- read_csv("https://www.opendata.nhs.scot/dataset/df10dbd4-81b3-4bfa-83ac-b14a5ec62296/resource/3e96277a-9029-4390-ab90-ec600f9926a5/download/11.6_ca_drugmisuse.csv") %>%
+#Bringing old data not present in open data platform. CHECK that in open data platform 
+#they keep the whole trend too. If not incorporate the oldest data to this file.
+#Data available at Health Board Data prior 2008/09-2010/11 based on pre 2014 health board boundaries. 
+drugmat_old <- readRDS(file=paste0(folder_data, 'Prepared Data/maternity_drug_old_do_not_delete.rds')) 
+#Now extract data from open data platform
+drugmat_ca <- read_csv("https://www.opendata.nhs.scot/dataset/df10dbd4-81b3-4bfa-83ac-b14a5ec62296/resource/3e96277a-9029-4390-ab90-ec600f9926a5/download/11.6_ca_drugmisuse.csv") %>%
   setNames(tolower(names(.))) %>%   #variables to lower case
   rename(code = ca2011) #to allow merging
 
-data_drugmat_hb <- read_csv("https://www.opendata.nhs.scot/dataset/df10dbd4-81b3-4bfa-83ac-b14a5ec62296/resource/8c8377e1-b1c7-48e7-b313-79eb5ac3c110/download/11.6_hb_drugmisuse.csv") %>% 
+drugmat_hb <- read_csv("https://www.opendata.nhs.scot/dataset/df10dbd4-81b3-4bfa-83ac-b14a5ec62296/resource/8c8377e1-b1c7-48e7-b313-79eb5ac3c110/download/11.6_hb_drugmisuse.csv") %>% 
   setNames(tolower(names(.))) %>%  #variables to lower case
   rename(code = hbr2014) %>%  select(-hbr2014qf) #to allow merging
 
-data_drugmat <- rbind(data_drugmat_ca, data_drugmat_hb) %>% 
+#Merging together ca and hb
+data_drugmat <- rbind(drugmat_ca, drugmat_hb) %>% 
   #selecting only totals and hb, ca and scotland
   filter(simdquintileqf == "d" &
            substr(code, 1, 3) %in% c("S92", "S08", "S12")) %>% 
-  rename(year = financialyears, numerator = drugmisuse, denominator = maternities) %>% 
-  select(year, code, numerator, denominator) %>% 
+  rename(trend_axis = financialyears, numerator = drugmisuse, denominator = maternities) %>% 
+  select(trend_axis, code, numerator, denominator) %>% 
   #reformatting year to style needed
-  mutate(year = as.numeric(paste0(substr(year, 1, 2), substr(year, 6, 7))))
+  mutate(year = as.numeric(paste0(substr(trend_axis, 1, 2), substr(trend_axis, 6, 7))))
+
+data_drugmat <- rbind(data_drugmat, drugmat_old) 
+
+#Now, we need ADP level, so selecting councils and recoding codes.
+# both lanarkshires CA are one ADP and Mid and East lothian are one ADP
+drugmat_adp <- data_drugmat %>% filter(substr(code,1,3) == "S12") %>% 
+  mutate(code = case_when( 
+    code == "S12000005" ~ "S11000005", code == "S12000006" ~ "S11000006", code == "S12000008" ~ "S11000008", 
+    code == "S12000010" ~ "S11000051", code == "S12000011" ~ "S11000011", code == "S12000014" ~ "S11000013", 
+    code == "S12000017" ~ "S11000016", code == "S12000018" ~ "S11000017", code == "S12000019" ~ "S11000051", 
+    code == "S12000020" ~ "S11000019", code == "S12000021" ~ "S11000010", code == "S12000026" ~ "S11000025", 
+    code == "S12000028" ~ "S11000027", code == "S12000029" ~ "S11000052", code == "S12000030" ~ "S11000029", 
+    code == "S12000033" ~ "S11000001", code == "S12000034" ~ "S11000002", code == "S12000035" ~ "S11000004", 
+    code == "S12000036" ~ "S11000012", code == "S12000038" ~ "S11000024", code == "S12000039" ~ "S11000030", 
+    code == "S12000040" ~ "S11000031", code == "S12000041" ~ "S11000003", code == "S12000042" ~ "S11000007", 
+    code == "S12000044" ~ "S11000052", code == "S12000045" ~ "S11000009", code == "S12000046" ~ "S11000015", 
+    code == "S12000047" ~ "S11000014", code == "S12000048" ~ "S11000023", code == "S12000013" ~ "S11000032", 
+    code == "S12000027" ~ "S11000026", code == "S12000023" ~ "S11000022", TRUE ~ "Error")) %>% 
+  group_by(year, code, trend_axis) %>% summarise_all(funs(sum)) %>% ungroup()
 
-saveRDS(data_drugmat, paste0(formatted_data, "maternities_drug_use_formatted.rds"))
+data_drugmat <- rbind(data_drugmat, drugmat_adp) 
 
 ###############################################.
-## Part 3 - Run analysis functions ----
+## Part 2 - Computing rates and adding labels ----
 ###############################################.
-analyze_second(filename = "maternities_drug_use", measure = "crude", crude_rate = 1000, 
-               ind_id = aaaa, year_type = "financial", profile = "HN", min_opt = 1245385)
+data_drugmat <- data_drugmat %>% 
+  #create 3-year average values.
+  mutate(numerator = numerator/3,
+         denominator = denominator/3,
+  # calculate the rate and the confidence intervals (Byars method)
+         rate = numerator/denominator*1000,
+         o_lower = numerator *(1-1/9/numerator-1.96/3/sqrt(numerator))^3,
+         o_upper = (numerator+1) *(1-1/9/(numerator+1)+1.96/3/sqrt(numerator+1))^3,
+         lowci = o_lower/(denominator)*1000,
+         upci = o_upper/(denominator)*1000) %>% 
+  select(-o_upper,- o_lower) %>% 
+  # add in the definition period label.
+mutate(def_period = paste0(substr(trend_axis, 1, 7), " to ", substr(trend_axis, 9, 15),
+                    " ", "financial years; 3-year aggregates"),
+       ind_id = 4129, #indicator number
+       #change number to first opt number
+       uni_id = paste0("DU", (seq_len(nrow(.)) + 160000 - 1)))
+
+#Preparing data for Shiny tool
+data_shiny <- data_drugmat %>% select(-denominator, -uni_id)
+#Including both rds and csv file for now
+saveRDS(data_shiny, file = paste0(folder_data, "Shiny Data/maternity_druguse_shiny.rds"))
+write_csv(data_shiny, path = paste0(folder_data, "Shiny Data/", filename, "_shiny.csv"))
+
+# Reorder by column index: uni_id code ind_id year numerator rate lowci upci def_period trend_axis.
+data_oldopt <- data_drugmat[c("uni_id", "code", "ind_id", "year", "numerator", "rate", "lowci" ,
+                             "upci", "def_period", "trend_axis")] 
+#Saving file for old OPT
+write_csv(data_oldopt, path = paste0(folder_data, "OPT Data/maternity_druguse_OPT.csv"),
+          col_names = FALSE)
+
 
 ##END