Skip to content

Commit

Permalink
bringind old data and finishing analysis
Browse files Browse the repository at this point in the history
  • Loading branch information
jvillacampa committed Jan 9, 2019
1 parent c5e863f commit 9a2e604
Showing 1 changed file with 70 additions and 24 deletions.
94 changes: 70 additions & 24 deletions Maternities with drug use.R
Original file line number Diff line number Diff line change
@@ -1,54 +1,100 @@
# ScotPHO indicators: Maternities with drug use

# Part 1 - Extract data from open data platform
# Part 2 - P7 Child dental raw data
# Part 3 - Run analysis functions

# TODO:
# Need to bring older data in
#I probably don't need the functions, maybe the second one
#check the spss code
# Part 1 - Create basefile
# Part 2 - Computing rates and adding labels

###############################################.
## Packages/Filepaths/Functions ----
## Packages/Filepaths ----
library(dplyr)
library(readr)

server_desktop <- "server" # change depending if you are using R server or R desktop
if (server_desktop == "server") {
prepared_data <- "/PHI_conf/ScotPHO/Profiles/Data/Prepared Data/"
formatted_data <- "/PHI_conf/ScotPHO/Profiles/Data/Temporary/"
folder_data <- "/PHI_conf/ScotPHO/Profiles/Data/"
} else if (server_desktop == "desktop") {
prepared_data <- "//stats/ScotPHO/Profiles/Data/Prepared Data/"
formatted_data <- "//stats/ScotPHO/Profiles/Data/Temporary/"
folder_data <- "//stats/ScotPHO/Profiles/Data/"
}

###############################################.
## Part 1 - Extract data from open data platform ----
## Part 1 - Create basefile ----
###############################################.
data_drugmat_ca <- read_csv("https://www.opendata.nhs.scot/dataset/df10dbd4-81b3-4bfa-83ac-b14a5ec62296/resource/3e96277a-9029-4390-ab90-ec600f9926a5/download/11.6_ca_drugmisuse.csv") %>%
#Bringing old data not present in open data platform. CHECK that in open data platform
#they keep the whole trend too. If not incorporate the oldest data to this file.
#Data available at Health Board Data prior 2008/09-2010/11 based on pre 2014 health board boundaries.
drugmat_old <- readRDS(file=paste0(folder_data, 'Prepared Data/maternity_drug_old_do_not_delete.rds'))
#Now extract data from open data platform
drugmat_ca <- read_csv("https://www.opendata.nhs.scot/dataset/df10dbd4-81b3-4bfa-83ac-b14a5ec62296/resource/3e96277a-9029-4390-ab90-ec600f9926a5/download/11.6_ca_drugmisuse.csv") %>%
setNames(tolower(names(.))) %>% #variables to lower case
rename(code = ca2011) #to allow merging

data_drugmat_hb <- read_csv("https://www.opendata.nhs.scot/dataset/df10dbd4-81b3-4bfa-83ac-b14a5ec62296/resource/8c8377e1-b1c7-48e7-b313-79eb5ac3c110/download/11.6_hb_drugmisuse.csv") %>%
drugmat_hb <- read_csv("https://www.opendata.nhs.scot/dataset/df10dbd4-81b3-4bfa-83ac-b14a5ec62296/resource/8c8377e1-b1c7-48e7-b313-79eb5ac3c110/download/11.6_hb_drugmisuse.csv") %>%
setNames(tolower(names(.))) %>% #variables to lower case
rename(code = hbr2014) %>% select(-hbr2014qf) #to allow merging

data_drugmat <- rbind(data_drugmat_ca, data_drugmat_hb) %>%
#Merging together ca and hb
data_drugmat <- rbind(drugmat_ca, drugmat_hb) %>%
#selecting only totals and hb, ca and scotland
filter(simdquintileqf == "d" &
substr(code, 1, 3) %in% c("S92", "S08", "S12")) %>%
rename(year = financialyears, numerator = drugmisuse, denominator = maternities) %>%
select(year, code, numerator, denominator) %>%
rename(trend_axis = financialyears, numerator = drugmisuse, denominator = maternities) %>%
select(trend_axis, code, numerator, denominator) %>%
#reformatting year to style needed
mutate(year = as.numeric(paste0(substr(year, 1, 2), substr(year, 6, 7))))
mutate(year = as.numeric(paste0(substr(trend_axis, 1, 2), substr(trend_axis, 6, 7))))

data_drugmat <- rbind(data_drugmat, drugmat_old)

#Now, we need ADP level, so selecting councils and recoding codes.
# both lanarkshires CA are one ADP and Mid and East lothian are one ADP
drugmat_adp <- data_drugmat %>% filter(substr(code,1,3) == "S12") %>%
mutate(code = case_when(
code == "S12000005" ~ "S11000005", code == "S12000006" ~ "S11000006", code == "S12000008" ~ "S11000008",
code == "S12000010" ~ "S11000051", code == "S12000011" ~ "S11000011", code == "S12000014" ~ "S11000013",
code == "S12000017" ~ "S11000016", code == "S12000018" ~ "S11000017", code == "S12000019" ~ "S11000051",
code == "S12000020" ~ "S11000019", code == "S12000021" ~ "S11000010", code == "S12000026" ~ "S11000025",
code == "S12000028" ~ "S11000027", code == "S12000029" ~ "S11000052", code == "S12000030" ~ "S11000029",
code == "S12000033" ~ "S11000001", code == "S12000034" ~ "S11000002", code == "S12000035" ~ "S11000004",
code == "S12000036" ~ "S11000012", code == "S12000038" ~ "S11000024", code == "S12000039" ~ "S11000030",
code == "S12000040" ~ "S11000031", code == "S12000041" ~ "S11000003", code == "S12000042" ~ "S11000007",
code == "S12000044" ~ "S11000052", code == "S12000045" ~ "S11000009", code == "S12000046" ~ "S11000015",
code == "S12000047" ~ "S11000014", code == "S12000048" ~ "S11000023", code == "S12000013" ~ "S11000032",
code == "S12000027" ~ "S11000026", code == "S12000023" ~ "S11000022", TRUE ~ "Error")) %>%
group_by(year, code, trend_axis) %>% summarise_all(funs(sum)) %>% ungroup()

saveRDS(data_drugmat, paste0(formatted_data, "maternities_drug_use_formatted.rds"))
data_drugmat <- rbind(data_drugmat, drugmat_adp)

###############################################.
## Part 3 - Run analysis functions ----
## Part 2 - Computing rates and adding labels ----
###############################################.
analyze_second(filename = "maternities_drug_use", measure = "crude", crude_rate = 1000,
ind_id = aaaa, year_type = "financial", profile = "HN", min_opt = 1245385)
data_drugmat <- data_drugmat %>%
#create 3-year average values.
mutate(numerator = numerator/3,
denominator = denominator/3,
# calculate the rate and the confidence intervals (Byars method)
rate = numerator/denominator*1000,
o_lower = numerator *(1-1/9/numerator-1.96/3/sqrt(numerator))^3,
o_upper = (numerator+1) *(1-1/9/(numerator+1)+1.96/3/sqrt(numerator+1))^3,
lowci = o_lower/(denominator)*1000,
upci = o_upper/(denominator)*1000) %>%
select(-o_upper,- o_lower) %>%
# add in the definition period label.
mutate(def_period = paste0(substr(trend_axis, 1, 7), " to ", substr(trend_axis, 9, 15),
" ", "financial years; 3-year aggregates"),
ind_id = 4129, #indicator number
#change number to first opt number
uni_id = paste0("DU", (seq_len(nrow(.)) + 160000 - 1)))

#Preparing data for Shiny tool
data_shiny <- data_drugmat %>% select(-denominator, -uni_id)
#Including both rds and csv file for now
saveRDS(data_shiny, file = paste0(folder_data, "Shiny Data/maternity_druguse_shiny.rds"))
write_csv(data_shiny, path = paste0(folder_data, "Shiny Data/", filename, "_shiny.csv"))

# Reorder by column index: uni_id code ind_id year numerator rate lowci upci def_period trend_axis.
data_oldopt <- data_drugmat[c("uni_id", "code", "ind_id", "year", "numerator", "rate", "lowci" ,
"upci", "def_period", "trend_axis")]
#Saving file for old OPT
write_csv(data_oldopt, path = paste0(folder_data, "OPT Data/maternity_druguse_OPT.csv"),
col_names = FALSE)


##END

0 comments on commit 9a2e604

Please sign in to comment.