-
Notifications
You must be signed in to change notification settings - Fork 0
/
project3_2.R
97 lines (81 loc) · 3.32 KB
/
project3_2.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
library(riskCommunicator)
library(tidyverse)
library(tableone)
data("framingham")
# The Framingham data has been used to create models for cardiovascular risk.
# The variable selection and model below are designed to mimic the models used
# in the paper General Cardiovascular Risk Profile for Use in Primary Care
# This paper is available (cvd_risk_profile.pdf) on Canvas.
framingham_df <- framingham %>% select(c(CVD, TIMECVD, SEX, TOTCHOL, AGE,
SYSBP, CURSMOKE, DIABETES, BPMEDS,
HDLC, BMI))
framingham_df <- na.omit(framingham_df)
CreateTableOne(data=framingham_df, strata = c("SEX"))
# Get blood pressure based on whether or not on BPMEDS
framingham_df$SYSBP_UT <- ifelse(framingham_df$BPMEDS == 0,
framingham_df$SYSBP, 0)
framingham_df$SYSBP_T <- ifelse(framingham_df$BPMEDS == 1,
framingham_df$SYSBP, 0)
# Looking at risk within 15 years - remove censored data
dim(framingham_df)
framingham_df <- framingham_df %>%
filter(!(CVD == 0 & TIMECVD <= 365*15)) %>%
select(-c(TIMECVD))
dim(framingham_df)
# Filter to each sex
framingham_df_men <- framingham_df %>% filter(SEX == 1)
framingham_df_women <- framingham_df %>% filter(SEX == 2)
# Fit models with log transforms for all continuous variables
mod_men <- glm(CVD~log(HDLC)+log(TOTCHOL)+log(AGE)+log(SYSBP_UT+1)+
log(SYSBP_T+1)+CURSMOKE+DIABETES,
data= framingham_df_men, family= "binomial")
mod_women <- glm(CVD~log(HDLC)+log(TOTCHOL)+log(AGE)+log(SYSBP_UT+1)+
log(SYSBP_T+1)+CURSMOKE+DIABETES,
data= framingham_df_women, family= "binomial")
# The NHANES data here finds the same covariates among this national survey data
library(nhanesA)
# blood pressure, demographic, bmi, smoking, and hypertension info
bpx_2017 <- nhanes("BPX_J") %>%
select(SEQN, BPXSY1 ) %>%
rename(SYSBP = BPXSY1)
demo_2017 <- nhanes("DEMO_J") %>%
select(SEQN, RIAGENDR, RIDAGEYR) %>%
rename(SEX = RIAGENDR, AGE = RIDAGEYR)
bmx_2017 <- nhanes("BMX_J") %>%
select(SEQN, BMXBMI) %>%
rename(BMI = BMXBMI)
smq_2017 <- nhanes("SMQ_J") %>%
mutate(CURSMOKE = case_when(SMQ040 %in% c(1,2) ~ 1,
SMQ040 == 3 ~ 0,
SMQ020 == 2 ~ 0)) %>%
select(SEQN, CURSMOKE)
bpq_2017 <- nhanes("BPQ_J") %>%
mutate(BPMEDS = case_when(
BPQ020 == 2 ~ 0,
BPQ040A == 2 ~ 0,
BPQ050A == 1 ~ 1,
TRUE ~ NA )) %>%
select(SEQN, BPMEDS)
tchol_2017 <- nhanes("TCHOL_J") %>%
select(SEQN, LBXTC) %>%
rename(TOTCHOL = LBXTC)
hdl_2017 <- nhanes("HDL_J") %>%
select(SEQN, LBDHDD) %>%
rename(HDLC = LBDHDD)
diq_2017 <- nhanes("DIQ_J") %>%
mutate(DIABETES = case_when(DIQ010 == 1 ~ 1,
DIQ010 %in% c(2,3) ~ 0,
TRUE ~ NA)) %>%
select(SEQN, DIABETES)
# Join data from different tables
df_2017 <- bpx_2017 %>%
full_join(demo_2017, by = "SEQN") %>%
full_join(bmx_2017, by = "SEQN") %>%
full_join(hdl_2017, by = "SEQN") %>%
full_join(smq_2017, by = "SEQN") %>%
full_join(bpq_2017, by = "SEQN") %>%
full_join(tchol_2017, by = "SEQN") %>%
full_join(diq_2017, by = "SEQN")
CreateTableOne(data = df_2017, strata = c("SEX"))
write_csv(framingham_df, 'framingham_df.csv')
write_csv(df_2017,'df_2017.csv')