forked from ekarsten/family-cohort
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfamily_cohort_final_paper.R
152 lines (114 loc) · 3.52 KB
/
family_cohort_final_paper.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
#====================================
# Final Research Project Analysis
#====================================
#-----------------------------------
# Setting up workspace
#-----------------------------------
# This snippet of code is a little loop that makes my code work on your computer
root <- getwd()
while(basename(root) != "family-cohort") {
root <- dirname(root)
}
# This line runs the script in your data.R file so that each person can have
# their data in a different place because everyone's file structure will be
# a little different
source(file.path(root, "df.Rda"))
# Loading the packages we want
library(tidyverse)
library(haven) # for reading stata data
library(lfe) # for fixed effect regression
library(stargazer) # for pretty regression tables
#-----------------------------------
# Loading In the Data
#-----------------------------------
#df <- read_dta(file.path(ddir, "Joneses", "ipums.dta.gz"))
#save(small_df, file = file.path(ddir, "Joneses", "small_df.Rda"))
load(file.path(ddir, "df.Rda"))
head(df)
#-----------------------------------
# Cleaning Our Data
#-----------------------------------
#Here is where we will clean our data
# Sunday, April 28th
NYmetro_df <- df %>%
filter(PWMETRO == 5600) %>%
filter(MARST == 1) %>%
filter(RACE != 7) %>%
filter(RACE != 8) %>%
filter(RACE != 9) %>%
filter(RACE_SP != 7) %>%
filter(RACE_SP != 8) %>%
filter(RACE_SP != 9)
NYinter_df <- NYmetro_df %>%
filter(RACE != RACE_SP)
summary(NYmetro_df)
LAmetro_df <- df %>%
filter(PWMETRO == 4480)
#-----------------------------------
# Figures
#-----------------------------------
# May 5th
LAmetro_df <- df %>%
filter(PWMETRO == 4480) %>%
filter(MARST == 1) %>%
filter(RACE != 7) %>%
filter(RACE != 8) %>%
filter(RACE != 9) %>%
filter(RACE_SP != 7) %>%
filter(RACE_SP != 8) %>%
filter(RACE_SP != 9)
LAinter_df <- LAmetro_df %>%
filter(RACE != RACE_SP)
LA1980_df <- LAmetro_df %>%
filter(YEAR == 1980)
LA1980_inter_df <- LA1980_df %>%
filter(RACE != RACE_SP)
# percentage of interracial marriage in LA in 1980
322/8755
# result is 0.03677898
LA1990_df <- LAmetro_df %>%
filter(YEAR == 1990)
LA1990_inter_df <- LA1990_df %>%
filter(RACE != RACE_SP)
# percentage of interracial marriage in LA in 1990
866/17805
# result is 0.04863802
LA2000_df <- LAmetro_df %>%
filter(YEAR == 2000)
LA2000_inter_df <- LA2000_df %>%
filter(RACE != RACE_SP)
# percentage of interracial marriage in LA in 2000
958/14440
#The result is 0.06634349
#Here is where we will make our figures
LAgroup <- LAmetro_df %>%
group_by(YEAR) %>%
mutate(interracial = as.numeric(RACE != RACE_SP)) %>%
summarise(inter_prop = mean(interracial))
ggplot(data = LAgroup, aes(x = YEAR, y = inter_prop)) +
geom_point() +
geom_line()
NYgroup <- NYmetro_df %>%
group_by(YEAR) %>%
mutate(interracial2 = as.numeric(RACE != RACE_SP)) %>%
summarise(inter_prop2 = mean(interracial2))
ggplot(data = NYgroup, aes(x = YEAR, y = inter_prop2)) +
geom_point() +
geom_line()
LA_NYmetro_df <- df %>%
filter(PWMETRO == 4480 | PWMETRO == 5600) %>%
filter(MARST == 1) %>%
filter(RACE != 7) %>%
filter(RACE != 8) %>%
filter(RACE != 9) %>%
filter(RACE_SP != 7) %>%
filter(RACE_SP != 8) %>%
filter(RACE_SP != 9)
NY_LAgroup <- LA_NYmetro_df %>%
group_by(YEAR, PWMETRO) %>%
mutate(interracial3 = as.numeric(RACE != RACE_SP)) %>%
summarise(inter_prop3 = mean(interracial3))
NY_LAgroup$PWMETRO = as.factor(NY_LAgroup$PWMETRO)
ggplot(data = NY_LAgroup, aes(x = YEAR, y = inter_prop3, color = PWMETRO)) +
geom_point() +
geom_line()