-
Notifications
You must be signed in to change notification settings - Fork 0
/
Chapter_15.Rmd
116 lines (81 loc) · 2.45 KB
/
Chapter_15.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
---
title: "Chapter 15"
author: "Laura"
date: "12/12/2019"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, cache = TRUE)
library(tidyverse); library(skimr); library(nycflights13); library(GGally); library(ggstance); library(lvplot); library(hexbin); library(modelr); library(magrittr); library(maps); library(stringr); library(htmlwidgets); library(forcats)
```
## Notes for Chapter 15 Factors
To work with factors, we’ll use the forcats package, which provides tools for dealing with categorical variables (and it’s an anagram of factors!).
### 15.2 Creating factors
```{r ch1521}
levels(gss_cat$race)
gss_cat %>%
count(race, sort = TRUE)
ggplot(gss_cat) +
geom_bar(aes(race)) +
scale_x_discrete(drop = FALSE)
levels(gss_cat$rincome)
gss_cat %>%
count(rincome, sort = TRUE) %>%
ggplot(aes(rincome, n)) +
geom_col() +
coord_flip()
```
```{r ch1522}
gss_cat %>%
count(relig, sort = TRUE)
gss_cat %>%
count(partyid, sort = TRUE)
gss_cat %>%
count(relig, denom, sort = TRUE) %>%
filter(denom != "Not applicable") %>%
ggplot() +
geom_count(aes(relig, denom, size = n, color = n))
levels(gss_cat$denom)
ggplot(gss_cat) +
geom_count(aes(relig, denom))
```
# 15.4 Modifying factor order
```{r ch1541}
by_age <- gss_cat %>%
filter(!is.na(age)) %>%
count(age, marital) %>%
group_by(age) %>%
mutate(prop = n / sum(n))
ggplot(by_age, aes(age, prop, colour = marital)) +
geom_line(na.rm = TRUE)
ggplot(by_age, aes(age, prop, colour = fct_reorder2(marital, age, prop))) +
geom_smooth(se = FALSE, size = 0.1) +
labs(colour = "marital")
# entendiendo que hizo exactamente esto
gss_cat %>%
filter(!is.na(age)) %>%
count(age, marital) %>%
group_by(age) %>%
mutate(prop = n / sum(n))
```
```{r ch1542}
gss_cat %>%
arrange(desc(tvhours))
ggplot(gss_cat) +
geom_density(aes(tvhours))
```
### 15.5 Modifying factor levels
```{r ex1551}
glimpse(gss_cat)
levels(gss_cat$partyid)
gss_cat %>%
mutate(partyid = fct_collapse(partyid,
rep = c("Strong republican", "Not str republican"),
dem = c("Strong democrat", "Not str democrat"),
ind = c("Ind,near rep", "Independent", "Ind,near dem"),
other = c("No answer", "Don't know", "Other party"))) %>% group_by(year) %>%
count(partyid) %>%
mutate(prop = n/sum(n)) %>%
ggplot(aes(year, prop, color = partyid)) +
geom_line()
```