-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathData Visualisation R code
150 lines (115 loc) · 4.45 KB
/
Data Visualisation R code
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
library(ggplot2)
library(tidyverse)
install.packages("ggplotly")
#mpg frame comes with ggplot
ggplot(data = mpg)
str(mpg)
dim(mpg)
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, colour = class))
#ggplot2 will only use six shapes at a time
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, alpha = class), colour = "blue")#alpha is for transparency
#Here, the color doesn't convey information about a variable,
#but only changes the appearance of the plot
#alpha is for transparency
ggplot(data = mpg) +
geom_violin(mapping = aes(x = displ, y = hwy, alpha = class))
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, alpha = class), colour = "blue")
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = class, alpha = hwy,
shape = class, colour= displ < 5))+
facet_wrap(~ class, nrow = 3)
#facets
#put the variable with more unique levels in the columns
?facet_wrap
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
facet_wrap(~ class, nrow = 4)
#to facet with two variables
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
facet_grid(drv ~ cyl)
#to facet not in row or column dimensions
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
facet_grid(. ~ cyl)
#facetting on a continuous variable
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = cyl)) +
facet_grid(. ~ cty)
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
facet_grid(drv ~ .)
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
facet_grid(. ~ cyl)
##geometric objects
#Every geom function in ggplot2 takes a mapping argument.
#However, not every aesthetic works with every geom
# left
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy))
# right
ggplot(data = mpg) +
geom_smooth(mapping = aes(x = displ, y = hwy))
?geom_smooth
#using different linetypes to represent data
#cant map a continuous variable to linetype
ggplot(data = mpg) +
geom_smooth(mapping = aes(x = cty, y = hwy, linetype = displ))
#geom smoothe uses one geomobject to represent multiple rows of data
#set the group aesthetic to a categorical variable to draw multiple objects
ggplot(data = mpg) +
geom_smooth(mapping = aes(x = displ, y = hwy, group = drv))
ggplot(data = mpg) +
geom_smooth(
mapping = aes(x = displ, y = hwy, color = drv),
show.legend = TRUE
)
#To display multiple geoms in the same plot, add multiple geom functions
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
geom_smooth(mapping = aes(x = displ, y = hwy, group = drv))
#this introduces duplication into the code.
#To avoid forgetting to update a variable in two places, pass mappings to ggplot
#ggplot passes the mappings to each geom in the graph
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_point() +
geom_smooth()+
ggtitle(label = ("NEW COVID CASES PER 1000 PEOPLE IN AFRICA"))
#geom function mappings are treated as local mappings for the layer.
#ggplot extends or overwrites the global mappings for that layer only.
#This helps to display different aesthetics in different layers.
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_point(mapping = aes(color = class)) +
geom_smooth()
#there can be different data in each layer
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_point(mapping = aes(color = class)) +
geom_smooth(data = filter(mpg, class == "subcompact"), se = TRUE)
# se Displays confidence interval around smooth
#statistical Transformations
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut))
#You can generally use geoms and stats interchangeably
ggplot(data = diamonds) +
stat_count(mapping = aes(x = cut))
#Adding labels(ggplot::labs)
# The plot title appears at the top-left, with the subtitle
# display in smaller text underneath it
p + labs(title = "New plot title")
p + labs(title = "New plot title", subtitle = "A subtitle")
# The caption appears in the bottom-right, and is often used for
# sources, notes or copyright
p + labs(caption = "(based on data from ...)")
# The plot tag appears at the top-left, and is typically used
# for labelling a subplot with a letter.
p + labs(title = "title", tag = "A")
# If you want to remove a label, set it to NULL.
p +
labs(title = "title") +
labs(title = NULL)
#use to convert several columns in a dataset
mutate_if(is.double,as.numeric)