-
Notifications
You must be signed in to change notification settings - Fork 0
/
maurer.R
130 lines (90 loc) · 2.83 KB
/
maurer.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
library(tidyr)
library(dplyr)
library(modelr)
library(lubridate)
library(ggplot2)
library(nycflights13)
library(ggcorrplot)
print(7+2)
print("Sepp")
a <- 1
b <- a*3
print(b)
addiere_eins <- function(a, d) {
b <- a + d
return(b)
}
b <- addiere_eins(5, 2)
## Operationen werden vektorisiert
b <- c(1,2,3)
b+1
b*2+1
## Data Frames beinhalten Daten
mpg
print(mpg)
ggplot(mpg, aes(x=displ, y=hwy, colour=class)) + geom_point(size=2)
colnames(mpg)
unique(mpg$class)
nycflights13::flights
## Verb filter
## Filter wendet logische Operationen auf Datensatz an
filter(flights, arr_delay>120 & dep_delay>120)
## Dimension, Anzahl Spalten, Anzahl Zeilen
dim(flights)
ncol(flights)
nrow(flights)
nrow(filter(flights, arr_delay>120 & dep_delay>120))
## arrange
arrange(flights, dep_delay)
arrange(flights, desc(dep_delay))
arrange(flights, desc(dep_delay), carrier)
## Verb select
select(flights, year, dep_delay, carrier)
dat.sub <- select(flights, distance, air_time)
## Verb mutate
mutate(dat.sub, airspeed=distance/air_time)
## Verb summarise
summarise(flights, delay=mean(dep_delay))
summarise(flights, delay=mean(dep_delay, na.rm=TRUE))
## Pipeline
summarise(select(flights, year, dep_delay, carrier),
delay=mean(dep_delay, na.rm=TRUE))
flights %>% select(year, dep_delay, carrier) %>%
summarise(delay=mean(dep_delay, na.rm=TRUE))
## Gruppierung
flights %>% select(year, dep_delay, carrier) %>%
group_by(carrier) %>%
summarise(delay=mean(dep_delay, na.rm=TRUE)) %>%
arrange(desc(delay))
## mpg abhaengig von Baujahr
## mpg ueber Zylinderanzahl
## Korrelation zwischen Daten (OK)
## Departure delay abhaengig von Flughafen
## Top-Destinationen abhaengig von Startort
## NA-Handling (+subsetting) (OK)
head(flights)
unique(flights$dest)
length(unique(flights$dest))
flights %>% group_by(dest) %>% summarise(delay=mean(arr_delay, na.rm=TRUE))
flights %>% select(arr_delay, distance) %>%
ggplot(aes(x=arr_delay, y=distance)) + geom_point()
corr <- round(cor(mtcars), 2)
ggcorrplot(corr)
View(table1)
View(table4a)
## Verb pivot_longer
# argument c macht einen Vektor aus den angegebenen Spalten
tab1 <- table4a %>% pivot_longer(c("1999", "2000"), names_to="year", values_to="cases")
tab2 <- table4a %>% pivot_longer(c("1999", "2000"), names_to="year", values_to="population")
## Join
left_join(tab1, tab2)
table2
## verb pivot_wider
# pivot wider macht mehr spalten. names_from möchte wissen, wie diese Spalten heißen sollen
# und values_from, welche Daten darin stehen sollen
table2 %>% pivot_wider(names_from=type, values_from=count)
## verb separate
table3 %>% separate(rate, into=(c("cases", "population")))
## verb unite
table5 %>% unite(nyear, century, year, sep = "") %>% separate(rate, into=(c("cases", "population")))
table5 %>% unite(nyear, c("century", "year"), sep = "") %>% separate(rate, into=(c("cases", "population")))