-
Notifications
You must be signed in to change notification settings - Fork 14
/
PopularBabyNames.R
58 lines (46 loc) · 1.83 KB
/
PopularBabyNames.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#http://www.ssa.gov/OACT/babynames/limits.html
#http://www.ssa.gov/OACT/babynames/names.zip
#http://www.ssa.gov/OACT/babynames/state/namesbystate.zip
download.data <- function(url, filename) {
if(!file.exists(file.path(dataDir, filename))) {
download.file(url = paste(url, filename, sep = '/'),
destfile = file.path(dataDir, filename))
unzip(file.path(dataDir, filename), exdir = dataDir)
}
}
if (!dir.exists(dataDir)) {
dir.create(dataDir)
}
url <- 'http://www.ssa.gov/OACT/babynames'
dataDir <- file.path('data', 'ssa.gov')
download.data(url, 'names.zip')
download.data(paste(url, 'state', sep = '/'), 'namesbystate.zip')
library(dplyr)
#library(plyr)
#yob <- ldply(flist[grep('yob.*\\.txt', flist)], read.csv, header = F)
if(!exists('yob')){
flist <- list.files(dataDir, full.names = T)
flist.yob <- flist[grep('yob.*\\.txt', flist)]
yob <- NULL
col.names <- c('name', 'sex', 'occurrence')
colClasses <- c('character', 'factor', 'integer')
for (f in flist.yob) {
print(substr(f, 17, 17 + 3))
temp <- read.csv(f[1],
header = F,
col.names = col.names,
colClasses = colClasses)
temp$year <- rep(as.integer(substr(f, 17, 17 + 3)), nrow(temp))
yob <- rbind(yob, temp)
}
yob$year <- as.ordered(yob$year)
}
n <- 'Margaret'; s <- 'F'; birthYear <- 1999
x <- filter(yob, grepl(paste0(n,'$'), name), sex==s) %>% select(year, occurrence)
x$year <- as.integer(as.character(x$year))
with(x, plot(year, occurrence, type='l',
xlab='Birth Year',
ylab=paste('No. of Babies named', n),
main=paste0('Hello, ', n, '.')))
points(subset(x, year==birthYear), col='blue', pch=19, cex=2)
points(subset(x, occurrence==max(occurrence)), col='red', cex=2)