-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_analysis.R
64 lines (55 loc) · 2.16 KB
/
run_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# download and unzip the raw data file
obtainRawData = function() {
filename = 'getdata-projectfiles-UCI HAR Dataset.zip'
if(!file.exists(filename)) {
download.file('https://d396qusza40orc.cloudfront.net/getdata%2Fprojectfiles%2FUCI%20HAR%20Dataset.zip',
filename,
method="curl")
}
unzip(filename)
}
# for a given set_type ('train' or 'test'), prepare a single
# data set containing all readings, plus the subject and activy data
combineDataSets = function(set_type, interestingColumns) {
setwd(set_type)
x_file = paste("X_",set_type,".txt", sep='')
fixedWidths = vector(mode='integer', 561) + 16
accelReadings = read.fwf(x_file, fixedWidths)
accelReadings = accelReadings[,interestingColumns]
y_file = paste("Y_",set_type,".txt", sep='')
activities = read.fwf(y_file, 2)
subject_file = paste("subject_",set_type,".txt", sep='')
subjects = read.fwf(subject_file, 2)
combined = data.table(cbind(subjects, activities, accelReadings))
# some cleanup
rm(accelReadings);rm(activities);rm(subjects);setwd('../')
combined
}
produceTidyData = function() {
setwd("./UCI HAR Dataset/")
features = read.table("./features.txt", sep=' ', strip.white=T)
activities = read.table("./activity_labels.txt", sep=' ', strip.white=T)
interestingColumns = grep("mean|std", features$V2)
# load the training and test data sets and combine them
trainingSet = combineDataSets("train", interestingColumns)
testSet = combineDataSets("test", interestingColumns)
allReadings = rbind(trainingSet, testSet)
setnames(allReadings, append(c("subject_id", "activity"), as.character(features$V2[interestingColumns])))
allReadings$activity = factor(allReadings$activity, levels=activities$V1, labels=activities$V2)
rm(trainingSet);
rm(testSet)
rm(features)
rm(activities)
tidyData = melt(allReadings, id.vars=c('subject_id', 'activity'))
rm(allReadings)
setwd("..")
tidyData
}
library(data.table)
library(reshape2)
library(plyr)
tidy = produceTidyData()
write.csv(tidy, "mean_and_std_obs.csv", row.names=F)
means = ddply(tidy, .(subject_id, activity, variable), summarize, mean=mean(value))
write.csv(means, "averages.csv", row.names=F)
means