-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_analysis.R
138 lines (97 loc) · 5.35 KB
/
run_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
## Run_Analysis
####################################
## Data Folder Configuration ##
####################################
library(dplyr)
source("DataFolderConfiguration.R")
#PrjLocalBase<- "C:/Project"
## Using local working directory for data.
PrjLocalBase<- getwd()
## If you already have the files downloaded, comment out the call to DataFolderConfiguration
## The script expects the data to be in the subfolder structure "YourCurrentWorkingDirectory"\TidyData\Data\UCI HAR Dataset
## if the data is not in the subfoldre structure make sure the DataFolderConfiguration.R script is in your working directory
DataFolderConfiguration(PrjLocalBase)
PrjDataLocalDir <- paste(PrjLocalBase,"/Data",sep="")
## Base folder of the Downloaded Dataset
UCIDatasetDir <- paste(PrjDataLocalDir,"/UCI HAR Dataset",sep = "")
## Uncomment the following ling and enter the path to the location of the unzipped following,
#UCIDatasetDir <- "Location of unzipped Files"
###################################
## Script Initialization ##
###################################
## PUt the Directory structure into variables
TestDataSetDir <- paste(UCIDatasetDir,"/Test",sep = "")
TrainDataSetDir <- paste(UCIDatasetDir,"/Train",sep = "")
## put the File structure into variables
FileColVariableNamesTable <- paste(UCIDatasetDir,"/Features.TXT",sep = "")
FileActivityTable <- paste(UCIDatasetDir,"/activity_labels.TXT",sep = "")
FilePrimaryTrainData <- paste(TrainDataSetDir,"/X_Train.txt",sep = "")
FilePrimaryTestData <- paste(TestDataSetDir,"/X_Test.txt",sep = "")
FileTrainActivityID <- paste(TrainDataSetDir,"/Y_Train.txt",sep = "")
FileTestActivityID <- paste(TestDataSetDir,"/Y_Test.txt",sep = "")
##################################
## Read Tables in ##
##################################
## Get the Variable Data
tblDataColNames <- read.table(FileColVariableNamesTable)
tblActivity <- read.table(FileActivityTable)
tblTrainActivityID <- read.table(FileTrainActivityID)
tblTestActivityID <- read.table(FileTestActivityID)
## Get the Actual Data
TrainData <- read.table(FilePrimaryTrainData)
TestData <- read.table(FilePrimaryTestData)
## prepair the tabels for the Column Names
DataColNames <- tblDataColNames[,2]
colnames(TrainData) <- DataColNames
colnames(TestData) <- DataColNames
## This version returns everything with mean or std and () in the name ** only one set should be used **
#meanCols <- grep("mean()",tblDataColNames$V2)
#stdCols <- grep("std()",tblDataColNames$V2)
## This version returns only those with "mean()" and "std()" in the name ** only one set should be used **
meanCols <- grep("mean()",tblDataColNames$V2, fixed = TRUE)
stdCols <- grep("std()",tblDataColNames$V2,fixed = TRUE)
## merge then sort the two columns
DesiredCols <- append(as.numeric(meanCols),as.numeric(stdCols))
DesiredColsSorted <- sort(as.numeric(DesiredCols))
## Build the Frames
DesiredTrainData <- TrainData[,DesiredColsSorted]
DesiredTestData <- TestData[,DesiredColsSorted]
## Add the ActivityID then Activity Name
colnames(tblActivity)[1] <- c("ActivityID")
TrainWithActivity <- cbind(DesiredTrainData,tblTrainActivityID)
TestWithActivity <- cbind(DesiredTestData,tblTestActivityID)
colnames(TrainWithActivity)[67]<-c("ActivityID")
colnames(TestWithActivity)[67]<-c("ActivityID")
TrainWithActNames <- merge(x=TrainWithActivity, y=tblActivity, by="ActivityID", all.x = TRUE)
TestWithActNames <- merge(x=TestWithActivity, y=tblActivity, by="ActivityID", all.x = TRUE)
colnames(TrainWithActNames)[68]<-c("Activity")
colnames(TestWithActNames)[68]<-c("Activity")
# Combine Train and Test Datasets
TidyData <- rbind(TrainWithActNames, TestWithActNames)
#############################################################
## Add Subject ID to Dataframe ##
#############################################################
FileSubjectIDTrain <- paste(TrainDataSetDir,"/Subject_Train.txt",sep = "")
FileSubjectIDTest <- paste(TestDataSetDir,"/Subject_Test.txt",sep = "")
SubjectIDTrain <- as.vector(read.table(FileSubjectIDTrain))
SubjectIDTest <- as.vector(read.table(FileSubjectIDTest))
## Add the test and train vectors into a single subject ID vector
SubjectIDs <- rbind(SubjectIDTrain, SubjectIDTest)
## Connect the Subject ID vector to the Tidy Data
TidyDataComplete <- cbind(TidyData,SubjectIDs)
colnames(TidyDataComplete)[69] <- c("SubjectID")
############################################################
## Tidy Data processing is complete ##
############################################################
write.table(TidyDataComplete, file = "ClassProject_Tidy.txt", row.names=FALSE)
############################################################
## Create second Tidy Dataset with summerization ##
############################################################
ByActivity <- group_by(TidyDataComplete, Activity)
# remove the Activity ID and the SubjectID, since they aren't needed
MeanByActivity <- summarize_each(select(ByActivity, -c(ActivityID,SubjectID)), funs(mean))
BySubject <- group_by(TidyDataComplete, SubjectID)
# remove the Activity ID and the Activity, since they aren't needed
MeanBySubject <- summarize_each(select(BySubject, -c(ActivityID,Activity)), funs(mean))
write.table(MeanByActivity,file = "ClassProject_MeanByActivity.txt", row.name=FALSE)
write.table(MeanBySubject,file = "ClassProject_MeanBySubject.txt", row.name=FALSE)