-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_analysis.R
72 lines (54 loc) · 3.4 KB
/
run_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
setwd("C:/Users/Tycho/Documents/Coursera/Data Science Johns Hopkins/Getting and Cleaning Data/Course Project/UCI HAR Dataset")
# loading all the data files into the global environment
train_X_train <- read.table("./train/X_train.txt", sep="")
train_y_train <- read.table("./train/y_train.txt", sep="")
train_subject_train <- read.table("./train/subject_train.txt", sep="")
test_X_test <- read.table("./test/X_test.txt", sep="")
test_y_test <- read.table("./test/y_test.txt", sep="")
test_subject_test <- read.table("./test/subject_test.txt", sep="")
features <- read.table("./features.txt", sep="", stringsAsFactors=F)
activity_labels <- read.table("./activity_labels.txt", sep="", stringsAsFactors=F)
# Changing the activity label into lowercase and removing the "_"s
activity_labels[,2] <- sub("_", "", activity_labels[,2])
activity_labels[,2] <- tolower(activity_labels[,2])
# Assigning descriptive names to all the activities each prticipant performed
for (i in 1:6) { train_y_train[train_y_train == i,] <- activity_labels[i,2] }
for (i in 1:6) { test_y_test[test_y_test == i,] <- activity_labels[i,2] }
# Extracting only the data measuring mean and standard deviation
train_X_subset <- train_X_train[,grep("mean|std",features[,2])]
test_X_subset <- test_X_test[,grep("mean|std",features[,2])]
# Combining the information about each subject and activity with actual data
#obtained from the experiment
train_X <- cbind(train_y_train, train_subject_train, train_X_subset)
test_X <- cbind( test_y_test, test_subject_test, test_X_subset)
# Combining the train set data with the test set data
data <- rbind(train_X, test_X)
# Extracting the names of the variables containing mean and standard deviation
features_subset <- grep("mean|std",features[,2],value=T)
# Transforming the resulting names into lowercase
features_subset <- tolower(features_subset)
# Removing the "()"s
features_subset <- sub("[/(][/)]","",features_subset)
# Removing the "-"
features_subset <- gsub("[-]|[,]","",features_subset)
# Moving prefixes "t" and "f" from the beginning to the end of the variables
features_subset <- paste0(substr(features_subset,2,nchar(features_subset)),substr(features_subset,1,1))
# Substituting the abbreviation "acc" into "acceleration"
features_subset <- sub("[a][c][c]","accelerometer",features_subset)
# substituting the abbreviation "gyro" into "gyroscope"
features_subset <- sub("[g][y][r][o]","gyroscope",features_subset)
# Removing the duplication of body
features_subset <- sub("[b][o][d][y][b][o][d][y]","body",features_subset)
# Changing the names of the resulting reduced dataset with the tidy names
names(data)[3:ncol(data)] <- features_subset
# Changing the names of the added first 2 column for the activities and subjects
names(data)[1:2] <- c("activity","subject")
# Aggregating the data as taking the means of the seleted variables
#for each activity and subject
tidydata <- aggregate(data[3:ncol(data)],by = list(data$activity, data$subject),FUN="mean")
names(tidydata)[1:2] <- c("activity","subject")
# Exporting a comma separated file as the end result, i.e. the tidy data
write.table(tidydata,file="./Course Project/tidydata.txt", sep=",",row.name=F)
# Exporting a "draft" code book, i.e. all variable names in text format
features_subset_text <- cbind(c(rep("*",79)),features_subset) # Addind a "*" before each variable
write.table(features_subset_text,file="./Course Project/CodeBook_draft.txt",quote= F, col.names=F,row.names=F)