-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcreateMerged.R
84 lines (60 loc) · 2.43 KB
/
createMerged.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# funcion to create initial set of files from myrepo
createMerged <- function (myrepo, merge = T){
# form the filename from myrepo
filename <- file.path(myrepo, "toc.yml")
# libraries used
library(yaml)
library(data.table)
library(dplyr)
source("expandItems.R")
source("getMetadata.R")
## STEP 1: Read TOC & process it:
require(yaml)
# reading from a file connection
data <- read_yaml(filename) #initial data - big list with one row per heading
#create the first data.table
library(data.table)
dt<- rbindlist(data, fill=TRUE) # now we have one row for each first level item
## Each time you call the function, you'll get one more level of the TOC.
## keep going as long as there is still an items.y column.
dt<- expandItems(dt) # FIRST TIME (outside loop because no items.y yet, first merge will create it)
level <- 2 # keep track of how many levels
while ("items.y" %in% names(dt)){
#change the name to items before using expandItems.
names(dt)[names(dt) == "items.y"] <- "items"
level <- level + 1
dt<- expandItems(dt)
}
## Done with expanding ##
dt$items <- NULL # get rid of the remaining items column.
# Make nicer names, based on the level
f <- paste0("f", 1:level)
n <- paste0("n", 1:level)
dtnames <- c("n1","f1")
# now put the names in the right order
for (i in 2:level) {
dtnames <- c(dtnames,n[i],f[i])
}
if (length(names(dt)) == length(dtnames)) {
names(dt) <- dtnames
} else {
sprintf("Something's wrong! You should have %i columns in dt, but you have %i. Stop and debug",length(dtnames), length(names(dt)))
}
## STEP 2 Clean up and consolidate filename into a single column
dt[is.na(dt)] <- "" # replace NANs with blanks
# only one of these columns contains the filename, combine into a single variable
dt <- data.frame(dt, stringsAsFactors = FALSE) # change data.table to data.frame for the paste to work
dt$filename <- do.call(paste, c(dt[f], sep = ""))
# Now get rid of the multiple filename colums
dt <- dt[, -which(names(dt) %in% f)]
## STEP 3: Write the csv file - uncomment the next line and stop here if you wish
# write.csv(dt, file= writefile, na="")
## CONTINUE ON to add metadata to the file
if (merge==T) {
metadata <- getMetadata(myrepo)
final <- left_join(dt, metadata, by = "filename")
} else {
final <- dt
}
return(final)
}