-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathattendance_report.rmd
281 lines (232 loc) · 7.5 KB
/
attendance_report.rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
---
params:
d: !r Sys.Date()
input_file: "dummy.pdf"
output:
pdf_document:
latex_engine: xelatex
keep_tex: yes
keep_md: yes
header-includes:
- \usepackage{fontspec}
- \setmainfont{Lato}
---
```{r setup, include=FALSE}
library(pdftools)
library(magrittr)
library(lubridate)
library(tibble)
library(dplyr)
library(ggplot2)
library(tidyr)
library(stringr)
library(stringi)
# Set flag for identifying rows with desired data.
a0 <- "A00000"
# Set flag for holiday because CAMS is too stupid to exclude holidays from reports.
# MLK Day
# holiday <- "2020-01-20"
holiday <- "01-20"
# Function to extract raw data from PDF
pre_process <- function(input_file){
raw <- pdf_text(input_file)
raw <- strsplit(raw, split = "\n")
return(raw)
}
# Function to get course name from raw data.
get_course_name <- function(raw){
return(raw[[1]][2])
}
# Function to extract name from a linelist of strings
get_name <- function(a_line_list){
student_name <- paste(a_line_list[3:2], collapse = " ")
student_name <- str_remove(student_name, ",") # Strip out the comma
return(student_name)
}
# Function to extract presence status from a linelist of strings
get_presence <- function(a_line_list){
suppressWarnings({
if (TRUE %in% stri_detect_fixed(a_line_list, "Left")) {
return("Left Early")
} else if (TRUE %in% stri_detect_fixed(a_line_list, "Present")) {
return("Present")
} else if (TRUE %in% stri_detect_fixed(a_line_list, "Absent")) {
return("Absent")
} else if (TRUE %in% stri_detect_fixed(a_line_list, "Other")) {
return("Other")
} else if (TRUE %in% stri_detect_fixed(a_line_list, "Tardy")) {
return("Tardy")
} else if (TRUE %in% stri_detect_fixed(a_line_list, "Late")) {
return("Late")
}
})
}
# Function to extract the date from a linelist of strings
get_date <- function(a_line_list){
suppressWarnings({
for (it in 1:length(a_line_list)){
if (!is.na(mdy(a_line_list[it]))){
return(a_line_list[it])
} else {
result <- NA
}
}
})
return(result)
}
# Function to build master dataframe
build_df <- function(input_data){
# Initialize empty data frame.
df <- data.frame(Student=character(),
Presence=character(),
Date=character(),
stringsAsFactors=FALSE)
# Iterate over each page, over each line.
for (i in 1:length(input_data)) {
page <- input_data[[i]]
for (j in 1:length(page)) {
line <- page[j]
# Strip each line into a list, and store its length.
linelist <- strsplit(line, "\\s+")[[1]]
n <- length(linelist)
# Check if line contains student data by
# checking whether that flagged string is in
# any word in the list.
if (TRUE %in% stri_detect_fixed(linelist, a0)) {
# Get name, presence, and date and save them to new_row
new_row <- c(get_name(linelist),
get_presence(linelist),
get_date(linelist))
# Add new_row to dataframe.
df[nrow(df)+1,] <- new_row
}
}
}
# Correct Student name capitalization.
df$Student <- str_to_title(df$Student)
# Set the date string as a Date object.
df$Date <- mdy(df$Date)
df$Date <- format(df$Date, format="%m-%d")
# Exclude holidays.
df %>% filter(Date != holiday) -> df
# Get number of class days.
class_days_n <- length(unique(df$Date))
return(df)
}
# Function to build dataframe of attendance by date
build_date_df <- function(input_df){
# Group data by date and get total number of enrolled students each day.
input_df %>% group_by(Date) %>%
count(name = "Total") -> date_df
# Group data by date and count all who were in class any time that day.
input_df %>% group_by(Date) %>%
filter(
Presence == "Present" |
Presence == "Late" |
Presence == "Left Early"
) %>%
count(name = "In_Class") -> date_p_df
# Aggregate counts for In_Class, Total (Enrolled), and Percent.
date_df %>% add_column(In_Class = date_p_df$In_Class) %>%
mutate(Percent = round(In_Class / Total * 100)) %>%
select(Date, In_Class, Total, Percent) -> date_df
return(date_df)
}
# Function to build plot of attendance by date
build_date_plot <- function(input_df){
date_plot <- ggplot(input_df, aes(x = factor(input_df$Date), y = input_df$Percent, group = 1)) +
geom_line() +
theme(axis.text.x = element_text(angle = 60, vjust = 0.5)) +
xlab("Date") +
ylim(0, 100)
return(date_plot)
}
# Function to build plot of attendance by date in Plotly
build_date_plotly <- function(input_df){
date_plotly <- plot_ly(
x = input_df$Date,
y = input_df$Percent,
type = 'scatter',
mode = 'lines+markers'
)
date_plotly %>% layout(title = 'Percentage of Enrolled Students Attending',
xaxis = list(title = "Date"),
yaxis = list(title = "Percent In Classroom",
ticksuffix = "%",
range = c(0, 100)),
margin = list(t = 108)
) -> date_plotly
return(date_plotly)
}
# Function to subset master dataframe to only currently enrolled students
get_current_df <- function(input_df){
# Get currently enrolled students only.
input_df %>% filter(Date == max(Date)) %>%
pull(Student) -> cur_enr_students
# Restrict data frame to include only currently enrolled students.
input_df %>% filter(Student %in% cur_enr_students) -> cur_enr_df
return(cur_enr_df)
}
# Function to build table of attendance by student
build_student_df <- function(input_df){
# Get table of students' days in class.
input_df %>%
group_by(Student) %>%
filter(Presence == "Present" |
Presence == "Late" |
Presence == "Left Early" |
Presence == "Tardy") %>%
count(name = "In_Class", .drop = FALSE) -> inc_table
# Get table of students' days absent.
input_df %>%
group_by(Student) %>%
filter(Presence == "Absent") %>%
count(name = "Absent", .drop = FALSE) -> abs_table
# Merge the tables and create percentage column.
merge(inc_table, abs_table, by = "Student", all = TRUE) %>%
replace_na(replace = list(In_Class = 0, Absent = 0)) %>%
mutate(Percent = round(In_Class / (In_Class + Absent) * 100)) -> att_table
# Return final table.
return(att_table)
}
# Function to build table of attendance by date and student
build_date_student_df <- function(input_df){
abrev_cur_enr_df <- input_df %>% mutate(Presence = substr(Presence, 1, 2))
abrev_cur_enr_df %>%
pivot_wider(names_from = Date, values_from = Presence) -> untidy
return(untidy)
}
library(kableExtra)
```
```{r data_prep, echo=FALSE}
params$input_file %>% pre_process() -> prepped_data
prepped_data %>% get_course_name() -> course_name
course_name %>% str_extract("[A-Z]{2}[0-9]{3}") -> title
prepped_data %>% build_df() -> df
df %>% build_date_df() -> date_df
df %>% get_current_df() -> current_df
current_df %>% build_student_df() -> student_df
current_df %>% build_date_student_df() -> student_date_df
date_df %>% build_date_plot() -> date_plot
```
---
title: "Attendance Report: `r title`"
---
## `r course_name`
## Attendance by Date
```{r by_date, echo=FALSE, fig.height=3}
date_plot
kable(date_df)
```
\pagebreak
## `r course_name`
## Attendance by Student
```{r by_student, echo=FALSE}
kable(student_df)
```
\pagebreak
## `r course_name`
## Attendance by Student and Date
```{r by_student_and_date, echo=FALSE}
kable(student_date_df)
```