diff --git a/bin/dump-past-answers.pl b/bin/dump-past-answers.pl new file mode 100755 index 0000000000..d4dc3082a7 --- /dev/null +++ b/bin/dump-past-answers.pl @@ -0,0 +1,299 @@ +#!/usr/bin/env perl +################################################################################ +# WeBWorK Online Homework Delivery System +# Copyright © 2000-2023 The WeBWorK Project, https://github.com/openwebwork +# +# This program is free software; you can redistribute it and/or modify it under +# the terms of either: (a) the GNU General Public License as published by the +# Free Software Foundation; either version 2, or (at your option) any later +# version, or (b) the "Artistic License" which comes with this package. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See either the GNU General Public License or the +# Artistic License for more details. +################################################################################ + +=head1 NAME + +dump-past-answers.pl: This script dumps past answers from courses into a CSV +file. + +=head1 SYNOPSIS + +dump-past-answers.pl [options] + + Options: + -c|--course Course from which to dump past answers + -f|--output-file CSV file name to dump past answers to + -h|--help Show this help + +The C option can be repeated multiple times to dump past answers from +multiple courses into the same file. If no courses are given via this option, +then past answers from all courses will be dumped. + +If the C option is not given then +C will be used for the output file name. + +=head1 DESCRIPTION + +The CSV file that is generated has the following columns: + +ID info + + 0 - Answer ID + 1 - Course ID + 2 - Student ID + 3 - Set ID + 4 - Problem ID + +User Info + + 5 - Permission Level + 6 - User Course Status + +Set Info + + 7 - Set type + 8 - Open Date (unix time) + 9 - Due Date (unix time) + 10 - Answer Date (unix time) + 11 - Final Set Grade (percentage) + +Problem Info + + 12 - Problem Path + 13 - Problem Value + 14 - Problem Max Attempts + 15 - Problem Seed + 16 - Attempted + 17 - Final Incorrect Attempts + 18 - Final Correct Attempts + 19 - Final Status + +OPL Info + + 20 - Subject + 21 - Chapter + 22 - Section + 23 - Keywords + +Answer Info + + 24 - Answer timestamp (unix time) + 25 - Attempt Number + 26 - Raw status of attempt (percentage of correct blanks) + 27 - Number of Answer Blanks + 28/29 etc... - The following columns will come in pairs. The first will be + the text of the answer contained in the answer blank + and the second will be the binary 0/1 status of the answer + blank. There will be as many pairs as answer blanks. + +=cut + +use strict; +use warnings; +use feature 'say'; + +BEGIN { + use Mojo::File qw(curfile); + use Env qw(WEBWORK_ROOT); + $WEBWORK_ROOT = curfile->dirname->dirname; +} + +use lib "$ENV{WEBWORK_ROOT}/lib"; + +use Getopt::Long qw(:config bundling); +use Pod::Usage; +use Text::CSV; + +use WeBWorK::CourseEnvironment; +use WeBWorK::DB; +use WeBWorK::Utils::CourseManagement qw(listCourses); +use WeBWorK::Utils::Tags; + +# Get options. +my @courses; +my $output_file = "past-answers-" . time . ".csv"; +my $show_help; +GetOptions('c|course=s' => \@courses, 'f|output-file=s' => \$output_file, 'h|help' => \$show_help); + +pod2usage(2) if $show_help; + +@courses = listCourses(WeBWorK::CourseEnvironment->new({ webwork_dir => $ENV{WEBWORK_ROOT} })) unless @courses; + +sub write_past_answers_csv { + my $outFH = shift; + + my $csv = Text::CSV->new({ binary => 1, eol => "\n" }) or die "Cannot use CSV: " . Text::CSV->error_diag(); + + # Cache OPL tag data when it is looked up instead of looking up each file every time it appears as the source file + # for a past answer. This considerably speeds up this script. + my %OPL_tag_data; + + for my $courseID (@courses) { + next if $courseID eq 'admin' || $courseID eq 'modelCourse'; + + my $ce = WeBWorK::CourseEnvironment->new({ webwork_dir => $ENV{WEBWORK_ROOT}, courseName => $courseID }); + my $db = WeBWorK::DB->new($ce->{dbLayout}); + + my %permissionLabels = reverse %{ $ce->{userRoles} }; + + unless (defined $ce && defined $db) { + warn("Unable to load course environment and database for $courseID"); + next; + } + + say "Dumping past answers for $courseID"; + + # Get all past answers for this course sorted by answer_id and organize them by user, set, and problem. + my %pastAnswers; + for ($db->getPastAnswersWhere({}, 'answer_id')) { + push(@{ $pastAnswers{ $_->user_id }{ $_->set_id }{ $_->problem_id } }, $_); + } + + my @row; + + $row[1] = $courseID; + + my @users = $db->getUsersWhere({ user_id => { not_like => 'set_id:%' } }); + + for my $user (@users) { + my $userID = $user->user_id; + + $row[2] = $userID; + $row[5] = $permissionLabels{ $db->getPermissionLevel($userID)->permission }; + $row[6] = $ce->status_abbrev_to_name($user->{status}); + + my @sets; + for ($db->getMergedSetsWhere({ user_id => $userID }, 'set_id')) { + if (defined $_->assignment_type && $_->assignment_type =~ /gateway/) { + my $setID = $_->set_id; + my @versions = $db->listSetVersions($userID, $setID); + for my $version (@versions) { + push(@sets, $db->getUserSet($userID, "$setID,v$version")); + } + } else { + push(@sets, $_); + } + } + + for my $set (@sets) { + my $setID = $set->set_id; + + $row[3] = $setID; + $row[7] = $set->assignment_type; + $row[8] = $set->open_date; + $row[9] = $set->due_date; + $row[10] = $set->answer_date; + + my @problems = + $set->assignment_type =~ /gateway/ + ? $db->getMergedProblemVersionsWhere({ user_id => $userID, set_id => $setID }, 'problem_id') + : $db->getMergedProblemsWhere({ user_id => $userID, set_id => $setID }, 'problem_id'); + + # Compute set score + my $total = 0; + my $correct = 0; + for my $problem (@problems) { + $total += $problem->value; + $correct += $problem->value * $problem->status; + } + $row[11] = $total ? $correct / $total : 0; + + for my $problem (@problems) { + my $problemID = $problem->problem_id; + + $row[4] = $problemID; + $row[12] = $problem->source_file; + $row[13] = $problem->value; + $row[14] = $problem->max_attempts; + $row[15] = $problem->problem_seed; + $row[16] = $problem->attempted; + $row[17] = $problem->num_incorrect; + $row[18] = $problem->num_correct; + $row[19] = $problem->status; + + # Get OPL tag data. + if ($row[12]) { + my $file = "$ce->{courseDirs}{templates}/$row[12]"; + $OPL_tag_data{$file} = WeBWorK::Utils::Tags->new($file) + if !defined $OPL_tag_data{$file} && -e $file; + if (defined $OPL_tag_data{$file}) { + $row[20] = $OPL_tag_data{$file}{DBsubject}; + $row[21] = $OPL_tag_data{$file}{DBchapter}; + $row[22] = $OPL_tag_data{$file}{DBsection}; + $row[23] = + defined($OPL_tag_data{$file}{keywords}) + ? join(',', @{ $OPL_tag_data{$file}{keywords} }) + : ''; + } + } + + my $attempt_number = 0; + for my $answer (@{ $pastAnswers{$userID}{$setID}{ $problem->problem_id } }) { + my $answerID = $answer->answer_id; + ++$attempt_number; + + # If the source file for this answer is different from that of the merged user set, + # then update the row and get the OPL tag data for this file. + if ($row[12] ne $answer->source_file) { + $row[12] = $answer->source_file; + if ($row[12]) { + my $file = "$ce->{courseDirs}{templates}/$row[12]"; + $OPL_tag_data{$file} = WeBWorK::Utils::Tags->new($file) + if !defined $OPL_tag_data{$file} && -e $file; + if (defined $OPL_tag_data{$file}) { + $row[20] = $OPL_tag_data{$file}{DBsubject}; + $row[21] = $OPL_tag_data{$file}{DBchapter}; + $row[22] = $OPL_tag_data{$file}{DBsection}; + $row[23] = + defined($OPL_tag_data{$file}{keywords}) + ? join(',', @{ $OPL_tag_data{$file}{keywords} }) + : ''; + } + } + } + + # Input answer specific info + $row[0] = $answerID; + $row[15] = $answer->problem_seed + if defined $answer->problem_seed && $answer->problem_seed ne ''; + $row[24] = $answer->timestamp; + $row[25] = $attempt_number; + + my @scores = split('', $answer->scores); + my @answers = split("\t", $answer->answer_string, -1); + + # Skip answer processing if the number of scores isn't the same as the number of answers. + next if $#scores != $#answers; + + my $num_blanks = scalar(@scores); + + # Compute the raw status + my $score = 0; + for (@scores) { $score += $_ } + $row[26] = $num_blanks ? $score / $num_blanks : 0; + + $row[27] = $num_blanks; + + for (my $i = 0; $i < $num_blanks; $i++) { + $row[ 28 + 2 * $i ] = $answers[$i]; + $row[ 29 + 2 * $i ] = $scores[$i]; + } + + $csv->print($outFH, \@row) or warn "Couldn't print row"; + } + } + } + } + } + + return; +} + +say "Dumping answer data to $output_file"; +open(my $outFH, '>:encoding(UTF-8)', $output_file) or die("Couldn't open file $output_file"); +write_past_answers_csv($outFH); +close($outFH) or die("Couldn't close $output_file"); +say 'Done dumping data'; diff --git a/bin/dump_past_answers b/bin/dump_past_answers deleted file mode 100755 index 1228da7ff8..0000000000 --- a/bin/dump_past_answers +++ /dev/null @@ -1,329 +0,0 @@ -#!/usr/bin/env perl - -################################################################################ -# WeBWorK Online Homework Delivery System -# Copyright © 2000-2023 The WeBWorK Project, https://github.com/openwebwork -# -# This program is free software; you can redistribute it and/or modify it under -# the terms of either: (a) the GNU General Public License as published by the -# Free Software Foundation; either version 2, or (at your option) any later -# version, or (b) the "Artistic License" which comes with this package. -# -# This program is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See either the GNU General Public License or the -# Artistic License for more details. -################################################################################ - -# This script dumps the course information from all unarchived courses into -# a single csv file. The csv file has the following columns. -# -# ID Info -# 0 - Answer ID hash -# 1 - Course ID hash -# 2 - Student ID hash -# 3 - Set ID hash -# 4 - Problem ID hash -# 5 - Timestamp -# User Info -# 6 - Permission Level -# 7 - Final Status -# Set Info -# 8 - Set type -# 9 - Open Date (unix time) -# 10 - Due Date (unix time) -# 11 - Answer Date (unix time) -# 12 - Final Set Grade (percentage) -# Problem Info -# 13 - Problem Path -# 14 - Problem Value -# 15 - Problem Max Attempts -# 16 - Problem Seed -# 17 - Attempted -# 18 - Final Incorrect Attempts -# 19 - Final Correct Attempts -# 20 - Final Status -# OPL Info -# 21 - Subject -# 22 - Chapter -# 23 - Section -# 24 - Keywords -# Answer Info -# 25 - Answer timestamp (unix time) -# 26 - Attempt Number -# 27 - Raw status of attempt (percentage of correct blanks) -# 28 - Number of Answer Blanks -# 29/30 etc... - The following columns will come in pairs. The first will be -# the text of the answer contained in the answer blank -# and the second will be the binary 0/1 status of the answer -# blank. There will be as many pairs as answer blanks. - -use strict; - -BEGIN { - use Mojo::File qw(curfile); - use Env qw(WEBWORK_ROOT); - - $WEBWORK_ROOT = curfile->dirname->dirname; -} - -use lib "$ENV{WEBWORK_ROOT}/lib"; - -use WeBWorK::CourseEnvironment; - -use WeBWorK::DB; -use WeBWorK::Utils::CourseIntegrityCheck; -use WeBWorK::Utils::CourseManagement qw/listCourses/; -use WeBWorK::Utils::Tags; -use WeBWorK::PG; - -use Text::CSV; -use Digest::SHA qw(sha256_hex); -use Net::Domain; - -# Deal with options -my $output_file; -my $zip_result = 1; -my $upload_result = 0; - -my $domainname = Net::Domain::domainname; -my $time = time(); - -# define and open the output file. -if (!$output_file) { - $output_file = "$domainname-$time.csv"; -} - -my $salt; -my $SALTFILE; -my $saltfilename = $ENV{WEBWORK_ROOT} . '/.dump_past_answers_salt'; - -if (-e $saltfilename) { - open($SALTFILE, '<', $saltfilename) || die("Couldn't open salt file."); - $salt = <$SALTFILE>; - close $SALTFILE; -} else { - $salt = ''; - for (my $i = 0; $i < 32; $i++) { - $salt .= ('.', '/', '0' .. '9', 'A' .. 'Z', 'a' .. 'z')[ rand 64 ]; - } - - open($SALTFILE, '>', $saltfilename) || die("Couldn't open salt file."); - print $SALTFILE $salt; - close $SALTFILE; -} - -my $OUT; -open($OUT, '>', $output_file) || die("Couldn't open file $output_file"); - -print "Dumping answer data to $output_file\n"; - -# set up various variables and utilities that we will need -my ($db, @wheres); -my $max_answer_blanks = 0; -my $csv = new Text::CSV->new({ binary => 1 }) - or die "Cannot use CSV: " . Text::CSV->error_diag(); -$csv->eol("\n"); - -my $ce = WeBWorK::CourseEnvironment->new({ - webwork_dir => $ENV{WEBWORK_ROOT}, -}); - -my @courses = listCourses($ce); -my %permissionLabels = reverse %{ $ce->{userRoles} }; - -# this is our row array and is the main structure -my @row; - -# go through courses -foreach my $courseID (@courses) { - next if $courseID eq 'admin' || $courseID eq 'modelCourse'; - - $ce = WeBWorK::CourseEnvironment->new({ - webwork_dir => $ENV{WEBWORK_ROOT}, - courseName => $courseID, - }); - $db = new WeBWorK::DB($ce->{dbLayout}); - - unless (defined($ce) && defined($db)) { - warn("Unable to load up database for $courseID"); - next; - } - - print "Dumping $courseID\n"; - - my $templateDir = $ce->{courseDirs}->{templates}; - - my $sCourseID = sha256_hex($salt . $domainname . $courseID); - - $row[1] = $sCourseID; - $row[5] = $time; - - my @userIDs = $db->listUsers(); - my @users = $db->getUsers(@userIDs); - - # go through users - foreach my $user (@users) { - my $userID = $user->user_id; - - #skip proctor users - next if $user->user_id =~ /^set_id:/; - - my $sUserID = sha256_hex($salt . $domainname . $courseID . $userID); - - # get user specific info - $row[2] = $sUserID; - my $permissionLevel = $db->getPermissionLevel($userID); - $row[6] = $permissionLabels{ $permissionLevel->permission }; - $row[7] = $ce->status_abbrev_to_name($user->{status}); - - my @setIDs = $db->listUserSets($userID); - @wheres = map { [ $userID, $_ ] } @setIDs; - my @sets = $db->getMergedSets(@wheres); - - # go through sets - foreach my $set (@sets) { - # skip gateways - if ($set->assignment_type =~ /gateway/ - && $set->set_id !~ /,v\d+$/) - { - next; - } - - my $setID = $set->set_id; - my $sSetID = sha256_hex($salt . $domainname . $courseID . $setID); - - # get set specific info - $row[3] = $sSetID; - $row[8] = $set->assignment_type; - $row[9] = $set->open_date; - $row[10] = $set->due_date; - $row[11] = $set->answer_date; - - my @problemIDs = $db->listUserProblems($userID, $setID); - @wheres = map { [ $userID, $setID, $_ ] } @problemIDs; - my @problems = $db->getMergedProblems(@wheres); - - # compute set score - my $total = 0; - my $correct = 0; - foreach my $problem (@problems) { - $total += $problem->value(); - $correct += $problem->value * $problem->status; - } - $row[12] = $total ? $correct / $total : 0; - - # go through each problem - foreach my $problem (@problems) { - my $problemID = $problem->problem_id; - my $sProblemID = sha256_hex($salt . $domainname . $courseID . $userID . $setID . $problemID); - - # print problem specific info - $row[4] = $sProblemID; - $row[13] = $problem->source_file; - $row[14] = $problem->value; - $row[15] = $problem->max_attempts; - $row[16] = $problem->problem_seed; - $row[17] = $problem->attempted; - $row[18] = $problem->num_incorrect; - $row[19] = $problem->num_correct; - $row[20] = $problem->status; - - # get OPL data - my $file = $templateDir . '/' . $problem->source_file(); - if (-e $file) { - my $tags = WeBWorK::Utils::Tags->new($file); - $row[21] = $tags->{DBsubject}; - $row[22] = $tags->{DBchapter}; - $row[23] = $tags->{DBsection}; - $row[24] = defined($tags->{keywords}) ? join(',', @{ $tags->{keywords} }) : ''; - } - - my @answerIDs = $db->listProblemPastAnswers($userID, $setID, $problemID); - my @answers = $db->getPastAnswers(\@answerIDs); - - # go through attempts - my $attempt_number = 0; - foreach my $answer (@answers) { - #reset the row length because it can change; - @row = splice(@row, 0, 28); - my $answerID = $answer->answer_id; - my $sAnswerID = - sha256_hex($salt . $domainname . $courseID . $userID . $setID . $problemID . $answerID); - $attempt_number++; - - # if the source file changed redo that info - if ($row[13] != $answer->source_file) { - $row[13] = $answer->source_file; - $file = $templateDir . '/' . $answer->source_file(); - if (-e $file) { - my $tags = WeBWorK::Utils::Tags->new($file); - $row[21] = $tags->{DBsubject}; - $row[22] = $tags->{DBchapter}; - $row[23] = $tags->{DBsection}; - $row[24] = defined($tags->{keywords}) ? join(',', @{ $tags->{keywords} }) : ''; - } - } - - # input answer specific info - $row[0] = $sAnswerID; - $row[25] = $answer->timestamp; - $row[26] = $attempt_number; - - my @scores = split('', $answer->scores, -1); - my @answers = split("\t", $answer->answer_string, -1); - - # if the number of scores isn't the same as the number of - # answers we should skip - if ($#scores != $#answers) { - next; - } - my $num_blanks = scalar(@scores); - - $max_answer_blanks = $num_blanks - if ($num_blanks > $max_answer_blanks); - - # compute the raw status - my $score = 0; - foreach (@scores) { - $score += $_; - } - - $row[27] = $num_blanks ? $score / $num_blanks : 0; - - # we leave the computed status blank for now. - - $row[28] = $num_blanks; - - for (my $i = 0; $i < $num_blanks; $i++) { - $row[ 29 + 2 * $i ] = $answers[$i]; - $row[ 30 + 2 * $i ] = $scores[$i]; - } - - #form the csv string and print - $csv->print($OUT, \@row) || warn "Couldn't print row"; - } - } - } - } -} - -print "Done dumping data\n"; - -close($OUT) or die("Couldn't close $output_file"); - -if ($zip_result) { - print "Zipping file\n"; - - `gzip $output_file`; - - $output_file = $output_file . ".gz"; -} - -if ($upload_result) { - print "Uploading file\n"; - - `echo "put $output_file" | sftp -oStrictHostKeyChecking=no -oUserKnownHostsFile=/dev/null -oPort=57281 wwdata\@52.88.32.79`; -} - -1;