jfrog · espoelstra · Aug 9, 2018 · Aug 10, 2018
diff --git a/requestToUsage/README.md b/requestToUsage/README.md
@@ -1,9 +1,20 @@
 #requestToUsage.sh
-This script is used with the syntax:
-```request.sh request.log```
+
+To quickly grab these scripts without cloning or downloading and unpacking a zip:
+
+```
+curl -L -O https://raw.githubusercontent.com/jfrog/artifactory-scripts/master/requestToUsage/requestToUsage.sh && chmod +x requestToUsage.sh
+# multiLog script requires requestToUsage, but multiLog is only needed if you want to parse all `request.*` logs in a folder in one run
+curl -L -O https://raw.githubusercontent.com/jfrog/artifactory-scripts/master/requestToUsage/multiLogParse.sh && chmod +x multiLogParse.sh
+```
+
+These scripts are used with the syntax:
+`./requestToUsage.sh request.log [optional-output-prefix]` or
 (replace request.log with the name of your request log file)
+`./multiLogParse.sh ./logs [optional-output-prefix]`
+(the `multiLogParse.sh` script currently has `request.*` hardcoded for the file glob to match in the folder path given)
 
-It outputs a file request.csv which you can open in excel.
-The furthest right field is your overall usage in gigabytes over the period of the request log.  You need to figure out the difference in the dates and turn it into a 30-day figure to get a monthly usage figure.
+The script has been updated to provide you a day by day summary of data transferred as well as a summary of the data transferred during the entire period of the log(s). It doesn't currently calculate averages, but that could be considered in the future, the challenge is that currently it only analyzes one log file at a time which only encompasses a few days and doesn't track state across multiple files, that would probably be broken out into a separate script.
 
-TODO: Automate date difference calculation to monthly statistic
+It outputs one or more files of the pattern `[optional-output-prefix-]FIRSTDATEINLOG.csv` which you can open in Excel/OpenOffice Calc.
+The furthest bottom right field is your overall usage in gigabytes over the period of the request log.  You need to figure out the difference in the dates and turn it into a 30-day figure to get a monthly usage figure if the summaries in the script run output mentioned above aren't sufficient.
diff --git a/requestToUsage/multiLogParse.sh b/requestToUsage/multiLogParse.sh
@@ -1,8 +1,8 @@
 #!/bin/bash
 LOC=$1
-${LOC:-"."}
+: ${LOC:-"."}
 if [ ! -d $LOC ]; then
-    echo "Usage is: ./multiLogParse.sh /path/to/logs/ <OPTIONAL_PREFIX>"
+    echo "Usage is: ./multiLogParse.sh /path/to/logs/ [optional prefix string for output files]"
 else
     echo "Using directory $LOC as LOC"
 fi

diff --git a/requestToUsage/requestToUsage.sh b/requestToUsage/requestToUsage.sh
@@ -1,26 +1,45 @@
 #!/bin/bash
 FILE=$1
 if [ ! -f $FILE ]; then
-    echo "Correct usage is: requestToUsage.sh request.log [optional prefix string]"
+    echo "Correct usage is: requestToUsage.sh request.log [optional prefix string for output file]"
 fi
 PREFIX=$2
 
+calculate_gigabytes () {
+  # from https://unix.stackexchange.com/a/374877
+  echo "$1" | awk '{ split( "B KB MB GB TB PB" , v ); s=1; while( $1>1024 ){ $1/=1024; s++ } printf "%.2f %s", $1, v[s] }'
+}
+
+# Gets first 8 characters of first timestamp ie YYYYMMDD
 OUTPUT=${PREFIX:+${PREFIX}-}$(head -c 8 $FILE).csv
 
-awk '!/0$/' $FILE > $OUTPUT
-if sed --version 
-then sed "s/[|]/,/g" $OUTPUT -i
-else
-	echo "Please ignore the above error message from sed, switching to gsed."
-	gsed "s/[|]/,/g" $OUTPUT -i
-fi
-echo "Successfully outputted to $OUTPUT"
-if date --version
-then echo 'date'
-else 
-	echo "Please ignore the above error message from date, switching to gdate."
-	echo 'gdate'
-fi
+# Replaces | with ,
+# Skips any 0-byte requests
+# gsub should exist in most implementations of awk
+awk '{ gsub(/[|]/, ",") }; !/,0$/' $FILE > $OUTPUT
+
+echo "Successfully reformatted $FILE to CSV"
+
+echo "Successfully reformatted $FILE to CSV as $OUTPUT"
+
+# Reads output and assigns into an array for easier indexing below
+DATES_IN_FILE=($(awk -F',' '{print $1}' $OUTPUT | cut -c1-8 | uniq))
+
+for eachday in "${DATES_IN_FILE[@]}"; do
+  DAY_TOTAL=0
+  for requestsize in $(awk -v day="^$eachday" -F',' '{ if ( $0 ~ day){ print $NF }}' $OUTPUT); do
+    let "DAY_TOTAL += requestsize";
+  done
+  echo "$eachday had $(calculate_gigabytes $DAY_TOTAL) transferred"
+done
+TOTAL=0
+for request in $(awk -F',' '{ print $NF }' $OUTPUT ); do
+  let "TOTAL += request";
+done
+FILE_TOTAL=$(calculate_gigabytes $TOTAL)
+
+echo "Approximate data transfer total between ${DATES_IN_FILE[0]} and ${DATES_IN_FILE[@]: -1} is $FILE_TOTAL"
+
 echo "0,0,0,0,0,0,0,0,0,0,=SUM(J:J)/(1024^3)" >> $OUTPUT
 echo "Added calculation line."
 echo "Open $OUTPUT in excel or a similar spreadsheet program"