From 985148f342d12114dca6d4610054bfd18c54fc76 Mon Sep 17 00:00:00 2001 From: Andrew Polk Date: Fri, 18 Oct 2024 16:28:29 -0700 Subject: [PATCH] Add other columns and post-review changes (BL-13994) --- cloud/main.js | 172 ++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 133 insertions(+), 39 deletions(-) diff --git a/cloud/main.js b/cloud/main.js index 58757118f..86b295446 100644 --- a/cloud/main.js +++ b/cloud/main.js @@ -171,7 +171,8 @@ Parse.Cloud.job("updateLanguageRecords", async (request) => { request.message("Completed successfully."); }); -// A background job to populate the analytics_* fields in the books table. +// A background job to populate the analytics_* fields in our books table +// from api.bloomlibrary.org/stats. Data comes from our postgresql analytics database populated from Segment. // // This is scheduled on Azure under bloom-library-maintenance-{prod|dev}-daily. // You can also run it manually via REST: @@ -179,45 +180,97 @@ Parse.Cloud.job("updateLanguageRecords", async (request) => { Parse.Cloud.job("updateBookAnalytics", async (request) => { request.log.info("updateBookAnalytics - Starting."); - function getConnectionInfo() { + // api.bloomlibrary.org/stats looks up analytics based on a parse server query. + // The api needs the appropriate parse server url and key so it can call back to the right parse server + // instance to get the list of books we want data about from the postgresql database. + function getCurrentInstanceInfoForApiQuery() { return { - url: process.env.SERVER_URL + "/", - headers: { - "X-Parse-Application-Id": process.env.APP_ID, - }, + url: process.env.SERVER_URL, + appId: process.env.APP_ID, }; - // When testing locally, you'll need to override using something like + // But when testing locally, you need to explicitly set which environment you want + // to collect analytics data for. You'll need to override using something like // return { - // url: "https://dev-server.bloomlibrary.org/parse/", - // headers: { - // "X-Parse-Application-Id": - // "yrXftBF6mbAuVu3fO6LnhCJiHxZPIdE7gl1DUVGR", - // }, + // url: "https://dev-server.bloomlibrary.org/parse", + // appId: "yrXftBF6mbAuVu3fO6LnhCJiHxZPIdE7gl1DUVGR", // }; } - function getNumberOrZero(value) { + function getNumberOrZero(value, isDecimal = false) { if (!value) return 0; + + if (isDecimal) { + const number = parseFloat(value); + return isNaN(number) ? 0 : number; + } + const number = parseInt(value, 10); return isNaN(number) ? 0 : number; } + // key/value pairs of column names to analytics results metadata + const analyticsColumnsMap = { + analytics_startedCount: { + apiResultName: "started", + }, + analytics_finishedCount: { + apiResultName: "finished", + }, + analytics_shellDownloads: { + apiResultName: "shelldownloads", + }, + analytics_pdfDownloads: { + apiResultName: "pdfdownloads", + }, + analytics_epubDownloads: { + apiResultName: "epubdownloads", + }, + analytics_bloompubDownloads: { + apiResultName: "bloompubdownloads", + }, + analytics_questionsInBookCount: { + apiResultName: "numquestionsinbook", + }, + analytics_quizzesTakenCount: { + apiResultName: "numquizzestaken", + }, + analytics_meanQuestionsCorrectPct: { + apiResultName: "meanpctquestionscorrect", + isDecimal: true, + }, + analytics_medianQuestionsCorrectPct: { + apiResultName: "medianpctquestionscorrect", + isDecimal: true, + }, + }; try { const bloomApiUrl = "https://api.bloomlibrary.org/v1"; // "http://127.0.0.1:7071/v1"; // testing with a locally-run api - //Query the api for per-books stats for all books + // Query the api for per-books stats for all books. + // What is going on behind the scenes is actually somewhat convoluted. + // We give the api the query to run to get the parse books. + // It sends that list of books to the postgresql database to get the analytics data + // and returns it to us. It would be more efficient to ask the postgresql database + // ourselves, but the api endpoint already exists, and I didn't want to provide + // postgres connection information to the parse server. const axios = require("axios"); - const results = await axios.post( + const analyticsResults = await axios.post( `${bloomApiUrl}/stats/reading/per-book`, { filter: { parseDBQuery: { - url: `${getConnectionInfo().url}classes/books`, + url: `${ + getCurrentInstanceInfoForApiQuery().url + }/classes/books`, method: "GET", options: { - headers: getConnectionInfo().headers, + headers: { + "X-Parse-Application-Id": `${ + getCurrentInstanceInfoForApiQuery().appId + }`, + }, params: { - limit: 1000000, + limit: 1000000, // Default is 100. We want all of them. keys: "objectId,bookInstanceId", }, }, @@ -225,30 +278,54 @@ Parse.Cloud.job("updateBookAnalytics", async (request) => { }, } ); + const analyticsSourceData = analyticsResults.data.stats; + + // Make a map of bookInstanceId to analytics data for efficiency + const bookInstanceIdToAnalyticsMap = {}; + analyticsSourceData.forEach((bookAnalytics) => { + bookInstanceIdToAnalyticsMap[bookAnalytics.bookinstanceid] = + bookAnalytics; + }); - //Loop through all books, updating analytics + // Get all the books in our parse database. + // If the analytics values need to be updated, push it into + // a new array of books to update. + const booksToUpdate = []; const bookQuery = new Parse.Query("books"); bookQuery.limit(1000000); // Default is 100. We want all of them. - bookQuery.select("bookInstanceId"); - const books = await bookQuery.find(); - books.forEach((book) => { - const { bookInstanceId } = book.attributes; - const bookStats = results.data.stats.find( - (bookStat) => bookStat.bookinstanceid === bookInstanceId - ); - book.set( - "analytics_finishedCount", - getNumberOrZero(bookStats?.finished) - ); - book.set( - "analytics_shellDownloads", - getNumberOrZero(bookStats?.shelldownloads) - ); - book.set("updateSource", "updateBookAnalytics"); + bookQuery.select("bookInstanceId", ...Object.keys(analyticsColumnsMap)); + + const allBooks = await bookQuery.find(); + allBooks.forEach((book) => { + const bookAnalytics = + bookInstanceIdToAnalyticsMap[book.get("bookInstanceId")]; + + let bookNeedsUpdate = false; + Object.keys(analyticsColumnsMap).forEach((columnName) => { + const newValue = getNumberOrZero( + bookAnalytics?.[ + analyticsColumnsMap[columnName].apiResultName + ], + analyticsColumnsMap[columnName].isDecimal || false + ); + + if (book.get(columnName) !== newValue) { + book.set(columnName, newValue); + bookNeedsUpdate = true; + } + }); + if (bookNeedsUpdate) { + // Important to set updateSource for proper processing in beforeSave (see details there). + book.set("updateSource", "updateBookAnalytics"); + + booksToUpdate.push(book); + } }); - //Save all books - const successfulUpdates = await Parse.Object.saveAll(books, { + request.log.info("booksToUpdate", booksToUpdate); + + //Save any books with updated analytics. + const successfulUpdates = await Parse.Object.saveAll(booksToUpdate, { useMasterKey: true, }); request.log.info( @@ -256,11 +333,20 @@ Parse.Cloud.job("updateBookAnalytics", async (request) => { ); } catch (error) { if (error.code === Parse.Error.AGGREGATE_ERROR) { - error.errors.forEach((iError) => { + const maxErrors = 20; // Don't blow up the log. + for (let i = 0; i < error.errors.length && i < maxErrors; i++) { + const iError = error.errors[i]; request.log.error( `Couldn't process ${iError.object.id} due to ${iError.message}` ); - }); + } + if (error.errors.length > maxErrors) { + request.log.error( + `${ + error.errors.length - maxErrors + } more errors were suppressed.` + ); + } request.log.error( "updateBookAnalytics - Terminated unsuccessfully." ); @@ -735,8 +821,16 @@ Parse.Cloud.define("setupTables", async () => { { name: "bloomPUBVersion", type: "Number" }, // analytics_* fields are populated by the updateBookAnalytics job. + { name: "analytics_startCount", type: "Number" }, { name: "analytics_finishedCount", type: "Number" }, { name: "analytics_shellDownloads", type: "Number" }, + { name: "analytics_pdfDownloads", type: "Number" }, + { name: "analytics_epubDownloads", type: "Number" }, + { name: "analytics_bloompubDownloads", type: "Number" }, + { name: "analytics_questionsInBookCount", type: "Number" }, + { name: "analytics_quizzesTakenCount", type: "Number" }, + { name: "analytics_meanQuestionsCorrectPct", type: "Number" }, + { name: "analytics_medianQuestionsCorrectPct", type: "Number" }, ], }, {