Skip to content

Commit

Permalink
Fix leaderboard parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
gausie committed Nov 21, 2024
1 parent 4951fab commit 501081a
Show file tree
Hide file tree
Showing 5 changed files with 117 additions and 55 deletions.
10 changes: 6 additions & 4 deletions packages/kol.js/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "kol.js",
"version": "0.1.6",
"version": "0.1.7",
"main": "src/index.ts",
"type": "module",
"files": [
Expand All @@ -18,17 +18,19 @@
"vitest": "^2.1.0"
},
"dependencies": {
"@xmldom/xmldom": "^0.9.2",
"async-mutex": "^0.5.0",
"css-select": "^5.1.0",
"date-fns": "^3.6.0",
"domhandler": "^5.0.3",
"domutils": "^3.1.0",
"got": "^14.4.2",
"html-entities": "^2.5.2",
"htmlparser2": "^9.1.0",
"image-size": "^1.1.1",
"node-html-parser": "^6.1.13",
"querystring": "^0.2.1",
"tough-cookie": "^5.0.0",
"ts-dedent": "^2.2.0",
"typed-emitter": "^2.1.0",
"xpath": "^0.0.34"
"typed-emitter": "^2.1.0"
}
}
26 changes: 26 additions & 0 deletions packages/kol.js/src/utils/__fixtures__/leaderboard_wotsf.html

Large diffs are not rendered by default.

44 changes: 44 additions & 0 deletions packages/kol.js/src/utils/leaderboard.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import { describe, expect, it } from "vitest";
import { loadFixture } from "../testUtils.js";
import { parseLeaderboard } from "./leaderboard.js";

describe("Leaderboards", () => {
it("can parse a regular path leaderboard", async () => {
const page = await loadFixture(__dirname, "leaderboard_wotsf.html");
const leaderboard = parseLeaderboard(page);

// Group name
expect(leaderboard.name).toBe(
"Way of the Surprising Fist (Frozen) Leaderboards",
);

// Hardcore and normal should both be detected, and no more.
expect(leaderboard.boards).toHaveLength(2);

// Hardcore
const hardcore = leaderboard.boards[0];
expect(hardcore.name).toBe(
"Fastest Hardcore Way of the Surprising Fist Ascensions",
);
expect(hardcore.runs).toHaveLength(35);
expect(hardcore.runs[0].player).toBe("Iron Bob (AT)");
expect(hardcore.runs[0].days).toBe("3");
expect(hardcore.runs[0].turns).toBe("712");
expect(hardcore.updated).toStrictEqual(
new Date("2024-11-21T02:34:30-07:00"),
);

// Softcore
const softcore = leaderboard.boards[1];
expect(softcore.name).toBe(
"Fastest Normal Way of the Surprising Fist Ascensions",
);
expect(softcore.runs).toHaveLength(35);
expect(softcore.runs[34].player).toBe("Jesusisagurl (SC)");
expect(softcore.runs[34].days).toBe("3");
expect(softcore.runs[34].turns).toBe("583");
expect(softcore.updated).toStrictEqual(
new Date("2024-11-21T02:34:31-07:00"),
);
});
});
56 changes: 23 additions & 33 deletions packages/kol.js/src/utils/leaderboard.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import xpath, { select } from "xpath";
import { DOMParser, MIME_TYPE } from "@xmldom/xmldom";
import { selectAll, selectOne } from "css-select";
import { parseDocument } from "htmlparser2";
import { isComment, Text } from "domhandler";
import { innerText } from "domutils";

export type LeaderboardInfo = {
name: string;
Expand All @@ -18,47 +20,34 @@ type RunInfo = {
turns: string;
};

const parser = new DOMParser({
onError: (level, message) => {
if (level === "fatalError") console.error(message);
},
});

const selectMulti = (expression: string, node: Node) => {
const selection = select(expression, node);
if (Array.isArray(selection)) return selection;
return selection instanceof Node ? [selection] : [];
};
const blankNode = new Text("");

export function parseLeaderboard(page: string): LeaderboardInfo {
const doc = parser.parseFromString(page, MIME_TYPE.HTML);
// @ts-expect-error see https://github.com/xmldom/xmldom/issues/724
const [board, ...boards] = selectMulti("//table", doc);
const doc = parseDocument(page);
const [container, ...boards] = selectAll("table", doc);

return {
name: selectMulti(".//text()", board.firstChild!)
.map((node) => node.nodeValue)
.join("")
name: innerText(selectOne("tr", container) ?? blankNode)
.replace(/\s+/g, " ")
.trim(),
boards: boards
.slice(1)
.filter(
(board) =>
selectMulti("./tr//text()", board)[0]?.nodeValue?.match(
/^((Fast|Funn|B)est|Most (Goo|Elf))/,
) && selectMulti("./tr", board).length > 1,
)
.filter((board) => {
if (selectAll(":scope > tr, :scope > tbody > tr", board).length <= 1)
return false;
const text = innerText(selectOne("tr", board) ?? blankNode);
return text.match(/^((Fast|Funn|B)est|Most (Goo|Elf))/);
})
.map((subboard) => {
const rows = selectMulti("./tr", subboard);
const rows = selectAll("tr", subboard);

return {
name: (selectMulti(".//text()", rows[0])[0]?.nodeValue || "").trim(),
runs: selectMulti("./td//tr", rows[1])
name: innerText(rows[0]),
runs: selectAll("td tr", rows[1])
.slice(2)
.map((node) => {
const rowText = selectMulti(".//text()", node).map((text) =>
text.toString().replace(/&amp;nbsp;/g, ""),
const rowText = selectAll("td", node).map((col) =>
innerText(col).replace(/&amp;nbsp;/g, ""),
);
const hasTwoNumbers = !!parseInt(rowText[rowText.length - 2]);
return {
Expand All @@ -73,9 +62,10 @@ export function parseLeaderboard(page: string): LeaderboardInfo {
turns: rowText[rowText.length - 1].toString() || "0",
};
}),
updated: xpath.isComment(subboard.nextSibling)
? new Date(subboard.nextSibling.data.slice(9, -1))
: null,
updated:
subboard.nextSibling && isComment(subboard.nextSibling)
? new Date(subboard.nextSibling.data.slice(9, -1))
: null,
};
}),
};
Expand Down
36 changes: 18 additions & 18 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -4366,13 +4366,6 @@ __metadata:
languageName: node
linkType: hard

"@xmldom/xmldom@npm:^0.9.2":
version: 0.9.2
resolution: "@xmldom/xmldom@npm:0.9.2"
checksum: 10c0/a449add63f652dfe1fb8d24f793d258ef8ae896eea52b242864d2118b9c659bbadc70bad6175c07f7309920044d48a5544ffce20932cdeb62b44357bbb6eec21
languageName: node
linkType: hard

"@zag-js/dom-query@npm:0.16.0":
version: 0.16.0
resolution: "@zag-js/dom-query@npm:0.16.0"
Expand Down Expand Up @@ -5668,7 +5661,7 @@ __metadata:
languageName: node
linkType: hard

"domutils@npm:^3.0.1":
"domutils@npm:^3.0.1, domutils@npm:^3.1.0":
version: 3.1.0
resolution: "domutils@npm:3.1.0"
dependencies:
Expand Down Expand Up @@ -5790,7 +5783,7 @@ __metadata:
languageName: node
linkType: hard

"entities@npm:^4.2.0, entities@npm:^4.4.0":
"entities@npm:^4.2.0, entities@npm:^4.4.0, entities@npm:^4.5.0":
version: 4.5.0
resolution: "entities@npm:4.5.0"
checksum: 10c0/5b039739f7621f5d1ad996715e53d964035f75ad3b9a4d38c6b3804bb226e282ffeae2443624d8fdd9c47d8e926ae9ac009c54671243f0c3294c26af7cc85250
Expand Down Expand Up @@ -7614,6 +7607,18 @@ __metadata:
languageName: node
linkType: hard

"htmlparser2@npm:^9.1.0":
version: 9.1.0
resolution: "htmlparser2@npm:9.1.0"
dependencies:
domelementtype: "npm:^2.3.0"
domhandler: "npm:^5.0.3"
domutils: "npm:^3.1.0"
entities: "npm:^4.5.0"
checksum: 10c0/394f6323efc265bbc791d8c0d96bfe95984e0407565248521ab92e2dc7668e5ceeca7bc6ed18d408b9ee3b25032c5743368a4280d280332d782821d5d467ad8f
languageName: node
linkType: hard

"http-cache-semantics@npm:^4.1.1":
version: 4.1.1
resolution: "http-cache-semantics@npm:4.1.1"
Expand Down Expand Up @@ -8340,11 +8345,14 @@ __metadata:
version: 0.0.0-use.local
resolution: "kol.js@workspace:packages/kol.js"
dependencies:
"@xmldom/xmldom": "npm:^0.9.2"
async-mutex: "npm:^0.5.0"
css-select: "npm:^5.1.0"
date-fns: "npm:^3.6.0"
domhandler: "npm:^5.0.3"
domutils: "npm:^3.1.0"
got: "npm:^14.4.2"
html-entities: "npm:^2.5.2"
htmlparser2: "npm:^9.1.0"
image-size: "npm:^1.1.1"
node-html-parser: "npm:^6.1.13"
prettier: "npm:^3.3.3"
Expand All @@ -8354,7 +8362,6 @@ __metadata:
typed-emitter: "npm:^2.1.0"
typescript: "npm:^5.6.2"
vitest: "npm:^2.1.0"
xpath: "npm:^0.0.34"
languageName: unknown
linkType: soft

Expand Down Expand Up @@ -12878,13 +12885,6 @@ __metadata:
languageName: node
linkType: hard

"xpath@npm:^0.0.34":
version: 0.0.34
resolution: "xpath@npm:0.0.34"
checksum: 10c0/88335108884ca164421f7fed048ef1a18ab3f7b1ae446b627fd3f51fc2396dcce798601c5e426de3bbd55d5940b84cf2326c75cd76620c1b49491283b85de17a
languageName: node
linkType: hard

"xtend@npm:~2.1.1":
version: 2.1.2
resolution: "xtend@npm:2.1.2"
Expand Down

0 comments on commit 501081a

Please sign in to comment.