Skip to content

Commit

Permalink
Scrape library and class definition pages
Browse files Browse the repository at this point in the history
Results in nice and correct library/class descriptions, as well as reduces reliance on hardcoding class inheritance
  • Loading branch information
robotboy655 committed Feb 16, 2024
1 parent 9457527 commit 28fbcb2
Show file tree
Hide file tree
Showing 7 changed files with 122 additions and 50 deletions.
2 changes: 0 additions & 2 deletions custom/class.CSEnt.lua

This file was deleted.

2 changes: 0 additions & 2 deletions custom/class.NPC.lua

This file was deleted.

2 changes: 0 additions & 2 deletions custom/class.Player.lua

This file was deleted.

5 changes: 0 additions & 5 deletions custom/class.WEAPON.lua

This file was deleted.

80 changes: 63 additions & 17 deletions src/api-writer/glua-api-writer.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
import { ClassFunction, Enum, Function, HookFunction, LibraryFunction, Panel, PanelFunction, Realm, Struct, WikiPage, isPanel } from '../scrapers/wiki-page-markup-scraper.js';
import { ClassFunction, Enum, Function, HookFunction, LibraryFunction, TypePage, Panel, PanelFunction, Realm, Struct, WikiPage, isPanel } from '../scrapers/wiki-page-markup-scraper.js';
import { escapeSingleQuotes, putCommentBeforeEachLine, removeNewlines, safeFileName, toLowerCamelCase } from '../utils/string.js';
import {
isClassFunction,
isHookFunction,
isLibraryFunction,
isLibrary,
isClass,
isPanelFunction,
isStruct,
isEnum,
} from '../scrapers/wiki-page-markup-scraper.js';
import fs from 'fs';

export const RESERVERD_KEYWORDS = new Set([
'and',
Expand Down Expand Up @@ -40,6 +43,8 @@ export class GluaApiWriter {
private readonly writtenLibraryGlobals: Set<string> = new Set();
private readonly pageOverrides: Map<string, string> = new Map();

private readonly files: Map<string, WikiPage[]> = new Map();

constructor() { }

public static safeName(name: string) {
Expand Down Expand Up @@ -73,7 +78,7 @@ export class GluaApiWriter {
if (isClassFunction(page))
api += this.writeClassStart(page.parent, undefined, page.deprecated);
else if (isLibraryFunction(page))
api += this.writeLibraryGlobal(page);
api += this.writeLibraryGlobalFallback(page);

api += this.pageOverrides.get(fileSafeAddress);

Expand All @@ -92,6 +97,10 @@ export class GluaApiWriter {
return this.writeEnum(page);
else if (isStruct(page))
return this.writeStruct(page);
else if (isLibrary(page))
return this.writeLibraryGlobal(page);
else if (isClass(page))
return this.writeClassGlobal(page);
}

private writeClassStart(className: string, parent?: string, deprecated?: string, description?: string) {
Expand All @@ -113,7 +122,10 @@ export class GluaApiWriter {
api += ` : ${parent}`;

api += '\n';
api += `local ${className} = {}\n\n`;

// for PLAYER, WEAPON, etc. we want to define globals
if (className !== className.toUpperCase()) api += 'local ';
api += `${className} = {}\n\n`;
}

this.writtenClasses.add(className);
Expand All @@ -122,23 +134,44 @@ export class GluaApiWriter {
return api;
}

private writeLibraryGlobal(func: LibraryFunction) {
private writeLibraryGlobalFallback(func: LibraryFunction) {
if (!func.dontDefineParent && !this.writtenLibraryGlobals.has(func.parent)) {
let global = '';

if (func.deprecated)
global += `---@deprecated ${removeNewlines(func.deprecated)}\n`;
let api = '';

global += `${func.parent} = {}\n\n`;
api += `---Missing description.`;
api += `${func.parent} = {}\n\n`;

this.writtenLibraryGlobals.add(func.parent);

return global;
return api;
}

return '';
}

private writeLibraryGlobal(page: TypePage) {
if (!this.writtenLibraryGlobals.has(page.name)) {
let api = '';

api += page.description ? `${putCommentBeforeEachLine(page.description, false)}\n` : '';

if (page.deprecated)
api += `---@deprecated ${removeNewlines(page.deprecated)}\n`;

api += `${page.name} = {}\n\n`;

this.writtenLibraryGlobals.add(page.name);

return api;
}

return '';
}

private writeClassGlobal(page: TypePage) {
return this.writeClassStart(page.name, page.parent, page.deprecated, page.description);
}

private writeClassFunction(func: ClassFunction) {
let api: string = this.writeClassStart(func.parent, undefined, func.deprecated);

Expand All @@ -149,7 +182,7 @@ export class GluaApiWriter {
}

private writeLibraryFunction(func: LibraryFunction) {
let api: string = this.writeLibraryGlobal(func);
let api: string = this.writeLibraryGlobalFallback(func);

api += this.writeFunctionLuaDocComment(func, func.realm);
api += this.writeFunctionDeclaration(func, func.realm);
Expand Down Expand Up @@ -241,14 +274,27 @@ export class GluaApiWriter {
return api;
}

public writePages(pages: WikiPage[]) {
let api: string = '';
public writePages(pages: WikiPage[], filePath: string) {
if (!this.files.has(filePath)) this.files.set(filePath, []);
this.files.get(filePath)!.push(...pages);
}

for (const page of pages) {
api += this.writePage(page);
}
public writeToDisk() {
this.files.forEach((pages: WikiPage[], filePath: string) => {
let api = "";

return api;
// First we write the "header" types
for (const page of pages.filter(x => isClass(x) || isLibrary(x))) {
api += this.writePage(page);
}
for (const page of pages.filter(x => !isClass(x) && !isLibrary(x))) {
api += this.writePage(page);
}

if (api.length > 0) {
fs.appendFileSync(filePath, "---@meta\n\n" + api);
}
});
}

private transformType(type: string) {
Expand Down
34 changes: 23 additions & 11 deletions src/cli-scraper.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { WikiPageMarkupScraper } from './scrapers/wiki-page-markup-scraper.js';
import { WikiPageMarkupScraper, isLibrary, isClass } from './scrapers/wiki-page-markup-scraper.js';
import { WikiPageListScraper } from './scrapers/wiki-page-list-scraper.js';
import packageJson from '../package.json' assert { type: "json" };
import { GluaApiWriter } from './api-writer/glua-api-writer.js';
Expand Down Expand Up @@ -80,8 +80,16 @@ async function startScrape() {
}
}

console.log('Collecting all pages...');
let collect_start = performance.now();

const pageIndexes = await scrapeAndCollect(pageListScraper);

console.log(`Took ${Math.floor((performance.now()-collect_start) / 100) / 10}s!\n`);

console.log('Scraping all pages...');
let scrape_start = performance.now();

let queue: Promise<any>[] = [];
for (const pageIndex of pageIndexes) {
const pageMarkupScraper = new WikiPageMarkupScraper(`${baseUrl}/${pageIndex.address}?format=text`);
Expand All @@ -90,32 +98,32 @@ async function startScrape() {
if (pageMarkups.length === 0)
return;

const api = writer.writePages(pageMarkups);

// Generate file names
let fileName = pageIndex.address;
let moduleName = fileName;

if (fileName.includes('.') || fileName.includes(':') || fileName.includes('/')) {
[moduleName, fileName] = fileName.split(/[:.\/]/, 2);
}

fileName = fileName.replace(/[^a-z0-9]/gi, '_').toLowerCase();

// Make sure modules like Entity and ENTITY are placed in the same file.
moduleName = moduleName.toLowerCase();

// Special cases for library and hook pages
if (moduleName.endsWith("(library)")) moduleName = moduleName.substring(0, moduleName.length - 9);
if (moduleName.endsWith("_hooks")) moduleName = moduleName.substring(0, moduleName.length - 6);

const moduleFile = path.join(baseDirectory, moduleName);

if (!fs.existsSync(`${moduleFile}.lua`))
fs.writeFileSync(`${moduleFile}.lua`, '---@meta\n\n');
// Write Lua API docs
writer.writePages(pageMarkups, path.join(baseDirectory, `${moduleName}.lua`));

// Write JSON data
if (!fs.existsSync(moduleFile))
fs.mkdirSync(moduleFile, { recursive: true });

fileName = fileName.replace(/[^a-z0-9]/gi, '_').toLowerCase();

// Lua API
fs.appendFileSync(path.join(baseDirectory, `${moduleName}.lua`), api);

// JSON data
const json = JSON.stringify(pageMarkups, null, 2);
fs.writeFileSync(path.join(baseDirectory, moduleName, `${fileName}.json`), json);
});
Expand All @@ -129,6 +137,10 @@ async function startScrape() {
}
}

console.log(`Took ${Math.floor((performance.now()-scrape_start) / 100) / 10}s!`);

writer.writeToDisk();

console.log(`Done with scraping! You can find the output in ${baseDirectory}`);
}

Expand Down
47 changes: 36 additions & 11 deletions src/scrapers/wiki-page-markup-scraper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ export type WikiFunctionType = 'panelfunc' | 'classfunc' | 'libraryfunc' | 'hook
export type Realm = 'Menu' | 'Client' | 'Server' | 'Shared' | 'Client and menu';

export type CommonWikiProperties = {
type: WikiFunctionType | 'enum' | 'struct' | 'panel';
type: WikiFunctionType | 'enum' | 'struct' | 'panel' | 'class' | 'library';
address: string;
name: string;
description: string;
Expand Down Expand Up @@ -77,7 +77,12 @@ export type Panel = CommonWikiProperties & {
parent: string;
};

export type WikiPage = ClassFunction | LibraryFunction | HookFunction | PanelFunction | Panel | Enum | Struct;

export type TypePage = CommonWikiProperties & {
parent: string;
};

export type WikiPage = ClassFunction | LibraryFunction | HookFunction | PanelFunction | Panel | Enum | Struct | TypePage

/**
* Guards
Expand Down Expand Up @@ -110,6 +115,14 @@ export function isStruct(page: WikiPage): page is Struct {
return page.type === 'struct';
}

export function isLibrary(page: WikiPage): page is TypePage {
return page.type === 'library';
}

export function isClass(page: WikiPage): page is TypePage {
return page.type === 'class';
}

/**
* Scraper
*/
Expand All @@ -126,20 +139,21 @@ export class WikiPageMarkupScraper extends Scraper<WikiPage> {
const isEnum = $('enum').length > 0;
const isStruct = $('structure').length > 0;
const isFunction = $('function').length > 0;
const isTypePage = $('type').length > 0;
const isPanel = $('panel').length > 0;
const mainElement = $(isEnum ? 'enum' : isStruct ? 'struct' : isPanel ? 'panel' : 'function');
const isDeprecated = $('deprecated').length > 0;
const address = response.url.split('/').pop()!.split('?')[0];

let deprecated: string | undefined = undefined;
if (isDeprecated && !isEnum && !isStruct) {
deprecated = $('deprecated').map(function() {
const $el = $(this);
return $el.text().trim();
}).get().join(' - ')
let deprecated: string | undefined = undefined;
if (isDeprecated && !isEnum && !isStruct) {
deprecated = $('deprecated').map(function() {
const $el = $(this);
return $el.text().trim();
}).get().join(' - ');

$('deprecated').remove();
}
$('deprecated').remove();
}

if (isEnum) {
const items = $('items item').map(function () {
Expand Down Expand Up @@ -215,7 +229,7 @@ export class WikiPageMarkupScraper extends Scraper<WikiPage> {
description: $el.text()
};

if ($el.attr('default')!= undefined)
if ($el.attr('default') != undefined)
argument.default = $el.attr('default')!;

return argument;
Expand Down Expand Up @@ -270,6 +284,17 @@ export class WikiPageMarkupScraper extends Scraper<WikiPage> {
isPanelFunction: 'yes'
};
}
} else if (isTypePage) {
const $el = $('type');

return <TypePage>{
type: $el.attr('is'),
name: $el.attr('name'),
parent: $el.attr('parent'),
address: address,
description: $('type summary').text(),
deprecated: deprecated
};
}

return null;
Expand Down

0 comments on commit 28fbcb2

Please sign in to comment.