From 1a3f3a5528b176e4d970d86fb4f22c57b7583d6d Mon Sep 17 00:00:00 2001 From: iacore Date: Mon, 7 Oct 2024 00:45:52 +0000 Subject: [PATCH 1/2] Add check_urls plugin It checks broken links Works reasonably well for links from HTML pages to other assets --- plugins/check_urls.ts | 108 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 plugins/check_urls.ts diff --git a/plugins/check_urls.ts b/plugins/check_urls.ts new file mode 100644 index 00000000..8f387680 --- /dev/null +++ b/plugins/check_urls.ts @@ -0,0 +1,108 @@ +import { merge } from "lume/core/utils/object.ts"; +import type Site from "lume/core/site.ts"; +import type { Page } from "lume/core/file.ts"; + +export interface Options { + /** The list of extensions this plugin applies to */ + extensions?: string[]; +} + +/** Default options */ +export const defaults: Options = { + extensions: [".html"], +}; + +/** + * This plugin checks broken links in *.html output files. + */ +export default function (userOptions?: Options) { + const options = merge(defaults, userOptions); + + return (site: Site) => { + const url_site = site.options.location; + const urls = new Set(); // Set is more performant than arrays + + function scan( + url: string | null, + page: Page, + _element: Element, + ): undefined { + if (url == null) return; + + const full_url = new URL(url, new URL(page.data.url, url_site)); + if (full_url.origin != url_site.origin) { + return; + } + full_url.hash = '' // doesn't check hash + full_url.search = '' // doesn't check search either + + if (!urls.has(full_url.toString())) { + console.warn(`⛓️‍💥 ${page.data.url} -> ${url}`); + } + + return; + } + + function scanSrcset( + attr: string | null, + page: Page, + element: Element, + ): undefined { + const srcset = attr != null ? attr.trim().split(",") : []; + for (const src of srcset) { + const [, url, _rest] = src.trim().match(/^(\S+)(.*)/)!; + scan(url, page, element); + } + } + + site.process("*", (pages) => { + urls.clear(); // Clear on rebuild + for (const page of pages) { + urls.add(new URL(page.data.url, url_site).toString()); // site.url() return the full url if the second argument is true + } + for (const file of site.files) { + urls.add(site.url(file.outputPath, true)); + } + }); + + site.process( + options.extensions, + (pages) => + pages.forEach((page: Page) => { + const { document } = page; + + if (!document) { + return; + } + + for (const element of document.querySelectorAll("[href]")) { + scan(element.getAttribute("href"), page, element); + } + + for (const element of document.querySelectorAll("[src]")) { + scan(element.getAttribute("src"), page, element); + } + + for (const element of document.querySelectorAll("video[poster]")) { + scan(element.getAttribute("poster"), page, element); + } + + for (const element of document.querySelectorAll("[srcset]")) { + scanSrcset( + element.getAttribute("srcset"), + page, + element, + ); + } + + for (const element of document.querySelectorAll("[imagesrcset]")) { + scanSrcset( + element.getAttribute("imagesrcset"), + page, + element, + ); + } + }), + ); + }; +} From c09f28b24d9cd3e5e4331ae977ed4c94a10ca51e Mon Sep 17 00:00:00 2001 From: iacore Date: Mon, 7 Oct 2024 00:47:50 +0000 Subject: [PATCH 2/2] Run deno fmt --- plugins/check_urls.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/check_urls.ts b/plugins/check_urls.ts index 8f387680..c671b185 100644 --- a/plugins/check_urls.ts +++ b/plugins/check_urls.ts @@ -33,8 +33,8 @@ export default function (userOptions?: Options) { if (full_url.origin != url_site.origin) { return; } - full_url.hash = '' // doesn't check hash - full_url.search = '' // doesn't check search either + full_url.hash = ""; // doesn't check hash + full_url.search = ""; // doesn't check search either if (!urls.has(full_url.toString())) { console.warn(`⛓️‍💥 ${page.data.url} -> ${url}`);