From d3142c5ed53a2af6725fcbdb39a004c7957ed963 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Lorber?= Date: Thu, 25 Jan 2024 19:49:45 +0100 Subject: [PATCH] fix(core): broken links optimization behaves differently than non-optimized logic (#9791) --- packages/docusaurus-types/src/routing.d.ts | 5 + .../src/server/__tests__/brokenLinks.test.ts | 134 +++++++++++++++++- packages/docusaurus/src/server/brokenLinks.ts | 39 ++++- 3 files changed, 168 insertions(+), 10 deletions(-) diff --git a/packages/docusaurus-types/src/routing.d.ts b/packages/docusaurus-types/src/routing.d.ts index 7d1129c2a0a6..3e2ae2ab0bbd 100644 --- a/packages/docusaurus-types/src/routing.d.ts +++ b/packages/docusaurus-types/src/routing.d.ts @@ -60,6 +60,11 @@ export type RouteConfig = { routes?: RouteConfig[]; /** React router config option: `exact` routes would not match subroutes. */ exact?: boolean; + /** + * React router config option: `strict` routes are sensitive to the presence + * of a trailing slash. + */ + strict?: boolean; /** Used to sort routes. Higher-priority routes will be placed first. */ priority?: number; /** Extra props; will be copied to routes.js. */ diff --git a/packages/docusaurus/src/server/__tests__/brokenLinks.test.ts b/packages/docusaurus/src/server/__tests__/brokenLinks.test.ts index 5619b1f7ba34..2f962db07c0a 100644 --- a/packages/docusaurus/src/server/__tests__/brokenLinks.test.ts +++ b/packages/docusaurus/src/server/__tests__/brokenLinks.test.ts @@ -13,7 +13,12 @@ import type {RouteConfig} from '@docusaurus/types'; type Params = Parameters[0]; // We don't need all the routes attributes for our tests -type SimpleRoute = {path: string; routes?: SimpleRoute[]}; +type SimpleRoute = { + path: string; + routes?: SimpleRoute[]; + exact?: boolean; + strict?: boolean; +}; // Conveniently apply defaults to function under test async function testBrokenLinks(params: { @@ -43,6 +48,52 @@ describe('handleBrokenLinks', () => { }); }); + it('accepts valid non-exact link', async () => { + await testBrokenLinks({ + routes: [{path: '/page1', exact: false}, {path: '/page2/'}], + collectedLinks: { + '/page1': { + links: [ + '/page1', + '/page1/', + '/page2', + '/page2/', + '/page1/subPath', + '/page2/subPath', + ], + anchors: [], + }, + '/page2/': { + links: [ + '/page1', + '/page1/', + '/page2', + '/page2/', + '/page1/subPath', + '/page2/subPath', + ], + anchors: [], + }, + }, + }); + }); + + it('accepts valid non-strict link', async () => { + await testBrokenLinks({ + routes: [{path: '/page1', strict: false}, {path: '/page2/'}], + collectedLinks: { + '/page1': { + links: ['/page1', '/page1/', '/page2', '/page2/'], + anchors: [], + }, + '/page2/': { + links: ['/page1', '/page1/', '/page2', '/page2/'], + anchors: [], + }, + }, + }); + }); + it('accepts valid link to uncollected page', async () => { await testBrokenLinks({ routes: [{path: '/page1'}, {path: '/page2'}], @@ -292,6 +343,76 @@ describe('handleBrokenLinks', () => { `); }); + it('rejects broken link due to strict matching', async () => { + await expect(() => + testBrokenLinks({ + routes: [ + {path: '/page1', strict: true}, + {path: '/page2/', strict: true}, + ], + + collectedLinks: { + '/page1': { + links: ['/page1', '/page1/', '/page2', '/page2/'], + anchors: [], + }, + '/page2/': { + links: ['/page1', '/page1/', '/page2', '/page2/'], + anchors: [], + }, + }, + }), + ).rejects.toThrowErrorMatchingInlineSnapshot(` + "Docusaurus found broken links! + + Please check the pages of your site in the list below, and make sure you don't reference any path that does not exist. + Note: it's possible to ignore broken links with the 'onBrokenLinks' Docusaurus configuration, and let the build pass. + + Exhaustive list of all broken links found: + - Broken link on source page path = /page1: + -> linking to /page2 + - Broken link on source page path = /page2/: + -> linking to /page2 + " + `); + }); + + it('rejects broken link due to strict exact matching', async () => { + await expect(() => + testBrokenLinks({ + routes: [ + {path: '/page1', exact: true, strict: true}, + {path: '/page2/', exact: true, strict: true}, + ], + + collectedLinks: { + '/page1': { + links: ['/page1', '/page1/', '/page2', '/page2/'], + anchors: [], + }, + '/page2/': { + links: ['/page1', '/page1/', '/page2', '/page2/'], + anchors: [], + }, + }, + }), + ).rejects.toThrowErrorMatchingInlineSnapshot(` + "Docusaurus found broken links! + + Please check the pages of your site in the list below, and make sure you don't reference any path that does not exist. + Note: it's possible to ignore broken links with the 'onBrokenLinks' Docusaurus configuration, and let the build pass. + + Exhaustive list of all broken links found: + - Broken link on source page path = /page1: + -> linking to /page1/ + -> linking to /page2 + - Broken link on source page path = /page2/: + -> linking to /page1/ + -> linking to /page2 + " + `); + }); + it('rejects broken link with anchor', async () => { await expect(() => testBrokenLinks({ @@ -728,6 +849,10 @@ describe('handleBrokenLinks', () => { const routes: SimpleRoute[] = [ ...Array.from({length: scale}).map((_, i) => ({ path: `/page${i}`, + exact: true, + })), + ...Array.from({length: scale}).map((_, i) => ({ + path: `/page/nonExact/${i}`, })), ...Array.from({length: scale}).fill({ path: '/pageDynamic/:subpath1', @@ -741,6 +866,7 @@ describe('handleBrokenLinks', () => { links: [ ...Array.from({length: scale}).flatMap((_2, j) => [ `/page${j}`, + `/page/nonExact/${j}`, `/page${j}?age=42`, `/page${j}#anchor${j}`, `/page${j}?age=42#anchor${j}`, @@ -770,9 +896,9 @@ describe('handleBrokenLinks', () => { // See https://github.com/facebook/docusaurus/issues/9754 // See https://twitter.com/sebastienlorber/status/1749392773415858587 // We expect no more matchRoutes calls than number of dynamic route links - expect(matchRoutesMock).toHaveBeenCalledTimes(scale); + expect(matchRoutesMock).toHaveBeenCalledTimes(scale * 2); // We expect matchRoutes to be called with a reduced number of routes - expect(routes).toHaveLength(scale * 2); - expect(matchRoutesMock.mock.calls[0]![0]).toHaveLength(scale); + expect(routes).toHaveLength(scale * 3); + expect(matchRoutesMock.mock.calls[0]![0]).toHaveLength(scale * 2); }); }); diff --git a/packages/docusaurus/src/server/brokenLinks.ts b/packages/docusaurus/src/server/brokenLinks.ts index 3590b71351b1..c91c2f1faecc 100644 --- a/packages/docusaurus/src/server/brokenLinks.ts +++ b/packages/docusaurus/src/server/brokenLinks.ts @@ -8,7 +8,13 @@ import _ from 'lodash'; import logger from '@docusaurus/logger'; import {matchRoutes as reactRouterMatchRoutes} from 'react-router-config'; -import {parseURLPath, serializeURLPath, type URLPath} from '@docusaurus/utils'; +import { + addTrailingSlash, + parseURLPath, + removeTrailingSlash, + serializeURLPath, + type URLPath, +} from '@docusaurus/utils'; import {getAllFinalRoutes} from './utils'; import type {RouteConfig, ReportingSeverity} from '@docusaurus/types'; @@ -55,16 +61,37 @@ function createBrokenLinksHelper({ }): BrokenLinksHelper { const validPathnames = new Set(collectedLinks.keys()); + // IMPORTANT: this is an optimization + // See https://github.com/facebook/docusaurus/issues/9754 // Matching against the route array can be expensive // If the route is already in the valid pathnames, - // we can avoid matching against it as an optimization - const remainingRoutes = routes.filter( - (route) => !validPathnames.has(route.path), - ); + // we can avoid matching against it + const remainingRoutes = (function filterRoutes() { + // Goal: unit tests should behave the same with this enabled or disabled + const disableOptimization = false; + if (disableOptimization) { + return routes; + } + // We must consider the "exact" and "strict" match attribute + // We can only infer pre-validated pathnames from a route from exact routes + const [validPathnameRoutes, otherRoutes] = _.partition( + routes, + (route) => route.exact && validPathnames.has(route.path), + ); + // If a route is non-strict (non-sensitive to trailing slashes) + // We must pre-validate all possible paths + validPathnameRoutes.forEach((validPathnameRoute) => { + if (!validPathnameRoute.strict) { + validPathnames.add(addTrailingSlash(validPathnameRoute.path)); + validPathnames.add(removeTrailingSlash(validPathnameRoute.path)); + } + }); + return otherRoutes; + })(); function isPathnameMatchingAnyRoute(pathname: string): boolean { if (matchRoutes(remainingRoutes, pathname).length > 0) { - // IMPORTANT: this is an optimization here + // IMPORTANT: this is an optimization // See https://github.com/facebook/docusaurus/issues/9754 // Large Docusaurus sites have many routes! // We try to minimize calls to a possibly expensive matchRoutes function