diff --git a/docs/check-redirects/README.md b/docs/check-redirects/README.md new file mode 100644 index 0000000000000..3dfe57476dc7c --- /dev/null +++ b/docs/check-redirects/README.md @@ -0,0 +1,33 @@ +# Redirect checker + +This is a tool to check for out-of-date URL paths to pages on the Teleport docs +site. You can use it to identify 404ing links in the Teleport Web UI source, +`gravitational/blog`, and `gravitational/next` repositories. The tool identifies +URLs in the target directory that do not correspond to docs page files or +redirects in the `gravitational/teleport` repository. + +## Usage + +The following example checks for `https://goteleport.com/docs` URLs in a +`gravitational/blog` clone. The `--in` flag points to the directory that +contains blog pages (the clone itself is at `~/Documents/blog`). Our +`gravitational/teleport` clone is at `~/Documents/docs/content/16.x`: + +```bash +$ node docs/check-redirects/index.js --in ~/Documents/blog/pages --docs ~/Documents/teleport --name ~/Documents/docs/content/16.x --config ~/Documents/docs/content/16.x/docs/config.json +``` + +## Command-line flags + +``` + --version Show version number [boolean] + --in root directory path in which to check for necessary redirects. + [required] + --config path to a docs configuration file with a "redirects" key [required] + --docs path to the root of a gravitational/teleport repo [required] + --exclude comma-separated list of file extensions not to check, e.g., ".md" + or ".test.tsx" + --name name of the directory tree we are checking for docs URLs (for + display only) [required] + --help Show help [boolean] +``` diff --git a/docs/check-redirects/check-redirects.js b/docs/check-redirects/check-redirects.js new file mode 100644 index 0000000000000..00643fa9bb642 --- /dev/null +++ b/docs/check-redirects/check-redirects.js @@ -0,0 +1,123 @@ +const yaml = require('yaml'); +const path = require('path'); + +const teleportDomain = 'https://goteleport.com'; +// RedirectChecker checks for Teleport docs site domains and paths within a +// given file tree and determines whether a given docs configuration requires +// redirects. +// @param fs - The filesystem to use. Either memfs or the NodeJS fs package. +// @param {string} otherRepoRoot - directory path in fs in which to check for +// required redirects. +// @param {string} docsRoot - directory path in fs in which to check for present +// or missing docs files based on URL paths found in the directory tree at +// otherRepoRoot. +// @param {Array} redirects - array of objects with keys "source", +// "destination", and "permanent". +// @param {Array} exclude - array of file extensions not to check. +class RedirectChecker { + constructor(fs, otherRepoRoot, docsRoot, redirects, exclude) { + this.fs = fs; + this.otherRepoRoot = otherRepoRoot; + this.docsRoot = docsRoot; + this.redirectSet = new Set(); + + if (!exclude) { + this.exclude = []; + } else { + this.exclude = exclude; + } + + if (!!redirects) { + redirects.forEach(r => { + this.redirectSet.add(r.source); + }); + } + } + + // check walks the user-configured directory tree and identifies Teleport docs + // URLs that do not correspond to an existing docs page or redirect. It + // returns a list of problematic URLs. + check() { + const results = this.checkDir(this.otherRepoRoot); + let deduped = {}; + if (results != undefined) { + results.forEach(r => { + deduped[r] = true; + }); + return Object.keys(deduped); + } + } + + // checkDir recursively checks for docs URLs with missing docs paths or + // redirects at dirPath. It returns an array of missing URLs. + checkDir(dirPath) { + const files = this.fs.readdirSync(dirPath, 'utf8'); + let result = []; + files.forEach(f => { + for (let e = 0; e < this.exclude.length; e++) { + if (f.endsWith(this.exclude[e])) { + return; + } + } + const fullPath = path.join(dirPath, f); + const info = this.fs.statSync(fullPath); + if (!info.isDirectory()) { + result = result.concat(this.checkFile(fullPath)); + return; + } + result = result.concat(this.checkDir(fullPath)); + }); + return result; + } + + // checkFile determines whether docs URLs found in the file + // match either an actual docs file path or a redirect source. + // Returns an array of URLs with missing files or redirects. + checkFile(filePath) { + const docsPattern = new RegExp( + /https:\/\/goteleport.com\/docs\/(ver\/[0-9]+\.x\/)?[\w\/_#-]+/, + 'gm' + ); + const text = this.fs.readFileSync(filePath, 'utf8'); + const docsURLs = [...text.matchAll(docsPattern)]; + if (docsURLs.length === 0) { + return; + } + let result = []; + docsURLs.forEach(url => { + const docsPath = this.urlToDocsPath(url[0]); + const missingEntry = + this.fs.statSync(docsPath, { + throwIfNoEntry: false, + }) == undefined; + + if (!missingEntry) { + return; + } + + let pathPart = url[0].slice(teleportDomain.length); + if (pathPart[pathPart.length - 1] != '/') { + pathPart += '/'; + } + + if (!this.redirectSet.has(pathPart)) { + result.push(url[0]); + } + }); + return result; + } + + urlToDocsPath(url) { + let nofrag = url.split('#')[0]; // Remove the fragment + let rest = nofrag.slice((teleportDomain + '/docs/').length); + if (rest.length == 0) { + return path.join(this.docsRoot, 'docs', 'pages', 'index.mdx'); + } + if (rest[rest.length - 1] == '/') { + rest = rest.slice(0, rest.length - 1); + } + return path.join(this.docsRoot, 'docs', 'pages', rest + '.mdx'); + } +} + +module.exports.RedirectChecker = RedirectChecker; diff --git a/docs/check-redirects/check-redirects.test.js b/docs/check-redirects/check-redirects.test.js new file mode 100644 index 0000000000000..dda5f513f1860 --- /dev/null +++ b/docs/check-redirects/check-redirects.test.js @@ -0,0 +1,141 @@ +import { Volume, createFsFromVolume } from 'memfs'; +import { RedirectChecker } from './check-redirects.js'; + +describe('check files for links to missing Teleport docs', () => { + const files = { + '/blog/content1.mdx': `--- +title: "Sample Page 1" +--- + +This is a link to a [documentation page](https://goteleport.com/docs/page1). + +This is a link to the [index page](https://goteleport.com/docs/). + +This link has a [trailing slash](https://goteleport.com/docs/desktop-access/getting-started/). + +This link has a [fragment](https://goteleport.com/docs/page1#introduction). + +`, + '/blog/content2.mdx': `--- +title: "Sample Page 2" +--- + +This is a link to a [documentation page](https://goteleport.com/docs/subdirectory/page2). + +Here is a link to a [missing page](https://goteleport.com/docs/page3). + +`, + '/docs/content/1.x/docs/pages/page1.mdx': `--- +title: "Sample Page 1" +--- +`, + '/docs/content/1.x/docs/pages/subdirectory/page2.mdx': `--- +title: "Sample Page 2" +--- +`, + '/docs/content/1.x/docs/pages/index.mdx': `--- +title: "Index page" +--- +`, + '/docs/content/1.x/docs/pages/desktop-access/getting-started.mdx': `--- +title: "Desktop Access Getting Started" +---`, + }; + + test(`throws an error if there is no redirect for a missing docs page`, () => { + const vol = Volume.fromJSON(files); + const fs = createFsFromVolume(vol); + const checker = new RedirectChecker(fs, '/blog', '/docs/content/1.x', []); + const results = checker.check(); + expect(results).toEqual(['https://goteleport.com/docs/page3']); + }); + + test(`handles URL fragments`, () => { + const vol = Volume.fromJSON(files); + const fs = createFsFromVolume(vol); + const checker = new RedirectChecker(fs, '/blog', '/docs/content/1.x', []); + const results = checker.check(); + expect(results).toEqual(['https://goteleport.com/docs/page3']); + }); + + test(`handles trailing slashes in URLs`, () => { + const vol = Volume.fromJSON(files); + const fs = createFsFromVolume(vol); + const checker = new RedirectChecker(fs, '/blog', '/docs/content/1.x', []); + const results = checker.check(); + expect(results).toEqual(['https://goteleport.com/docs/page3']); + }); + + test(`allows missing docs pages if there is a redirect`, () => { + const vol = Volume.fromJSON(files); + const fs = createFsFromVolume(vol); + const checker = new RedirectChecker(fs, '/blog', '/docs/content/1.x', [ + { + source: '/docs/page3/', + destination: '/docs/page1', + permanent: true, + }, + ]); + const results = checker.check(); + expect(results).toEqual([]); + }); + + test(`excluding file extensions`, () => { + const vol = Volume.fromJSON({ + '/web/content1.mdx': `--- +title: "Sample Page 1" +--- + +This is a link to a [documentation page](https://goteleport.com/docs/page1). + +`, + '/web/file.story.test.tsx.snap': ` + +https://goteleport.com/docs/page2 + +`, + '/docs/content/1.x/docs/pages/page1.mdx': `--- +title: "Sample Page 1" +--- +`, + }); + const fs = createFsFromVolume(vol); + const checker = new RedirectChecker( + fs, + '/web', + '/docs/content/1.x', + [], + ['.story.test.tsx.snap'] + ); + const results = checker.check(); + expect(results).toEqual([]); + }); + + test(`URL in sentence`, () => { + const vol = Volume.fromJSON({ + '/web/content1.mdx': `--- +title: "Sample Page 1" +--- + + Learn [how Teleport works](https://goteleport.com/docs/page1/) and get started with Teleport today -https://goteleport.com/docs/. +`, + '/docs/content/1.x/docs/pages/page1.mdx': `--- +title: "Sample Page 1" +--- +`, + '/docs/content/1.x/docs/pages/index.mdx': `--- +title: Docs Home" +---`, + }); + const fs = createFsFromVolume(vol); + const checker = new RedirectChecker( + fs, + '/web', + '/docs/content/1.x', + [], + [] + ); + const results = checker.check(); + expect(results).toEqual([]); + }); +}); diff --git a/docs/check-redirects/index.js b/docs/check-redirects/index.js new file mode 100644 index 0000000000000..960b2a2be4df4 --- /dev/null +++ b/docs/check-redirects/index.js @@ -0,0 +1,47 @@ +#!/user/bin/env node +const { RedirectChecker } = require('./check-redirects.js'); +const yargs = require('yargs/yargs'); +const { hideBin } = require('yargs/helpers'); +const process = require('node:process'); +const fs = require('node:fs'); +const path = require('node:path'); + +const args = yargs(hideBin(process.argv)) + .option('in', { + describe: 'root directory path in which to check for necessary redirects.', + }) + .option('config', { + describe: 'path to a docs configuration file with a "redirects" key', + }) + .option('docs', { + describe: 'path to the root of a gravitational/teleport repo', + }) + .option('exclude', { + describe: + 'comma-separated list of file extensions not to check, e.g., ".md" or ".test.tsx"', + }) + .option('name', { + describe: + 'name of the directory tree we are checking for docs URLs (for display only)', + }) + .demandOption(['in', 'config', 'docs', 'name']) + .help() + .parse(); + +const conf = fs.readFileSync(args.config); +const redirects = JSON.parse(conf).redirects; +let exclude; +if (args.exclude != undefined) { + exclude = args.exclude.split(','); +} +const checker = new RedirectChecker(fs, args.in, args.docs, redirects, exclude); +const results = checker.check(); + +if (!!results && results.length > 0) { + const message = + `Found Teleport docs URLs in ${args.name} that do not correspond to a docs +page or redirect. Either add a redirect for these or edit ${args.name}. + - ` + results.join('\n - '); + console.error(message); + process.exit(1); +}