Skip to content

Commit

Permalink
✨ Adds consistent redirects handling for cf workers.
Browse files Browse the repository at this point in the history
This means that both request for the html pages at /grapher/[slug] and
requests for /grapher/[slug].config.json will be redirected according
to the _grapherRedirects.json file. This is done as a catch handler
that checks for 404 pages so that the common, happy path does not
have to fetch the redirects file.
  • Loading branch information
danyx23 authored and sophiamersmann committed Sep 2, 2024
1 parent 7b3ec48 commit f080612
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 96 deletions.
33 changes: 22 additions & 11 deletions functions/_common/grapherRenderer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import {
} from "@ourworldindata/utils"
import { svg2png, initialize as initializeSvg2Png } from "svg2png-wasm"
import { TimeLogger } from "./timeLogger"
import { png } from "itty-router"
import { png, StatusError } from "itty-router"

import svg2png_wasm from "../../node_modules/svg2png-wasm/svg2png_wasm_bg.wasm"

Expand Down Expand Up @@ -152,6 +152,8 @@ async function fetchFromR2(
headers,
}
const primaryResponse = await fetch(url.toString(), init)
// The fallback URL here is used so that on staging or dev we can fallback
// to the production bucket if the file is not found in the branch bucket
if (primaryResponse.status === 404 && fallbackUrl) {
return fetch(fallbackUrl.toString(), init)
}
Expand Down Expand Up @@ -215,6 +217,13 @@ export async function fetchGrapherConfig(
): Promise<FetchGrapherConfigResult> {
const fetchResponse = await fetchUnparsedGrapherConfig(slug, env, etag)

if (fetchResponse.status === 404) {
// we throw 404 errors instad of returning a 404 response so that the router
// catch handler can do a lookup in the redirects file and maybe send
// a 302 redirect response
throw new StatusError(404)
}

if (fetchResponse.status !== 200) {
console.log(
"Status code is not 200, returning empty response with status code",
Expand All @@ -241,13 +250,16 @@ async function fetchAndRenderGrapherToSvg(
options: ImageOptions,
searchParams: URLSearchParams,
env: Env
) {
): Promise<string> {
const grapherLogger = new TimeLogger("grapher")

const grapherConfigResponse = await fetchGrapherConfig(slug, env)

if (grapherConfigResponse.status !== 200) {
return null
if (grapherConfigResponse.status === 404) {
// we throw 404 errors instad of returning a 404 response so that the router
// catch handler can do a lookup in the redirects file and maybe send
// a 302 redirect response
throw new StatusError(grapherConfigResponse.status)
}

const bounds = new Bounds(0, 0, options.svgWidth, options.svgHeight)
Expand Down Expand Up @@ -300,10 +312,6 @@ export const fetchAndRenderGrapher = async (
)
console.log("fetched svg")

if (!svg) {
return new Response("Not found", { status: 404 })
}

switch (outType) {
case "png":
return png(await renderSvgToPng(svg, options))
Expand Down Expand Up @@ -341,7 +349,7 @@ export async function getOptionalRedirectForSlug(
slug: string,
baseUrl: URL,
env: Env
) {
): Promise<string | undefined> {
const redirects: Record<string, string> = await env.ASSETS.fetch(
new URL("/grapher/_grapherRedirects.json", baseUrl),
{ cf: { cacheTtl: 2 * 60 } }
Expand All @@ -354,8 +362,11 @@ export async function getOptionalRedirectForSlug(
return redirects[slug]
}

export function createRedirectResponse(redirSlug: string, currentUrl: URL) {
new Response(null, {
export function createRedirectResponse(
redirSlug: string,
currentUrl: URL
): Response {
return new Response(null, {
status: 302,
headers: { Location: `/grapher/${redirSlug}${currentUrl.search}` },
})
Expand Down
149 changes: 64 additions & 85 deletions functions/grapher/[slug].ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,16 @@ import {
createRedirectResponse,
fetchUnparsedGrapherConfig,
} from "../_common/grapherRenderer.js"
import { IRequestStrict, Router, error, cors } from "itty-router"
import { IRequestStrict, Router, StatusError, error } from "itty-router"

const { preflight, corsify } = cors({
allowMethods: "GET",
})
const extensions = {
configJson: ".config.json",
}

const router = Router<IRequestStrict, [URL, Env, string]>({
before: [preflight],
finally: [corsify],
})
const router = Router<IRequestStrict, [URL, Env, string]>()
router
.get(
"/grapher/:slug.config.json",
`/grapher/:slug${extensions.configJson}`,
async ({ params: { slug } }, { searchParams }, env, etag) =>
handleConfigRequest(slug, searchParams, env, etag)
)
Expand All @@ -27,10 +24,16 @@ router
)
.all("*", () => error(404, "Route not defined"))

export const onRequestOptions: PagesFunction = async (_context) => {
return new Response(null, {
status: 204,
headers: {
"Access-Control-Allow-Origin": "*",
},
})
}

export const onRequestGet: PagesFunction = async (context) => {
// Makes it so that if there's an error, we will just deliver the original page before the HTML rewrite.
// Only caveat is that redirects will not be taken into account for some reason; but on the other hand the worker is so simple that it's unlikely to fail.
context.passThroughOnException()
const { request, env } = context
const url = new URL(request.url)

Expand All @@ -41,7 +44,49 @@ export const onRequestGet: PagesFunction = async (context) => {
{ ...env, url },
request.headers.get("if-none-match")
)
.catch((e) => error(500, e))
.catch(async (e) => {
// Here we do a unified after the fact handling of 404s to check
// if we have a redirect in the _grapherRedirects.json file.
// This is done as a catch handler that checks for 404 pages
// so that the common, happy path does not have to fetch the redirects file.
if (e instanceof StatusError && e.status === 404) {
console.log("Handling 404 for ", url.pathname)
const fullslug = url.pathname.split("/").pop() as string

const allExtensions = Object.values(extensions)
.map((ext) => ext.replace(".", "\\.")) // for the regex make sure we match only a single dot, not any character
.join("|")
const regexForKnownExtensions = new RegExp(
`^(?<slug>.*?)(?<extension>${allExtensions})?$`
)

const matchResult = fullslug.match(regexForKnownExtensions)
const slug = matchResult?.groups?.slug ?? fullslug
const extension = matchResult?.groups?.extension ?? ""

console.log("Looking up slug and extension", {
slug,
extension,
})

const redirectSlug = await getOptionalRedirectForSlug(
slug,
url,
{
...env,
url,
} as Env
)
console.log("Redirect slug", redirectSlug)
if (redirectSlug && redirectSlug !== slug) {
return createRedirectResponse(
`${redirectSlug}${extension}`,
url
)
} else return error(404, "Not found")
}
return error(500, e)
})
}

async function handleHtmlPageRequest(
Expand All @@ -50,30 +95,6 @@ async function handleHtmlPageRequest(
env: Env
) {
const url = env.url
// Redirects handling is performed by the worker, and is done by fetching the (baked) _grapherRedirects.json file.
// That file is a mapping from old slug to new slug.

/**
* REDIRECTS HANDLING:
* We want to optimize for the case where the user visits a page using the correct slug, i.e. there's no redirect.
* That's why:
* 1. We first check if the slug is lowercase. If it's not, we convert it to lowercase _and check for any redirects already_, and send a redirect already.
* 2. If the slug is lowercase, we check if we can find the page at the requested slug. If we can find it, we return it already.
* 3. If we can't find it, we _then_ check if there's a redirect for it. If there is, we redirect to the new page.
*/

// All our grapher slugs are lowercase by convention.
// To allow incoming links that may contain uppercase characters to work, we redirect to the lowercase version.
const lowerCaseSlug = slug.toLowerCase()
if (lowerCaseSlug !== slug) {
const redirectSlug = await getOptionalRedirectForSlug(
lowerCaseSlug,
url,
env
)

return createRedirectResponse(redirectSlug ?? lowerCaseSlug, url)
}

// For local testing
// const grapherPageResp = await fetch(
Expand All @@ -84,25 +105,17 @@ async function handleHtmlPageRequest(
const grapherPageResp = await env.ASSETS.fetch(url, { redirect: "manual" })

if (grapherPageResp.status === 404) {
// If the request is a 404, we check if there's a redirect for it.
// If there is, we redirect to the new page.
const redirectSlug = await getOptionalRedirectForSlug(slug, url, env)
if (redirectSlug && redirectSlug !== slug) {
return createRedirectResponse(redirectSlug, url)
} else {
// Otherwise we just return the 404 page.
return grapherPageResp
}
throw new StatusError(404)
}

// A non-200 status code is most likely a redirect (301 or 302) or a 404, all of which we want to pass through as-is.
// A non-200 status code is most likely a redirect (301 or 302), all of which we want to pass through as-is.
// In the case of the redirect, the browser will then request the new URL which will again be handled by this worker.
if (grapherPageResp.status !== 200) return grapherPageResp

const openGraphThumbnailUrl = `/grapher/thumbnail/${lowerCaseSlug}.png?imType=og${
const openGraphThumbnailUrl = `/grapher/thumbnail/${slug}.png?imType=og${
url.search ? "&" + url.search.slice(1) : ""
}`
const twitterThumbnailUrl = `/grapher/thumbnail/${lowerCaseSlug}.png?imType=twitter${
const twitterThumbnailUrl = `/grapher/thumbnail/${slug}.png?imType=twitter${
url.search ? "&" + url.search.slice(1) : ""
}`

Expand Down Expand Up @@ -146,21 +159,6 @@ async function handleConfigRequest(
) {
const shouldCache = searchParams.get("nocache") === null
console.log("Preparing json response for ", slug)
// All our grapher slugs are lowercase by convention.
// To allow incoming links that may contain uppercase characters to work, we redirect to the lowercase version.
const lowerCaseSlug = slug.toLowerCase()
if (lowerCaseSlug !== slug) {
const redirectSlug = await getOptionalRedirectForSlug(
lowerCaseSlug,
env.url,
env
)

return createRedirectResponse(
`${redirectSlug ?? lowerCaseSlug}.config.json`,
env.url
)
}

const grapherPageResp = await fetchUnparsedGrapherConfig(slug, env, etag)

Expand All @@ -169,29 +167,10 @@ async function handleConfigRequest(
return new Response(null, { status: 304 })
}

if (grapherPageResp.status !== 200) {
// If the request is a 404, we check if there's a redirect for it.
// If there is, we redirect to the new page.
const redirectSlug = await getOptionalRedirectForSlug(
slug,
env.url,
env
)
if (redirectSlug && redirectSlug !== slug) {
console.log("Redirecting to ", redirectSlug)
return createRedirectResponse(
`${redirectSlug}.config.json`,
env.url
)
} else {
console.log("Returning 404 for ", slug)
// Otherwise we just return the status code.
return new Response(null, { status: grapherPageResp.status })
}
if (grapherPageResp.status === 404) {
throw new StatusError(404)
}

console.log("Returning 200 for ", slug)

const cacheControl = shouldCache
? "public, s-maxage=3600, max-age=0, must-revalidate"
: "public, s-maxage=0, max-age=0, must-revalidate"
Expand Down

0 comments on commit f080612

Please sign in to comment.