From d470713181d3df5793d4653e471d8f3d2fe6a15b Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Thu, 16 May 2024 19:05:09 +0200 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20render=20csv=20and=20metadata.json?= =?UTF-8?q?=20in=20CF=20worker=20for=20grapher=20charts?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .vscode/launch.json | 2 +- functions/_common/grapherRenderer.ts | 334 +++++++++++++++++- functions/_common/readmeTools.ts | 297 ++++++++++++++++ functions/grapher/[slug].ts | 36 +- functions/package.json | 1 + .../src/IndicatorKeyData/IndicatorKeyData.tsx | 44 +-- .../core-table/src/CoreTable.ts | 11 +- .../core-table/src/OwidTable.ts | 4 +- .../grapher/src/core/LegacyToOwidTable.ts | 2 + .../grapher/src/modal/DownloadModal.scss | 56 ++- .../grapher/src/modal/DownloadModal.tsx | 184 +++++++++- .../types/src/domainTypes/CoreTableTypes.ts | 1 + packages/@ourworldindata/utils/src/index.ts | 1 + .../utils/src/metadataHelpers.ts | 43 +++ yarn.lock | 83 ++++- 15 files changed, 1026 insertions(+), 73 deletions(-) create mode 100644 functions/_common/readmeTools.ts diff --git a/.vscode/launch.json b/.vscode/launch.json index 4afec7a04eb..48b3df00747 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -140,4 +140,4 @@ "restart": true }, ] -} \ No newline at end of file +} diff --git a/functions/_common/grapherRenderer.ts b/functions/_common/grapherRenderer.ts index 8115fb15177..3db0ba75d63 100644 --- a/functions/_common/grapherRenderer.ts +++ b/functions/_common/grapherRenderer.ts @@ -4,7 +4,16 @@ import { excludeUndefined, GrapherInterface, R2GrapherConfigDirectory, + OwidColumnDef, + getCitationShort, + getAttributionFragmentsFromVariable, + getCitationLong, + getLastUpdatedFromVariable, + OwidTableSlugs, + getNextUpdateFromVariable, } from "@ourworldindata/utils" +import { OwidOrigin } from "@ourworldindata/types" +import { constructReadme } from "./readmeTools" import { svg2png, initialize as initializeSvg2Png } from "svg2png-wasm" import { TimeLogger } from "./timeLogger" import { png, StatusError } from "itty-router" @@ -17,6 +26,7 @@ import LatoMedium from "../_common/fonts/LatoLatin-Medium.ttf.bin" import LatoBold from "../_common/fonts/LatoLatin-Bold.ttf.bin" import PlayfairSemiBold from "../_common/fonts/PlayfairDisplayLatin-SemiBold.ttf.bin" import { Env } from "./env.js" +import { fromPairs } from "lodash" declare global { // eslint-disable-next-line no-var @@ -267,16 +277,13 @@ export async function fetchGrapherConfig( etag: fetchResponse.headers.get("etag"), } } - -async function fetchAndRenderGrapherToSvg( - id: GrapherIdentifier, +async function initGrapher( + identifier: GrapherIdentifier, options: ImageOptions, searchParams: URLSearchParams, env: Env -): Promise { - const grapherLogger = new TimeLogger("grapher") - - const grapherConfigResponse = await fetchGrapherConfig(id, env) +): Promise { + const grapherConfigResponse = await fetchGrapherConfig(identifier, env) if (grapherConfigResponse.status === 404) { // we throw 404 errors instad of returning a 404 response so that the router @@ -296,10 +303,319 @@ async function fetchAndRenderGrapherToSvg( }) grapher.shouldIncludeDetailsInStaticExport = options.details - grapherLogger.log("grapherInit") + return grapher +} + +export async function fetchMetadataForGrapher( + identifier: GrapherIdentifier, + env: Env, + searchParams?: URLSearchParams +) { + console.log("Initializing grapher") + const grapher = await initGrapher( + identifier, + TWITTER_OPTIONS, + searchParams ?? new URLSearchParams(""), + env + ) + + await grapher.downloadLegacyDataFromOwidVariableIds() + + //const useShortNames = searchParams.get("useColumnShortNames") === "true" + + const columnsToIgnore = new Set( + [ + OwidTableSlugs.entityId, + OwidTableSlugs.time, + OwidTableSlugs.entityColor, + OwidTableSlugs.entityName, + OwidTableSlugs.entityCode, + OwidTableSlugs.year, + OwidTableSlugs.day, + ].map((slug) => slug.toString()) + ) + + const columnsToGet = grapher.inputTable.columnSlugs.filter( + (col) => !columnsToIgnore.has(col) + ) + const useShortNames = searchParams.get("useColumnShortNames") === "true" + console.log("useShortNames", useShortNames) + + const columns: [ + string, + { + title: string + titleProducer: string + titleVariant: string + descriptionShort: string + descriptionFromProducer: string + descriptionKey: string[] + descriptionProcessing: string + shortUnit: string + unit: string + timespan: string + tolerance: number + type: string + conversionFactor: number + owidVariableId: number + catalogPath: string + sources: Partial< + Pick< + OwidOrigin, + | "attribution" + | "attributionShort" + | "description" + | "urlDownload" + | "urlMain" + > + >[] + shortName: string + }, + ][] = grapher.inputTable.getColumns(columnsToGet).map((col) => { + console.log("mapping col", col.name) + const { + descriptionShort, + descriptionFromProducer, + descriptionKey, + descriptionProcessing, + additionalInfo, + shortUnit, + unit, + timespan, + tolerance, + type, + display, + presentation, + origins, + sourceLink, + sourceName, + catalogPath, + owidVariableId, + shortName, + } = col.def as OwidColumnDef + const lastUpdated = getLastUpdatedFromVariable(col.def) + const nextUpdate = getNextUpdateFromVariable(col.def) + + let consensedOrigins: + | Partial< + Pick< + OwidOrigin, + | "attribution" + | "attributionShort" + | "description" + | "urlDownload" + | "urlMain" + > + >[] + | undefined = origins?.map((origin) => { + const { + attribution, + attributionShort, + description, + citationFull, + urlDownload, + urlMain, + dateAccessed, + } = origin + return { + attribution, + attributionShort, + description, + urlDownload, + urlMain, + dateAccessed, + citationFull, + } + }) + + if (!consensedOrigins || consensedOrigins.length === 0) { + consensedOrigins = [ + { + attribution: sourceName, + urlMain: sourceLink, + }, + ] + } + + const def = col.def as OwidColumnDef + + const citationShort = getCitationShort( + def.origins, + getAttributionFragmentsFromVariable(def), + def.owidProcessingLevel + ) + + const citationLong = getCitationLong( + col.titlePublicOrDisplayName, + def.origins ?? [], + col.source ?? {}, + getAttributionFragmentsFromVariable(def), + def.presentation?.attributionShort, + def.presentation?.titleVariant, + def.owidProcessingLevel, + undefined + ) + + const titleShort = col.titlePublicOrDisplayName.title + const attributionShort = col.titlePublicOrDisplayName.attributionShort + const titleVariant = col.titlePublicOrDisplayName.titleVariant + const attributionString = + attributionShort && titleVariant + ? `${attributionShort} – ${titleVariant}` + : attributionShort || titleVariant + const titleModifier = attributionString ? ` - ${attributionString}` : "" + const titleLong = `${col.titlePublicOrDisplayName.title}${titleModifier}` + + return [ + useShortNames ? shortName : col.name, + { + titleShort, + titleLong, + descriptionShort, + descriptionKey, + descriptionProcessing, + shortUnit, + unit, + timespan, + tolerance, + type, + conversionFactor: col.display?.conversionFactor, + owidVariableId, + shortName, + additionalInfo, + lastUpdated, + nextUpdate, + citationShort, + citationLong, + fullMetadata: `https://api.ourworldindata.org/v1/indicators/${owidVariableId}.metadata.json`, + }, + ] + }) + + const fullMetadata = { + chart: { + title: grapher.title, + subtitle: grapher.subtitle, + note: grapher.note, + xAxisLabel: grapher.xAxis.label, + yAxisLabel: grapher.yAxis.label, + citation: grapher.sourcesLine, + originalChartUrl: grapher.canonicalUrl, + selection: grapher.selectedEntityNames, + }, + columns: fromPairs(columns), + } + + return Response.json(fullMetadata) +} + +export async function fetchZipForGrapher( + identifier: GrapherIdentifier, + env: Env, + searchParams?: URLSearchParams +) { + const grapher = await initGrapher( + identifier, + TWITTER_OPTIONS, + searchParams ?? new URLSearchParams(""), + env + ) + await grapher.downloadLegacyDataFromOwidVariableIds() + const defs = grapher.inputTable + .getColumns(grapher.inputTable.columnNames) + .map((col) => col.def) + const table = + searchParams.get("csvType") === "filtered" + ? grapher.transformedTable + : grapher.inputTable + const json = JSON.stringify(defs) + const zip = new JSZip() + zip.file("metadata.json", json) + zip.file("data.csv", table.toPrettyCsv()) + const content = await zip.generateAsync({ type: "blob" }) + return new Response(content, { + headers: { + "Content-Type": "application/zip", + }, + }) +} + +export async function fetchCsvForGrapher( + identifier: GrapherIdentifier, + env: Env, + searchParams?: URLSearchParams +) { + const grapher = await initGrapher( + identifier, + TWITTER_OPTIONS, + searchParams ?? new URLSearchParams(""), + env + ) + await grapher.downloadLegacyDataFromOwidVariableIds() + const useShortNames = searchParams.get("useColumnShortNames") === "true" + const table = + searchParams.get("csvType") === "filtered" + ? grapher.transformedTable + : grapher.inputTable + return new Response(table.toPrettyCsv(useShortNames), { + headers: { + "Content-Type": "text/csv", + }, + }) +} + +export async function fetchReadmeForGrapher( + identifier: GrapherIdentifier, + env: Env, + searchParams?: URLSearchParams +) { + console.log("Initializing grapher") + const grapher = await initGrapher( + identifier, + TWITTER_OPTIONS, + searchParams ?? new URLSearchParams(""), + env + ) + + await grapher.downloadLegacyDataFromOwidVariableIds() + + const columnsToIgnore = new Set( + [ + OwidTableSlugs.entityId, + OwidTableSlugs.time, + OwidTableSlugs.entityColor, + OwidTableSlugs.entityName, + OwidTableSlugs.entityCode, + OwidTableSlugs.year, + OwidTableSlugs.day, + ].map((slug) => slug.toString()) + ) + + const columnsToGet = grapher.inputTable.columnSlugs.filter( + (col) => !columnsToIgnore.has(col) + ) + + const columns = grapher.inputTable.getColumns(columnsToGet) + + const readme = constructReadme(grapher, columns) + return new Response(readme, { + headers: { + "Content-Type": "text/markdown", + }, + }) +} +async function fetchAndRenderGrapherToSvg( + identifier: GrapherIdentifier, + options: ImageOptions, + searchParams: URLSearchParams, + env: Env +) { + const grapherLogger = new TimeLogger("grapher") + const grapher = await initGrapher(identifier, options, searchParams, env) + + grapherLogger.log("initGrapher") const promises = [] promises.push(grapher.downloadLegacyDataFromOwidVariableIds()) - if (options.details && grapher.detailsOrderedByReference.length) { promises.push( await fetch("https://ourworldindata.org/dods.json") diff --git a/functions/_common/readmeTools.ts b/functions/_common/readmeTools.ts new file mode 100644 index 00000000000..dbad7773824 --- /dev/null +++ b/functions/_common/readmeTools.ts @@ -0,0 +1,297 @@ +import { + Bounds, + deserializeJSONFromHTML, + excludeUndefined, + formatSourceDate, + getAttributionFragmentsFromVariable, + getLastUpdatedFromVariable, + getNextUpdateFromVariable, + getPhraseForProcessingLevel, + OwidColumnDef, + OwidTableSlugs, + getDateRange, + uniq, + getCitationShort, + getCitationLong, + prepareSourcesForDisplay, + uniqBy, +} from "@ourworldindata/utils" +import { CoreColumn } from "@ourworldindata/core-table" +import { Grapher } from "@ourworldindata/grapher" + +const markdownNewlineEnding = " " + +export function* getCitationLines( + def: OwidColumnDef, + col: CoreColumn +): Generator { + yield "" + yield "### How to cite this data" + yield "" + yield "#### In-line citation" + yield `If you have limited space (e.g. in data visualizations), you can use this abbreviated in-line citation:` + + markdownNewlineEnding + const citationShort = getCitationShort( + def.origins ?? [], + getAttributionFragmentsFromVariable(def), + def.owidProcessingLevel + ) + yield citationShort + + yield "" + + yield "#### Full citation" + const citationLong = getCitationLong( + col.titlePublicOrDisplayName, + def.origins ?? [], + col.source ?? {}, + getAttributionFragmentsFromVariable(def), + def.presentation?.attributionShort, + def.presentation?.titleVariant, + def.owidProcessingLevel, + undefined + ) + yield citationLong +} + +export function* getDataProcessingLines( + def: OwidColumnDef +): Generator { + yield "" + yield "### How we process data at Our World In Data" + yield "" + yield `All data and visualizations on Our World in Data rely on data sourced from one or several original data providers. Preparing this original data involves several processing steps. Depending on the data, this can include standardizing country names and world region definitions, converting units, calculating derived indicators such as per capita measures, as well as adding or adapting metadata such as the name or the description given to an indicator.` + yield `At the link below you can find a detailed description of the structure of our data pipeline, including links to all the code used to prepare data across Our World in Data.` + yield `[Read about our data pipeline](https://docs.owid.io/projects/etl/)` + if (def.descriptionProcessing) { + yield "" + yield `#### Notes on our processing step for this indicator` + yield def.descriptionProcessing + } +} + +export function* getDescriptionLines( + def: OwidColumnDef, + attribution: string +): Generator { + const descriptionKey = def.descriptionKey + if (descriptionKey) { + yield "" + yield `### What you should know about this data` + for (const desc of descriptionKey) yield `* ${desc.trim()}` + } + + if (def.descriptionFromProducer) { + yield "" + yield `### How is this data described by its producer - ${attribution}?` + yield def.descriptionFromProducer.trim() + } + + if (def.additionalInfo) { + yield "" + yield `### Additional information about this data` + yield def.additionalInfo.trim() + } +} + +export function* getKeyDataLines( + def: OwidColumnDef, + col: CoreColumn +): Generator { + const lastUpdated = getLastUpdatedFromVariable(def) + if (lastUpdated) + yield `Last updated: ${formatSourceDate(lastUpdated, "MMMM D, YYYY")}` + + markdownNewlineEnding + + const nextUpdate = getNextUpdateFromVariable(def) + if (nextUpdate) + yield `Next update: ${formatSourceDate(nextUpdate, "MMMM YYYY")}` + + markdownNewlineEnding + + const dateRange = def.timespan ? getDateRange(def.timespan) : undefined + if (dateRange) yield `Date range: ${dateRange}` + markdownNewlineEnding + + const unit = def.unit + if (unit) yield `Unit: ${unit}` + markdownNewlineEnding + + const unitConversionFactor = + col.unitConversionFactor && col.unitConversionFactor !== 1 + ? col.unitConversionFactor + : undefined + if (unitConversionFactor) + yield `Unit conversion factor: ${unitConversionFactor}` + + markdownNewlineEnding +} + +export function yieldMultilineTextAsLines(line: string): string[] { + return line.split("\n").map((l) => l.trim()) +} + +export function* getSources( + def: OwidColumnDef +): Generator { + const sourcesForDisplay = uniqBy(prepareSourcesForDisplay(def), "label") + + if (sourcesForDisplay.length === 0) return + else if (sourcesForDisplay.length === 1) { + yield "" + yield "### Source" + } else { + yield "" + yield "### Sources" + } + + for (const source of sourcesForDisplay) { + yield "" + yield `#### ${source.label}` + if (source.dataPublishedBy) + yield `Data published by: ${source.dataPublishedBy.trim()}` + + markdownNewlineEnding + if (source.retrievedOn) + yield `Retrieved on: ${source.retrievedOn.trim()}` + + markdownNewlineEnding + if (source.retrievedFrom) + yield `Retrieved from: ${source.retrievedFrom.trim()}` + + markdownNewlineEnding + } +} + +export function getSource(attribution: string, def: OwidColumnDef): string { + const processingLevelPhrase = + attribution.toLowerCase() !== "our world in data" + ? getPhraseForProcessingLevel(def.owidProcessingLevel) + : undefined + const fullProcessingPhrase = processingLevelPhrase + ? ` – ${processingLevelPhrase} by Our World In Data` + : "" + const source = `${attribution}${fullProcessingPhrase}` + return source +} + +export function getAttribution(def: OwidColumnDef): string { + const producers = uniq( + excludeUndefined((def.origins ?? []).map((o) => o.producer)) + ) + + const attributionFragments = + getAttributionFragmentsFromVariable(def) ?? producers + const attribution = attributionFragments.join(", ") + return attribution +} + +export function* getDescription( + def: OwidColumnDef +): Generator { + const description = def.descriptionShort || def.description + if (description) yield* yieldMultilineTextAsLines(description) +} + +export function getTitle(col: CoreColumn): string { + let title = col.titlePublicOrDisplayName.title + if ( + col.titlePublicOrDisplayName.attributionShort && + col.titlePublicOrDisplayName.titleVariant + ) + title = `${title} – ${col.titlePublicOrDisplayName.titleVariant} – ${col.titlePublicOrDisplayName.attributionShort}` + else if (col.titlePublicOrDisplayName.titleVariant) + title = `${title} – ${col.titlePublicOrDisplayName.titleVariant}` + else if (col.titlePublicOrDisplayName.attributionShort) + title = `${title} – ${col.titlePublicOrDisplayName.attributionShort}` + return title +} + +function* columnReadmeText(col: CoreColumn) { + const def = col.def as OwidColumnDef + + const title = getTitle(col) + yield "" + yield `## ${title}` + + yield* getDescription(def) + + yield* getKeyDataLines(def, col) + + yield "" + + const attribution = getAttribution(def) + + const source = getSource(attribution, def) + + yield* getCitationLines(def, col) + + yield `Source: ${source}` + + yield* getDescriptionLines(def, attribution) + + yield* getSources(def) + + yield* getDataProcessingLines(def) + yield "" +} + +export function constructReadme( + grapher: Grapher, + columns: CoreColumn[] +): string { + const isSingleColumn = columns.length === 1 + const sources = columns.flatMap((col) => [...columnReadmeText(col)]) + const sourcesAreShort = sources.length < 50 + let readme: string + if (isSingleColumn) + readme = `# ${grapher.title} - Data package + +This data package contains the data that powers the chart ["${grapher.title}"](${grapher.originUrl}) on the Our World in Data website. + +## CSV Structure + +The high level structure of the CSV file is that each row is an observation for an entity (usually a country or region) and a timepoint (usually a year). + +The first two columns in the CSV file are "Entity" and "Code". "Entity" is the name of the entity (e.g. "United States"). "Code" is the OWID internal entity code that we use if the entity is a country or region. For normal countries, this is the same as the [iso alpha-3](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3) code of the entity (e.g. "USA") - for non-standard countries like historical countries these are custom codes. + +The third column is either "Year" or "Day". If the data is annual, this is "Year" and contains only the year as an integer. If the column is "Day", the column contains a date string in the form "YYYY-MM-DD". + +The final column is the data column, which is the time series that powers the chart. If the CSV data is downloaded using the "full data" option, then the column corresponds to the time series below. If the CSV data is downloaded using the "only selected data visible in the chart" option then the data column is transformed depending on the chart type and thus the association with the time series might not be as straightforward. + +## Metadata.json structure + +The .metadata.json file contains metadata about the data package. The "charts" key contains information to recreate the chart, like the title, subtitle etc.. The "columns" key contains information about each of the columns in the csv, like the unit, timespan covered, citation for the data etc.. + +## About the data + +Our World in Data is almost never the original producer of the data - almost all of the data we use has been compiled by others. If you want to re-use data, it is your responsibility to ensure that you adhere to the sources' license and to credit them correctly. Please note that a single time series may have more than one source - e.g. when we stich together data from different time periods by different producers or when we calculate per capita metrics using population data from a second source. + +## Detailed information about the data + +${sources.join("\n")} + + ` + else + readme = `# ${grapher.title} - Data package + +This data package contains the data that powers the chart ["${grapher.title}"](${grapher.originUrl}) on the Our World in Data website. + +## CSV Structure + +The high level structure of the CSV file is that each row is an observation for an entity (usually a country or region) and a timepoint (usually a year). + +The first two columns in the CSV file are "Entity" and "Code". "Entity" is the name of the entity (e.g. "United States"). "Code" is the OWID internal entity code that we use if the entity is a country or region. For normal countries, this is the same as the [iso alpha-3](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3) code of the entity (e.g. "USA") - for non-standard countries like historical countries these are custom codes. + +The third column is either "Year" or "Day". If the data is annual, this is "Year" and contains only the year as an integer. If the column is "Day", the column contains a date string in the form "YYYY-MM-DD". + +The remaining columns are the data columns, each of which is a time series. If the CSV data is downloaded using the "full data" option, then each column corresponds to one time series below. If the CSV data is downloaded using the "only selected data visible in the chart" option then the data columns are transformed depending on the chart type and thus the association with the time series might not be as straightforward. + +## Metadata.json structure + +The .metadata.json file contains metadata about the data package. The "charts" key contains information to recreate the chart, like the title, subtitle etc.. The "columns" key contains information about each of the columns in the csv, like the unit, timespan covered, citation for the data etc.. + +## About the data + +Our World in Data is almost never the original producer of the data - almost all of the data we use has been compiled by others. If you want to re-use data, it is your responsibility to ensure that you adhere to the sources' license and to credit them correctly. Please note that a single time series may have more than one source - e.g. when we stich together data from different time periods by different producers or when we calculate per capita metrics using population data from a second source. + +## Detailed information about each time series + +${sources.join("\n")} + + ` + return readme +} diff --git a/functions/grapher/[slug].ts b/functions/grapher/[slug].ts index 4b4f8caba58..d0b167c34aa 100644 --- a/functions/grapher/[slug].ts +++ b/functions/grapher/[slug].ts @@ -4,6 +4,10 @@ import { createRedirectResponse, Etag, fetchUnparsedGrapherConfig, + fetchCsvForGrapher, + fetchMetadataForGrapher, + fetchReadmeForGrapher, + fetchZipForGrapher, } from "../_common/grapherRenderer.js" import { IRequestStrict, Router, StatusError, error } from "itty-router" import { handleThumbnailRequest } from "../_common/reusableHandlers.js" @@ -14,6 +18,10 @@ const extensions = { configJson: ".config.json", png: ".png", svg: ".svg", + csv: ".csv", + metadata: ".metadata.json", + readme: ".readme.md", + zip: ".zip", } const router = Router< @@ -50,6 +58,30 @@ router "svg" ) ) + .get( + `/grapher/:slug${extensions.csv}`, + async ({ params: { slug } }, { searchParams }, env, etag, ctx) => + fetchCsvForGrapher({ type: "slug", id: slug }, env, searchParams) + ) + .get( + `/grapher/:slug${extensions.metadata}`, + async ({ params: { slug } }, { searchParams }, env, etag, ctx) => + fetchMetadataForGrapher( + { type: "slug", id: slug }, + env, + searchParams + ) + ) + .get( + `/grapher/:slug${extensions.readme}`, + async ({ params: { slug } }, { searchParams }, env, etag, ctx) => + fetchReadmeForGrapher({ type: "slug", id: slug }, env, searchParams) + ) + .get( + `/grapher/:slug${extensions.zip}`, + async ({ params: { slug } }, { searchParams }, env, etag, ctx) => + fetchZipForGrapher({ type: "slug", id: slug }, env, searchParams) + ) .get( "/grapher/:slug", async ({ params: { slug } }, { searchParams }, env) => @@ -136,7 +168,9 @@ async function handleHtmlPageRequest( // { redirect: "manual" } // ) - const grapherPageResp = await env.ASSETS.fetch(url, { redirect: "manual" }) + const grapherPageResp = await env.ASSETS.fetch(env.url, { + redirect: "manual", + }) if (grapherPageResp.status === 404) { throw new StatusError(404) diff --git a/functions/package.json b/functions/package.json index ee9979ff2fa..a4f1240e464 100644 --- a/functions/package.json +++ b/functions/package.json @@ -4,6 +4,7 @@ "@ourworldindata/grapher": "workspace:^", "@ourworldindata/utils": "workspace:^", "itty-router": "^5.0.17", + "jszip": "^3.10.1", "stripe": "^14.20.0", "svg2png-wasm": "^1.4.1" }, diff --git a/packages/@ourworldindata/components/src/IndicatorKeyData/IndicatorKeyData.tsx b/packages/@ourworldindata/components/src/IndicatorKeyData/IndicatorKeyData.tsx index eae6ba63347..5853353610f 100644 --- a/packages/@ourworldindata/components/src/IndicatorKeyData/IndicatorKeyData.tsx +++ b/packages/@ourworldindata/components/src/IndicatorKeyData/IndicatorKeyData.tsx @@ -4,6 +4,7 @@ import { getPhraseForProcessingLevel, splitSourceTextIntoFragments, formatSourceDate, + getDateRange, } from "@ourworldindata/utils" import { DATAPAGE_SOURCES_AND_PROCESSING_SECTION_ID } from "../SharedDataPageConstants.js" import { SimpleMarkdownText } from "../SimpleMarkdownText.js" @@ -112,46 +113,3 @@ export const makeLinks = ({ link }: { link?: string }): React.ReactNode => { ) }) } - -const getDateRange = (dateRange: string): string | null => { - // This regex matches: - // Beginning of string - // Ignore whitespace - // a named group called start that matches: - // hyphen aka minus - // 1 or more digits - // Ignore whitespace - // hyphen aka minus OR en dash - // Ignore whitespace - // a named group called end that matches: - // hyphen aka minus - // 1 or more digits - // Ignore whitespace - // End of string - const dateRangeRegex = /^\s*(?(-)?\d+)\s*(-|–)\s*(?(-)?\d+)\s*$/ - const match = dateRange.match(dateRangeRegex) - if (match) { - const firstYearString = match.groups?.start - const lastYearString = match.groups?.end - if (!firstYearString || !lastYearString) return null - - const firstYear = parseInt(firstYearString, 10) - const lastYear = parseInt(lastYearString, 10) - let formattedFirstYear - - // if start year is before year 0, add BCE to the end - if (firstYear < 0) formattedFirstYear = `${Math.abs(firstYear)} BCE` - else formattedFirstYear = firstYear - - // if end year is before year 0, add BCE to the end or, if start year is after year 0, add CE to the end - let formattedLastYear - if (lastYear < 0) formattedLastYear = `${Math.abs(lastYear)} BCE` - else if (firstYear < 0) formattedLastYear = `${lastYear} CE` - else formattedLastYear = lastYear - - if (lastYear < 0 || firstYear < 0) - return `${formattedFirstYear} – ${formattedLastYear}` - else return `${formattedFirstYear}–${formattedLastYear}` - } - return null -} diff --git a/packages/@ourworldindata/core-table/src/CoreTable.ts b/packages/@ourworldindata/core-table/src/CoreTable.ts index b015a01ed8e..b1fefb42e71 100644 --- a/packages/@ourworldindata/core-table/src/CoreTable.ts +++ b/packages/@ourworldindata/core-table/src/CoreTable.ts @@ -36,6 +36,7 @@ import { CoreColumnDef, JsTypes, OwidTableSlugs, + OwidColumnDef, } from "@ourworldindata/types" import { AlignedTextTableOptions, @@ -903,11 +904,17 @@ export class CoreTable< return this.toDelimited("\t") } - toCsvWithColumnNames(): string { + toCsvWithColumnNames(useShortNames: boolean = false): string { const delimiter = "," const header = this.columnsAsArray - .map((col) => csvEscape(col.name)) + .map((col) => + csvEscape( + useShortNames && (col.def as OwidColumnDef).shortName + ? (col.def as OwidColumnDef).shortName + : col.name + ) + ) .join(delimiter) + "\n" const body = this.rows .map((row) => diff --git a/packages/@ourworldindata/core-table/src/OwidTable.ts b/packages/@ourworldindata/core-table/src/OwidTable.ts index fda142017bc..1d7e2fd2644 100644 --- a/packages/@ourworldindata/core-table/src/OwidTable.ts +++ b/packages/@ourworldindata/core-table/src/OwidTable.ts @@ -607,14 +607,14 @@ export class OwidTable extends CoreTable { } // Give our users a clean CSV of each Grapher. Assumes an Owid Table with entityName. - toPrettyCsv(): string { + toPrettyCsv(useShortNames: boolean = false): string { return this.dropColumns([ OwidTableSlugs.entityId, OwidTableSlugs.time, OwidTableSlugs.entityColor, ]) .sortBy([this.entityNameSlug]) - .toCsvWithColumnNames() + .toCsvWithColumnNames(useShortNames) } @imemo get entityNameColorIndex(): Map { diff --git a/packages/@ourworldindata/grapher/src/core/LegacyToOwidTable.ts b/packages/@ourworldindata/grapher/src/core/LegacyToOwidTable.ts index 72423a3fa9b..50a545572e2 100644 --- a/packages/@ourworldindata/grapher/src/core/LegacyToOwidTable.ts +++ b/packages/@ourworldindata/grapher/src/core/LegacyToOwidTable.ts @@ -613,6 +613,7 @@ const columnDefFromOwidVariable = ( presentation, catalogPath, updatePeriodDays, + shortName, } = variable // Without this the much used var 123 appears as "Countries Continent". We could rename in Grapher but not sure the effects of that. @@ -665,6 +666,7 @@ const columnDefFromOwidVariable = ( owidSchemaVersion: variable.schemaVersion, type, sort, + shortName, } } diff --git a/packages/@ourworldindata/grapher/src/modal/DownloadModal.scss b/packages/@ourworldindata/grapher/src/modal/DownloadModal.scss index 9981e76f0b5..5197ae66a36 100644 --- a/packages/@ourworldindata/grapher/src/modal/DownloadModal.scss +++ b/packages/@ourworldindata/grapher/src/modal/DownloadModal.scss @@ -34,6 +34,48 @@ margin-top: 4px; } + .grouped-menu-list + .grouped-menu-list { + margin-top: 8px; + } + .csv-options-list { + display: flex; + flex-direction: column; + gap: 8px; + padding: 7px 0; + button { + width: 100%; + + .option-icon { + display: flex; + flex-wrap: wrap; + width: 34px; + height: 24px; + justify-content: space-between; + margin-right: 8px; + span { + // the round-rects that make up the grid + display: inline-block; + width: 100%; + height: 100%; + border-radius: 2px; + background: $light-stroke; + } + } + + &.active span { + background: #a4b6ca; + } + + &:hover:not(.active) span { + background: $light-fill; + } + + &:active:not(.active) span { + background: $light-text; + } + } + } + .grouped-menu-item { display: flex; flex-direction: row; @@ -51,9 +93,21 @@ background-color: $hover-fill; } - &:active { + &.active { background-color: $active-fill; } + + &.active span { + background: #a4b6ca; + } + + &:hover:not(.active) span { + background: $light-fill; + } + + &:active:not(.active) span { + background: $light-text; + } } .grouped-menu-icon img { diff --git a/packages/@ourworldindata/grapher/src/modal/DownloadModal.tsx b/packages/@ourworldindata/grapher/src/modal/DownloadModal.tsx index 21dd4e3822b..adab4445fd6 100644 --- a/packages/@ourworldindata/grapher/src/modal/DownloadModal.tsx +++ b/packages/@ourworldindata/grapher/src/modal/DownloadModal.tsx @@ -1,14 +1,20 @@ import React from "react" -import { observable, computed, action } from "mobx" -import { observer } from "mobx-react" import { + Url, Bounds, DEFAULT_BOUNDS, isEmpty, triggerDownloadFromBlob, triggerDownloadFromUrl, } from "@ourworldindata/utils" -import { Checkbox, OverlayHeader } from "@ourworldindata/components" +import { observable, computed, action } from "mobx" +import { observer } from "mobx-react" +import { + Checkbox, + CodeSnippet, + OverlayHeader, + MarkdownTextWrap, +} from "@ourworldindata/components" import { LoadingIndicator } from "../loadingIndicator/LoadingIndicator" import { FontAwesomeIcon } from "@fortawesome/react-fontawesome/index.js" import { faDownload, faInfoCircle } from "@fortawesome/free-solid-svg-icons" @@ -20,6 +26,7 @@ import { } from "@ourworldindata/core-table" import { Modal } from "./Modal" import { GrapherExport } from "../captionedChart/StaticChartRasterizer.js" +import classnames from "classnames" export interface DownloadModalManager { displaySlug: string @@ -39,12 +46,20 @@ export interface DownloadModalManager { isOnChartOrMapTab?: boolean framePaddingVertical?: number showAdminControls?: boolean + bakedGrapherURL?: string + sourcesLine?: string + isSourcesModalOpen?: boolean } interface DownloadModalProps { manager: DownloadModalManager } +enum CsvFilterMode { + full, + visible, +} + @observer export class DownloadModal extends React.Component { @computed private get frameBounds(): Bounds { @@ -97,6 +112,8 @@ export class DownloadModal extends React.Component { @observable private isReady: boolean = false + @observable private csvFilterMode: CsvFilterMode = CsvFilterMode.full + @action.bound private export(): void { // render the graphic then cache data-urls for display & blobs for downloads this.manager @@ -136,6 +153,15 @@ export class DownloadModal extends React.Component { return this.manager.displaySlug } + @action.bound private onToggleCsvFilterMode(): () => void { + return (): void => { + this.csvFilterMode = + this.csvFilterMode === CsvFilterMode.full + ? CsvFilterMode.visible + : CsvFilterMode.full + } + } + @computed private get inputTable(): OwidTable { return this.manager.table ?? BlankOwidTable() } @@ -194,7 +220,7 @@ export class DownloadModal extends React.Component { if (manager.externalCsvLink) { triggerDownloadFromUrl(filename, manager.externalCsvLink) } else { - triggerDownloadFromBlob(filename, this.csvBlob) + triggerDownloadFromUrl(filename, this.csvFileUrl) } } @@ -217,6 +243,49 @@ export class DownloadModal extends React.Component { return this.hasDetails || !!this.manager.showAdminControls } + @computed protected get sourcesLine(): string { + return this.manager.sourcesLine?.replace(/\r\n|\n|\r/g, "") ?? "" + } + + @computed protected get sourcesText(): string { + return `**Data source:** ${this.sourcesLine}` + } + + @computed protected get csvFileUrl(): string { + const baseUrl = `${this.manager.bakedGrapherURL || ""}/${this.manager.displaySlug}.csv` + const searchParams = new URLSearchParams([ + ...Object.entries({ csvType: "filtered" }), + ...Array.from(new URLSearchParams(this.manager.queryStr).entries()), + ]).toString() + return this.csvFilterMode === CsvFilterMode.visible + ? `${baseUrl}?${searchParams}` + : baseUrl + } + private renderSources(): JSX.Element | null { + const sources = new MarkdownTextWrap({ + text: `**Data source:** ${this.sourcesLine}`, + fontSize: 13, + }) + + return ( +

+ {sources.renderHTML()} + {" – "} + { + e.stopPropagation() + + this.manager.isDownloadModalOpen = false + this.manager.isSourcesModalOpen = true + })} + > + Learn more about this data and citations + +

+ ) + } private renderReady(): React.ReactElement { const { manager, @@ -255,6 +324,21 @@ export class DownloadModal extends React.Component { opacity: this.isReady ? 1 : 0, } + const csvUrl = this.csvFileUrl + const metadataUrl = csvUrl.replace(".csv", ".metadata.json") + + const googleDocsCode = `=IMPORTDATA("${csvUrl}")` + + const pandasCode = `import pandas as pd +import requests + +# Fetch the data +df = pd.read_csv("${csvUrl}") + +# Fetch the metadata +metadata = requests.get("${metadataUrl}").json()` + + const rCode = `df <- read.csv("${csvUrl}")` return (
{manager.isOnChartOrMapTab && ( @@ -338,14 +422,92 @@ export class DownloadModal extends React.Component {
) : ( -
- +

Source

+

+ Whenever you use this data in a public context, + please make sure to credit the original source + and to verify that your use is permitted as per + the source's license. +

+

{this.renderSources()}

+

+ Download options +

+ +
+ + +
+

Download

+
+ + +
+

Code examples

+

+ Below are examples of how to load this data into + different data analysis tools. +

+

Excel/Google Sheets

+ -
+

Python with Pandas

+ +

R

+ + )} diff --git a/packages/@ourworldindata/types/src/domainTypes/CoreTableTypes.ts b/packages/@ourworldindata/types/src/domainTypes/CoreTableTypes.ts index 2df5e36c638..5fa5ba92b00 100644 --- a/packages/@ourworldindata/types/src/domainTypes/CoreTableTypes.ts +++ b/packages/@ourworldindata/types/src/domainTypes/CoreTableTypes.ts @@ -259,6 +259,7 @@ export interface OwidColumnDef extends CoreColumnDef { catalogPath?: string owidProcessingLevel?: OwidProcessingLevel owidSchemaVersion?: number + shortName?: string } export const OwidEntityNameColumnDef = { diff --git a/packages/@ourworldindata/utils/src/index.ts b/packages/@ourworldindata/utils/src/index.ts index 8daf61ba2ce..0ddcc4bc3bc 100644 --- a/packages/@ourworldindata/utils/src/index.ts +++ b/packages/@ourworldindata/utils/src/index.ts @@ -137,6 +137,7 @@ export { splitSourceTextIntoFragments, prepareSourcesForDisplay, formatSourceDate, + getDateRange, getCitationLong, getCitationShort, grabMetadataForGdocLinkedIndicator, diff --git a/packages/@ourworldindata/utils/src/metadataHelpers.ts b/packages/@ourworldindata/utils/src/metadataHelpers.ts index 946c6a43888..6a99a3d3d2b 100644 --- a/packages/@ourworldindata/utils/src/metadataHelpers.ts +++ b/packages/@ourworldindata/utils/src/metadataHelpers.ts @@ -292,3 +292,46 @@ export function grabMetadataForGdocLinkedIndicator( ), } } + +export const getDateRange = (dateRange: string): string | null => { + // This regex matches: + // Beginning of string + // Ignore whitespace + // a named group called start that matches: + // hyphen aka minus + // 1 or more digits + // Ignore whitespace + // hyphen aka minus OR en dash + // Ignore whitespace + // a named group called end that matches: + // hyphen aka minus + // 1 or more digits + // Ignore whitespace + // End of string + const dateRangeRegex = /^\s*(?(-)?\d+)\s*(-|–)\s*(?(-)?\d+)\s*$/ + const match = dateRange.match(dateRangeRegex) + if (match) { + const firstYearString = match.groups?.start + const lastYearString = match.groups?.end + if (!firstYearString || !lastYearString) return null + + const firstYear = parseInt(firstYearString, 10) + const lastYear = parseInt(lastYearString, 10) + let formattedFirstYear + + // if start year is before year 0, add BCE to the end + if (firstYear < 0) formattedFirstYear = `${Math.abs(firstYear)} BCE` + else formattedFirstYear = firstYear + + // if end year is before year 0, add BCE to the end or, if start year is after year 0, add CE to the end + let formattedLastYear + if (lastYear < 0) formattedLastYear = `${Math.abs(lastYear)} BCE` + else if (firstYear < 0) formattedLastYear = `${lastYear} CE` + else formattedLastYear = lastYear + + if (lastYear < 0 || firstYear < 0) + return `${formattedFirstYear} – ${formattedLastYear}` + else return `${formattedFirstYear}–${formattedLastYear}` + } + return null +} diff --git a/yarn.lock b/yarn.lock index 51f73fce493..3976301f94d 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2923,7 +2923,7 @@ __metadata: languageName: node linkType: hard -"@npmcli/package-json@npm:5.2.0, @npmcli/package-json@npm:^5.0.0, @npmcli/package-json@npm:^5.1.0": +"@npmcli/package-json@npm:5.2.0, @npmcli/package-json@npm:^5.1.0": version: 5.2.0 resolution: "@npmcli/package-json@npm:5.2.0" dependencies: @@ -2938,6 +2938,21 @@ __metadata: languageName: node linkType: hard +"@npmcli/package-json@npm:^5.0.0": + version: 5.1.0 + resolution: "@npmcli/package-json@npm:5.1.0" + dependencies: + "@npmcli/git": "npm:^5.0.0" + glob: "npm:^10.2.2" + hosted-git-info: "npm:^7.0.0" + json-parse-even-better-errors: "npm:^3.0.0" + normalize-package-data: "npm:^6.0.0" + proc-log: "npm:^4.0.0" + semver: "npm:^7.5.3" + checksum: 10/0e5cb5eff32cf80234525160a702c91a38e4b98ab74e34e2632b43c4350dbad170bd835989cc7d6e18d24798e3242e45b60f3d5e26bd128fe1c4529931105f8e + languageName: node + linkType: hard + "@npmcli/promise-spawn@npm:^7.0.0": version: 7.0.2 resolution: "@npmcli/promise-spawn@npm:7.0.2" @@ -8551,7 +8566,7 @@ __metadata: languageName: node linkType: hard -"dedent@npm:1.5.3, dedent@npm:^1.0.0": +"dedent@npm:1.5.3": version: 1.5.3 resolution: "dedent@npm:1.5.3" peerDependencies: @@ -8563,6 +8578,18 @@ __metadata: languageName: node linkType: hard +"dedent@npm:^1.0.0": + version: 1.5.1 + resolution: "dedent@npm:1.5.1" + peerDependencies: + babel-plugin-macros: ^3.1.0 + peerDependenciesMeta: + babel-plugin-macros: + optional: true + checksum: 10/fc00a8bc3dfb7c413a778dc40ee8151b6c6ff35159d641f36ecd839c1df5c6e0ec5f4992e658c82624a1a62aaecaffc23b9c965ceb0bbf4d698bfc16469ac27d + languageName: node + linkType: hard + "deep-extend@npm:^0.6.0": version: 0.6.0 resolution: "deep-extend@npm:0.6.0" @@ -8836,7 +8863,14 @@ __metadata: languageName: node linkType: hard -"dotenv@npm:^16.0.1, dotenv@npm:^16.0.3, dotenv@npm:^16.4.4, dotenv@npm:~16.4.5": +"dotenv@npm:^16.0.1, dotenv@npm:^16.0.3": + version: 16.3.1 + resolution: "dotenv@npm:16.3.1" + checksum: 10/dbb778237ef8750e9e3cd1473d3c8eaa9cc3600e33a75c0e36415d0fa0848197f56c3800f77924c70e7828f0b03896818cd52f785b07b9ad4d88dba73fbba83f + languageName: node + linkType: hard + +"dotenv@npm:^16.4.4, dotenv@npm:~16.4.5": version: 16.4.5 resolution: "dotenv@npm:16.4.5" checksum: 10/55a3134601115194ae0f924e54473459ed0d9fc340ae610b676e248cca45aa7c680d86365318ea964e6da4e2ea80c4514c1adab5adb43d6867fb57ff068f95c8 @@ -11586,6 +11620,13 @@ __metadata: languageName: node linkType: hard +"immediate@npm:~3.0.5": + version: 3.0.6 + resolution: "immediate@npm:3.0.6" + checksum: 10/f9b3486477555997657f70318cc8d3416159f208bec4cca3ff3442fd266bc23f50f0c9bd8547e1371a6b5e82b821ec9a7044a4f7b944798b25aa3cc6d5e63e62 + languageName: node + linkType: hard + "immutable@npm:^4.0.0, immutable@npm:^4.3.6": version: 4.3.6 resolution: "immutable@npm:4.3.6" @@ -13196,6 +13237,18 @@ __metadata: languageName: node linkType: hard +"jszip@npm:^3.10.1": + version: 3.10.1 + resolution: "jszip@npm:3.10.1" + dependencies: + lie: "npm:~3.3.0" + pako: "npm:~1.0.2" + readable-stream: "npm:~2.3.6" + setimmediate: "npm:^1.0.5" + checksum: 10/bfbfbb9b0a27121330ac46ab9cdb3b4812433faa9ba4a54742c87ca441e31a6194ff70ae12acefa5fe25406c432290e68003900541d948a169b23d30c34dd984 + languageName: node + linkType: hard + "just-diff-apply@npm:^5.2.0": version: 5.5.0 resolution: "just-diff-apply@npm:5.5.0" @@ -13461,6 +13514,15 @@ __metadata: languageName: node linkType: hard +"lie@npm:~3.3.0": + version: 3.3.0 + resolution: "lie@npm:3.3.0" + dependencies: + immediate: "npm:~3.0.5" + checksum: 10/f335ce67fe221af496185d7ce39c8321304adb701e122942c495f4f72dcee8803f9315ee572f5f8e8b08b9e8d7195da91b9fad776e8864746ba8b5e910adf76e + languageName: node + linkType: hard + "lilconfig@npm:3.0.0": version: 3.0.0 resolution: "lilconfig@npm:3.0.0" @@ -15282,6 +15344,7 @@ __metadata: "@ourworldindata/grapher": "workspace:^" "@ourworldindata/utils": "workspace:^" itty-router: "npm:^5.0.17" + jszip: "npm:^3.10.1" stripe: "npm:^14.20.0" svg2png-wasm: "npm:^1.4.1" languageName: unknown @@ -15457,6 +15520,13 @@ __metadata: languageName: node linkType: hard +"pako@npm:~1.0.2": + version: 1.0.11 + resolution: "pako@npm:1.0.11" + checksum: 10/1ad07210e894472685564c4d39a08717e84c2a68a70d3c1d9e657d32394ef1670e22972a433cbfe48976cb98b154ba06855dcd3fcfba77f60f1777634bec48c0 + languageName: node + linkType: hard + "papaparse@npm:^5.3.1": version: 5.3.1 resolution: "papaparse@npm:5.3.1" @@ -18072,6 +18142,13 @@ __metadata: languageName: node linkType: hard +"setimmediate@npm:^1.0.5": + version: 1.0.5 + resolution: "setimmediate@npm:1.0.5" + checksum: 10/76e3f5d7f4b581b6100ff819761f04a984fa3f3990e72a6554b57188ded53efce2d3d6c0932c10f810b7c59414f85e2ab3c11521877d1dea1ce0b56dc906f485 + languageName: node + linkType: hard + "setprototypeof@npm:1.2.0": version: 1.2.0 resolution: "setprototypeof@npm:1.2.0"