From 6809fd091848c36181644c6bd3fae2c92248852d Mon Sep 17 00:00:00 2001 From: Daniel Bachler Date: Tue, 30 Jul 2024 09:54:35 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9D=20improve=20rendering=20of=20readm?= =?UTF-8?q?e=20and=20metadata=20completeness?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- functions/_common/grapherRenderer.ts | 31 ++++++++++++++++++ functions/_common/readmeTools.ts | 47 +++++++++++++++++++++++----- 2 files changed, 70 insertions(+), 8 deletions(-) diff --git a/functions/_common/grapherRenderer.ts b/functions/_common/grapherRenderer.ts index 26d985ace7c..58605d60a61 100644 --- a/functions/_common/grapherRenderer.ts +++ b/functions/_common/grapherRenderer.ts @@ -282,6 +282,7 @@ export async function fetchMetadataForGrapher( descriptionFromProducer, descriptionKey, descriptionProcessing, + additionalInfo, shortUnit, unit, timespan, @@ -296,6 +297,8 @@ export async function fetchMetadataForGrapher( owidVariableId, shortName, } = col.def as OwidColumnDef + const lastUpdated = getLastUpdatedFromVariable(col.def) + const nextUpdate = getNextUpdateFromVariable(col.def) let consensedOrigins: | Partial< @@ -313,8 +316,10 @@ export async function fetchMetadataForGrapher( attribution, attributionShort, description, + citationFull, urlDownload, urlMain, + dateAccessed, } = origin return { attribution, @@ -322,6 +327,8 @@ export async function fetchMetadataForGrapher( description, urlDownload, urlMain, + dateAccessed, + citationFull, } }) @@ -334,6 +341,25 @@ export async function fetchMetadataForGrapher( ] } + const def = col.def as OwidColumnDef + + const citationShort = getCitationShort( + def.origins, + getAttributionFragmentsFromVariable(def), + def.owidProcessingLevel + ) + + const citationLong = getCitationLong( + col.titlePublicOrDisplayName, + def.origins ?? [], + col.source ?? {}, + getAttributionFragmentsFromVariable(def), + def.presentation?.attributionShort, + def.presentation?.titleVariant, + def.owidProcessingLevel, + undefined + ) + return [ useShortNames ? shortName : col.name, { @@ -354,6 +380,11 @@ export async function fetchMetadataForGrapher( catalogPath, sources: consensedOrigins, shortName, + additionalInfo, + lastUpdated, + nextUpdate, + citationShort, + citationLong, }, ] }) diff --git a/functions/_common/readmeTools.ts b/functions/_common/readmeTools.ts index 95b081df225..d56200fa6b5 100644 --- a/functions/_common/readmeTools.ts +++ b/functions/_common/readmeTools.ts @@ -25,7 +25,9 @@ export function* getCitationLines( def: OwidColumnDef, col: CoreColumn ): Generator { + yield "" yield "### How to cite this data" + yield "" yield "#### In-line citation" yield `If you have limited space (e.g. in data visualizations), you can use this abbreviated in-line citation:` + markdownNewlineEnding @@ -36,6 +38,8 @@ export function* getCitationLines( ) yield citationShort + yield "" + yield "#### Full citation" const citationLong = getCitationLong( col.titlePublicOrDisplayName, @@ -53,13 +57,17 @@ export function* getCitationLines( export function* getDataProcessingLines( def: OwidColumnDef ): Generator { + yield "" yield "### How we process data at Our World In Data" + yield "" yield `All data and visualizations on Our World in Data rely on data sourced from one or several original data providers. Preparing this original data involves several processing steps. Depending on the data, this can include standardizing country names and world region definitions, converting units, calculating derived indicators such as per capita measures, as well as adding or adapting metadata such as the name or the description given to an indicator.` yield `At the link below you can find a detailed description of the structure of our data pipeline, including links to all the code used to prepare data across Our World in Data.` yield `[Read about our data pipeline](https://docs.owid.io/projects/etl/)` - if (def.descriptionProcessing) - yield `#### Notes on our processing step for this indicator -${def.descriptionProcessing}` + if (def.descriptionProcessing) { + yield "" + yield `#### Notes on our processing step for this indicator` + yield def.descriptionProcessing + } } export function* getDescriptionLines( @@ -67,16 +75,20 @@ export function* getDescriptionLines( attribution: string ): Generator { const descriptionKey = def.descriptionKey - if (descriptionKey) - yield `### What you should know about this data -${descriptionKey.map((desc) => `* ${desc.trim()}`).join("\n")}` + if (descriptionKey) { + yield "" + yield `### What you should know about this data` + for (const desc of descriptionKey) yield `* ${desc.trim()}` + } if (def.descriptionFromProducer) { + yield "" yield `### How is this data described by its producer - ${attribution}?` yield def.descriptionFromProducer.trim() } if (def.additionalInfo) { + yield "" yield `### Additional information about this data` yield def.additionalInfo.trim() } @@ -121,10 +133,16 @@ export function* getSources( const sourcesForDisplay = uniqBy(prepareSourcesForDisplay(def), "label") if (sourcesForDisplay.length === 0) return - else if (sourcesForDisplay.length === 1) yield "### Source" - else yield "### Sources" + else if (sourcesForDisplay.length === 1) { + yield "" + yield "### Source" + } else { + yield "" + yield "### Sources" + } for (const source of sourcesForDisplay) { + yield "" yield `#### ${source.label}` if (source.dataPublishedBy) yield `Data published by: ${source.dataPublishedBy.trim()}` + @@ -136,18 +154,22 @@ export function* getSources( yield `Retrieved from: ${source.retrievedFrom.trim()}` + markdownNewlineEnding if (source.description) { + yield "" yield "##### Description of the dataset" yield* yieldMultilineTextAsLines(source.description).map( (l) => `> ${l}` ) + yield "" } if (source.citation) { + yield "" yield "##### Citation" yield "This is the citation of the original data obtained from the source, prior to any processing or adaptation by Our World in Data. We would usually prefer to use the Our World In Data citation format given at the bottom." + markdownNewlineEnding yield* yieldMultilineTextAsLines(source.citation).map( (l) => `> ${l}` ) + yield "" } } } @@ -200,6 +222,7 @@ function* columnReadmeText(col: CoreColumn) { const def = col.def as OwidColumnDef const title = getTitle(col) + yield "" yield `## ${title}` yield* getDescription(def) @@ -236,12 +259,20 @@ export function constructReadme( This data package contains the data that powers the chart ["${grapher.title}"](${grapher.originUrl}) on the Our World in Data website. +## CSV Structure + The high level structure of the CSV file is that each row is an observation for an entity (usually a country or region) and a timepoint (usually a year). The first two columns in the CSV file are "Entity" and "Code". "Entity" is the name of the entity (e.g. "United States"). "Code" is the OWID internal entity code that we use if the entity is a country or region. For normal countries, this is the same as the [iso alpha-3](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3) code of the entity (e.g. "USA") - for non-standard countries like historical countries these are custom codes. The third column is either "Year" or "Day". If the data is annual, this is "Year" and contains only the year as an integer. If the column is "Day", the column contains a date string in the form "YYYY-MM-DD". +## Metadata.json structure + +The .metadata.json file contains metadata about the data package. The "charts" key contains information to recreate the chart, like the title, subtitle etc.. The "columns" key contains information about each of the columns in the csv, like the unit, timespan covered, citation for the data etc.. + +## About the data + Our World in Data is almost never the original producer of the data - almost all of the data we use has been compiled by others. If you want to re-use data, make sure to check the licenses of the original producers. Follow the information below to do this. Please also note that a single time series may have more than one source - e.g. when we stich together data from different time periods by different producers or when we calculate per capita metrics (which adds a population data source). ## Detailed information about the data