diff --git a/dag/health.yml b/dag/health.yml index a49708bbdce..bf26bd592d1 100644 --- a/dag/health.yml +++ b/dag/health.yml @@ -922,3 +922,14 @@ steps: - data://meadow/who/2025-01-09/vaccination_schedules data://grapher/who/2025-01-09/vaccination_schedules: - data://garden/who/2025-01-09/vaccination_schedules + + # + # TODO: add step name (just something recognizable) + # + data://meadow/who/2025-01-14/vaccine_preventable_incidence: + - snapshot://who/2025-01-14/vaccine_preventable_incidence.xlsx + data://garden/who/2025-01-14/vaccine_preventable_incidence: + - data://meadow/who/2025-01-14/vaccine_preventable_incidence + - data://garden/who/2025-01-09/vaccination_schedules + data://grapher/who/2025-01-14/vaccine_preventable_incidence: + - data://garden/who/2025-01-14/vaccine_preventable_incidence diff --git a/etl/steps/data/garden/who/2025-01-14/vaccine_preventable_incidence.countries.json b/etl/steps/data/garden/who/2025-01-14/vaccine_preventable_incidence.countries.json new file mode 100644 index 00000000000..16a5d68f5d6 --- /dev/null +++ b/etl/steps/data/garden/who/2025-01-14/vaccine_preventable_incidence.countries.json @@ -0,0 +1,223 @@ +{ + "Afghanistan": "Afghanistan", + "Albania": "Albania", + "Algeria": "Algeria", + "American Samoa": "American Samoa", + "Andorra": "Andorra", + "Angola": "Angola", + "Anguilla": "Anguilla", + "Antigua and Barbuda": "Antigua and Barbuda", + "Argentina": "Argentina", + "Armenia": "Armenia", + "Aruba": "Aruba", + "Australia": "Australia", + "Austria": "Austria", + "Azerbaijan": "Azerbaijan", + "Bahamas": "Bahamas", + "Bahrain": "Bahrain", + "Bangladesh": "Bangladesh", + "Barbados": "Barbados", + "Belarus": "Belarus", + "Belgium": "Belgium", + "Belize": "Belize", + "Benin": "Benin", + "Bermuda": "Bermuda", + "Bhutan": "Bhutan", + "Bolivia (Plurinational State of)": "Bolivia", + "Bosnia and Herzegovina": "Bosnia and Herzegovina", + "Botswana": "Botswana", + "Brazil": "Brazil", + "British Virgin Islands": "British Virgin Islands", + "Brunei Darussalam": "Brunei", + "Bulgaria": "Bulgaria", + "Burkina Faso": "Burkina Faso", + "Burundi": "Burundi", + "Cabo Verde": "Cape Verde", + "Cambodia": "Cambodia", + "Cameroon": "Cameroon", + "Canada": "Canada", + "Cayman Islands": "Cayman Islands", + "Central African Republic": "Central African Republic", + "Chad": "Chad", + "Chile": "Chile", + "China": "China", + "China, Hong Kong SAR": "Hong Kong", + "China, Macao SAR": "Macao", + "Colombia": "Colombia", + "Comoros": "Comoros", + "Congo": "Congo", + "Cook Islands": "Cook Islands", + "Costa Rica": "Costa Rica", + "Croatia": "Croatia", + "Cuba": "Cuba", + "Cura\u00e7ao": "Curacao", + "Cyprus": "Cyprus", + "Czechia": "Czechia", + "C\u00f4te d'Ivoire": "Cote d'Ivoire", + "Democratic People's Republic of Korea": "North Korea", + "Democratic Republic of the Congo": "Democratic Republic of Congo", + "Denmark": "Denmark", + "Djibouti": "Djibouti", + "Dominica": "Dominica", + "Dominican Republic": "Dominican Republic", + "Ecuador": "Ecuador", + "Egypt": "Egypt", + "El Salvador": "El Salvador", + "Equatorial Guinea": "Equatorial Guinea", + "Eritrea": "Eritrea", + "Estonia": "Estonia", + "Eswatini": "Eswatini", + "Ethiopia": "Ethiopia", + "Fiji": "Fiji", + "Finland": "Finland", + "France": "France", + "French Polynesia": "French Polynesia", + "Gabon": "Gabon", + "Gambia": "Gambia", + "Georgia": "Georgia", + "Germany": "Germany", + "Ghana": "Ghana", + "Global": "World", + "Greece": "Greece", + "Grenada": "Grenada", + "Guam": "Guam", + "Guatemala": "Guatemala", + "Guinea": "Guinea", + "Guinea-Bissau": "Guinea-Bissau", + "Guyana": "Guyana", + "Haiti": "Haiti", + "Honduras": "Honduras", + "Hungary": "Hungary", + "Iceland": "Iceland", + "India": "India", + "Indonesia": "Indonesia", + "Iran (Islamic Republic of)": "Iran", + "Iraq": "Iraq", + "Ireland": "Ireland", + "Israel": "Israel", + "Italy": "Italy", + "Jamaica": "Jamaica", + "Japan": "Japan", + "Jordan": "Jordan", + "Kazakhstan": "Kazakhstan", + "Kenya": "Kenya", + "Kiribati": "Kiribati", + "Kuwait": "Kuwait", + "Kyrgyzstan": "Kyrgyzstan", + "Lao People's Democratic Republic": "Laos", + "Latvia": "Latvia", + "Lebanon": "Lebanon", + "Lesotho": "Lesotho", + "Liberia": "Liberia", + "Libya": "Libya", + "Lithuania": "Lithuania", + "Luxembourg": "Luxembourg", + "Madagascar": "Madagascar", + "Malawi": "Malawi", + "Malaysia": "Malaysia", + "Maldives": "Maldives", + "Mali": "Mali", + "Malta": "Malta", + "Marshall Islands": "Marshall Islands", + "Mauritania": "Mauritania", + "Mauritius": "Mauritius", + "Mexico": "Mexico", + "Micronesia (Federated States of)": "Micronesia (country)", + "Monaco": "Monaco", + "Mongolia": "Mongolia", + "Montenegro": "Montenegro", + "Montserrat": "Montserrat", + "Morocco": "Morocco", + "Mozambique": "Mozambique", + "Myanmar": "Myanmar", + "Namibia": "Namibia", + "Nauru": "Nauru", + "Nepal": "Nepal", + "New Caledonia": "New Caledonia", + "New Zealand": "New Zealand", + "Nicaragua": "Nicaragua", + "Niger": "Niger", + "Nigeria": "Nigeria", + "Niue": "Niue", + "North Macedonia": "North Macedonia", + "Northern Mariana Islands": "Northern Mariana Islands", + "Norway": "Norway", + "Oman": "Oman", + "Pakistan": "Pakistan", + "Palau": "Palau", + "Panama": "Panama", + "Papua New Guinea": "Papua New Guinea", + "Paraguay": "Paraguay", + "Peru": "Peru", + "Philippines": "Philippines", + "Poland": "Poland", + "Portugal": "Portugal", + "Qatar": "Qatar", + "Republic of Korea": "South Korea", + "Republic of Moldova": "Moldova", + "Romania": "Romania", + "Russian Federation": "Russia", + "Rwanda": "Rwanda", + "Saint Kitts and Nevis": "Saint Kitts and Nevis", + "Saint Lucia": "Saint Lucia", + "Saint Vincent and the Grenadines": "Saint Vincent and the Grenadines", + "Samoa": "Samoa", + "San Marino": "San Marino", + "Sao Tome and Principe": "Sao Tome and Principe", + "Saudi Arabia": "Saudi Arabia", + "Senegal": "Senegal", + "Serbia": "Serbia", + "Seychelles": "Seychelles", + "Sierra Leone": "Sierra Leone", + "Singapore": "Singapore", + "Sint Maarten (Dutch part)": "Sint Maarten (Dutch part)", + "Slovakia": "Slovakia", + "Slovenia": "Slovenia", + "Solomon Islands": "Solomon Islands", + "Somalia": "Somalia", + "South Africa": "South Africa", + "South Sudan": "South Sudan", + "Spain": "Spain", + "Sri Lanka": "Sri Lanka", + "Sudan": "Sudan", + "Suriname": "Suriname", + "Sweden": "Sweden", + "Switzerland": "Switzerland", + "Syrian Arab Republic": "Syria", + "Tajikistan": "Tajikistan", + "Thailand": "Thailand", + "Timor-Leste": "East Timor", + "Togo": "Togo", + "Tokelau": "Tokelau", + "Tonga": "Tonga", + "Trinidad and Tobago": "Trinidad and Tobago", + "Tunisia": "Tunisia", + "Turkmenistan": "Turkmenistan", + "Turks and Caicos Islands": "Turks and Caicos Islands", + "Tuvalu": "Tuvalu", + "Uganda": "Uganda", + "Ukraine": "Ukraine", + "United Arab Emirates": "United Arab Emirates", + "United Kingdom of Great Britain and Northern Ireland": "United Kingdom", + "United Republic of Tanzania": "Tanzania", + "United States of America": "United States", + "Uruguay": "Uruguay", + "Uzbekistan": "Uzbekistan", + "Vanuatu": "Vanuatu", + "Venezuela (Bolivarian Republic of)": "Venezuela", + "Viet Nam": "Vietnam", + "Wallis and Futuna": "Wallis and Futuna", + "Yemen": "Yemen", + "Zambia": "Zambia", + "Zimbabwe": "Zimbabwe", + "African Region": "African Region (WHO)", + "Eastern Mediterranean Region": "Eastern Mediterranean Region (WHO)", + "European Region": "European Region (WHO)", + "Kosovo (in accordance with UN Security Council resolution 1244 (1999))": "Kosovo", + "Netherlands (Kingdom of the)": "Netherlands", + "Region of the Americas": "Region of the Americas (WHO)", + "South-East Asia Region": "South-East Asia Region (WHO)", + "T\u00fcrkiye": "Turkey", + "Western Pacific Region": "Western Pacific Region (WHO)", + "occupied Palestinian territory, including east Jerusalem": "Palestine" +} \ No newline at end of file diff --git a/etl/steps/data/garden/who/2025-01-14/vaccine_preventable_incidence.excluded_countries.json b/etl/steps/data/garden/who/2025-01-14/vaccine_preventable_incidence.excluded_countries.json new file mode 100644 index 00000000000..0d4f101c7a3 --- /dev/null +++ b/etl/steps/data/garden/who/2025-01-14/vaccine_preventable_incidence.excluded_countries.json @@ -0,0 +1,2 @@ +[ +] diff --git a/etl/steps/data/garden/who/2025-01-14/vaccine_preventable_incidence.meta.yml b/etl/steps/data/garden/who/2025-01-14/vaccine_preventable_incidence.meta.yml new file mode 100644 index 00000000000..788b8cccb6b --- /dev/null +++ b/etl/steps/data/garden/who/2025-01-14/vaccine_preventable_incidence.meta.yml @@ -0,0 +1,31 @@ +# NOTE: To learn more about the fields, hover over their names. +definitions: + common: + presentation: + topic_tags: + - Vaccination + +# Learn more about the available fields: +# http://docs.owid.io/projects/etl/architecture/metadata/reference/ +dataset: + update_period_days: 365 + +tables: + vaccine_preventable_incidence: + variables: + incidence_rate: + title: Incidence rate of << disease_description.lower() >> << denominator >> + presentation: + title_public: Incidence rate of << disease_description.lower() >> << denominator >> + unit: << denominator >> + display: + name: << disease_description >> + years_since_vaccine_introduction: + variables: + incidence_rate: + title: Incidence rate of << disease_description.lower() >> << denominator >> since vaccine introduction + presentation: + title_public: Incidence rate of << disease_description.lower() >> << denominator >> since vaccine introduction + unit: << denominator >> + display: + name: << disease_description >> diff --git a/etl/steps/data/garden/who/2025-01-14/vaccine_preventable_incidence.py b/etl/steps/data/garden/who/2025-01-14/vaccine_preventable_incidence.py new file mode 100644 index 00000000000..0e50b94bbb9 --- /dev/null +++ b/etl/steps/data/garden/who/2025-01-14/vaccine_preventable_incidence.py @@ -0,0 +1,95 @@ +"""Load a meadow dataset and create a garden dataset.""" + +from owid.catalog import Table + +from etl.data_helpers import geo +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load meadow dataset. + ds_meadow = paths.load_dataset("vaccine_preventable_incidence") + ds_intro = paths.load_dataset("vaccination_schedules") + + # Read table from meadow dataset. + tb = ds_meadow.read("vaccine_preventable_incidence") + tb_intro = ds_intro.read("vaccination_schedules") + + # + # Process data. + # + tb = geo.harmonize_countries( + df=tb, countries_file=paths.country_mapping_path, excluded_countries_file=paths.excluded_countries_path + ) + tb_intro = calculate_years_from_vaccine_introduction(tb, tb_intro) + tb = tb.drop(columns=["group", "code", "disease"]) + tb = tb.format(["country", "year", "disease_description", "denominator"]) + tb_intro = tb_intro.format( + ["country", "year", "disease_description", "denominator"], short_name="years_since_vaccine_introduction" + ) + + # + # Save outputs. + # + # Create a new garden dataset with the same metadata as the meadow dataset. + ds_garden = create_dataset( + dest_dir, tables=[tb, tb_intro], check_variables_metadata=True, default_metadata=ds_meadow.metadata + ) + + # Save changes in the new garden dataset. + ds_garden.save() + + +def find_first_year(tb_intro: Table) -> Table: + """Find the first year the vaccination is introduced for each disease i.e.""" + + filtered_tb = tb_intro[ + tb_intro["intro"].isin( + [ + "Entire country", + "Specific risk groups", + "Regions of the country", + "High risk areas", + "Adolescents", + # "Not routinely administered", + # "During outbreaks", + # "Demonstration projects", + ] + ) + ] + first_year_tb = filtered_tb.groupby(["country", "description"])["year"].min().reset_index() + first_year_tb = first_year_tb.rename(columns={"year": "first_year"}) + + return first_year_tb + + +def calculate_years_from_vaccine_introduction(tb: Table, tb_intro: Table) -> Table: + """Calculate the years from the introduction of the vaccine for each country and disease.""" + + vaccine_disease_dict = { + "Measles-containing vaccine 2nd dose": "Measles", + "aP (acellular pertussis) vaccine": "Pertussis", + "IPV (Inactivated polio vaccine)": "Polio", + "IPV (Inactivated polio vaccine) 2nd dose": "Polio", + "YF (Yellow fever) vaccine": "Yellow fever", + "Rubella vaccine": "Rubella", + "Japanese Encephalitis": "Japanese encephalitis", + "Typhoid vaccine": "Typhoid", + "Meningococcal meningitis vaccines (all strains)": "Invasive meningococcal disease", + } + + first_year_tb = find_first_year(tb_intro) + first_year_tb["disease_description"] = first_year_tb["description"].replace(vaccine_disease_dict) + + tb = tb.merge(first_year_tb, on=["country", "disease_description"], how="inner") + tb["years_from_introduction"] = tb["year"] - tb["first_year"] + tb = tb.drop(columns=["first_year", "description", "year", "group", "code", "disease"]) + tb = tb.rename(columns={"years_from_introduction": "year"}) + + return tb diff --git a/etl/steps/data/grapher/who/2025-01-14/vaccine_preventable_incidence.py b/etl/steps/data/grapher/who/2025-01-14/vaccine_preventable_incidence.py new file mode 100644 index 00000000000..742fdcf8b1f --- /dev/null +++ b/etl/steps/data/grapher/who/2025-01-14/vaccine_preventable_incidence.py @@ -0,0 +1,29 @@ +"""Load a garden dataset and create a grapher dataset.""" + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Load garden dataset. + ds_garden = paths.load_dataset("vaccine_preventable_incidence") + + # Read table from garden dataset. + tb = ds_garden.read("vaccine_preventable_incidence", reset_index=False) + tb_intro = ds_garden.read("years_since_vaccine_introduction", reset_index=False) + + # + # Save outputs. + # + # Create a new grapher dataset with the same metadata as the garden dataset. + ds_grapher = create_dataset( + dest_dir, tables=[tb, tb_intro], check_variables_metadata=True, default_metadata=ds_garden.metadata + ) + + # Save changes in the new grapher dataset. + ds_grapher.save() diff --git a/etl/steps/data/meadow/who/2025-01-14/vaccine_preventable_incidence.py b/etl/steps/data/meadow/who/2025-01-14/vaccine_preventable_incidence.py new file mode 100644 index 00000000000..18a454d588f --- /dev/null +++ b/etl/steps/data/meadow/who/2025-01-14/vaccine_preventable_incidence.py @@ -0,0 +1,38 @@ +"""Load a snapshot and create a meadow dataset.""" + +from etl.helpers import PathFinder, create_dataset + +# Get paths and naming conventions for current step. +paths = PathFinder(__file__) + + +def run(dest_dir: str) -> None: + # + # Load inputs. + # + # Retrieve snapshot. + snap = paths.load_snapshot("vaccine_preventable_incidence.xlsx") + + # Load data from snapshot. + tb = snap.read() + tb = tb.rename(columns={"NAME": "country"}) + tb = tb.dropna(subset=["country"]) + # + # Process data. + # + # Ensure all columns are snake-case, set an appropriate index, and sort conveniently. + tables = [tb.format(["country", "year", "disease_description", "denominator"])] + + # + # Save outputs. + # + # Create a new meadow dataset with the same metadata as the snapshot. + ds_meadow = create_dataset( + dest_dir, + tables=tables, + check_variables_metadata=True, + default_metadata=snap.metadata, + ) + + # Save changes in the new meadow dataset. + ds_meadow.save() diff --git a/snapshots/who/2025-01-14/vaccine_preventable_incidence.py b/snapshots/who/2025-01-14/vaccine_preventable_incidence.py new file mode 100644 index 00000000000..6e401fdc3e0 --- /dev/null +++ b/snapshots/who/2025-01-14/vaccine_preventable_incidence.py @@ -0,0 +1,24 @@ +"""Script to create a snapshot of dataset.""" + +from pathlib import Path + +import click + +from etl.snapshot import Snapshot + +# Version for current snapshot dataset. +SNAPSHOT_VERSION = Path(__file__).parent.name + + +@click.command() +@click.option("--upload/--skip-upload", default=True, type=bool, help="Upload dataset to Snapshot") +def main(upload: bool) -> None: + # Create a new snapshot. + snap = Snapshot(f"who/{SNAPSHOT_VERSION}/vaccine_preventable_incidence.xlsx") + + # Download data from source, add file to DVC and upload to S3. + snap.create_snapshot(upload=upload) + + +if __name__ == "__main__": + main() diff --git a/snapshots/who/2025-01-14/vaccine_preventable_incidence.xlsx.dvc b/snapshots/who/2025-01-14/vaccine_preventable_incidence.xlsx.dvc new file mode 100644 index 00000000000..e58454eb600 --- /dev/null +++ b/snapshots/who/2025-01-14/vaccine_preventable_incidence.xlsx.dvc @@ -0,0 +1,32 @@ +# Learn more at: +# http://docs.owid.io/projects/etl/architecture/metadata/reference/ +meta: + origin: + # Data product / Snapshot + title: WHO Immunization data - reported incidence + description: |- + Reported cases and incidences of vaccine-preventable diseases are collected annually through the WHO/UNICEF Joint Reporting Form on Immunization (JRF). Country data (including historical data) are updated and made available as data is received. Global and regional aggregate data are released annually in mid-July and updated thereafter as country data is received. + + These data are used to provide global trends over time and to inform the Strategic Advisory Group of Experts on Immunization (SAGE) and other advisory bodies to make evidence-based decisions as they deliberate different policies (for example, the need for booster doses and targeted ages). + date_published: "2024-03-01" + + # Citation + producer: World Health Organization + citation_full: |- + Reported Incidence - WHO/UNICEF Joint Reporting Form on Immunization (2024), World Health Organization and UNICEF. + attribution_short: WHO + + # Files + url_main: https://immunizationdata.who.int/global?topic=Reported-cases-and-incidence&location= + url_download: https://srhdpeuwpubsa-geecgzbpd5h0fueu.z01.azurefd.net/whdh/WIISE/export/incidence-rate-data.xlsx + date_accessed: 2025-01-14 + + # License + license: + name: CC BY-NC-SA 3.0 IGO + url: https://www.who.int/about/policies/publishing/copyright + +outs: + - md5: 54f6f4f9df9bbbee2a236960819f4a8d + size: 3294849 + path: vaccine_preventable_incidence.xlsx