Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

📊 Incidence rates of vaccine preventable diseases #3842

Draft
wants to merge 7 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions dag/health.yml
Original file line number Diff line number Diff line change
Expand Up @@ -922,3 +922,14 @@ steps:
- data://meadow/who/2025-01-09/vaccination_schedules
data://grapher/who/2025-01-09/vaccination_schedules:
- data://garden/who/2025-01-09/vaccination_schedules

#
# TODO: add step name (just something recognizable)
#
data://meadow/who/2025-01-14/vaccine_preventable_incidence:
- snapshot://who/2025-01-14/vaccine_preventable_incidence.xlsx
data://garden/who/2025-01-14/vaccine_preventable_incidence:
- data://meadow/who/2025-01-14/vaccine_preventable_incidence
- data://garden/who/2025-01-09/vaccination_schedules
data://grapher/who/2025-01-14/vaccine_preventable_incidence:
- data://garden/who/2025-01-14/vaccine_preventable_incidence
Original file line number Diff line number Diff line change
@@ -0,0 +1,223 @@
{
"Afghanistan": "Afghanistan",
"Albania": "Albania",
"Algeria": "Algeria",
"American Samoa": "American Samoa",
"Andorra": "Andorra",
"Angola": "Angola",
"Anguilla": "Anguilla",
"Antigua and Barbuda": "Antigua and Barbuda",
"Argentina": "Argentina",
"Armenia": "Armenia",
"Aruba": "Aruba",
"Australia": "Australia",
"Austria": "Austria",
"Azerbaijan": "Azerbaijan",
"Bahamas": "Bahamas",
"Bahrain": "Bahrain",
"Bangladesh": "Bangladesh",
"Barbados": "Barbados",
"Belarus": "Belarus",
"Belgium": "Belgium",
"Belize": "Belize",
"Benin": "Benin",
"Bermuda": "Bermuda",
"Bhutan": "Bhutan",
"Bolivia (Plurinational State of)": "Bolivia",
"Bosnia and Herzegovina": "Bosnia and Herzegovina",
"Botswana": "Botswana",
"Brazil": "Brazil",
"British Virgin Islands": "British Virgin Islands",
"Brunei Darussalam": "Brunei",
"Bulgaria": "Bulgaria",
"Burkina Faso": "Burkina Faso",
"Burundi": "Burundi",
"Cabo Verde": "Cape Verde",
"Cambodia": "Cambodia",
"Cameroon": "Cameroon",
"Canada": "Canada",
"Cayman Islands": "Cayman Islands",
"Central African Republic": "Central African Republic",
"Chad": "Chad",
"Chile": "Chile",
"China": "China",
"China, Hong Kong SAR": "Hong Kong",
"China, Macao SAR": "Macao",
"Colombia": "Colombia",
"Comoros": "Comoros",
"Congo": "Congo",
"Cook Islands": "Cook Islands",
"Costa Rica": "Costa Rica",
"Croatia": "Croatia",
"Cuba": "Cuba",
"Cura\u00e7ao": "Curacao",
"Cyprus": "Cyprus",
"Czechia": "Czechia",
"C\u00f4te d'Ivoire": "Cote d'Ivoire",
"Democratic People's Republic of Korea": "North Korea",
"Democratic Republic of the Congo": "Democratic Republic of Congo",
"Denmark": "Denmark",
"Djibouti": "Djibouti",
"Dominica": "Dominica",
"Dominican Republic": "Dominican Republic",
"Ecuador": "Ecuador",
"Egypt": "Egypt",
"El Salvador": "El Salvador",
"Equatorial Guinea": "Equatorial Guinea",
"Eritrea": "Eritrea",
"Estonia": "Estonia",
"Eswatini": "Eswatini",
"Ethiopia": "Ethiopia",
"Fiji": "Fiji",
"Finland": "Finland",
"France": "France",
"French Polynesia": "French Polynesia",
"Gabon": "Gabon",
"Gambia": "Gambia",
"Georgia": "Georgia",
"Germany": "Germany",
"Ghana": "Ghana",
"Global": "World",
"Greece": "Greece",
"Grenada": "Grenada",
"Guam": "Guam",
"Guatemala": "Guatemala",
"Guinea": "Guinea",
"Guinea-Bissau": "Guinea-Bissau",
"Guyana": "Guyana",
"Haiti": "Haiti",
"Honduras": "Honduras",
"Hungary": "Hungary",
"Iceland": "Iceland",
"India": "India",
"Indonesia": "Indonesia",
"Iran (Islamic Republic of)": "Iran",
"Iraq": "Iraq",
"Ireland": "Ireland",
"Israel": "Israel",
"Italy": "Italy",
"Jamaica": "Jamaica",
"Japan": "Japan",
"Jordan": "Jordan",
"Kazakhstan": "Kazakhstan",
"Kenya": "Kenya",
"Kiribati": "Kiribati",
"Kuwait": "Kuwait",
"Kyrgyzstan": "Kyrgyzstan",
"Lao People's Democratic Republic": "Laos",
"Latvia": "Latvia",
"Lebanon": "Lebanon",
"Lesotho": "Lesotho",
"Liberia": "Liberia",
"Libya": "Libya",
"Lithuania": "Lithuania",
"Luxembourg": "Luxembourg",
"Madagascar": "Madagascar",
"Malawi": "Malawi",
"Malaysia": "Malaysia",
"Maldives": "Maldives",
"Mali": "Mali",
"Malta": "Malta",
"Marshall Islands": "Marshall Islands",
"Mauritania": "Mauritania",
"Mauritius": "Mauritius",
"Mexico": "Mexico",
"Micronesia (Federated States of)": "Micronesia (country)",
"Monaco": "Monaco",
"Mongolia": "Mongolia",
"Montenegro": "Montenegro",
"Montserrat": "Montserrat",
"Morocco": "Morocco",
"Mozambique": "Mozambique",
"Myanmar": "Myanmar",
"Namibia": "Namibia",
"Nauru": "Nauru",
"Nepal": "Nepal",
"New Caledonia": "New Caledonia",
"New Zealand": "New Zealand",
"Nicaragua": "Nicaragua",
"Niger": "Niger",
"Nigeria": "Nigeria",
"Niue": "Niue",
"North Macedonia": "North Macedonia",
"Northern Mariana Islands": "Northern Mariana Islands",
"Norway": "Norway",
"Oman": "Oman",
"Pakistan": "Pakistan",
"Palau": "Palau",
"Panama": "Panama",
"Papua New Guinea": "Papua New Guinea",
"Paraguay": "Paraguay",
"Peru": "Peru",
"Philippines": "Philippines",
"Poland": "Poland",
"Portugal": "Portugal",
"Qatar": "Qatar",
"Republic of Korea": "South Korea",
"Republic of Moldova": "Moldova",
"Romania": "Romania",
"Russian Federation": "Russia",
"Rwanda": "Rwanda",
"Saint Kitts and Nevis": "Saint Kitts and Nevis",
"Saint Lucia": "Saint Lucia",
"Saint Vincent and the Grenadines": "Saint Vincent and the Grenadines",
"Samoa": "Samoa",
"San Marino": "San Marino",
"Sao Tome and Principe": "Sao Tome and Principe",
"Saudi Arabia": "Saudi Arabia",
"Senegal": "Senegal",
"Serbia": "Serbia",
"Seychelles": "Seychelles",
"Sierra Leone": "Sierra Leone",
"Singapore": "Singapore",
"Sint Maarten (Dutch part)": "Sint Maarten (Dutch part)",
"Slovakia": "Slovakia",
"Slovenia": "Slovenia",
"Solomon Islands": "Solomon Islands",
"Somalia": "Somalia",
"South Africa": "South Africa",
"South Sudan": "South Sudan",
"Spain": "Spain",
"Sri Lanka": "Sri Lanka",
"Sudan": "Sudan",
"Suriname": "Suriname",
"Sweden": "Sweden",
"Switzerland": "Switzerland",
"Syrian Arab Republic": "Syria",
"Tajikistan": "Tajikistan",
"Thailand": "Thailand",
"Timor-Leste": "East Timor",
"Togo": "Togo",
"Tokelau": "Tokelau",
"Tonga": "Tonga",
"Trinidad and Tobago": "Trinidad and Tobago",
"Tunisia": "Tunisia",
"Turkmenistan": "Turkmenistan",
"Turks and Caicos Islands": "Turks and Caicos Islands",
"Tuvalu": "Tuvalu",
"Uganda": "Uganda",
"Ukraine": "Ukraine",
"United Arab Emirates": "United Arab Emirates",
"United Kingdom of Great Britain and Northern Ireland": "United Kingdom",
"United Republic of Tanzania": "Tanzania",
"United States of America": "United States",
"Uruguay": "Uruguay",
"Uzbekistan": "Uzbekistan",
"Vanuatu": "Vanuatu",
"Venezuela (Bolivarian Republic of)": "Venezuela",
"Viet Nam": "Vietnam",
"Wallis and Futuna": "Wallis and Futuna",
"Yemen": "Yemen",
"Zambia": "Zambia",
"Zimbabwe": "Zimbabwe",
"African Region": "African Region (WHO)",
"Eastern Mediterranean Region": "Eastern Mediterranean Region (WHO)",
"European Region": "European Region (WHO)",
"Kosovo (in accordance with UN Security Council resolution 1244 (1999))": "Kosovo",
"Netherlands (Kingdom of the)": "Netherlands",
"Region of the Americas": "Region of the Americas (WHO)",
"South-East Asia Region": "South-East Asia Region (WHO)",
"T\u00fcrkiye": "Turkey",
"Western Pacific Region": "Western Pacific Region (WHO)",
"occupied Palestinian territory, including east Jerusalem": "Palestine"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# NOTE: To learn more about the fields, hover over their names.
definitions:
common:
presentation:
topic_tags:
- Vaccination

# Learn more about the available fields:
# http://docs.owid.io/projects/etl/architecture/metadata/reference/
dataset:
update_period_days: 365

tables:
vaccine_preventable_incidence:
variables:
incidence_rate:
title: Incidence rate of << disease_description.lower() >> << denominator >>
presentation:
title_public: Incidence rate of << disease_description.lower() >> << denominator >>
unit: << denominator >>
display:
name: << disease_description >>
years_since_vaccine_introduction:
variables:
incidence_rate:
title: Incidence rate of << disease_description.lower() >> << denominator >> since vaccine introduction
presentation:
title_public: Incidence rate of << disease_description.lower() >> << denominator >> since vaccine introduction
unit: << denominator >>
display:
name: << disease_description >>
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
"""Load a meadow dataset and create a garden dataset."""

from owid.catalog import Table

from etl.data_helpers import geo
from etl.helpers import PathFinder, create_dataset

# Get paths and naming conventions for current step.
paths = PathFinder(__file__)


def run(dest_dir: str) -> None:
#
# Load inputs.
#
# Load meadow dataset.
ds_meadow = paths.load_dataset("vaccine_preventable_incidence")
ds_intro = paths.load_dataset("vaccination_schedules")

# Read table from meadow dataset.
tb = ds_meadow.read("vaccine_preventable_incidence")
tb_intro = ds_intro.read("vaccination_schedules")

#
# Process data.
#
tb = geo.harmonize_countries(
df=tb, countries_file=paths.country_mapping_path, excluded_countries_file=paths.excluded_countries_path
)
tb_intro = calculate_years_from_vaccine_introduction(tb, tb_intro)
tb = tb.drop(columns=["group", "code", "disease"])
tb = tb.format(["country", "year", "disease_description", "denominator"])
tb_intro = tb_intro.format(
["country", "year", "disease_description", "denominator"], short_name="years_since_vaccine_introduction"
)

#
# Save outputs.
#
# Create a new garden dataset with the same metadata as the meadow dataset.
ds_garden = create_dataset(
dest_dir, tables=[tb, tb_intro], check_variables_metadata=True, default_metadata=ds_meadow.metadata
)

# Save changes in the new garden dataset.
ds_garden.save()


def find_first_year(tb_intro: Table) -> Table:
"""Find the first year the vaccination is introduced for each disease i.e."""

filtered_tb = tb_intro[
tb_intro["intro"].isin(
[
"Entire country",
"Specific risk groups",
"Regions of the country",
"High risk areas",
"Adolescents",
# "Not routinely administered",
# "During outbreaks",
# "Demonstration projects",
]
)
]
first_year_tb = filtered_tb.groupby(["country", "description"])["year"].min().reset_index()
first_year_tb = first_year_tb.rename(columns={"year": "first_year"})

return first_year_tb


def calculate_years_from_vaccine_introduction(tb: Table, tb_intro: Table) -> Table:
"""Calculate the years from the introduction of the vaccine for each country and disease."""

vaccine_disease_dict = {
"Measles-containing vaccine 2nd dose": "Measles",
"aP (acellular pertussis) vaccine": "Pertussis",
"IPV (Inactivated polio vaccine)": "Polio",
"IPV (Inactivated polio vaccine) 2nd dose": "Polio",
"YF (Yellow fever) vaccine": "Yellow fever",
"Rubella vaccine": "Rubella",
"Japanese Encephalitis": "Japanese encephalitis",
"Typhoid vaccine": "Typhoid",
"Meningococcal meningitis vaccines (all strains)": "Invasive meningococcal disease",
}

first_year_tb = find_first_year(tb_intro)
first_year_tb["disease_description"] = first_year_tb["description"].replace(vaccine_disease_dict)

tb = tb.merge(first_year_tb, on=["country", "disease_description"], how="inner")
tb["years_from_introduction"] = tb["year"] - tb["first_year"]
tb = tb.drop(columns=["first_year", "description", "year", "group", "code", "disease"])
tb = tb.rename(columns={"years_from_introduction": "year"})

return tb
Loading
Loading