Skip to content

Commit

Permalink
Merge pull request #1 from dsfsi/master
Browse files Browse the repository at this point in the history
Update to main
  • Loading branch information
TshepisoMokoena authored May 24, 2020
2 parents 92a609f + c41363b commit 5e425db
Show file tree
Hide file tree
Showing 18 changed files with 3,872 additions and 1,416 deletions.
10 changes: 10 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -129,3 +129,13 @@ dmypy.json
.pyre/

data/Ghana_updates_moh/

# WIP files
.idea/
act.exe
actions-runner/
data/testing/covid-testing-all-observations.csv
twitter_developer_account_for_api

# Other build files
.vscode/
1,557 changes: 1,557 additions & 0 deletions data/line_lists/line-list-egypt.csv

Large diffs are not rendered by default.

112 changes: 56 additions & 56 deletions data/time_series/africa_cdc/africa_cdc_daily_time_series_cases.csv

Large diffs are not rendered by default.

112 changes: 56 additions & 56 deletions data/time_series/africa_cdc/africa_cdc_daily_time_series_deaths.csv

Large diffs are not rendered by default.

112 changes: 56 additions & 56 deletions data/time_series/africa_cdc/africa_cdc_daily_time_series_recovered.csv

Large diffs are not rendered by default.

112 changes: 56 additions & 56 deletions data/time_series/africa_cdc/africa_cdc_daily_time_series_tests.csv

Large diffs are not rendered by default.

108 changes: 54 additions & 54 deletions data/time_series/africa_daily_time_series_cases.csv

Large diffs are not rendered by default.

108 changes: 54 additions & 54 deletions data/time_series/africa_daily_time_series_deaths.csv

Large diffs are not rendered by default.

108 changes: 54 additions & 54 deletions data/time_series/africa_daily_time_series_recovered.csv

Large diffs are not rendered by default.

108 changes: 54 additions & 54 deletions data/time_series/africa_daily_time_series_tests.csv

Large diffs are not rendered by default.

1,408 changes: 924 additions & 484 deletions data/time_series/africa_daily_time_series_unpivoted.csv

Large diffs are not rendered by default.

352 changes: 231 additions & 121 deletions data/time_series/africa_daily_time_series_unpivoted_cases.csv

Large diffs are not rendered by default.

352 changes: 231 additions & 121 deletions data/time_series/africa_daily_time_series_unpivoted_deaths.csv

Large diffs are not rendered by default.

352 changes: 231 additions & 121 deletions data/time_series/africa_daily_time_series_unpivoted_recovered.csv

Large diffs are not rendered by default.

352 changes: 231 additions & 121 deletions data/time_series/africa_daily_time_series_unpivoted_tests.csv

Large diffs are not rendered by default.

Binary file added img/May_22_2020_6pm_EAT.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added img/May_23_2020_9am_EAT.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
25 changes: 17 additions & 8 deletions scripts/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
timeseries_path = "data/time_series/"
# This is a hack for now, it will be replaced with similarity measures for more elegant accomodation of variants
# Variants happen due to people mistyping or OCR artifacts
freq_missed = {"ORC" : "DRC", "Cdte d'ivoire": "Cote d'ivoire", "Cdte d'Ivoire": "Cote d'ivoire",
freq_missed = {"Cape Verde":"Cabo Verde", "ORC" : "DRC", "Cdte d'ivoire": "Cote d'ivoire", "Cdte d'Ivoire": "Cote d'ivoire",
"Cate d'ivoire" : "Cote d'ivoire", "Cote d'Ivoire" : "Cote d'ivoire", "Cate d'Ivoire" : "Cote d'ivoire",
"Céte d'ivoire" : "Cote d'ivoire", "Céte d'Ivoire" : "Cote d'ivoire", "Cte d'Ivoire" : "Cote d'ivoire",
"S20 Tome & Principe" : "Sao Tome & Principe", "Sa0 Tome & Principe" : "Sao Tome & Principe"}
Expand Down Expand Up @@ -72,9 +72,19 @@ def get_timeseries_filenames(files_path=timeseries_path, files_base="africa_dail
return get_filenames(files_path, files_base)

def read_time_series():
files = get_africa_cdc_filenames()[:3]
data_f, files = read_africa_cdc_time_series()
return data_f[:3], files[:3]

def read_africa_cdc_time_series(use_country_asid=True):
files = get_africa_cdc_filenames()
# read the files
data_f = [pd.read_csv(f, index_col='Country/Region', encoding = "ISO-8859-1") for f in files]
if use_country_asid:
data_f = [pd.read_csv(f, index_col='Country/Region', encoding = "ISO-8859-1", keep_default_na=False) for f in files]
else:
data_f = [pd.read_csv(f, encoding = "ISO-8859-1", keep_default_na=False) for f in files]
# df = data_f[0]
# print(df)
# print(df.loc[df['Country/Region'] == "Namibia"])
# print(data_f)
return data_f, files

Expand Down Expand Up @@ -109,12 +119,11 @@ def unpivot_timeseries():
keys = ["Confirmed Cases", "Deaths", "Recovered Cases", "Tests"]
df_unp = "unpivoted_dataframe"
# First, get all the 4 files, unpivoted and sorted
#filenames = get_mixed_timeseries_filenames()
filenames = get_africa_cdc_filenames()
# filenames = get_mixed_timeseries_filenames()
# filenames = get_africa_cdc_filenames()
# print(filenames)
dfs = [pd.read_csv(filenames[i], keep_default_na=False) for i in range(len(keys))]
# df = dfs[0]
# print(df.loc[df['Country/Region'] == "Namibia"])
dfs, filenames = read_africa_cdc_time_series(use_country_asid=False) #[pd.read_csv(filenames[i], keep_default_na=False) for i in range(len(keys))]

data = {keys[i]:{"filename":filenames[i], \
"df": dfs[i], \
df_unp: dfs[i].melt(id_vars=["Country/Region", "iso2", "iso3", "Subregion", "Population-2020", "Lat", "Long"], var_name="Date", value_name="Values"), \
Expand Down

0 comments on commit 5e425db

Please sign in to comment.