-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcovid-19-cases-nyt.py
124 lines (113 loc) · 3.78 KB
/
covid-19-cases-nyt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# ---
# name: covid-19-cases-nyt
# deployed: true
# config: index
# title: Covid-19 Cases (New York Times)
# description: Returns data about Covid-19 cases from the New York Times Covid-19 GitHub Repository
# params:
# - name: properties
# type: array
# description: The properties to return, given as a string or array; defaults to all properties; see "Returns" for available properties
# required: false
# - name: filter
# type: array
# description: Search query to determine the rows to return, given as a string or array
# required: false
# returns:
# - name: state
# type: string
# description: The state name
# - name: county
# type: string
# description: The county name
# - name: cases
# type: number
# description: The number of cases in the state/county
# - name: deaths
# type: number
# description: The number of deaths in the state/county
# - name: date
# type: string
# description: The date of the information
# - name: fips
# type: integer
# description: The fips number
# examples:
# - '"date, state, county, cases, deaths"'
# - '"", "\"New York\""'
# - '"county, cases", "+Illinois +date:2020-04-01"'
# notes: |-
# Data from The New York Times, based on reports from state and local health agencies
# Additional Resources:
# * New York Times Covid-19 GitHub Repo Source Data: \
# https://github.com/nytimes/covid-19-data
# * New York Times Covid-19 Tracking Page: \
# https://www.nytimes.com/interactive/2020/us/coronavirus-us-cases.html
# ---
import csv
import json
import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
from contextlib import closing
from collections import OrderedDict
from time import sleep
def flex_handler(flex):
# configuration
url = 'https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv'
# set the output type to ndjson for loading into an index
flex.output.content_type = 'application/x-ndjson'
# get the data for each line in each file and write it to
# stdout with one json object per-line (ndjson) for loading
# into an index
for row in get_data(url):
item = json.dumps(row) + "\n"
flex.output.write(item)
def get_data(url):
# get the data
headers = {
'User-Agent': 'Flex.io Covid-19 Integration'
}
request = requests_retry_session().get(url, stream=True, headers=headers)
with closing(request) as r:
# get each line and return a dictionary item for each line
f = (line.decode('utf-8') for line in r.iter_lines())
reader = csv.DictReader(f, delimiter=',', quotechar='"')
for row in reader:
data = get_item(row)
yield data
def get_item(row):
# convert keys to lowercase and make sure the values are formatted
row = {k.lower(): v for k, v in row.items()}
item = OrderedDict()
item['state'] = row.get('state','')
item['county'] = row.get('county','')
item['cases'] = to_number(row.get('cases',0))
item['deaths'] = to_number(row.get('deaths',0))
item['date'] = row.get('date','')
item['fips'] = to_number(row.get('fips',''))
return item
def requests_retry_session(
retries=3,
backoff_factor=0.3,
status_forcelist=(429, 500, 502, 503, 504),
session=None,
):
session = session or requests.Session()
retry = Retry(
total=retries,
read=retries,
connect=retries,
backoff_factor=backoff_factor,
status_forcelist=status_forcelist,
)
adapter = HTTPAdapter(max_retries=retry)
session.mount('http://', adapter)
session.mount('https://', adapter)
return session
def to_number(value):
try:
v = value
return float(v)
except ValueError:
return value