Skip to content

Commit

Permalink
warn when posting more than 10k rows
Browse files Browse the repository at this point in the history
  • Loading branch information
fitnr committed Jan 22, 2022
1 parent 22fff3d commit 83bbc5e
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 15 deletions.
1 change: 1 addition & 0 deletions HISTORY
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
0.5.2
-----
* Accept either "zip" or "zipcode" as an argument in `CensusGeocode.address` (#24).
* Add warning to `CensusGeocode.batch()` when trying send more than 10,000 records
* Move packaging metadata to setup.cfg and pyproject.toml
* Use github actions for automated tests

Expand Down
3 changes: 2 additions & 1 deletion src/censusgeocode/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ def main():

except IndexError:
print("Address not found: {}".format(args.address), file=sys.stderr)
sys.exit(1)

elif args.csv:
if args.csv == "-":
Expand All @@ -94,7 +95,7 @@ def main():
writer.writerows(result)

else:
print("Address or csv file required")
print("Address or csv file required", file=sys.stderr)
sys.exit(1)


Expand Down
27 changes: 14 additions & 13 deletions src/censusgeocode/censusgeocode.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,12 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
Census Geocoder wrapper
see http://geocoding.geo.census.gov/geocoder/Geocoding_Services_API.pdf
Accepts either named `lat` and `lng` or x and y inputs.
For details on the API, see:
http://geocoding.geo.census.gov/geocoder/Geocoding_Services_API.pdf
"""
import csv
import io
import warnings

import requests
from requests.exceptions import RequestException
Expand Down Expand Up @@ -140,24 +141,24 @@ def onelineaddress(self, address, **kwargs):

def set_benchmark(self, benchmark):
"""Set the Census Geocoding API benchmark the class will use.
See https://geocoding.geo.census.gov/geocoder/vintages?form for more."""
See: https://geocoding.geo.census.gov/geocoder/vintages?form"""
self._benchmark = benchmark

@property
def benchmark(self):
"""Give the Census Geocoding API benchmark the class is using.
See https://geocoding.geo.census.gov/geocoder/benchmarks for more."""
See: https://geocoding.geo.census.gov/geocoder/benchmarks"""
return getattr(self, "_benchmark")

def set_vintage(self, vintage):
"""Set the Census Geocoding API vintage the class will use.
See https://geocoding.geo.census.gov/geocoder/vintages?form for more."""
See: https://geocoding.geo.census.gov/geocoder/vintages?form"""
self._vintage = vintage

@property
def vintage(self):
"""Give the Census Geocoding API vintage the class is using.
See https://geocoding.geo.census.gov/geocoder/vintages?form for more."""
See: https://geocoding.geo.census.gov/geocoder/vintages?form"""
return getattr(self, "_vintage")

def _parse_batch_result(self, data, returntype):
Expand Down Expand Up @@ -190,13 +191,15 @@ def _post_batch(self, data=None, f=None, **kwargs):
returntype = kwargs.get("returntype", "geographies")
url = self._geturl("addressbatch", returntype)

if data is not None:
if data:
# For Python 3, compile data into a StringIO
f = io.StringIO()
writer = csv.DictWriter(f, fieldnames=["id", "street", "city", "state", "zip"])
for i, row in enumerate(data):
for i, row in enumerate(data, 1):
row.setdefault("id", i)
writer.writerow(row)
if i == 10001:
warnings.warn("Sending more than 10,000 records, the upper limit for the Census Geocoder. Request will likely fail")

f.seek(0)

Expand Down Expand Up @@ -226,12 +229,12 @@ def _post_batch(self, data=None, f=None, **kwargs):
def addressbatch(self, data, **kwargs):
"""
Send either a CSV file or data to the addressbatch API.
According to the Census, "there is currently an upper limit of 10,000 records per batch file."
If a file, can either be a file-like with a `read()` method, or a `str` that's a path to the
file. Either way, it must have no header and have fields id,street,city,state,zip
If data, should be an iterable of dicts with the above fields (although ID is optional).
"""
# Does data quack like a file handle?
Expand All @@ -251,8 +254,6 @@ class GeographyResult(dict):

"""Wrapper for geography objects returned by the Census Geocoding API"""

_coordkeys = ("CENTLON", "CENTLAT", "INTPTLON", "INTPTLAT")

def __init__(self, data):
self.input = data["result"].get("input", {})
super().__init__(data["result"]["geographies"])
Expand Down
15 changes: 14 additions & 1 deletion tests/test_censusgeocode.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-

"""Tests for censusgeocode"""
# This file is part of censusgeocode.
# https://github.com/fitnr/censusgeocode

Expand All @@ -9,6 +9,8 @@

import unittest
import vcr
import warnings

from censusgeocode import CensusGeocode
from censusgeocode.censusgeocode import AddressResult, GeographyResult

Expand Down Expand Up @@ -71,6 +73,7 @@ def test_address_return_type(self):

@vcr.use_cassette('tests/fixtures/test_benchmark_vintage.yaml')
def test_benchmark_vintage(self):
"""Initializing CensuGeocode with benchmark and vintage keywords works"""
bmark, vint = 'Public_AR_Census2020', 'Census2020_Current'

cg = CensusGeocode(benchmark=bmark, vintage=vint)
Expand All @@ -82,6 +85,7 @@ def test_benchmark_vintage(self):

@vcr.use_cassette('tests/fixtures/address-batch.yaml')
def test_addressbatch(self):
"""batch() function works"""
result = self.cg.addressbatch('tests/fixtures/batch.csv', returntype='locations')
assert isinstance(result, list)
resultdict = {int(r['id']): r for r in result}
Expand All @@ -94,3 +98,12 @@ def test_addressbatch(self):
assert resultdict[3]['tigerlineid'] == '59653655'
assert resultdict[3]['statefp'] == '36'
assert resultdict[2]['match'] is False

def test_warning10k(self):
"""Sending more than 10,000 records to batch raises a warning"""
warnings.simplefilter("error")
data = ({} for _ in range(10001))
result = []
with self.assertRaises(UserWarning, msg="Get a warning when sending more than 10k rows to batch()"):
result = self.cg.addressbatch(data)
self.assertEqual(result, [], "Result is empty")

0 comments on commit 83bbc5e

Please sign in to comment.