Skip to content

Commit

Permalink
codeowners
Browse files Browse the repository at this point in the history
  • Loading branch information
jlherzberg committed Oct 2, 2019
1 parent 5807959 commit c0f91a3
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 13 deletions.
2 changes: 1 addition & 1 deletion lib/tagnews/geoloc/tag.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,7 @@ def best_geostring(self, extracted_strs_and_probs: tuple):
if is_neighborhood or len(geostring) >= 3:
consider[0].append((geostring))
consider[1].append((probs))
if consider:
if consider[0]:
avgs = [sum(i) / len(i) for i in consider[1]]
max_index = avgs.index(max(avgs))
return consider[0][max_index]
Expand Down
71 changes: 59 additions & 12 deletions lib/tagnews/tests/test_geocoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,36 +4,45 @@
import tagnews


class Test_GeoCoder():
class Test_GeoCoder:
@classmethod
def setup_class(cls):
cls.model = tagnews.GeoCoder()

def test_extract_geostrings(self):
self.model.extract_geostrings(
('This is example article text with a location of'
' 55th and Woodlawn where something happened.')
(
"This is example article text with a location of"
" 55th and Woodlawn where something happened."
)
)

def test_extract_geostring_probs(self):
article = ('This is example article text with a location of'
' 55th and Woodlawn where something happened.')
article = (
"This is example article text with a location of"
" 55th and Woodlawn where something happened."
)
words, probs = self.model.extract_geostring_probs(article)
max_prob = probs.max()
max_word = words[np.argmax(probs)]
geostrings = self.model.extract_geostrings(article,
prob_thresh=max_prob-0.001)
geostrings = self.model.extract_geostrings(
article, prob_thresh=max_prob - 0.001
)
assert max_word in [word for geostring in geostrings for word in geostring][0]

def test_extract_geostring_probs_word_not_in_glove(self):
"""
Regression test for issue #105.
"""
article = '___1234567890nonexistent0987654321___'
article = "___1234567890nonexistent0987654321___"
words, probs = self.model.extract_geostring_probs(article)

def test_lat_longs_from_geostring_lists(self):
geostring_lists = [['5500', 'S', 'Woodlawn'], ['100', 'N.', 'Wacker'], ['thigh']]
geostring_lists = [
["5500", "S", "Woodlawn"],
["100", "N.", "Wacker"],
["thigh"],
]
coords, scores = self.model.lat_longs_from_geostring_lists(
geostring_lists, sleep_secs=0.0
)
Expand All @@ -42,7 +51,45 @@ def test_lat_longs_from_geostring_lists(self):

def test_community_areas(self):
# Approximately 55th and Woodlawn, which is in Hyde Park.
coords = pd.DataFrame([[41.793465, -87.596930]],
columns=['lat', 'long'])
coords = pd.DataFrame([[41.793465, -87.596930]], columns=["lat", "long"])
com_area = self.model.community_area_from_coords(coords)
assert com_area == ['HYDE PARK']
assert com_area == ["HYDE PARK"]

def test_best_geostring(self):
"""Verify that the best_geostring function returns expected values"""
# Example from the readme
input1 = (
[
["1700", "block", "of", "S.", "Halsted", "Ave."],
["55th", "and", "Woodlawn,"],
],
[
np.array(
[
0.71738559,
0.81395197,
0.82227415,
0.79400611,
0.70529455,
0.60538059,
]
),
np.array(
[
0.79358339,
0.69696939,
0.68011874
]
),
],
)
output1 = ["1700", "block", "of", "S.", "Halsted", "Ave."]
# Empty geostring example
input2, output2 = [(), ()], None
for input, expected_output in zip([input1, input2], [output1, output2]):
actual_output = self.model.best_geostring(input)
assert (
actual_output == expected_output
), "ERROR: expected output != actual output for input {}/n {} != {}".format(
input, actual_output, expected_output
)

0 comments on commit c0f91a3

Please sign in to comment.