diff --git a/lib/tagnews/geoloc/tag.py b/lib/tagnews/geoloc/tag.py index 1e6d7b3..ddf596c 100644 --- a/lib/tagnews/geoloc/tag.py +++ b/lib/tagnews/geoloc/tag.py @@ -396,7 +396,7 @@ def best_geostring(self, extracted_strs_and_probs: tuple): if is_neighborhood or len(geostring) >= 3: consider[0].append((geostring)) consider[1].append((probs)) - if consider: + if consider[0]: avgs = [sum(i) / len(i) for i in consider[1]] max_index = avgs.index(max(avgs)) return consider[0][max_index] diff --git a/lib/tagnews/tests/test_geocoder.py b/lib/tagnews/tests/test_geocoder.py index cb6e288..1ad6b75 100644 --- a/lib/tagnews/tests/test_geocoder.py +++ b/lib/tagnews/tests/test_geocoder.py @@ -4,36 +4,45 @@ import tagnews -class Test_GeoCoder(): +class Test_GeoCoder: @classmethod def setup_class(cls): cls.model = tagnews.GeoCoder() def test_extract_geostrings(self): self.model.extract_geostrings( - ('This is example article text with a location of' - ' 55th and Woodlawn where something happened.') + ( + "This is example article text with a location of" + " 55th and Woodlawn where something happened." + ) ) def test_extract_geostring_probs(self): - article = ('This is example article text with a location of' - ' 55th and Woodlawn where something happened.') + article = ( + "This is example article text with a location of" + " 55th and Woodlawn where something happened." + ) words, probs = self.model.extract_geostring_probs(article) max_prob = probs.max() max_word = words[np.argmax(probs)] - geostrings = self.model.extract_geostrings(article, - prob_thresh=max_prob-0.001) + geostrings = self.model.extract_geostrings( + article, prob_thresh=max_prob - 0.001 + ) assert max_word in [word for geostring in geostrings for word in geostring][0] def test_extract_geostring_probs_word_not_in_glove(self): """ Regression test for issue #105. """ - article = '___1234567890nonexistent0987654321___' + article = "___1234567890nonexistent0987654321___" words, probs = self.model.extract_geostring_probs(article) def test_lat_longs_from_geostring_lists(self): - geostring_lists = [['5500', 'S', 'Woodlawn'], ['100', 'N.', 'Wacker'], ['thigh']] + geostring_lists = [ + ["5500", "S", "Woodlawn"], + ["100", "N.", "Wacker"], + ["thigh"], + ] coords, scores = self.model.lat_longs_from_geostring_lists( geostring_lists, sleep_secs=0.0 ) @@ -42,7 +51,45 @@ def test_lat_longs_from_geostring_lists(self): def test_community_areas(self): # Approximately 55th and Woodlawn, which is in Hyde Park. - coords = pd.DataFrame([[41.793465, -87.596930]], - columns=['lat', 'long']) + coords = pd.DataFrame([[41.793465, -87.596930]], columns=["lat", "long"]) com_area = self.model.community_area_from_coords(coords) - assert com_area == ['HYDE PARK'] + assert com_area == ["HYDE PARK"] + + def test_best_geostring(self): + """Verify that the best_geostring function returns expected values""" + # Example from the readme + input1 = ( + [ + ["1700", "block", "of", "S.", "Halsted", "Ave."], + ["55th", "and", "Woodlawn,"], + ], + [ + np.array( + [ + 0.71738559, + 0.81395197, + 0.82227415, + 0.79400611, + 0.70529455, + 0.60538059, + ] + ), + np.array( + [ + 0.79358339, + 0.69696939, + 0.68011874 + ] + ), + ], + ) + output1 = ["1700", "block", "of", "S.", "Halsted", "Ave."] + # Empty geostring example + input2, output2 = [(), ()], None + for input, expected_output in zip([input1, input2], [output1, output2]): + actual_output = self.model.best_geostring(input) + assert ( + actual_output == expected_output + ), "ERROR: expected output != actual output for input {}/n {} != {}".format( + input, actual_output, expected_output + )