codeowners

chicago-justice-project · Oct 2, 2019 · c0f91a3 · c0f91a3
1 parent 5807959
commit c0f91a3
Show file tree

Hide file tree

Showing 2 changed files with 60 additions and 13 deletions.
diff --git a/lib/tagnews/geoloc/tag.py b/lib/tagnews/geoloc/tag.py
@@ -396,7 +396,7 @@ def best_geostring(self, extracted_strs_and_probs: tuple):
             if is_neighborhood or len(geostring) >= 3:
                 consider[0].append((geostring))
                 consider[1].append((probs))
-        if consider:
+        if consider[0]:
             avgs = [sum(i) / len(i) for i in consider[1]]
             max_index = avgs.index(max(avgs))
             return consider[0][max_index]

diff --git a/lib/tagnews/tests/test_geocoder.py b/lib/tagnews/tests/test_geocoder.py
@@ -4,36 +4,45 @@
 import tagnews
 
 
-class Test_GeoCoder():
+class Test_GeoCoder:
     @classmethod
     def setup_class(cls):
         cls.model = tagnews.GeoCoder()
 
     def test_extract_geostrings(self):
         self.model.extract_geostrings(
-            ('This is example article text with a location of'
-             ' 55th and Woodlawn where something happened.')
+            (
+                "This is example article text with a location of"
+                " 55th and Woodlawn where something happened."
+            )
         )
 
     def test_extract_geostring_probs(self):
-        article = ('This is example article text with a location of'
-                   ' 55th and Woodlawn where something happened.')
+        article = (
+            "This is example article text with a location of"
+            " 55th and Woodlawn where something happened."
+        )
         words, probs = self.model.extract_geostring_probs(article)
         max_prob = probs.max()
         max_word = words[np.argmax(probs)]
-        geostrings = self.model.extract_geostrings(article,
-                                                   prob_thresh=max_prob-0.001)
+        geostrings = self.model.extract_geostrings(
+            article, prob_thresh=max_prob - 0.001
+        )
         assert max_word in [word for geostring in geostrings for word in geostring][0]
 
     def test_extract_geostring_probs_word_not_in_glove(self):
         """
         Regression test for issue #105.
         """
-        article = '___1234567890nonexistent0987654321___'
+        article = "___1234567890nonexistent0987654321___"
         words, probs = self.model.extract_geostring_probs(article)
 
     def test_lat_longs_from_geostring_lists(self):
-        geostring_lists = [['5500', 'S', 'Woodlawn'], ['100', 'N.', 'Wacker'], ['thigh']]
+        geostring_lists = [
+            ["5500", "S", "Woodlawn"],
+            ["100", "N.", "Wacker"],
+            ["thigh"],
+        ]
         coords, scores = self.model.lat_longs_from_geostring_lists(
             geostring_lists, sleep_secs=0.0
         )
@@ -42,7 +51,45 @@ def test_lat_longs_from_geostring_lists(self):
 
     def test_community_areas(self):
         # Approximately 55th and Woodlawn, which is in Hyde Park.
-        coords = pd.DataFrame([[41.793465, -87.596930]],
-                              columns=['lat', 'long'])
+        coords = pd.DataFrame([[41.793465, -87.596930]], columns=["lat", "long"])
         com_area = self.model.community_area_from_coords(coords)
-        assert com_area == ['HYDE PARK']
+        assert com_area == ["HYDE PARK"]
+
+    def test_best_geostring(self):
+        """Verify that the best_geostring function returns expected values"""
+        # Example from the readme
+        input1 = (
+            [
+                ["1700", "block", "of", "S.", "Halsted", "Ave."],
+                ["55th", "and", "Woodlawn,"],
+            ],
+            [
+                np.array(
+                    [
+                        0.71738559,
+                        0.81395197,
+                        0.82227415,
+                        0.79400611,
+                        0.70529455,
+                        0.60538059,
+                    ]
+                ),
+                np.array(
+                    [
+                        0.79358339,
+                        0.69696939,
+                        0.68011874
+                    ]
+                ),
+            ],
+        )
+        output1 = ["1700", "block", "of", "S.", "Halsted", "Ave."]
+        # Empty geostring example
+        input2, output2 = [(), ()], None
+        for input, expected_output in zip([input1, input2], [output1, output2]):
+            actual_output = self.model.best_geostring(input)
+            assert (
+                actual_output == expected_output
+            ), "ERROR: expected output != actual output for input {}/n  {} != {}".format(
+                input, actual_output, expected_output
+            )