Skip to content

Commit

Permalink
adding neighborhoods, probs to geostring return
Browse files Browse the repository at this point in the history
  • Loading branch information
jlherzberg committed Sep 4, 2019
1 parent d5db6d1 commit 492c1b0
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 2 deletions.
8 changes: 6 additions & 2 deletions lib/tagnews/geoloc/tag.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,22 +291,26 @@ def extract_geostrings(self, s, prob_thresh=0.5):
geostrings : list of lists of strings
The list of extracted geostrings from the article text.
Each word is kept separated in the list.
Examle:
Example:
[['1300', 'W.', 'Halsted'], ['Ohio']]
"""
words, probs = self.extract_geostring_probs(s)
above_thresh = probs >= prob_thresh

words = ["filler"] + words + ["filler"]
probs = np.append(0, np.append(probs, 0))

above_thresh = np.concatenate([[False], above_thresh, [False]]).astype(np.int32)
switch_ons = np.where(np.diff(above_thresh) == 1)[0] + 1
switch_offs = np.where(np.diff(above_thresh) == -1)[0] + 1

geostrings = []
probstrings = []
for on, off in zip(switch_ons, switch_offs):
geostrings.append(words[on:off])
probstrings.append(probs[on:off])

return geostrings
return geostrings, probstrings

@staticmethod
def lat_longs_from_geostring_lists(geostring_lists, **kwargs):
Expand Down
97 changes: 97 additions & 0 deletions lib/tagnews/utils/neighborhoods.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
neighborhoods = [
"Andersonville",
"Archer Heights",
"Ashburn",
"Ashburn Estates",
"Austin",
"Avaondale",
"Belmont Central",
"Beverly",
"Beverly Woods",
"Brainerd",
"Bridgeport",
"Brighton Park",
"Bronceville",
"Bucktown",
"Burnside",
"Calumet Heights",
"Canaryville",
"Clearing",
"Chatham",
"Chinatown",
"Cottage Grove Heights",
"Cragin",
"Dunning",
"East Chicago",
"Edison Park",
"Edgebrook",
"Edgewater",
"Englewood",
"Ford City",
"Gage Park",
"Galewood",
"Garfield Park",
"Garfield Ridge",
"Gold Coast",
"Grand Crossing",
"Gresham",
"Hamilton Park",
"Humboldt Park",
"Hyde Park",
"Jefferson Park",
"Kelvyn Park",
"Kenwood",
"Kilbourn Park",
"Lake Meadows",
"Lakeview",
"Lawndale",
"Lincoln Park",
"Lincoln Square",
"Little Village",
"Logan Square",
"Longwood Manor",
"Loop",
"Marquette Park",
"McKinley Park",
"Midway",
"Morgan Park",
"Montclare",
"Mount Greenwood",
"North Center",
"Norwood Park",
"Old Irving Park",
"Old Town",
"Park Manor",
"Pilsen",
"Princeton Park",
"Portage Park",
"Pullman",
"Ravenswood",
"River North",
"River West",
"Rodgers Park",
"Roscoe VIllage",
"Roseland",
"Sauganash",
"Schorsch Village",
"Scottsdale",
"South Chicago",
"South Deering",
"South Loop",
"South Shore",
"Streeterville",
"Tri-Taylor",
"Ukrainian Village",
"United Center",
"Uptown",
"Vittum Park",
"Washington Heights",
"West Elsdon",
"West Loop",
"West Pullman",
"Westlawn",
"Wicker Park",
"Woodlawn",
"Wrigleyville",
"Wrigtwood",
]

0 comments on commit 492c1b0

Please sign in to comment.