Skip to content

Commit

Permalink
Filter city by minimum population only after adding state
Browse files Browse the repository at this point in the history
  • Loading branch information
nanglo123 committed Sep 23, 2024
1 parent 23da16b commit d33df73
Showing 1 changed file with 8 additions and 5 deletions.
13 changes: 8 additions & 5 deletions mira/dkg/resources/geonames.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,16 +109,14 @@ def get_cities(code_to_country, code_to_admin1, code_to_admin2, *, minimum_popul
),
)

cities_df = cities_df[cities_df.population.astype(int) > minimum_population]
cities_df.synonyms = cities_df.synonyms.str.split(",")

terms = {}
for term in code_to_country.values():
terms[term.identifier] = term

cols = ["geonames_id", "name", "synonyms", "country_code", "admin1", "admin2"]
for identifier, name, synonyms, country, admin1, admin2 in cities_df[cols].values:
terms[identifier] = term = Term.from_triple("geonames", identifier, name)
cols = ["geonames_id", "name", "synonyms", "country_code", "admin1",
"admin2", "population"]
for identifier, name, synonyms, country, admin1, admin2, population in (cities_df[cols].values):
if synonyms and not isinstance(synonyms, float):
for synoynm in synonyms:
term.append_synonym(synoynm)
Expand All @@ -135,6 +133,11 @@ def get_cities(code_to_country, code_to_admin1, code_to_admin2, *, minimum_popul

terms[admin1_term.identifier] = admin1_term

# We skip cities that don't meet the minimum population requirement
if int(population) < minimum_population:
continue
terms[identifier] = term = Term.from_triple("geonames", identifier,
name)
if pd.notna(admin2):
admin2_full = f"{country}.{admin1}.{admin2}"
admin2_term = code_to_admin2.get(admin2_full)
Expand Down

0 comments on commit d33df73

Please sign in to comment.