Skip to content

Commit

Permalink
Use the trip start and end location directly
Browse files Browse the repository at this point in the history
Instead of looking up the place and getting it instead.

This has two advantages over the current implementation:
1. We don't have to make 2 separate database calls for each trip
    Note that we compute an nxn distance matrix, so this is likely to be a
    substantial savings
2. We can pass in a in-memory trip list. That makes it easier to write unit
    tests, and to use alternate load methods (e.g. for working with federated data
    e-mission/e-mission-eval-private-data@952c476

@corinne-hcr reported that the place location and the trip start/loc locations
are not identical. We don't have unit tests to verify this (alas!) but the top
level results are not changed significantly.

So the ROI seems high enough; we are going ahead with this change.
e-mission#826 (comment)
  • Loading branch information
shankari committed Jul 24, 2021
1 parent 7a75990 commit 58a14a8
Showing 1 changed file with 8 additions and 16 deletions.
24 changes: 8 additions & 16 deletions emission/analysis/modelling/tour_model/similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,10 @@ def filter_too_short(all_trips, radius):
for t in all_trips:
logging.debug(f"Considering trip {t['_id']}: {t.data.start_fmt_time} -> {t.data.end_fmt_time}, {t.data.start_loc} -> {t.data.end_loc}")
try:
start_place = esda.get_entry(esda.CLEANED_PLACE_KEY,
t.data.start_place)
end_place = esda.get_entry(esda.CLEANED_PLACE_KEY,
t.data.end_place)
start_lon = start_place.data.location["coordinates"][0]
start_lat = start_place.data.location["coordinates"][1]
end_lon = end_place.data.location["coordinates"][0]
end_lat = end_place.data.location["coordinates"][1]
start_lon = t.data.start_loc["coordinates"][0]
start_lat = t.data.start_loc["coordinates"][1]
end_lon = t.data.end_loc["coordinates"][0]
end_lat = t.data.end_loc["coordinates"][1]
logging.debug("endpoints are = (%s, %s) and (%s, %s)" %
(start_lon, start_lat, end_lon, end_lat))
if within_radius(start_lat, start_lon, end_lat, end_lon, radius):
Expand Down Expand Up @@ -183,14 +179,10 @@ def evaluate_bins(self):
for bin in self.bins:
for b in bin:
tb = self.data[b]
start_place = esda.get_entry(esda.CLEANED_PLACE_KEY,
tb.data.start_place)
end_place = esda.get_entry(esda.CLEANED_PLACE_KEY,
tb.data.end_place)
start_lon = start_place.data.location["coordinates"][0]
start_lat = start_place.data.location["coordinates"][1]
end_lon = end_place.data.location["coordinates"][0]
end_lat = end_place.data.location["coordinates"][1]
start_lon = tb.data.start_loc["coordinates"][0]
start_lat = tb.data.start_loc["coordinates"][1]
end_lon = tb.data.end_loc["coordinates"][0]
end_lat = tb.data.end_loc["coordinates"][1]
path = [start_lat, start_lon, end_lat, end_lon]
points.append(path)
logging.debug("number of labels are %d, number of points are = %d" %
Expand Down

0 comments on commit 58a14a8

Please sign in to comment.