-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
177 lines (156 loc) · 6.89 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
"""
This is our main script for generating maps based on
given coordinates and year of movie being published
Marko Ruzak, APPS_2021, CS-1
6.02.22
"""
from haversine import haversine
import pandas as pd
import folium
import argparse
from geopy.geocoders import Nominatim
import re
HTML_FILE_NAME = "map.html"
def argparser():
"""
Argument parser function for the program.
Type "python3 main.py -help" for the description of the parameters.
:return: parser object with unpacked recieved parameters.
"""
parser = argparse.ArgumentParser(description='Process input parameters for generating'
' geo map script.')
parser.add_argument('latitude', metavar='Latitude', type=float,
help='latitude for generating geo map.')
parser.add_argument('longitude', metavar='Longitude', type=float,
help='longitude for generating geo map.')
parser.add_argument('year', metavar='Year', type=int,
help='year of publishing for movies, for generating geo map.')
parser.add_argument('path', metavar='Path', type=str,
help='Path to the film db file. '
'Contains coordinates and year of publishing')
return parser
def file_parse(path: str) -> pd.DataFrame:
"""
Parses file (using regular expressions) into pandas DataFrame object.
:param path: path to file to be parsed.
:return: Pandas DataFrame object with all the movies in a row with three columns:
Name, Year, Location.
Example:
Name Year Location
19 "#LoveMyRoomie" 2016 Brooklyn, New York, USA
20 "#SpongeyLeaks" 2016 Northridge, California, USA\t(location)
"""
df = pd.DataFrame(columns=["Name", "Year", "Location"])
file = open(path, "r", encoding="ISO-8859-1") # Change if needed
lines = file.readlines()
file.close()
for line in lines:
try:
name = re.search("\"(.*?)\"", line).group() # selects text inside ""
year = re.search("([0-9]{4})", line).group() # selects groups of 4 numbers in a row
location = re.search("(?<=\(([0-9]{4})\)).*",
line).group().strip() # selects all text after 4 digits in a row excluded
location = re.search("(?<=}).*", location).group().strip()
new_df = pd.DataFrame({
"Name": [name],
"Year": [year],
"Location": [location]
})
df = pd.concat([df, new_df], ignore_index=True)
except AttributeError:
pass
print("Parsed the file.")
return df
def get_top_coordinates(df: pd.DataFrame, year: int, latitude, longitude):
"""
Function which is used to determine latitude/longitude coord of movie shooting
location of a specific year,
and also to get 10 closest locations to given on input one.
:param df: DataFrame object with all parsed from file data
:param year: inputted by user year parameter
:param latitude: inputted by user latitude parameter
:param longitude: inputted by user longtitude parameter
:return: dict of tuples: - key is index of movie in DataFrame, elements of tuple
are latitude and longtitude
"""
top_distances = {0: 100000} # just for it to have at least 1 element
top_coord = {0: (100000,9)}
locator = Nominatim(user_agent="webmap_lab")
this_years = df.loc[df["Year"] == str(year)]
for index, row in this_years.iterrows():
try:
location = row["Location"]
coordinates = locator.geocode(location)
if coordinates is None:
if location.find("(") != -1: # if parentheses found, remove
location = re.sub("[\(].*?[\)]", "", location) # to remove "()"
coordinates = locator.geocode(location)
pl_lat = coordinates.latitude
pl_long = coordinates.longitude
except AttributeError:
parts = location.split(",") # remove first part of location
parts.pop(0)
location = ", ".join(parts)
coordinates = locator.geocode(location)
distance = haversine((latitude, longitude),
(pl_lat, pl_long), unit="km")
if len(top_distances.values()) < 10:
top_distances[index] = distance
top_coord[index] = (pl_lat, pl_long)
continue
if distance < max(top_distances.values()):
if len(top_distances.values()) == 10:
key_to_delete = max(top_distances, key=lambda k: top_distances[k])
del top_distances[key_to_delete]
del top_coord[key_to_delete]
top_distances[index] = distance
top_coord[index] = (pl_lat, pl_long)
else:
top_distances[index] = distance
top_coord[index] = (pl_lat, pl_long)
print("Got closest to given movie shooting locations.")
print(top_coord)
return top_coord
def create_map(mapp: folium.Map, top_10: dict, df: pd.DataFrame, year: int):
"""
Function for the map creation in the end.
Puts top 10 locations, which are given as an input as
markers.
:param mapp: folium.Map object, our map
:param top_10: list of tuples, our coordinates with Dataframe object ID as a key.
:param df: DataFrame object with all film values
:param year: inputted by user parameter.
:return: Map, as the end of operation
"""
locations = folium.FeatureGroup(name=f"All {year}'s Movie Locations")
counter = 0
for movie in top_10:
counter += 1
name = df.loc[movie]["Name"]
location = top_10[movie]
popup = df.loc[movie]["Name"]
locations.add_child(folium.Marker(name=name, location=location, popup=popup))
mapp.add_child(locations)
print("Created map.")
return mapp
def main():
"""
Main function of our program.
:return:
"""
input_params = argparser().parse_args()
mapp = folium.Map(location=[input_params.latitude, input_params.longitude], zoom_start=3)
folium.CircleMarker(location=(input_params.latitude, input_params.longitude),
radius=5, popup='I am here!',
color='red', fill=True, fill_color='red').add_to(mapp)
dataframe = file_parse(input_params.path)
try:
top_coordinates = get_top_coordinates(dataframe, input_params.year,
input_params.latitude, input_params.longitude)
except AttributeError:
pass
mapp = create_map(mapp, top_coordinates, dataframe, input_params.year)
mapp.save(HTML_FILE_NAME)
print(f"Job's done!. Generated map is stored to {HTML_FILE_NAME}")
if __name__ == "__main__":
main()