ConnActivity · Nereuxofficial · Dec 14, 2023 · Dec 1, 2023 · Dec 1, 2023 · Dec 3, 2023
diff --git a/downloader.py b/downloader.py
@@ -43,12 +43,11 @@ def get_now_playing_movies(page=1):
     movies = tmdb.movies().now_playing(page=page)
     return movies
 
-
-def get_popular_people():
-    people = tmdb.people().popular()
+# Literally worse than the library functions. Why do these functions even exist? Isn't there a way to construct the TMDb object and call functions on it?
+def get_popular_people(page=1):
+    people = tmdb.people().popular(page=page)
     return people
 
-
 def get_person_details(person_id):
     person = tmdb.person(person_id)
     return person

diff --git a/eda/first_eda.ipynb b/eda/first_eda.ipynb
diff --git a/eda/second_eda.ipynb b/eda/second_eda.ipynb
diff --git a/eda/third_eda.ipynb b/eda/third_eda.ipynb
diff --git a/insert_actors.py b/insert_actors.py
@@ -0,0 +1,54 @@
+import downloader
+import insert
+import time
+
+all_actor_ids = set()
+all_actors = []
+all_people_popularity = []
+
+START = 1
+PAGES = 500
+
+start_time = time.time()
+
+for i in range(START, PAGES + 1):
+    print(f"Page {i} of {PAGES} started")
+    for actor in downloader.get_popular_people(i).results:
+        all_actor_ids.add(actor.id)
+        all_actors.insert(i, actor)
+    print(f"Page {i} of {PAGES} finished")
+
+end_time = time.time()
+print(f"Iterating through pages took {end_time - start_time} seconds to complete")
+
+
+start_time = time.time()
+print("Downloading popularity details")
+for i, actor_ids in enumerate(all_actor_ids):
+    try:
+        all_people_popularity.append(downloader.get_person_details(actor_ids))
+    except Exception as e:
+        print(f"Could not load actor with id {actor_ids}")
+        print(e)
+    if i % 25 == 0:
+        print(f"{i} of {len(all_actor_ids)} loaded from API")
+
+end_time = time.time()
+print(f"Downloading details took {end_time - start_time} seconds to complete")
+
+
+start_time = time.time()
+print("Inserting actors into database")
+insert.insert_person(all_actors)
+
+end_time = time.time()
+print(f"Inserting actors took {end_time - start_time} seconds to complete")
+
+
+start_time = time.time()
+print("Inserting popularity into database")
+people_popularity = [PersonPopularity(person.id, person.popularity) for person in all_people_popularity]
+insert.insert_person_popularity(people_popularity)
+
+end_time = time.time()
+print(f"Inserting popularity took {end_time - start_time} seconds to complete")
diff --git a/requirements.txt b/requirements.txt
@@ -1,2 +1,10 @@
-postgres==4.0
-themoviedb==0.4.0
+themoviedb==0.4.0
+psycopg2-binary==2.9.9
+plotly
+seaborn
+matplotlib
+gensim
+nltk
+wordcloud
+scikit-learn
+pandas
diff --git a/script.py b/script.py
@@ -19,7 +19,7 @@
         all_movie_ids.add(movie.id)
     for changes in downloader.get_changes_for_all_movies(i):
         all_movie_ids.add(changes["id"])
-        all_changes.append(changes)
+        all_changes.append(changes)        
     print(f"Page {i} of {PAGES} finished")
 
 end_time = time.time()