diff --git a/model_development/word_cloud_search.py b/model_development/word_cloud_search.py index 4668d16..518619d 100644 --- a/model_development/word_cloud_search.py +++ b/model_development/word_cloud_search.py @@ -13,11 +13,12 @@ print("Loading BERT model...") print('Using CUDA' if torch.cuda.is_available() else 'Using MPS') + # Initialize pre-trained BERT model embedder = SentenceTransformer('bert-base-nli-mean-tokens', device='cuda' if torch.cuda.is_available() else 'mps') # Load embeddings from file or calculate them (takes a while) -load_embeddings = False +load_embeddings = True embeddings_file = 'corpus_embeddings.pkl' dataframe_file = 'movie_titles.pkl' @@ -53,9 +54,8 @@ def query_db(sql_query, conn): sql_query = """ SELECT title FROM movies - """ -# WHERE title IS NOT NULL AND budget > 0 AND revenue > 0 AND runtime >= 20 + movie_titles = query_db(sql_query, conn) # Print how many movies are in the database @@ -63,6 +63,7 @@ def query_db(sql_query, conn): # All movie titles corpus = movie_titles['title'] + # Calculate embeddings otherwise corpus_embeddings = embedder.encode(corpus, show_progress_bar=True) @@ -180,12 +181,7 @@ def find_similar_movies(movie_title, top_n, additional_k): This application allows you to find movies similar to a given title using BERT embeddings and TSNE for visualization. Just enter a movie title, select the number of similar movies you want to see, and the application will display a list of similar movies along with a 3D plot. -## Selection of movies: {total_movies} films -The movies are selected from the database based on the following criteria: -- The movie title is not null -- The movie has a budget > 0 -- The movie has a revenue > 0 -- The movie has a runtime >= 20 minutes +## Number of movies in the database: {total_movies}. """ with gr.Blocks() as iface: