grid version for windows using react-electron

Allegra-Cohen · Dec 8, 2023 · e1cebe9 · e1cebe9
1 parent 8670a1e
commit e1cebe9
Show file tree

Hide file tree

Showing 89 changed files with 34,786 additions and 26,003 deletions.
diff --git a/habitus_ui_interface-main/backend/backend.py b/habitus_ui_interface-main/backend/backend.py
@@ -1,4 +1,5 @@
 import os.path
+import sys
 import pandas as pd
 
 from . import corpus_parser
@@ -71,9 +72,9 @@ def set_superfiles(self, supercorpus_filename, row_filename):
 	def get_grid(self, k: int, anchor: str, grid_filename: str, clustering_algorithm: str) -> Grid:
 		print("New grid -- processing documents ... ")
 		unique_filename = grid_filename.split(".")[0]
-		if self.set_up_corpus(anchor): # If the corpus setup went well
-			grid = Grid.generate(self.path, self.clean_supercorpus_filename, self.row_labels_filename, grid_filename, self.corpus, k, clustering_algorithm)
-			return grid
+		self.set_up_corpus(anchor)
+		grid = Grid.generate(self.path, self.clean_supercorpus_filename, self.row_labels_filename, grid_filename, self.corpus, k, clustering_algorithm)
+		return grid
 
 
 	def load_grid(self, unique_filename: str, clustering_algorithm: str) -> Grid:
@@ -102,9 +103,6 @@ def set_up_corpus(self, anchor: str):
 		columns = [column for column in data.columns if not column.startswith("Unnamed: 0") and column != 'stripped' and column != 'readable']
 		self.rows = [Row(row_name) for row_name in columns]
 		self.corpus = Corpus(self.path, self.clean_supercorpus_filename, self.row_labels_filename, self.rows, anchor, self.linguist)
-		if len(self.corpus.documents) < 4: # This is specific to the way I initialize the kmeans clusters (by pairs, thus you need four documents minimum for the minimum two clusters) -- so, can be done away with when clustering is improved
-			return False
-		return True
 
 	# Not sure if this should be in backend, or a method of Grid
 	def load_clusters(self, cells, col_names: list[str]):

diff --git a/habitus_ui_interface-main/backend/control_panel.py b/habitus_ui_interface-main/backend/control_panel.py
@@ -1,6 +1,12 @@
-import pandas as pd
 import sys
+import os.path
+
+backend_path = os.path.dirname(os.path.abspath(__file__))
+venv_path = os.path.join(backend_path, 'python_modules', 'venv')
+site_packages_path = os.path.join(venv_path, 'lib', 'python3.10.11', 'site-packages')
+sys.path.insert(0, site_packages_path)
 
+import pandas as pd
 from .frontend import Frontend
 
 class ControlPanel(Frontend):

diff --git a/habitus_ui_interface-main/backend/corpus.py b/habitus_ui_interface-main/backend/corpus.py
@@ -1,8 +1,15 @@
+import os.path
+import sys
+
+backend_path = os.path.dirname(os.path.abspath(__file__))
+venv_path = os.path.join(backend_path, 'python_modules', 'venv')
+site_packages_path = os.path.join(venv_path, 'lib', 'python3.10.11', 'site-packages')
+sys.path.insert(0, site_packages_path)
+
 import csv
 import gensim.downloader as api
 import json
 import numpy as np
-import os.path
 import pandas as pd
 import shutil
 import spacy
@@ -24,11 +31,10 @@ def __init__(self, path: str, clean_supercorpus_filename: str, row_labels_filena
 		self.rows = rows
 
 		self.model = None
-		self.model_filename = "../process_files/glove.6B.300d.txt"
+		self.model_filename = "./process_files/glove.6B.300d.txt"
 		self.linguist = linguist
-		self.documents: list[Document] = self.load_anchored_documents(anchor == 'load_all') # Reminder: This is a bool. Anchor is passed via self.
-		if len(self.documents) > 0: # Check or this will throw an error. Whether there are enough docs to keep going will be checked in backend.set_up_corpus()
-			self.initialize(preexisting)
+		self.documents: list[Document] = self.load_anchored_documents(anchor == 'load_all')
+		self.initialize(preexisting)
 
 	def initialize(self, preexisting = None):
 		vector_texts = [document.get_vector_text() for document in self.documents]

diff --git a/habitus_ui_interface-main/backend/corpus_parser.py b/habitus_ui_interface-main/backend/corpus_parser.py
@@ -1,7 +1,13 @@
-import io
+import sys
 import os
+
+backend_path = os.path.dirname(os.path.abspath(__file__))
+venv_path = os.path.join(backend_path, 'python_modules', 'venv')
+site_packages_path = os.path.join(venv_path, 'lib', 'python3.10.11', 'site-packages')
+sys.path.insert(0, site_packages_path)
+
+import io
 import pandas as pd
-import sys
 import textract
 
 from nltk.tokenize import sent_tokenize
@@ -102,13 +108,15 @@ def parse_supercorpus(corpus_name, input_dir, output_filepath):
     pd.DataFrame({'sentence': all_lines}).to_csv(output_file + output_extension, encoding = encoding)
 
     rows = pd.DataFrame({'readable': all_lines, 'label': row_labels})
+
+    print("rows", rows)
     # In this process the columns seem to get sorted.
-    df = pd.concat([rows.drop('label', 1), pd.get_dummies(rows.label)], axis = 1)
-    for col in df.columns:
+    #df = pd.concat([rows.drop('label', 1), pd.get_dummies(rows.label)], axis = 1)
+    for col in rows:
         if 'Unnamed:' in col:
-            df.drop(col, inplace = True)
-    df['all'] = 1
-    df.to_csv(output_file + '_row_labels.csv')
+            rows.drop(col, inplace = True)
+    rows['all'] = 1
+    rows.to_csv(output_file + '_row_labels.csv')
 
     print(f"Output: {output_file}\n")
 

diff --git a/habitus_ui_interface-main/backend/grid.py b/habitus_ui_interface-main/backend/grid.py
@@ -1,3 +1,11 @@
+import os.path
+import sys
+
+backend_path = os.path.dirname(os.path.abspath(__file__))
+venv_path = os.path.join(backend_path, 'python_modules', 'venv')
+site_packages_path = os.path.join(venv_path, 'lib', 'python3.10.11', 'site-packages')
+sys.path.insert(0, site_packages_path)
+
 import numpy as np
 import pandas as pd
 

diff --git a/habitus_ui_interface-main/backend/linguist.py b/habitus_ui_interface-main/backend/linguist.py
@@ -1,3 +1,11 @@
+import os.path
+import sys
+
+backend_path = os.path.dirname(os.path.abspath(__file__))
+venv_path = os.path.join(backend_path, 'python_modules', 'venv')
+site_packages_path = os.path.join(venv_path, 'lib', 'python3.10.11', 'site-packages')
+sys.path.insert(0, site_packages_path)
+
 import numpy as np
 import re
 import string