Skip to content

Commit

Permalink
grid version for windows using react-electron
Browse files Browse the repository at this point in the history
  • Loading branch information
arthurpolese committed Dec 8, 2023
1 parent 8670a1e commit e1cebe9
Show file tree
Hide file tree
Showing 89 changed files with 34,786 additions and 26,003 deletions.
10 changes: 4 additions & 6 deletions habitus_ui_interface-main/backend/backend.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os.path
import sys
import pandas as pd

from . import corpus_parser
Expand Down Expand Up @@ -71,9 +72,9 @@ def set_superfiles(self, supercorpus_filename, row_filename):
def get_grid(self, k: int, anchor: str, grid_filename: str, clustering_algorithm: str) -> Grid:
print("New grid -- processing documents ... ")
unique_filename = grid_filename.split(".")[0]
if self.set_up_corpus(anchor): # If the corpus setup went well
grid = Grid.generate(self.path, self.clean_supercorpus_filename, self.row_labels_filename, grid_filename, self.corpus, k, clustering_algorithm)
return grid
self.set_up_corpus(anchor)
grid = Grid.generate(self.path, self.clean_supercorpus_filename, self.row_labels_filename, grid_filename, self.corpus, k, clustering_algorithm)
return grid


def load_grid(self, unique_filename: str, clustering_algorithm: str) -> Grid:
Expand Down Expand Up @@ -102,9 +103,6 @@ def set_up_corpus(self, anchor: str):
columns = [column for column in data.columns if not column.startswith("Unnamed: 0") and column != 'stripped' and column != 'readable']
self.rows = [Row(row_name) for row_name in columns]
self.corpus = Corpus(self.path, self.clean_supercorpus_filename, self.row_labels_filename, self.rows, anchor, self.linguist)
if len(self.corpus.documents) < 4: # This is specific to the way I initialize the kmeans clusters (by pairs, thus you need four documents minimum for the minimum two clusters) -- so, can be done away with when clustering is improved
return False
return True

# Not sure if this should be in backend, or a method of Grid
def load_clusters(self, cells, col_names: list[str]):
Expand Down
8 changes: 7 additions & 1 deletion habitus_ui_interface-main/backend/control_panel.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
import pandas as pd
import sys
import os.path

backend_path = os.path.dirname(os.path.abspath(__file__))
venv_path = os.path.join(backend_path, 'python_modules', 'venv')
site_packages_path = os.path.join(venv_path, 'lib', 'python3.10.11', 'site-packages')
sys.path.insert(0, site_packages_path)

import pandas as pd
from .frontend import Frontend

class ControlPanel(Frontend):
Expand Down
16 changes: 11 additions & 5 deletions habitus_ui_interface-main/backend/corpus.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,15 @@
import os.path
import sys

backend_path = os.path.dirname(os.path.abspath(__file__))
venv_path = os.path.join(backend_path, 'python_modules', 'venv')
site_packages_path = os.path.join(venv_path, 'lib', 'python3.10.11', 'site-packages')
sys.path.insert(0, site_packages_path)

import csv
import gensim.downloader as api
import json
import numpy as np
import os.path
import pandas as pd
import shutil
import spacy
Expand All @@ -24,11 +31,10 @@ def __init__(self, path: str, clean_supercorpus_filename: str, row_labels_filena
self.rows = rows

self.model = None
self.model_filename = "../process_files/glove.6B.300d.txt"
self.model_filename = "./process_files/glove.6B.300d.txt"
self.linguist = linguist
self.documents: list[Document] = self.load_anchored_documents(anchor == 'load_all') # Reminder: This is a bool. Anchor is passed via self.
if len(self.documents) > 0: # Check or this will throw an error. Whether there are enough docs to keep going will be checked in backend.set_up_corpus()
self.initialize(preexisting)
self.documents: list[Document] = self.load_anchored_documents(anchor == 'load_all')
self.initialize(preexisting)

def initialize(self, preexisting = None):
vector_texts = [document.get_vector_text() for document in self.documents]
Expand Down
22 changes: 15 additions & 7 deletions habitus_ui_interface-main/backend/corpus_parser.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
import io
import sys
import os

backend_path = os.path.dirname(os.path.abspath(__file__))
venv_path = os.path.join(backend_path, 'python_modules', 'venv')
site_packages_path = os.path.join(venv_path, 'lib', 'python3.10.11', 'site-packages')
sys.path.insert(0, site_packages_path)

import io
import pandas as pd
import sys
import textract

from nltk.tokenize import sent_tokenize
Expand Down Expand Up @@ -102,13 +108,15 @@ def parse_supercorpus(corpus_name, input_dir, output_filepath):
pd.DataFrame({'sentence': all_lines}).to_csv(output_file + output_extension, encoding = encoding)

rows = pd.DataFrame({'readable': all_lines, 'label': row_labels})

print("rows", rows)
# In this process the columns seem to get sorted.
df = pd.concat([rows.drop('label', 1), pd.get_dummies(rows.label)], axis = 1)
for col in df.columns:
#df = pd.concat([rows.drop('label', 1), pd.get_dummies(rows.label)], axis = 1)
for col in rows:
if 'Unnamed:' in col:
df.drop(col, inplace = True)
df['all'] = 1
df.to_csv(output_file + '_row_labels.csv')
rows.drop(col, inplace = True)
rows['all'] = 1
rows.to_csv(output_file + '_row_labels.csv')

print(f"Output: {output_file}\n")

Expand Down
8 changes: 8 additions & 0 deletions habitus_ui_interface-main/backend/grid.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
import os.path
import sys

backend_path = os.path.dirname(os.path.abspath(__file__))
venv_path = os.path.join(backend_path, 'python_modules', 'venv')
site_packages_path = os.path.join(venv_path, 'lib', 'python3.10.11', 'site-packages')
sys.path.insert(0, site_packages_path)

import numpy as np
import pandas as pd

Expand Down
8 changes: 8 additions & 0 deletions habitus_ui_interface-main/backend/linguist.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
import os.path
import sys

backend_path = os.path.dirname(os.path.abspath(__file__))
venv_path = os.path.join(backend_path, 'python_modules', 'venv')
site_packages_path = os.path.join(venv_path, 'lib', 'python3.10.11', 'site-packages')
sys.path.insert(0, site_packages_path)

import numpy as np
import re
import string
Expand Down
Loading

0 comments on commit e1cebe9

Please sign in to comment.