#1 Fixing UX Bettina's fixes

harmonydata · Nov 8, 2022 · 5960482 · 5960482
1 parent e20d89c
commit 5960482
Show file tree

Hide file tree

Showing 8 changed files with 50 additions and 32 deletions.
diff --git a/front_end/dash_callbacks/callbacks_gettext.py b/front_end/dash_callbacks/callbacks_gettext.py
@@ -35,7 +35,8 @@ def add_gettext_callbacks(dash_app):
                                Output("please_wait_message", "children"),
                                Output("filter_by_cat", "children"),
                                Output("harmony_graphic", "src"),
-                               Output("dropdown-edge", "options")
+                               Output("dropdown-edge", "options"),
+                               Output("add_row", "children")
                                ],
                        inputs=[Input("select_language", "value")
                                ]
@@ -112,6 +113,8 @@ def find_if_tooltip_cookie_present(language):
 The AI converts the text of each question into a vector in 1600 dimensions using a neural network called GPT-2. This technique is called a *document embedding*.
 
 The distance between any two questions is measured according to the cosine similarity metric between the two vectors. Two questions which are similar in meaning, even if worded differently or in different languages, will have a high degree of similarity between their vector representations. Questions which are very different tend to be far apart in the vector space.
+
+You can read more about how Harmony works [in this blog post](https://harmonydata.org/how-does-harmony-work/).
 """),
             _("Hide tip"),
             _("AI tool built by "),
@@ -122,5 +125,6 @@ def find_if_tooltip_cookie_present(language):
             _("Filter questions by category:"),
             dash_app.get_asset_url(_('harmony_flowchart_en.png')),
             [{"value": 1, "label": _("positive")}, {"value": -1, "label": _("negative")},
-            {"value": 0, "label": _("no connection")}]
+             {"value": 0, "label": _("no connection")}],
+            _("Add row")
         ]
diff --git a/front_end/dash_callbacks/callbacks_view_1.py b/front_end/dash_callbacks/callbacks_view_1.py
@@ -98,8 +98,7 @@ def wake_up_tika_web_app_on_page_load(location):
             Output("please_wait_message", "style"),
         ]
         ,
-        inputs=[Input("url", "href"),  # on startup
-                Input("file_table", "data"),  # Commented out temporarily to remove circular dependency warning
+        inputs=[Input("file_table", "data"),  # Commented out temporarily to remove circular dependency warning
                 Input("dataset", "value"),
                 Input('upload-data', 'contents'),
                 State('upload-data', 'filename'),
@@ -108,7 +107,7 @@ def wake_up_tika_web_app_on_page_load(location):
                 ],
         prevent_initial_call=True
     )
-    def user_uploaded_files(href, file_table,
+    def user_uploaded_files(file_table,
                             selected_datasets, all_file_contents, file_names, file_date, parsed_documents):
 
         print("file_names", file_names)
@@ -243,7 +242,6 @@ def display_questions(document_content, language, add_row, old_cols, old_data):
             old_data.append({c['id']: '' for c in old_cols})
             return [old_cols, old_data]
 
-
         if language == "pt":
             from application import pt_lang
             _ = pt_lang.gettext
@@ -252,9 +250,6 @@ def display_questions(document_content, language, add_row, old_cols, old_data):
 
         dfs = []
 
-
-
-
         for file_name, pages in document_content.items():
             if file_name.endswith("pdf"):
                 text = "\n".join(pages)

diff --git a/front_end/dash_callbacks/callbacks_view_2.py b/front_end/dash_callbacks/callbacks_view_2.py
@@ -33,12 +33,12 @@ def add_view_2_callbacks(dash_app):
             State("excerpt_table", "columns"),
             State("excerpt_table", "data"),
             State("excerpt_table", "derived_virtual_indices"),
-            State("excerpt_table", "derived_virtual_selected_rows"),
+            State("excerpt_table", "selected_rows"),
             State("similaritystore", "data"),
         ],
     )
     def find_similarity(_, tab, columns, data, filtered_rows, selected_rows, old_similarity_store):
-        print ("SEL", selected_rows)
+        print("SEL", selected_rows)
         """
         This function does the heavy lifting.
 
@@ -58,9 +58,9 @@ def find_similarity(_, tab, columns, data, filtered_rows, selected_rows, old_sim
 
         # If user has selected some rows with the checkbox, use only them.
         if selected_rows is not None and len(selected_rows) > 0:
-            data=[data[r] for r in selected_rows]
-        elif filtered_rows is not None and len(filtered_rows) > 0:
-            data = [data[r] for r in filtered_rows]
+            data = [data[r] for r in selected_rows]
+        # elif filtered_rows is not None and len(filtered_rows) > 0:
+        #     data = [data[r] for r in filtered_rows]
 
         df_questions = deserialise_questions_dataframe(columns, data)
 
@@ -170,12 +170,14 @@ def display_value_of_edge(selection, elements):
             State("excerpt_table", "columns"),
             State("excerpt_table", "data"),
             State("excerpt_table", "derived_virtual_indices"),
-            State("excerpt_table", "derived_virtual_selected_rows"),
+            State("excerpt_table", "selected_rows"),
             Input("manual_edges", "data"),
             Input("select_language", "value")
         ],
+        prevent_initial_call=True
     )
-    def display_similarity_graph(pickled, sensitivity, categories_to_display, columns, data,filtered_rows, selected_rows, manual_edges_serialisable, language):
+    def display_similarity_graph(pickled, sensitivity, categories_to_display, columns, data, filtered_rows,
+                                 selected_rows, manual_edges_serialisable, language):
         if language == "pt":
             from application import pt_lang
             _ = pt_lang.gettext
@@ -184,9 +186,9 @@ def display_similarity_graph(pickled, sensitivity, categories_to_display, column
 
         # If user has selected some rows with the checkbox, use only them.
         if selected_rows is not None and len(selected_rows) > 0:
-            data=[data[r] for r in selected_rows]
-        elif filtered_rows is not None and len(filtered_rows) > 0:
-            data = [data[r] for r in filtered_rows]
+            data = [data[r] for r in selected_rows]
+        # elif filtered_rows is not None and len(filtered_rows) > 0:
+        #     data = [data[r] for r in filtered_rows]
 
         matches = pkl.loads(codecs.decode(pickled.encode(), "base64"))
 

diff --git a/front_end/dash_layout/body.py b/front_end/dash_layout/body.py
@@ -119,7 +119,7 @@
                                 dcc.Dropdown(id="filter_questions", options=[], value=None, multi=False,
                                              style={'float': 'left', 'width': '50%', "margin-left": "20px"})],
                                 style={"display": "flex", "width": "100%"}),
-                            html.Button("+", id="add_row"),
+                            html.Button(id="add_row"),
 
                             dcc.Loading([
                                 dash_table.DataTable(

diff --git a/front_end/locale/pt/LC_MESSAGES/body.mo b/front_end/locale/pt/LC_MESSAGES/body.mo
diff --git a/front_end/locale/pt/LC_MESSAGES/body.po b/front_end/locale/pt/LC_MESSAGES/body.po
@@ -8,7 +8,7 @@ msgstr ""
 "Project-Id-Version: \n"
 "Report-Msgid-Bugs-To: \n"
 "POT-Creation-Date: 2022-10-28 15:14-0300\n"
-"PO-Revision-Date: 2022-11-02 16:46+0000\n"
+"PO-Revision-Date: 2022-11-08 12:02+0000\n"
 "Last-Translator: \n"
 "Language-Team: \n"
 "Language: pt_BR\n"
@@ -188,6 +188,9 @@ msgid ""
 "differently or in different languages, will have a high degree of similarity between their "
 "vector representations. Questions which are very different tend to be far apart in the vector "
 "space.\n"
+"\n"
+"You can read more about how Harmony works [in this blog post](https://harmonydata.org/how-"
+"does-harmony-work/).\n"
 msgstr ""
 "## Uso do gráfico\n"
 "\n"
@@ -213,6 +216,9 @@ msgstr ""
 "formuladas de forma diferente ou em idiomas diferentes, terão um alto grau de similaridade "
 "entre suas representações vetoriais. Perguntas muito diferentes, por outro lado, tendem a "
 "estar distanciadas no espaço vetorial.\n"
+"\n"
+"Você pode ler mais sobre como o Harmony funciona [nesta postagem do blog](https://harmonydata."
+"org/how-does-harmony-work/).\n"
 
 #: body.py:258
 msgid "➋ Check the matches"
@@ -327,3 +333,6 @@ msgstr "Categoria da Pergunta"
 
 msgid "File"
 msgstr "Arquivo"
+
+msgid "Add row"
+msgstr "Nova linha"
diff --git a/front_end/utils/excel_processor.py b/front_end/utils/excel_processor.py
@@ -49,7 +49,7 @@ def convert_jsonified_excel_to_questions_dataframe(excel_in_json_format: str) ->
     # Check if header row present, in which case remove it
     rows_to_delete = []
     for i in range(len(df_questions)):
-        if df_questions.question.iloc[i] is None or df_questions.question.iloc[i].lower() in ["question", "text",
+        if df_questions.question.iloc[i] is None or type(df_questions.question.iloc[i]) is not str or df_questions.question.iloc[i].lower() in ["question", "text",
                                                                                               "pergunta", "texto"]:
             rows_to_delete.append(i)
 

diff --git a/front_end/utils/question_category_classifier.py b/front_end/utils/question_category_classifier.py
@@ -1,6 +1,9 @@
 import bz2
 import pickle as pkl
 import re
+import traceback
+import sys
+
 
 from utils.language_utils import get_clean_language_code
 from utils.pt_en_dict import pt_en_map
@@ -35,11 +38,16 @@ def categorise_questions(self, df):
 
         parsed_questions = df.apply(lambda r: parse_questions(get_spacy_model(get_clean_language_code(r.language)), r.question), axis=1)
 
-        categories = self.model.predict(parsed_questions)
-        # Override if empty strings
-        for i in range(len(df)):
-            lc = df.question.iloc[i].strip().lower()
-            if lc == "":
-                categories[i] = ""
-
-        df["question_category"] = categories
+        try:
+            categories = self.model.predict(parsed_questions)
+            # Override if empty strings
+            for i in range(len(df)):
+                lc = df.question.iloc[i].strip().lower()
+                if lc == "":
+                    categories[i] = ""
+
+            df["question_category"] = categories
+        except:
+            print ("Exception categorising questions")
+            traceback.print_exception(*sys.exc_info())
+            df["question_category"] = "N/A"