From 63583ae70e53421252f7aab6d9dbeb8a397d8fd0 Mon Sep 17 00:00:00 2001 From: Dayenne Souza Date: Thu, 4 Apr 2024 12:15:17 -0300 Subject: [PATCH] add .exe files to build locally --- .gitattributes | 1 + .gitignore | 8 +- .streamlit/config.toml | 5 +- README.md | 11 +- app/Home.py | 8 +- app/__init__.py | 0 app/components/app_loader.py | 11 +- app/components/app_openai.py | 17 ++ app/components/app_terminator.py | 35 +++ app/javascript/styles.py | 7 - app/pages/Settings.py | 46 +++ app/pages/__init__.py | 0 app/util/AI_API.py | 7 +- app/util/Database.py | 10 +- app/util/Embedder.py | 29 +- app/util/SecretsHandler.py | 28 ++ app/util/__init__.py | 0 app/util/openai_instance.py | 30 ++ app/util/session_variables.py | 1 + app/wkhtmltox/__init__.py | 0 app/workflows/attribute_patterns/__init__.py | 0 app/workflows/data_synthesis/__init__.py | 0 app/workflows/group_narratives/__init__.py | 0 app/workflows/question_answering/__init__.py | 0 app/workflows/question_answering/functions.py | 12 +- app/workflows/question_answering/workflow.py | 8 +- app/workflows/record_matching/__init__.py | 0 app/workflows/risk_networks/__init__.py | 0 installer.cfg | 134 +++++++++ installer.nsi | 273 ++++++++++++++++++ installer_script.ps1 | 25 ++ requirements.txt | Bin 3878 -> 4016 bytes run_exe.py | 65 +++++ 33 files changed, 732 insertions(+), 39 deletions(-) create mode 100644 .gitattributes create mode 100644 app/__init__.py create mode 100644 app/components/app_openai.py create mode 100644 app/components/app_terminator.py create mode 100644 app/pages/Settings.py create mode 100644 app/pages/__init__.py create mode 100644 app/util/SecretsHandler.py create mode 100644 app/util/__init__.py create mode 100644 app/util/openai_instance.py create mode 100644 app/wkhtmltox/__init__.py create mode 100644 app/workflows/attribute_patterns/__init__.py create mode 100644 app/workflows/data_synthesis/__init__.py create mode 100644 app/workflows/group_narratives/__init__.py create mode 100644 app/workflows/question_answering/__init__.py create mode 100644 app/workflows/record_matching/__init__.py create mode 100644 app/workflows/risk_networks/__init__.py create mode 100644 installer.cfg create mode 100644 installer.nsi create mode 100644 installer_script.ps1 create mode 100644 run_exe.py diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..ca8cde62 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +*Toolkit.exe filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore index 750ce466..90292975 100644 --- a/.gitignore +++ b/.gitignore @@ -38,4 +38,10 @@ qa_mine outputs cache data -legacy \ No newline at end of file +legacy + +# Exe installer builders +build/nsis/* +!build/nsis/Intelligence Toolkit.exe + +app/wkhtmltox/wkhtmltox-0.12.6-1.msvc2015-win64_.exe \ No newline at end of file diff --git a/.streamlit/config.toml b/.streamlit/config.toml index b609c543..bef4c385 100644 --- a/.streamlit/config.toml +++ b/.streamlit/config.toml @@ -1,3 +1,6 @@ [server] enableXsrfProtection = false -maxUploadSize = 1000 \ No newline at end of file +maxUploadSize = 1000 + +[client] +toolbarMode = "viewer" diff --git a/README.md b/README.md index 9380a515..930328cf 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,6 @@ # Intelligence Toolkit The Intelligence Toolkit is a suite of interactive workflows for creating AI intelligence reports from real-world data sources. The toolkit is designed to help users identify patterns, answers, relationships, and risks within complex datasets, with generative AI ([OpenAI GPT models](https://platform.openai.com/docs/models/)) used to create reports on findings of interest. - # Developing ## Requirements @@ -72,6 +71,16 @@ After building, run the docker container with: Open [localhost:8501](http://localhost:8501) +## Building a Windows executable + +We use [Pynsist](https://pynsist.readthedocs.io/en/latest/), that with [NSIS (Nullsoft Scriptable Install System)](https://nsis.sourceforge.io/) builds an executable for Windows, which packages the whole project and what it needs to run (including Python) into an .exe, that when installed will run the project on the user's localhost. + +For you to build locally, you will need to have pynsis intalled with `pip install pynsist` and install NSIS [downloading it here](https://nsis.sourceforge.io/Main_Page). + +**Tip**: Use Windows to build it, not Linux. + +Run `.\installer_script.ps1` in the root of the app. +It will download wkhtmltox from the source, that's needed to generate reports. Then it will build an .exe into build\nsis, that will include wkhtmltox package installation. # Deploying diff --git a/app/Home.py b/app/Home.py index 6ba8561c..e2c2af8a 100644 --- a/app/Home.py +++ b/app/Home.py @@ -1,13 +1,19 @@ # Copyright (c) 2024 Microsoft Corporation. All rights reserved. import streamlit as st +import os from components.app_loader import load_multipage_app import util.mermaid as mermaid +def get_transparency_faq(): + file_path = os.path.join(os.path.dirname(__file__), 'TransparencyFAQ.md') + with open(file_path, 'r') as file: + return file.read() + def main(): st.set_page_config(layout="wide", initial_sidebar_state="expanded", page_title='Intelligence Toolkit | Home') load_multipage_app() - transparency_faq = open('./app/TransparencyFAQ.md', 'r').read() + transparency_faq = get_transparency_faq() st.markdown(transparency_faq + '\n\n' + f"""\ #### Which Intelligence Toolkit workflow is right for me and my data? diff --git a/app/__init__.py b/app/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/app/components/app_loader.py b/app/components/app_loader.py index 2a607d3e..8a037d6a 100644 --- a/app/components/app_loader.py +++ b/app/components/app_loader.py @@ -1,13 +1,22 @@ # Copyright (c) 2024 Microsoft Corporation. All rights reserved. from javascript.styles import add_styles import components.app_user as au -import streamlit as st +import components.app_terminator as at +import components.app_openai as ao def load_multipage_app(): #Load user if logged in user = au.app_user() user.view_get_info() + #Terminate app (if needed for .exe) + terminator = at.app_terminator() + terminator.terminate_app_btn() + + #OpenAI key set + app_openai = ao.app_openai() + app_openai.api_info() + #load css # add_styles() diff --git a/app/components/app_openai.py b/app/components/app_openai.py new file mode 100644 index 00000000..9081572f --- /dev/null +++ b/app/components/app_openai.py @@ -0,0 +1,17 @@ +# Copyright (c) 2024 Microsoft Corporation. All rights reserved. +import streamlit as st +from util.openai_instance import get_key_env +from util.SecretsHandler import SecretsHandler + +class app_openai: + def _is_api_key_configured(self): + secrets = SecretsHandler() + if secrets.get_secret("api_key") != '': + return True + elif get_key_env() != '': + return True + return False + + def api_info(self): + if not self._is_api_key_configured(): + st.error("No OpenAI key found in the environment. Please add it in the settings.") \ No newline at end of file diff --git a/app/components/app_terminator.py b/app/components/app_terminator.py new file mode 100644 index 00000000..d4330284 --- /dev/null +++ b/app/components/app_terminator.py @@ -0,0 +1,35 @@ +# Copyright (c) 2024 Microsoft Corporation. All rights reserved. +import streamlit as st +from util.session_variables import SessionVariables +import psutil +import keyboard +import time +import os + +class app_terminator: + + sv = None + + def __init__(self, sv = None): + if "off_btn_disabled" not in st.session_state: + st.session_state.off_btn_disabled = False + if sv is not None: + self.sv = sv + else: + self.sv = SessionVariables('home') + + def _on_click(self): + def click(): + st.session_state.off_btn_disabled = not st.session_state.off_btn_disabled + return click + + def terminate_app_btn(self): + if self.sv.mode.value != 'cloud': + exit_app = st.sidebar.button("🔴 Terminate application", disabled=st.session_state.off_btn_disabled, on_click=self._on_click) + if exit_app: + st.text("Shutting down application...") + time.sleep(2) + pid = os.getpid() + keyboard.press_and_release('ctrl+w') + p = psutil.Process(pid) + p.terminate() \ No newline at end of file diff --git a/app/javascript/styles.py b/app/javascript/styles.py index 459a6822..6f6a6bb6 100644 --- a/app/javascript/styles.py +++ b/app/javascript/styles.py @@ -12,14 +12,7 @@ } ''' -style_iframes = ''' - iframe { - display: none; - } -''' - def add_styles(): st.markdown(f'''''', unsafe_allow_html=True) diff --git a/app/pages/Settings.py b/app/pages/Settings.py new file mode 100644 index 00000000..fb506257 --- /dev/null +++ b/app/pages/Settings.py @@ -0,0 +1,46 @@ +import os +from util.openai_instance import get_key_env +from util.SecretsHandler import SecretsHandler +import streamlit as st +import time +from util.session_variables import SessionVariables + +key = 'openaikey' +def on_change(handler, key = None, value = None): + def change(): + handler.write_secret('api_key', st.session_state[key] if key else value) + return change + +def main(): + st.header("Settings") + sv = SessionVariables('home') + + if key not in st.session_state: + st.session_state[key] = '' + + secrets_handler = SecretsHandler() + placeholder = "Enter key here..." + secret = secrets_handler.get_secret("api_key") + + is_mode_cloud = sv.mode.value == 'cloud' + + secret_input = st.text_input('Enter your OpenAI key', key=key, type="password", disabled=is_mode_cloud, placeholder=placeholder, value=secret, on_change=on_change(secrets_handler, key)) + + if secret and len(secret): + st.info("Your key is saved securely.") + clear_btn = st.button("Clear local key") + + if clear_btn: + on_change(secrets_handler, value='')() + time.sleep(0.3) + st.rerun() + + if secret_input and secret_input != secret: + st.rerun() + elif get_key_env() == '': + st.warning("No OpenAI key found in the environment. Please insert one above.") + elif not secret_input and not secret: + st.info("Using key from the environment.") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/app/pages/__init__.py b/app/pages/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/app/util/AI_API.py b/app/util/AI_API.py index c3d249e6..36278947 100644 --- a/app/util/AI_API.py +++ b/app/util/AI_API.py @@ -1,6 +1,8 @@ -from openai import OpenAI import tiktoken import json +from util.openai_instance import _OpenAI + +openai = _OpenAI() gen_model = 'gpt-4-turbo-preview' embed_model = 'text-embedding-3-small' @@ -10,7 +12,6 @@ default_temperature = 0 max_embed_tokens = 8191 -client = OpenAI() encoder = tiktoken.get_encoding(text_encoder) def prepare_messages_from_message(system_message, variables): @@ -41,7 +42,7 @@ def count_tokens_in_message_list(messages): def generate_text_from_message_list(messages, placeholder=None, prefix='', model=gen_model, temperature=default_temperature, max_tokens=max_gen_tokens): response = '' try: - responses = client.chat.completions.create( + responses = openai.client().chat.completions.create( model=model, temperature=temperature, max_tokens=max_tokens, diff --git a/app/util/Database.py b/app/util/Database.py index 6c70a821..f2fb788b 100644 --- a/app/util/Database.py +++ b/app/util/Database.py @@ -1,3 +1,4 @@ +# Copyright (c) 2024 Microsoft Corporation. All rights reserved. import os import duckdb @@ -6,7 +7,7 @@ def __init__(self, cache, db_name) -> None: if not os.path.exists(cache): os.makedirs(cache) - db_path = os.path.join(cache, f'{db_name}.db') + db_path = os.path.join(cache, f'_{db_name}.db') self.connection = duckdb.connect(database=db_path) def create_table(self, name, attributes = []): @@ -16,7 +17,12 @@ def select_embedding_from_hash(self, hash_text, username = ''): return self.connection.execute(f"SELECT embedding FROM embeddings WHERE hash_text = '{hash_text}' and username = '{username}'").fetchone() def insert_into_embeddings(self, hash_text, embedding, username = ''): - self.connection.execute(f"INSERT INTO embeddings VALUES ('{username}','{hash_text}', {embedding})") + self.connection.execute(f"INSERT OR IGNORE INTO embeddings VALUES ('{username}','{hash_text}', {embedding})") + + + def insert_multiple_into_embeddings(self, embeddings, username = ""): + embeddings = ''.join([f"('{username}','{embedding[0]}', {embedding[1]}), " for embedding in embeddings])[:-2] + self.connection.execute(f"INSERT OR IGNORE INTO embeddings VALUES {embeddings}") def execute(self, query): return self.connection.execute(query) diff --git a/app/util/Embedder.py b/app/util/Embedder.py index 6199474b..9fe113ac 100644 --- a/app/util/Embedder.py +++ b/app/util/Embedder.py @@ -1,9 +1,9 @@ -from openai import OpenAI +# Copyright (c) 2024 Microsoft Corporation. All rights reserved. import tiktoken import numpy as np from util.Database import Database import util.session_variables -import streamlit as st +from util.openai_instance import _OpenAI gen_model = 'gpt-4-turbo-preview' embed_model = 'text-embedding-3-small' @@ -13,8 +13,7 @@ default_temperature = 0 max_embed_tokens = 8191 - -client = OpenAI() +openai = _OpenAI() encoder = tiktoken.get_encoding(text_encoder) class Embedder: @@ -25,7 +24,7 @@ def __init__(self, cache, model=embed_model, encoder=text_encoder, max_tokens=ma self.encoder = tiktoken.get_encoding(encoder) self.max_tokens = max_tokens self.connection = Database(cache, 'embeddings') - self.connection.create_table('embeddings', ['username STRING','hash_text STRING', 'embedding DOUBLE[]']) + self.connection.create_table('embeddings', ['username STRING','hash_text STRING UNIQUE', 'embedding DOUBLE[]']) def encode_all(self, texts): final_embeddings = [None] * len(texts) @@ -48,30 +47,34 @@ def encode_all(self, texts): bi += 1 batch = new_texts[i:i+2000] batch_texts = [x[1] for x in batch] - embeddings = [x.embedding for x in client.embeddings.create(input = batch_texts, model=self.model).data] + list_all_embeddings = [] + embeddings = [x.embedding for x in openai.client().embeddings.create(input = batch_texts, model=self.model).data] for j, (ix, text) in enumerate(batch): - # hsh = hash(text) - # self.connection.insert_into_embeddings(hsh, embeddings[j]) + hsh = hash(text) + list_all_embeddings.append((hsh, embeddings[j])) final_embeddings[ix] = np.array(embeddings[j]) + self.connection.insert_multiple_into_embeddings(list_all_embeddings) + pb.empty() return np.array(final_embeddings) - def encode(self, text): + def encode(self, text, auto_save = True): text = text.replace("\n", " ") hsh = hash(text) embeddings = self.connection.select_embedding_from_hash(hsh) if embeddings: - return np.array(embeddings[0]) + return embeddings[0] else: tokens = len(self.encoder.encode(text)) if tokens > self.max_tokens: text = text[:self.max_tokens] print('Truncated text to max tokens') try: - embedding = client.embeddings.create(input = [text], model=self.model).data[0].embedding - self.connection.insert_into_embeddings(hsh, embedding) - return np.array(embedding) + embedding = openai.client().embeddings.create(input = [text], model=self.model).data[0].embedding + if auto_save: + self.connection.insert_into_embeddings(hsh, embedding) + return embedding except: print(f'Error embedding text: {text}') return None diff --git a/app/util/SecretsHandler.py b/app/util/SecretsHandler.py new file mode 100644 index 00000000..d24cb41a --- /dev/null +++ b/app/util/SecretsHandler.py @@ -0,0 +1,28 @@ +import os +import streamlit as st + +class SecretsHandler: + _instance = None + _directory = ".streamlit" + + def __init__(self): + if not os.path.exists(self._directory): + os.makedirs(self._directory) + with(open(os.path.join(self._directory, "secrets.toml"), "w")) as f: + f.write("") + + def __new__(cls): + if cls._instance is None: + cls._instance = super().__new__(cls) + + return cls._instance + + def write_secret(self, key, value): + with(open(os.path.join(self._directory, "secrets.toml"), "w")) as f: + f.write(f"{key} = '{value}'") + + def get_secret(self, key) -> str: + if st.secrets and key in st.secrets: + return st.secrets[key] + return '' + \ No newline at end of file diff --git a/app/util/__init__.py b/app/util/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/app/util/openai_instance.py b/app/util/openai_instance.py new file mode 100644 index 00000000..6e7c6116 --- /dev/null +++ b/app/util/openai_instance.py @@ -0,0 +1,30 @@ +# Copyright (c) 2024 Microsoft Corporation. All rights reserved. +from openai import OpenAI +import streamlit as st +import os +from util.SecretsHandler import SecretsHandler + +class _OpenAI: + _instance = None + _key = None + _secrets = None + + def __init__(self): + self._secrets = SecretsHandler() + + def client(self): + if self._secrets.get_secret("api_key") != '': + key = st.secrets["api_key"] + else: + key = get_key_env() + if key != self._key: + self._key = key + try: + self._instance = OpenAI(api_key=key) + except Exception as e: + print(f'Error creating OpenAI client: {e}') + + return self._instance + +def get_key_env(): + return os.environ['OPENAI_API_KEY'] if 'OPENAI_API_KEY' in os.environ else '' \ No newline at end of file diff --git a/app/util/session_variables.py b/app/util/session_variables.py index 6d70861b..766eefc8 100644 --- a/app/util/session_variables.py +++ b/app/util/session_variables.py @@ -11,4 +11,5 @@ def __init__(self, prefix): self.username = sv.SessionVariable('') + diff --git a/app/wkhtmltox/__init__.py b/app/wkhtmltox/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/app/workflows/attribute_patterns/__init__.py b/app/workflows/attribute_patterns/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/app/workflows/data_synthesis/__init__.py b/app/workflows/data_synthesis/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/app/workflows/group_narratives/__init__.py b/app/workflows/group_narratives/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/app/workflows/question_answering/__init__.py b/app/workflows/question_answering/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/app/workflows/question_answering/functions.py b/app/workflows/question_answering/functions.py index 7c5afa9d..1fbb49a3 100644 --- a/app/workflows/question_answering/functions.py +++ b/app/workflows/question_answering/functions.py @@ -1,10 +1,10 @@ +# Copyright (c) 2024 Microsoft Corporation. All rights reserved. import numpy as np import streamlit as st -import os import io import tiktoken import pdfplumber -from langchain.text_splitter import RecursiveCharacterTextSplitter +from langchain_text_splitters import RecursiveCharacterTextSplitter import util.Embedder import workflows.question_answering.classes as classes import workflows.question_answering.config as config @@ -53,11 +53,15 @@ def chunk_files(sv, files): file_chunks.append((file, chunk)) file.set_text(doc_text) + all_embeddings_list = [] for cx, (file, chunk) in enumerate(file_chunks): pb.progress((cx+1) / len(file_chunks), f'Embedding chunk {cx+1} of {len(file_chunks)}...') - chunk_vec = embedder.encode(chunk) + formatted_chunk = chunk.replace("\n", " ") + chunk_vec = embedder.encode(formatted_chunk, False) + all_embeddings_list.append((hash(formatted_chunk), chunk_vec)) file.add_chunk(chunk, np.array(chunk_vec), cx+1) - + pb.progress(99, 'Saving embeddings...') + embedder.connection.insert_multiple_into_embeddings(all_embeddings_list) pb.empty() def update_question(sv, question_history, new_questions, placeholder, prefix): diff --git a/app/workflows/question_answering/workflow.py b/app/workflows/question_answering/workflow.py index 357b9fa8..49b1bc57 100644 --- a/app/workflows/question_answering/workflow.py +++ b/app/workflows/question_answering/workflow.py @@ -84,7 +84,7 @@ def create(): source_counts = Counter() used_chunks = set() while True: - qe = embedder.encode(question) + qe = np.array(embedder.encode(question)) iteration += 1 cosine_distances = sorted([(t, c, scipy.spatial.distance.cosine(qe, v)) for (t, c, v) in all_units], key=lambda x:x[2], reverse=False) chunk_index = sv.answering_target_matches.value @@ -165,11 +165,9 @@ def create(): raw_refs = qa['source'] file_page_refs = [tuple([int(x[1:]) for x in r.split(';')]) for r in raw_refs] - q_vec = embedder.encode(q) - a_vec = embedder.encode(a) + q_vec = np.array(embedder.encode(q)) + a_vec = np.array(embedder.encode(a)) - q_vec = np.array(q_vec) - a_vec = np.array(a_vec) qid = sv.answering_next_q_id.value sv.answering_next_q_id.value += 1 q = classes.Question(f, q, q_vec, 0, qid) diff --git a/app/workflows/record_matching/__init__.py b/app/workflows/record_matching/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/app/workflows/risk_networks/__init__.py b/app/workflows/risk_networks/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/installer.cfg b/installer.cfg new file mode 100644 index 00000000..5ad67e69 --- /dev/null +++ b/installer.cfg @@ -0,0 +1,134 @@ +[Application] +name=Intelligence Toolkit +publisher=Microsoft +version=0.1.0 +# How to lunch the app - this calls the 'main' function from the 'run_exe' package: +entry_point=run_exe:main + +[Build] +nsi_template=installer.nsi +installer_name=Intelligence Toolkit.exe + +[Python] +version=3.10.11 +bitness=64 + +[Include] +packages = app + +exclude = pkgs/AppData + pkgs/venv + pkgs/cache + pkgs/.streamlit/secrets.toml + +extra_wheel_sources=./wheels + +pypi_wheels = aiohttp==3.9.3 + aiosignal==1.3.1 + altair==5.2.0 + annotated-types==0.6.0 + anyio==4.3.0 + attrs==23.2.0 + blinker==1.7.0 + cachetools==5.3.2 + certifi==2024.2.2 + cffi==1.16.0 + charset-normalizer==3.3.2 + click==8.1.7 + colorama==0.4.6 + cryptography==42.0.5 + dataclasses-json==0.6.4 + distro==1.9.0 + frozenlist==1.4.1 + gitdb==4.0.11 + GitPython==3.1.42 + greenlet==3.0.3 + h11==0.14.0 + httpcore==1.0.3 + httpx==0.26.0 + idna==3.6 + importlib-metadata==7.0.1 + isodate==0.6.1 + Jinja2==3.1.3 + joblib==1.3.2 + jsonpatch==1.33 + jsonpointer==2.4 + jsonschema==4.21.1 + jsonschema-specifications==2023.12.1 + langchain-core==0.1.38 + langsmith==0.1.38 + markdown-it-py==3.0.0 + MarkupSafe==2.1.5 + marshmallow==3.20.2 + mdurl==0.1.2 + multidict==6.0.5 + mypy-extensions==1.0.0 + networkx==3.2.1 + numpy==1.26.4 + openai==1.12.0 + pac-synth==0.0.8 + packaging==23.2 + pandas==2.2.0 + patsy==0.5.6 + pdfminer.six==20221105 + pdfplumber==0.10.4 + pillow==10.2.0 + plotly==5.19.0 + plotly-express==0.4.1 + polars==0.20.10 + protobuf==4.25.3 + pyarrow==15.0.0 + pycparser==2.21 + pydantic==2.6.1 + pydantic_core==2.16.2 + pydeck==0.8.1b0 + Pygments==2.17.2 + pyparsing==3.1.1 + pypdfium2==4.27.0 + python-dateutil==2.8.2 + python-decouple==3.8 + python-louvain==0.16 + pytz==2024.1 + PyYAML==6.0.1 + rdflib==7.0.0 + referencing==0.33.0 + regex==2023.12.25 + requests==2.31.0 + rich==13.7.0 + rpds-py==0.18.0 + scikit-learn==1.4.1.post1 + scipy==1.12.0 + six==1.16.0 + smmap==5.0.1 + sniffio==1.3.0 + SQLAlchemy==2.0.27 + statsmodels==0.14.1 + streamlit==1.31.1 + streamlit-aggrid==0.3.4.post3 + streamlit-javascript==0.1.5 + streamlit-agraph==0.0.45 + tenacity==8.2.3 + threadpoolctl==3.3.0 + tiktoken==0.6.0 + toml==0.10.2 + toolz==0.12.1 + tornado==6.4 + tqdm==4.66.2 + typing-inspect==0.9.0 + typing_extensions==4.9.0 + tzdata==2024.1 + tzlocal==5.2 + urllib3==2.2.1 + validators==0.22.0 + watchdog==4.0.0 + yarl==1.9.4 + zipp==3.17.0 + duckdb==0.10.1 + pdfkit==1.0.0 + markdown2==2.4.13 + pynsist==2.8 + exceptiongroup==1.2.0 + psutil==5.9.8 + keyboard==0.13.5 + langchain-text-splitters==0.0.1 + orjson==3.10.0 diff --git a/installer.nsi b/installer.nsi new file mode 100644 index 00000000..8af69e3c --- /dev/null +++ b/installer.nsi @@ -0,0 +1,273 @@ +!define PRODUCT_NAME "[[ib.appname]]" +!define PRODUCT_VERSION "[[ib.version]]" +!define PY_VERSION "[[ib.py_version]]" +!define PY_MAJOR_VERSION "[[ib.py_major_version]]" +!define BITNESS "[[ib.py_bitness]]" +!define ARCH_TAG "[[arch_tag]]" +!define INSTALLER_NAME "[[ib.installer_name]]" +!define PRODUCT_ICON "[[icon]]" + +; Marker file to tell the uninstaller that it's a user installation +!define USER_INSTALL_MARKER _user_install_marker + +SetCompressor lzma + +!if "${NSIS_PACKEDVERSION}" >= 0x03000000 + Unicode true + ManifestDPIAware true +!endif + +!define MULTIUSER_EXECUTIONLEVEL Highest +!define MULTIUSER_INSTALLMODE_DEFAULT_CURRENTUSER +!define MULTIUSER_MUI +!define MULTIUSER_INSTALLMODE_COMMANDLINE +!define MULTIUSER_INSTALLMODE_INSTDIR "[[ib.appname]]" + +[% if ib.py_bitness == 64 %] +!define MULTIUSER_INSTALLMODE_FUNCTION correct_prog_files +[% endif %] +!include MultiUser.nsh +!include FileFunc.nsh + +[% block modernui %] +; Modern UI installer stuff +!include "MUI2.nsh" +!define MUI_ABORTWARNING +!define MUI_ICON "[[icon]]" +!define MUI_UNICON "[[icon]]" + +; UI pages +[% block ui_pages %] +!insertmacro MUI_PAGE_WELCOME +[% if license_file %] +!insertmacro MUI_PAGE_LICENSE [[license_file]] +[% endif %] +!insertmacro MULTIUSER_PAGE_INSTALLMODE +!insertmacro MUI_PAGE_DIRECTORY +!insertmacro MUI_PAGE_INSTFILES +!insertmacro MUI_PAGE_FINISH +[% endblock ui_pages %] +!insertmacro MUI_LANGUAGE "English" +[% endblock modernui %] + +Name "${PRODUCT_NAME} ${PRODUCT_VERSION}" +OutFile "${INSTALLER_NAME}" +ShowInstDetails show + +Var cmdLineInstallDir + +Section -SETTINGS + SetOutPath "$INSTDIR" + SetOverwrite ifnewer +SectionEnd + +[% block sections %] + +Section "!${PRODUCT_NAME}" sec_app + SetRegView [[ib.py_bitness]] + SectionIn RO + File ${PRODUCT_ICON} + + [% block install_pkgs %] + [# + Extend this block if you need to remove the pkgs directory if it already + exists from previous installations (when upgrading without uninstalling). + https://github.com/takluyver/pynsist/issues/66 + + Example: + [% block install_pkgs %] + RMDir /r "$INSTDIR\pkgs" + [[ super() ]] + [% endblock install_pkgs %] + #] + ; Copy pkgs data + SetOutPath "$INSTDIR\pkgs" + File /r "pkgs\*.*" + [% endblock install_pkgs %] + + SetOutPath "$INSTDIR" + + ; Marker file for per-user install + StrCmp $MultiUser.InstallMode CurrentUser 0 +3 + FileOpen $0 "$INSTDIR\${USER_INSTALL_MARKER}" w + FileClose $0 + SetFileAttributes "$INSTDIR\${USER_INSTALL_MARKER}" HIDDEN + + [% block install_files %] + ; Install files + [% for destination, group in grouped_files %] + SetOutPath "[[destination]]" + [% for file in group %] + File "[[ file ]]" + [% endfor %] + [% endfor %] + + ; Install directories + [% for dir, destination in ib.install_dirs %] + SetOutPath "[[ pjoin(destination, dir) ]]" + File /r "[[dir]]\*.*" + [% endfor %] + [% endblock install_files %] + + [% block install_shortcuts %] + ; Install shortcuts + ; The output path becomes the working directory for shortcuts + SetOutPath "%HOMEDRIVE%\%HOMEPATH%" + [% if single_shortcut %] + [% for scname, sc in ib.shortcuts.items() %] + CreateShortCut "$SMPROGRAMS\[[scname]].lnk" "[[sc['target'] ]]" \ + '[[ sc['parameters'] ]]' "$INSTDIR\[[ sc['icon'] ]]" + [% endfor %] + [% else %] + [# Multiple shortcuts: create a directory for them #] + CreateDirectory "$SMPROGRAMS\${PRODUCT_NAME}" + [% for scname, sc in ib.shortcuts.items() %] + CreateShortCut "$SMPROGRAMS\${PRODUCT_NAME}\[[scname]].lnk" "[[sc['target'] ]]" \ + '[[ sc['parameters'] ]]' "$INSTDIR\[[ sc['icon'] ]]" + [% endfor %] + [% endif %] + SetOutPath "$INSTDIR" + [% endblock install_shortcuts %] + + [% block install_commands %] + [% if has_commands %] + DetailPrint "Setting up command-line launchers..." + + StrCmp $MultiUser.InstallMode CurrentUser 0 AddSysPathSystem + ; Add to PATH for current user + nsExec::ExecToLog '[[ python ]] -Es "$INSTDIR\_system_path.py" add_user "$INSTDIR\bin"' + GoTo AddedSysPath + AddSysPathSystem: + ; Add to PATH for all users + nsExec::ExecToLog '[[ python ]] -Es "$INSTDIR\_system_path.py" add "$INSTDIR\bin"' + AddedSysPath: + [% endif %] + [% endblock install_commands %] + + + DetailPrint "Installing wkhtmltopdf..." + ExecWait '"$INSTDIR\pkgs\wkhtmltox\wkhtmltox-0.12.6-1.msvc2015-win64_.exe" /S' + + ; Byte-compile Python files. + DetailPrint "Byte-compiling Python modules..." + nsExec::ExecToLog '[[ python ]] -m compileall -q "$INSTDIR\pkgs"' + WriteUninstaller $INSTDIR\uninstall.exe + ; Add ourselves to Add/remove programs + WriteRegStr SHCTX "Software\Microsoft\Windows\CurrentVersion\Uninstall\${PRODUCT_NAME}" \ + "DisplayName" "${PRODUCT_NAME}" + WriteRegStr SHCTX "Software\Microsoft\Windows\CurrentVersion\Uninstall\${PRODUCT_NAME}" \ + "UninstallString" '"$INSTDIR\uninstall.exe"' + WriteRegStr SHCTX "Software\Microsoft\Windows\CurrentVersion\Uninstall\${PRODUCT_NAME}" \ + "InstallLocation" "$INSTDIR" + WriteRegStr SHCTX "Software\Microsoft\Windows\CurrentVersion\Uninstall\${PRODUCT_NAME}" \ + "DisplayIcon" "$INSTDIR\${PRODUCT_ICON}" + [% if ib.publisher is not none %] + WriteRegStr SHCTX "Software\Microsoft\Windows\CurrentVersion\Uninstall\${PRODUCT_NAME}" \ + "Publisher" "[[ib.publisher]]" + [% endif %] + WriteRegStr SHCTX "Software\Microsoft\Windows\CurrentVersion\Uninstall\${PRODUCT_NAME}" \ + "DisplayVersion" "${PRODUCT_VERSION}" + WriteRegDWORD SHCTX "Software\Microsoft\Windows\CurrentVersion\Uninstall\${PRODUCT_NAME}" \ + "NoModify" 1 + WriteRegDWORD SHCTX "Software\Microsoft\Windows\CurrentVersion\Uninstall\${PRODUCT_NAME}" \ + "NoRepair" 1 + + ; Check if we need to reboot + IfRebootFlag 0 noreboot + MessageBox MB_YESNO "A reboot is required to finish the installation. Do you wish to reboot now?" \ + /SD IDNO IDNO noreboot + Reboot + noreboot: +SectionEnd + +Section "Uninstall" + SetRegView [[ib.py_bitness]] + SetShellVarContext all + IfFileExists "$INSTDIR\${USER_INSTALL_MARKER}" 0 +3 + SetShellVarContext current + Delete "$INSTDIR\${USER_INSTALL_MARKER}" + + Delete $INSTDIR\uninstall.exe + Delete "$INSTDIR\${PRODUCT_ICON}" + RMDir /r "$INSTDIR\pkgs" + ;Delete saved data + ;Delete secrets + RMDir /r "$LocalAppData\Intelligence Toolkit" + + ; Remove ourselves from %PATH% + [% block uninstall_commands %] + [% if has_commands %] + nsExec::ExecToLog '[[ python ]] -Es "$INSTDIR\_system_path.py" remove "$INSTDIR\bin"' + [% endif %] + [% endblock uninstall_commands %] + + [% block uninstall_files %] + ; Uninstall files + [% for file, destination in ib.install_files %] + Delete "[[pjoin(destination, file)]]" + [% endfor %] + ; Uninstall directories + [% for dir, destination in ib.install_dirs %] + RMDir /r "[[pjoin(destination, dir)]]" + [% endfor %] + [% endblock uninstall_files %] + + [% block uninstall_shortcuts %] + ; Uninstall shortcuts + [% if single_shortcut %] + [% for scname in ib.shortcuts %] + Delete "$SMPROGRAMS\[[scname]].lnk" + [% endfor %] + [% else %] + RMDir /r "$SMPROGRAMS\${PRODUCT_NAME}" + [% endif %] + [% endblock uninstall_shortcuts %] + RMDir $INSTDIR + DeleteRegKey SHCTX "Software\Microsoft\Windows\CurrentVersion\Uninstall\${PRODUCT_NAME}" +SectionEnd + +[% endblock sections %] + +; Functions + +Function .onMouseOverSection + ; Find which section the mouse is over, and set the corresponding description. + FindWindow $R0 "#32770" "" $HWNDPARENT + GetDlgItem $R0 $R0 1043 ; description item (must be added to the UI) + + [% block mouseover_messages %] + StrCmp $0 ${sec_app} "" +2 + SendMessage $R0 ${WM_SETTEXT} 0 "STR:${PRODUCT_NAME}" + + [% endblock mouseover_messages %] +FunctionEnd + +Function .onInit + ; Multiuser.nsh breaks /D command line parameter. Parse /INSTDIR instead. + ; Cribbing from https://nsis-dev.github.io/NSIS-Forums/html/t-299280.html + ${GetParameters} $0 + ClearErrors + ${GetOptions} '$0' "/INSTDIR=" $1 + IfErrors +2 ; Error means flag not found + StrCpy $cmdLineInstallDir $1 + ClearErrors + + !insertmacro MULTIUSER_INIT + + ; If cmd line included /INSTDIR, override the install dir set by MultiUser + StrCmp $cmdLineInstallDir "" +2 + StrCpy $INSTDIR $cmdLineInstallDir +FunctionEnd + +Function un.onInit + !insertmacro MULTIUSER_UNINIT +FunctionEnd + +[% if ib.py_bitness == 64 %] +Function correct_prog_files + ; The multiuser machinery doesn't know about the different Program files + ; folder for 64-bit applications. Override the install dir it set. + StrCmp $MultiUser.InstallMode AllUsers 0 +2 + StrCpy $INSTDIR "$PROGRAMFILES64\${MULTIUSER_INSTALLMODE_INSTDIR}" +FunctionEnd +[% endif %] \ No newline at end of file diff --git a/installer_script.ps1 b/installer_script.ps1 new file mode 100644 index 00000000..c65553e9 --- /dev/null +++ b/installer_script.ps1 @@ -0,0 +1,25 @@ +# Define the URL of the .exe file to download +$url = "https://github.com/wkhtmltopdf/packaging/releases/download/0.12.6-1/wkhtmltox-0.12.6-1.msvc2015-win64.exe" + +$folderPath = "app/wkhtmltox" +$outputFilePath = Join-Path -Path $folderPath -ChildPath "wkhtmltox-0.12.6-1.msvc2015-win64_.exe" + +# Check if the file already exists in the folder +if (Test-Path $outputFilePath) { + Write-Host "File already exists. Skipping download." +} else { + + Write-Host "Downloading wkhtmltox from host: $url" + + Invoke-WebRequest -Uri $url -OutFile $outputFilePath + + if (Test-Path $outputFilePath) { + Write-Host "Download successful." + # Call pynsist with the installer configuration file + } else { + Write-Host "Download failed. Exiting script." + exit 1 + } +} + +pynsist .\installer.cfg diff --git a/requirements.txt b/requirements.txt index bbda058c3b0f891d2947625284744c0026c54f7b..85cf12e7dec809561e130e1d89915abf92c6b2ee 100644 GIT binary patch delta 227 zcmX|*Jqp4=5QSe6K>{AYMmsx~LQt#w!E@SB3mL66QmkAz9r!Iup&IU_E4^jn$rRGlNuVtWZ6M2_ zW+>}_)`gr-d)zxRj)6#IhJ-AEvd23bZ(UgFG$+ccWT~Ta49(BLGK0sTS2I7O_{01; JJ|_)&qA#55CUF1& delta 70 zcmdlWzf5k!0v3K-23rOL20aEtFgBU&%P*;z%#hEJ3xuT%c?_8hB@C57H4s%GmBtK) MAXS?ev8ZqX00cG-$^ZZW diff --git a/run_exe.py b/run_exe.py new file mode 100644 index 00000000..3f0b2428 --- /dev/null +++ b/run_exe.py @@ -0,0 +1,65 @@ +import os +from subprocess import Popen, PIPE, STDOUT +import sys +import time +import webbrowser +import socket + +PORT = 8503 +def is_port_in_use(port: int) -> bool: + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + return s.connect_ex(('localhost', port)) == 0 + +def main(): + + # Getting path to python executable (full path of deployed python on Windows) + executable = sys.executable + os.environ["DB_APP_DATA"] = os.environ["LOCALAPPDATA"] + os.environ["MODE"] = "exe" + + path_to_main = os.path.join(os.path.dirname(__file__), "app","Home.py") + + port_use = is_port_in_use(PORT) + print(f"Port {PORT} is in use: {port_use}") + if port_use: + webbrowser.open(f"http://localhost:{PORT}") + return + + # Running streamlit server in a subprocess and writing to log file + proc = Popen( + [ + executable, + "-m", + "streamlit", + "run", + path_to_main, + # The following option appears to be necessary to correctly start the streamlit server, + # but it should start without it. More investigations should be carried out. + "--server.headless=true", + "--global.developmentMode=false", + f"--server.port={PORT}" + ], + stdin=PIPE, + stdout=PIPE, + stderr=STDOUT, + text=True, + ) + + proc.stdin.close() + + # Force the opening (does not open automatically) of the browser tab after a brief delay to let + # the streamlit server start. + time.sleep(10) + webbrowser.open(f"http://localhost:{PORT}") + + while True: + s = proc.stdout.read() + if not s: + break + print(s, end="") + + proc.wait() + + +if __name__ == "__main__": + main() \ No newline at end of file