add .exe files to build locally

microsoft · Apr 4, 2024 · 63583ae · 63583ae
1 parent 297a1bd
commit 63583ae
Show file tree

Hide file tree

Showing 33 changed files with 732 additions and 39 deletions.
diff --git a/.gitattributes b/.gitattributes
@@ -0,0 +1 @@
+*Toolkit.exe filter=lfs diff=lfs merge=lfs -text
diff --git a/.gitignore b/.gitignore
@@ -38,4 +38,10 @@ qa_mine
 outputs
 cache
 data
-legacy
+legacy
+
+# Exe installer builders
+build/nsis/*
+!build/nsis/Intelligence Toolkit.exe
+
+app/wkhtmltox/wkhtmltox-0.12.6-1.msvc2015-win64_.exe
diff --git a/.streamlit/config.toml b/.streamlit/config.toml
@@ -1,3 +1,6 @@
 [server]
 enableXsrfProtection = false
-maxUploadSize = 1000
+maxUploadSize = 1000
+
+[client]
+toolbarMode = "viewer"
diff --git a/README.md b/README.md
@@ -1,7 +1,6 @@
 # Intelligence Toolkit
 The Intelligence Toolkit is a suite of interactive workflows for creating AI intelligence reports from real-world data sources. The toolkit is designed to help users identify patterns, answers, relationships, and risks within complex datasets, with generative AI ([OpenAI GPT models](https://platform.openai.com/docs/models/)) used to create reports on findings of interest.
 
-
 # Developing 
 
 ## Requirements
@@ -72,6 +71,16 @@ After building, run the docker container with:
 
 Open [localhost:8501](http://localhost:8501)
 
+## Building a Windows executable
+
+We use [Pynsist](https://pynsist.readthedocs.io/en/latest/), that with [NSIS (Nullsoft Scriptable Install System)](https://nsis.sourceforge.io/) builds an executable for Windows, which packages the whole project and what it needs to run (including Python) into an .exe, that when installed will run the project on the user's localhost.
+
+For you to build locally, you will need to have pynsis intalled with `pip install pynsist` and install NSIS [downloading it here](https://nsis.sourceforge.io/Main_Page).
+
+**Tip**: Use Windows to build it, not Linux.
+
+Run `.\installer_script.ps1` in the root of the app.
+It will download wkhtmltox from the source, that's needed to generate reports. Then it will build an .exe into build\nsis, that will include wkhtmltox package installation.
 
 # Deploying
 

diff --git a/app/Home.py b/app/Home.py
@@ -1,13 +1,19 @@
 # Copyright (c) 2024 Microsoft Corporation. All rights reserved.
 import streamlit as st
+import os
 from components.app_loader import load_multipage_app
 import util.mermaid as mermaid
 
+def get_transparency_faq():
+    file_path = os.path.join(os.path.dirname(__file__), 'TransparencyFAQ.md')
+    with open(file_path, 'r') as file:
+        return file.read()
+
 def main():
     st.set_page_config(layout="wide", initial_sidebar_state="expanded", page_title='Intelligence Toolkit | Home')
     load_multipage_app()
 
-    transparency_faq = open('./app/TransparencyFAQ.md', 'r').read()
+    transparency_faq = get_transparency_faq()
     st.markdown(transparency_faq + '\n\n' + f"""\
 #### Which Intelligence Toolkit workflow is right for me and my data?
 

diff --git a/app/__init__.py b/app/__init__.py
diff --git a/app/components/app_loader.py b/app/components/app_loader.py
@@ -1,13 +1,22 @@
 # Copyright (c) 2024 Microsoft Corporation. All rights reserved.
 from javascript.styles import add_styles
 import components.app_user as au
-import streamlit as st
+import components.app_terminator as at
+import components.app_openai as ao
 
 def load_multipage_app():
     #Load user if logged in
     user = au.app_user()
     user.view_get_info()
 
+    #Terminate app (if needed for .exe)
+    terminator = at.app_terminator()
+    terminator.terminate_app_btn()
+
+    #OpenAI key set
+    app_openai = ao.app_openai()
+    app_openai.api_info()
+
     #load css
     # add_styles()
 
diff --git a/app/components/app_openai.py b/app/components/app_openai.py
@@ -0,0 +1,17 @@
+# Copyright (c) 2024 Microsoft Corporation. All rights reserved.
+import streamlit as st
+from util.openai_instance import get_key_env
+from util.SecretsHandler import SecretsHandler
+
+class app_openai:
+    def _is_api_key_configured(self):
+        secrets = SecretsHandler()
+        if secrets.get_secret("api_key") != '':
+            return True
+        elif get_key_env() != '':
+            return True
+        return False
+
+    def api_info(self):
+        if not self._is_api_key_configured():
+            st.error("No OpenAI key found in the environment. Please add it in the settings.")
diff --git a/app/components/app_terminator.py b/app/components/app_terminator.py
@@ -0,0 +1,35 @@
+# Copyright (c) 2024 Microsoft Corporation. All rights reserved.
+import streamlit as st
+from util.session_variables import SessionVariables
+import psutil
+import keyboard
+import time
+import os
+
+class app_terminator:
+
+    sv = None
+
+    def __init__(self, sv = None):
+        if "off_btn_disabled" not in st.session_state:
+            st.session_state.off_btn_disabled = False
+        if sv is not None:
+            self.sv = sv
+        else:
+            self.sv = SessionVariables('home')
+
+    def _on_click(self):
+        def click():
+            st.session_state.off_btn_disabled = not st.session_state.off_btn_disabled
+        return click
+
+    def terminate_app_btn(self):
+        if self.sv.mode.value != 'cloud':
+            exit_app = st.sidebar.button("🔴 Terminate application", disabled=st.session_state.off_btn_disabled, on_click=self._on_click)
+            if exit_app:
+                st.text("Shutting down application...")
+                time.sleep(2)
+                pid = os.getpid()
+                keyboard.press_and_release('ctrl+w')
+                p = psutil.Process(pid)
+                p.terminate()
diff --git a/app/javascript/styles.py b/app/javascript/styles.py
@@ -12,14 +12,7 @@
 }
 '''
 
-style_iframes = '''
-    iframe {
-        display: none;
-    }
-'''
-
 def add_styles():
     st.markdown(f'''<style>
         {style_sidebar}
-        {style_iframes}
     </style>''', unsafe_allow_html=True)
diff --git a/app/pages/Settings.py b/app/pages/Settings.py
@@ -0,0 +1,46 @@
+import os
+from util.openai_instance import get_key_env
+from util.SecretsHandler import SecretsHandler
+import streamlit as st
+import time
+from util.session_variables import SessionVariables
+
+key = 'openaikey'
+def on_change(handler, key = None, value = None):
+    def change():
+        handler.write_secret('api_key', st.session_state[key] if key else value)
+    return change
+
+def main():
+    st.header("Settings")
+    sv = SessionVariables('home')
+
+    if key not in st.session_state:
+        st.session_state[key] = ''
+
+    secrets_handler = SecretsHandler()
+    placeholder = "Enter key here..."
+    secret = secrets_handler.get_secret("api_key")
+
+    is_mode_cloud = sv.mode.value == 'cloud'
+
+    secret_input = st.text_input('Enter your OpenAI key', key=key, type="password", disabled=is_mode_cloud, placeholder=placeholder, value=secret, on_change=on_change(secrets_handler, key))
+
+    if secret and len(secret):
+        st.info("Your key is saved securely.")
+        clear_btn = st.button("Clear local key")
+
+        if clear_btn:
+            on_change(secrets_handler, value='')()
+            time.sleep(0.3)
+            st.rerun()
+
+    if secret_input and secret_input != secret:
+        st.rerun()
+    elif get_key_env() == '':
+        st.warning("No OpenAI key found in the environment. Please insert one above.")
+    elif not secret_input and not secret: 
+        st.info("Using key from the environment.")
+
+if __name__ == "__main__":
+    main()
diff --git a/app/pages/__init__.py b/app/pages/__init__.py
diff --git a/app/util/AI_API.py b/app/util/AI_API.py
@@ -1,6 +1,8 @@
-from openai import OpenAI
 import tiktoken
 import json
+from util.openai_instance import _OpenAI
+
+openai = _OpenAI()
 
 gen_model = 'gpt-4-turbo-preview'
 embed_model = 'text-embedding-3-small'
@@ -10,7 +12,6 @@
 default_temperature = 0
 max_embed_tokens = 8191
 
-client = OpenAI()
 encoder = tiktoken.get_encoding(text_encoder)
 
 def prepare_messages_from_message(system_message, variables):                
@@ -41,7 +42,7 @@ def count_tokens_in_message_list(messages):
 def generate_text_from_message_list(messages, placeholder=None, prefix='', model=gen_model, temperature=default_temperature, max_tokens=max_gen_tokens):     
     response = ''
     try:
-        responses = client.chat.completions.create(
+        responses = openai.client().chat.completions.create(
             model=model,
             temperature=temperature,
             max_tokens=max_tokens,

diff --git a/app/util/Database.py b/app/util/Database.py
@@ -1,3 +1,4 @@
+# Copyright (c) 2024 Microsoft Corporation. All rights reserved.
 import os
 import duckdb
 
@@ -6,7 +7,7 @@ def __init__(self, cache, db_name) -> None:
         if not os.path.exists(cache):
             os.makedirs(cache)
 
-        db_path = os.path.join(cache, f'{db_name}.db')
+        db_path = os.path.join(cache, f'_{db_name}.db')
         self.connection = duckdb.connect(database=db_path)
 
     def create_table(self, name, attributes = []):
@@ -16,7 +17,12 @@ def select_embedding_from_hash(self, hash_text, username = ''):
         return self.connection.execute(f"SELECT embedding FROM embeddings WHERE hash_text = '{hash_text}' and username = '{username}'").fetchone()
 
     def insert_into_embeddings(self, hash_text, embedding, username = ''):
-        self.connection.execute(f"INSERT INTO embeddings VALUES ('{username}','{hash_text}', {embedding})")
+        self.connection.execute(f"INSERT OR IGNORE INTO embeddings VALUES ('{username}','{hash_text}', {embedding})")
+
+
+    def insert_multiple_into_embeddings(self, embeddings, username = ""):
+        embeddings = ''.join([f"('{username}','{embedding[0]}', {embedding[1]}), " for embedding in embeddings])[:-2]
+        self.connection.execute(f"INSERT OR IGNORE INTO embeddings VALUES {embeddings}")
 
     def execute(self, query):
         return self.connection.execute(query)

diff --git a/app/util/Embedder.py b/app/util/Embedder.py
@@ -1,9 +1,9 @@
-from openai import OpenAI
+# Copyright (c) 2024 Microsoft Corporation. All rights reserved.
 import tiktoken
 import numpy as np
 from util.Database import Database
 import util.session_variables
-import streamlit as st
+from util.openai_instance import _OpenAI
 
 gen_model = 'gpt-4-turbo-preview'
 embed_model = 'text-embedding-3-small'
@@ -13,8 +13,7 @@
 default_temperature = 0
 max_embed_tokens = 8191
 
-
-client = OpenAI()
+openai = _OpenAI()
 encoder = tiktoken.get_encoding(text_encoder)
 
 class Embedder:
@@ -25,7 +24,7 @@ def __init__(self, cache, model=embed_model, encoder=text_encoder, max_tokens=ma
         self.encoder = tiktoken.get_encoding(encoder)
         self.max_tokens = max_tokens
         self.connection = Database(cache, 'embeddings')
-        self.connection.create_table('embeddings', ['username STRING','hash_text STRING', 'embedding DOUBLE[]'])
+        self.connection.create_table('embeddings', ['username STRING','hash_text STRING UNIQUE', 'embedding DOUBLE[]'])
 
     def encode_all(self, texts):
         final_embeddings = [None] * len(texts)
@@ -48,30 +47,34 @@ def encode_all(self, texts):
             bi += 1
             batch = new_texts[i:i+2000]
             batch_texts = [x[1] for x in batch]
-            embeddings = [x.embedding for x in client.embeddings.create(input = batch_texts, model=self.model).data]
+            list_all_embeddings = []
+            embeddings = [x.embedding for x in openai.client().embeddings.create(input = batch_texts, model=self.model).data]
             for j, (ix, text) in enumerate(batch):
-                # hsh = hash(text)
-                # self.connection.insert_into_embeddings(hsh, embeddings[j]) 
+                hsh = hash(text)
+                list_all_embeddings.append((hsh, embeddings[j]))
                 final_embeddings[ix] = np.array(embeddings[j])
+            self.connection.insert_multiple_into_embeddings(list_all_embeddings) 
+
         pb.empty()
         return np.array(final_embeddings)
 
-    def encode(self, text):
+    def encode(self, text, auto_save = True):
         text = text.replace("\n", " ")
         hsh = hash(text)
         embeddings = self.connection.select_embedding_from_hash(hsh)
 
         if embeddings:
-            return np.array(embeddings[0])
+            return embeddings[0]
         else:
             tokens = len(self.encoder.encode(text))
             if tokens > self.max_tokens:
                 text = text[:self.max_tokens]
                 print('Truncated text to max tokens')
             try:
-                embedding = client.embeddings.create(input = [text], model=self.model).data[0].embedding
-                self.connection.insert_into_embeddings(hsh, embedding)                
-                return np.array(embedding)
+                embedding = openai.client().embeddings.create(input = [text], model=self.model).data[0].embedding
+                if auto_save:
+                    self.connection.insert_into_embeddings(hsh, embedding)
+                return embedding
             except:
                 print(f'Error embedding text: {text}')
                 return None

diff --git a/app/util/SecretsHandler.py b/app/util/SecretsHandler.py
@@ -0,0 +1,28 @@
+import os
+import streamlit as st
+
+class SecretsHandler:
+    _instance = None
+    _directory = ".streamlit"
+
+    def __init__(self):
+        if not os.path.exists(self._directory):
+            os.makedirs(self._directory)
+            with(open(os.path.join(self._directory, "secrets.toml"), "w")) as f:
+                f.write("")
+
+    def __new__(cls):
+        if cls._instance is None:
+            cls._instance = super().__new__(cls)
+
+        return cls._instance
+
+    def write_secret(self, key, value):
+        with(open(os.path.join(self._directory, "secrets.toml"), "w")) as f:
+            f.write(f"{key} = '{value}'")
+
+    def get_secret(self, key) -> str:
+        if st.secrets and key in st.secrets:
+            return st.secrets[key]
+        return ''
+
diff --git a/app/util/__init__.py b/app/util/__init__.py
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		*Toolkit.exe filter=lfs diff=lfs merge=lfs -text