Skip to content

Commit

Permalink
add .exe files to build locally
Browse files Browse the repository at this point in the history
  • Loading branch information
dayesouza committed Apr 4, 2024
1 parent 297a1bd commit 63583ae
Show file tree
Hide file tree
Showing 33 changed files with 732 additions and 39 deletions.
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*Toolkit.exe filter=lfs diff=lfs merge=lfs -text
8 changes: 7 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,10 @@ qa_mine
outputs
cache
data
legacy
legacy

# Exe installer builders
build/nsis/*
!build/nsis/Intelligence Toolkit.exe

app/wkhtmltox/wkhtmltox-0.12.6-1.msvc2015-win64_.exe
5 changes: 4 additions & 1 deletion .streamlit/config.toml
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
[server]
enableXsrfProtection = false
maxUploadSize = 1000
maxUploadSize = 1000

[client]
toolbarMode = "viewer"
11 changes: 10 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# Intelligence Toolkit
The Intelligence Toolkit is a suite of interactive workflows for creating AI intelligence reports from real-world data sources. The toolkit is designed to help users identify patterns, answers, relationships, and risks within complex datasets, with generative AI ([OpenAI GPT models](https://platform.openai.com/docs/models/)) used to create reports on findings of interest.


# Developing

## Requirements
Expand Down Expand Up @@ -72,6 +71,16 @@ After building, run the docker container with:

Open [localhost:8501](http://localhost:8501)

## Building a Windows executable

We use [Pynsist](https://pynsist.readthedocs.io/en/latest/), that with [NSIS (Nullsoft Scriptable Install System)](https://nsis.sourceforge.io/) builds an executable for Windows, which packages the whole project and what it needs to run (including Python) into an .exe, that when installed will run the project on the user's localhost.

For you to build locally, you will need to have pynsis intalled with `pip install pynsist` and install NSIS [downloading it here](https://nsis.sourceforge.io/Main_Page).

**Tip**: Use Windows to build it, not Linux.

Run `.\installer_script.ps1` in the root of the app.
It will download wkhtmltox from the source, that's needed to generate reports. Then it will build an .exe into build\nsis, that will include wkhtmltox package installation.

# Deploying

Expand Down
8 changes: 7 additions & 1 deletion app/Home.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
# Copyright (c) 2024 Microsoft Corporation. All rights reserved.
import streamlit as st
import os
from components.app_loader import load_multipage_app
import util.mermaid as mermaid

def get_transparency_faq():
file_path = os.path.join(os.path.dirname(__file__), 'TransparencyFAQ.md')
with open(file_path, 'r') as file:
return file.read()

def main():
st.set_page_config(layout="wide", initial_sidebar_state="expanded", page_title='Intelligence Toolkit | Home')
load_multipage_app()

transparency_faq = open('./app/TransparencyFAQ.md', 'r').read()
transparency_faq = get_transparency_faq()
st.markdown(transparency_faq + '\n\n' + f"""\
#### Which Intelligence Toolkit workflow is right for me and my data?
Expand Down
Empty file added app/__init__.py
Empty file.
11 changes: 10 additions & 1 deletion app/components/app_loader.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,22 @@
# Copyright (c) 2024 Microsoft Corporation. All rights reserved.
from javascript.styles import add_styles
import components.app_user as au
import streamlit as st
import components.app_terminator as at
import components.app_openai as ao

def load_multipage_app():
#Load user if logged in
user = au.app_user()
user.view_get_info()

#Terminate app (if needed for .exe)
terminator = at.app_terminator()
terminator.terminate_app_btn()

#OpenAI key set
app_openai = ao.app_openai()
app_openai.api_info()

#load css
# add_styles()

17 changes: 17 additions & 0 deletions app/components/app_openai.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Copyright (c) 2024 Microsoft Corporation. All rights reserved.
import streamlit as st
from util.openai_instance import get_key_env
from util.SecretsHandler import SecretsHandler

class app_openai:
def _is_api_key_configured(self):
secrets = SecretsHandler()
if secrets.get_secret("api_key") != '':
return True
elif get_key_env() != '':
return True
return False

def api_info(self):
if not self._is_api_key_configured():
st.error("No OpenAI key found in the environment. Please add it in the settings.")
35 changes: 35 additions & 0 deletions app/components/app_terminator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Copyright (c) 2024 Microsoft Corporation. All rights reserved.
import streamlit as st
from util.session_variables import SessionVariables
import psutil
import keyboard
import time
import os

class app_terminator:

sv = None

def __init__(self, sv = None):
if "off_btn_disabled" not in st.session_state:
st.session_state.off_btn_disabled = False
if sv is not None:
self.sv = sv
else:
self.sv = SessionVariables('home')

def _on_click(self):
def click():
st.session_state.off_btn_disabled = not st.session_state.off_btn_disabled
return click

def terminate_app_btn(self):
if self.sv.mode.value != 'cloud':
exit_app = st.sidebar.button("🔴 Terminate application", disabled=st.session_state.off_btn_disabled, on_click=self._on_click)
if exit_app:
st.text("Shutting down application...")
time.sleep(2)
pid = os.getpid()
keyboard.press_and_release('ctrl+w')
p = psutil.Process(pid)
p.terminate()
7 changes: 0 additions & 7 deletions app/javascript/styles.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,7 @@
}
'''

style_iframes = '''
iframe {
display: none;
}
'''

def add_styles():
st.markdown(f'''<style>
{style_sidebar}
{style_iframes}
</style>''', unsafe_allow_html=True)
46 changes: 46 additions & 0 deletions app/pages/Settings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import os
from util.openai_instance import get_key_env
from util.SecretsHandler import SecretsHandler
import streamlit as st
import time
from util.session_variables import SessionVariables

key = 'openaikey'
def on_change(handler, key = None, value = None):
def change():
handler.write_secret('api_key', st.session_state[key] if key else value)
return change

def main():
st.header("Settings")
sv = SessionVariables('home')

if key not in st.session_state:
st.session_state[key] = ''

secrets_handler = SecretsHandler()
placeholder = "Enter key here..."
secret = secrets_handler.get_secret("api_key")

is_mode_cloud = sv.mode.value == 'cloud'

secret_input = st.text_input('Enter your OpenAI key', key=key, type="password", disabled=is_mode_cloud, placeholder=placeholder, value=secret, on_change=on_change(secrets_handler, key))

if secret and len(secret):
st.info("Your key is saved securely.")
clear_btn = st.button("Clear local key")

if clear_btn:
on_change(secrets_handler, value='')()
time.sleep(0.3)
st.rerun()

if secret_input and secret_input != secret:
st.rerun()
elif get_key_env() == '':
st.warning("No OpenAI key found in the environment. Please insert one above.")
elif not secret_input and not secret:
st.info("Using key from the environment.")

if __name__ == "__main__":
main()
Empty file added app/pages/__init__.py
Empty file.
7 changes: 4 additions & 3 deletions app/util/AI_API.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from openai import OpenAI
import tiktoken
import json
from util.openai_instance import _OpenAI

openai = _OpenAI()

gen_model = 'gpt-4-turbo-preview'
embed_model = 'text-embedding-3-small'
Expand All @@ -10,7 +12,6 @@
default_temperature = 0
max_embed_tokens = 8191

client = OpenAI()
encoder = tiktoken.get_encoding(text_encoder)

def prepare_messages_from_message(system_message, variables):
Expand Down Expand Up @@ -41,7 +42,7 @@ def count_tokens_in_message_list(messages):
def generate_text_from_message_list(messages, placeholder=None, prefix='', model=gen_model, temperature=default_temperature, max_tokens=max_gen_tokens):
response = ''
try:
responses = client.chat.completions.create(
responses = openai.client().chat.completions.create(
model=model,
temperature=temperature,
max_tokens=max_tokens,
Expand Down
10 changes: 8 additions & 2 deletions app/util/Database.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# Copyright (c) 2024 Microsoft Corporation. All rights reserved.
import os
import duckdb

Expand All @@ -6,7 +7,7 @@ def __init__(self, cache, db_name) -> None:
if not os.path.exists(cache):
os.makedirs(cache)

db_path = os.path.join(cache, f'{db_name}.db')
db_path = os.path.join(cache, f'_{db_name}.db')
self.connection = duckdb.connect(database=db_path)

def create_table(self, name, attributes = []):
Expand All @@ -16,7 +17,12 @@ def select_embedding_from_hash(self, hash_text, username = ''):
return self.connection.execute(f"SELECT embedding FROM embeddings WHERE hash_text = '{hash_text}' and username = '{username}'").fetchone()

def insert_into_embeddings(self, hash_text, embedding, username = ''):
self.connection.execute(f"INSERT INTO embeddings VALUES ('{username}','{hash_text}', {embedding})")
self.connection.execute(f"INSERT OR IGNORE INTO embeddings VALUES ('{username}','{hash_text}', {embedding})")


def insert_multiple_into_embeddings(self, embeddings, username = ""):
embeddings = ''.join([f"('{username}','{embedding[0]}', {embedding[1]}), " for embedding in embeddings])[:-2]
self.connection.execute(f"INSERT OR IGNORE INTO embeddings VALUES {embeddings}")

def execute(self, query):
return self.connection.execute(query)
Expand Down
29 changes: 16 additions & 13 deletions app/util/Embedder.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from openai import OpenAI
# Copyright (c) 2024 Microsoft Corporation. All rights reserved.
import tiktoken
import numpy as np
from util.Database import Database
import util.session_variables
import streamlit as st
from util.openai_instance import _OpenAI

gen_model = 'gpt-4-turbo-preview'
embed_model = 'text-embedding-3-small'
Expand All @@ -13,8 +13,7 @@
default_temperature = 0
max_embed_tokens = 8191


client = OpenAI()
openai = _OpenAI()
encoder = tiktoken.get_encoding(text_encoder)

class Embedder:
Expand All @@ -25,7 +24,7 @@ def __init__(self, cache, model=embed_model, encoder=text_encoder, max_tokens=ma
self.encoder = tiktoken.get_encoding(encoder)
self.max_tokens = max_tokens
self.connection = Database(cache, 'embeddings')
self.connection.create_table('embeddings', ['username STRING','hash_text STRING', 'embedding DOUBLE[]'])
self.connection.create_table('embeddings', ['username STRING','hash_text STRING UNIQUE', 'embedding DOUBLE[]'])

def encode_all(self, texts):
final_embeddings = [None] * len(texts)
Expand All @@ -48,30 +47,34 @@ def encode_all(self, texts):
bi += 1
batch = new_texts[i:i+2000]
batch_texts = [x[1] for x in batch]
embeddings = [x.embedding for x in client.embeddings.create(input = batch_texts, model=self.model).data]
list_all_embeddings = []
embeddings = [x.embedding for x in openai.client().embeddings.create(input = batch_texts, model=self.model).data]
for j, (ix, text) in enumerate(batch):
# hsh = hash(text)
# self.connection.insert_into_embeddings(hsh, embeddings[j])
hsh = hash(text)
list_all_embeddings.append((hsh, embeddings[j]))
final_embeddings[ix] = np.array(embeddings[j])
self.connection.insert_multiple_into_embeddings(list_all_embeddings)

pb.empty()
return np.array(final_embeddings)

def encode(self, text):
def encode(self, text, auto_save = True):
text = text.replace("\n", " ")
hsh = hash(text)
embeddings = self.connection.select_embedding_from_hash(hsh)

if embeddings:
return np.array(embeddings[0])
return embeddings[0]
else:
tokens = len(self.encoder.encode(text))
if tokens > self.max_tokens:
text = text[:self.max_tokens]
print('Truncated text to max tokens')
try:
embedding = client.embeddings.create(input = [text], model=self.model).data[0].embedding
self.connection.insert_into_embeddings(hsh, embedding)
return np.array(embedding)
embedding = openai.client().embeddings.create(input = [text], model=self.model).data[0].embedding
if auto_save:
self.connection.insert_into_embeddings(hsh, embedding)
return embedding
except:
print(f'Error embedding text: {text}')
return None
Expand Down
28 changes: 28 additions & 0 deletions app/util/SecretsHandler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import os
import streamlit as st

class SecretsHandler:
_instance = None
_directory = ".streamlit"

def __init__(self):
if not os.path.exists(self._directory):
os.makedirs(self._directory)
with(open(os.path.join(self._directory, "secrets.toml"), "w")) as f:
f.write("")

def __new__(cls):
if cls._instance is None:
cls._instance = super().__new__(cls)

return cls._instance

def write_secret(self, key, value):
with(open(os.path.join(self._directory, "secrets.toml"), "w")) as f:
f.write(f"{key} = '{value}'")

def get_secret(self, key) -> str:
if st.secrets and key in st.secrets:
return st.secrets[key]
return ''

Empty file added app/util/__init__.py
Empty file.
Loading

0 comments on commit 63583ae

Please sign in to comment.