first init

terateams · Dec 7, 2023 · 5d915ad · 5d915ad
commit 5d915ad
Show file tree

Hide file tree

Showing 29 changed files with 1,145 additions and 0 deletions.
diff --git a/.editotconfig b/.editotconfig
@@ -0,0 +1,23 @@
+root = true
+
+# all files
+[*.go]
+indent_style = tab
+indent_size = 4
+insert_final_newline = true
+
+[*.py]
+indent_style = space
+indent_size = 4
+
+[Makefile]
+indent_style = tab
+
+[*.js]
+charset = utf-8
+indent_style = space
+indent_size = 4
+
+[*.json]
+indent_style = space
+indent_size = 2
diff --git a/.github/workflows/docker-gptstudio-publish.yml b/.github/workflows/docker-gptstudio-publish.yml
@@ -0,0 +1,46 @@
+name: GPTStudio Build and Publish
+
+on:
+  # run it on push to the default repository branch
+  push:
+    branches: [main]
+  # run it during pull request
+  pull_request:
+
+jobs:
+  # define job to build and publish docker image
+  build-and-push-docker-image:
+    name: Build Docker image and push to repositories
+    # run only when code is compiling and tests are passing
+    runs-on: ubuntu-latest
+
+    # steps to perform in job
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v3
+
+      # setup Docker buld action
+      - name: Set up Docker Buildx
+        id: buildx
+        uses: docker/setup-buildx-action@v2
+
+      - name: Login to DockerHub
+        uses: docker/login-action@v2
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+
+      - name: Build image and push to Docker Hub and GitHub Container Registry
+        uses: docker/build-push-action@v2
+        with:
+          # 指向带有 Dockerfile 的源代码所在位置的相对路径
+          context: ./
+          file: ./Dockerfile
+          # Note: tags has to be all lower-case
+          tags: |
+            talkincode/gptstudio:latest
+          # build on feature branches, push only on main branch
+          push: ${{ github.ref == 'refs/heads/main' }}
+
+      - name: Image digest
+        run: echo ${{ steps.docker_build.outputs.digest }}
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,14 @@
+.idea
+__pycache__
+.vscode
+/release/
+release
+Dockerfile.local
+__debug_bin
+.DS_Store
+build
+/rundata/
+.env
+/venv/
+/playground/chroma_db/
+/playground/local_qdrant/
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,41 @@
+# 使用 Mambaforge 基础镜像
+FROM condaforge/mambaforge:latest
+
+# 设置非交互式前端，避免 apt-get 交互式提示
+ENV DEBIAN_FRONTEND=noninteractive
+
+# 设置时区
+RUN echo "Asia/Shanghai" > /etc/timezone && \
+    ln -fs /usr/share/zoneinfo/Asia/Shanghai /etc/localtime && \
+    apt-get update && \
+    apt-get install -y tzdata && \
+    dpkg-reconfigure --frontend noninteractive tzdata
+
+# 安装 Tesseract-OCR、Graphviz、字体以及 FFMPEG
+RUN apt-get update && \
+    apt-get install -y tesseract-ocr tesseract-ocr-chi-sim graphviz fonts-wqy-microhei fonts-noto ffmpeg && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+# 设置工作目录
+WORKDIR /app
+
+# 复制项目文件
+COPY ./GPTStudio.py ./GPTStudio.py
+COPY ./pages ./pages
+COPY ./libs ./libs
+COPY ./config.toml ./.streamlit/config.toml
+COPY ./components ./components
+COPY requirements.txt ./requirements.txt
+
+# 安装项目依赖以及 OpenCV
+RUN pip install --no-cache-dir -r requirements.txt
+
+# 暴露 Streamlit 默认端口
+EXPOSE 8501
+
+# 环境变量设置为非缓冲模式，以便实时输出
+ENV PYTHONUNBUFFERED=1
+
+# 设置启动命令
+CMD ["streamlit","run", "GPTStudio.py", "--server.port=8501"]
diff --git a/GPTStudio.py b/GPTStudio.py
@@ -0,0 +1,51 @@
+import streamlit as st
+from libs.msal import msal_auth
+
+msal_auth()
+
+
+def sidebar():
+    st.sidebar.markdown("""
+# 🦜GPTStudio
+- [GPTStudio Github](https://github.com/terateams/GPTService)
+- [Streamlit Website](https://streamlit.io)
+    """)
+    if st.sidebar.button('登出'):
+        st.session_state['authenticated'] = False
+        st.rerun()
+
+
+def show_page():
+    sidebar()
+    st.title("🦜GPTStudio")
+    st.markdown("""
+GPTStudio is a library of tools based on GPT (Generative Pre-trained Transformer).
+It is designed to provide developers and data scientists with powerful and easy-to-use GPT capabilities.
+It combines knowledge base management, GPT capabilities, and a collection of AI-based tools to make it a powerful and easy-to-use tool for anyone involved in AI and big data.
+making it ideal for any project involving AI and big models.
+
+## Key Features
+
+### Knowledge base retrieval:
+
+Provides an efficient search tool to help users quickly find relevant information in the knowledge base.
+
+### GPT Proficiency Test
+- **Model Capability Testing**: Allows users to test the performance and capability of GPT models with the assistance of the knowledge base.
+- **Real-time Feedback**: Provides real-time feedback to help users understand the response and accuracy of the model.
+
+### AI Tools Collection
+- **A wide range of AI tools**: including but not limited to text generation, language understanding, data analysis and many other AI-related tools.
+- **Large Model Support**: Supports integration with other large AI models to extend the capability and scope of the application.
+
+Translated with www.DeepL.com/Translator (free version)
+""")
+
+
+def main():
+    """Main app"""
+    show_page()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/License b/License
@@ -0,0 +1,9 @@
+MIT License
+
+Copyright (c) 2023 GPTStudio
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/Makefile b/Makefile
@@ -0,0 +1,12 @@
+arm64:
+	docker buildx build --build-arg GoArch="arm64" --platform=linux/arm64 -t \
+	talkincode/gptstudio:latest-arm64 .
+	docker push talkincode/gptstudio:latest-arm64
+
+fastpub:
+	docker buildx build --build-arg GoArch="amd64" --platform=linux/amd64 -t \
+	talkincode/gptstudio:latest .
+	docker push talkincode/gptstudio:latest
+
+
+.PHONY: clean build
diff --git a/README.md b/README.md
@@ -0,0 +1,79 @@
+
+       ______  _______   _________   ______    _                  __   _          
+     .' ___  ||_   __ \ |  _   _  |.' ____ \  / |_               |  ] (_)         
+    / .'   \_|  | |__) ||_/ | | \_|| (___ \_|`| |-'__   _    .--.| |  __   .--.   
+    | |   ____  |  ___/     | |     _.____`.  | | [  | | | / /'`\' | [  |/ .'`\ \ 
+    \ `.___]  |_| |_       _| |_   | \____) | | |, | \_/ |,| \__/  |  | || \__. | 
+     `._____.'|_____|     |_____|   \______.' \__/ '.__.'_/ '.__.;__][___]'.__.'  
+                                                                                  
+            
+# GPTStudio
+
+GPTStudio is a library of tools based on GPT (Generative Pre-trained Transformer).
+It is designed to provide developers and data scientists with powerful and easy-to-use GPT capabilities.
+It combines knowledge base management, GPT capabilities, and a collection of AI-based tools to make it 
+a powerful and easy-to-use tool for anyone involved in AI and big data.
+making it ideal for any project involving AI and big models.
+
+## Key Features
+
+### Knowledge base retrieval:
+
+Provides an efficient search tool to help users quickly find relevant information in the knowledge base.
+
+### GPT Proficiency Test
+
+- **Model Capability Testing**: Allows users to test the performance and capability of GPT models with the assistance of the knowledge base.
+- **Real-time Feedback**: Provides real-time feedback to help users understand the response and accuracy of the model.
+
+### AI Tools Collection
+
+- **A wide range of AI tools**: including but not limited to text generation, language understanding, data analysis and many other AI-related tools.
+- **Large Model Support**: Supports integration with other large AI models to extend the capability and scope of the application.
+
+
+## Quick Start
+
+### docker-compose
+
+> Use the .env environment variable file or configure docker-compose.yml
+
+```yaml
+version: "3"
+services:
+  gptstudio:
+    container_name: "gptstudio"
+    image: talkincode/gptstudio:latest
+    logging:
+      driver: "json-file"
+      options:
+        max-size: "50m"
+    environment:
+        - GPT_SERVICE_ADDRESS=${GPT_SERVICE_ADDRESS}
+        - GPT_SERVICE_TOKEN=${GPT_SERVICE_TOKEN}
+        - OPENAI_API_TYPE=${OPENAI_API_TYPE}
+        - OPENAI_API_KEY=${OPENAI_API_KEY}
+        - AZURE_OPENAI_API_VERSION=${AZURE_OPENAI_API_VERSION}
+        - AZURE_OPENAI_API_BASE=${AZURE_OPENAI_API_BASE}
+        - AZURE_OPENAI_API_KEY=${AZURE_OPENAI_API_KEY}
+        - MSAL_TENANTID=${MSAL_TENANTID}
+        - MSAL_APPID=${MSAL_APPID}
+        - DATA_DIR=/data
+    volumes:
+      - gptstudio-volume:/data
+    ports:
+      - "8898:80"
+    command: ["streamlit","run", "/GPTStudio.py"]
+    networks:
+      gptstudio_network:
+
+networks:
+  gptstudio_network:
+
+volumes:
+  gptstudio-volume:
+```
+
+## Contribute
+
+We welcome contributions of any kind, including but not limited to issues, pull requests, documentation, examples, etc.
diff --git a/components/__init__.py b/components/__init__.py
diff --git a/components/streamlit_tesseract_scanner/__init__.py b/components/streamlit_tesseract_scanner/__init__.py
@@ -0,0 +1,79 @@
+import base64
+from io import BytesIO
+from pathlib import Path
+from typing import Optional
+import cv2
+import numpy as np
+import pytesseract
+from pytesseract import Output
+
+import streamlit as st
+import streamlit.components.v1 as components
+
+# Tell streamlit that there is a component called camera_input_live,
+# and that the code to display that component is in the "frontend" folder
+frontend_dir = (Path(__file__).parent / "frontend").absolute()
+_component_func = components.declare_component(
+    "tesseract_scanner", path=str(frontend_dir)
+)
+
+
+def tesseract_scanner(showimg: bool =False, 
+                      lang: str = 'eng',
+                      blacklist: str = None,
+                      whitelist: str = None,
+                      psm: str = '3',
+                      hrate: float=0.2, 
+                      key: Optional[str] = None
+                ) -> Optional[BytesIO]:
+    """
+    Add a descriptive docstring
+    """
+    b64_data: Optional[str] = _component_func(hrate=hrate, key=key)
+
+    if b64_data is None:
+        return None
+
+    raw_data = b64_data.split(",")[1]  # Strip the data: type prefix
+
+    component_value = BytesIO(base64.b64decode(raw_data))
+
+    # return component_value
+    # image = cv2.imdecode(np.frombuffer(component_value, np.uint8), cv2.IMREAD_COLOR)
+
+    image = base64.b64decode(raw_data)
+    image = np.fromstring(image, dtype=np.uint8)
+    image = cv2.imdecode(image, cv2.IMREAD_COLOR)
+    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    image = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
+
+    if showimg:
+        st.image(image)
+
+    # blacklist = '@*|©_Ⓡ®¢§š'
+    if blacklist:
+        custom_config = f'''--oem 3 --psm 11'''
+    else:
+        custom_config = f'''--oem 3 --psm 3'''
+
+    text = pytesseract.image_to_string(image, lang=lang, config=custom_config)
+    # text = text.split('\n')
+    # while("" in text): text.remove("")
+    # while(" " in text): text.remove(" ")
+    # text.remove("\x0c")
+
+    return text
+
+
+def main():
+    st.write("## Example")
+
+    blacklist='@*|©_Ⓡ®¢§š'
+    data = tesseract_scanner(showimg=False, lang='vie+eng', 
+                             blacklist=blacklist, psm=3)
+
+    if data is not None:
+        st.write(data)
+
+if __name__ == "__main__":
+    main()
diff --git a/components/streamlit_tesseract_scanner/frontend/index.html b/components/streamlit_tesseract_scanner/frontend/index.html
@@ -0,0 +1,19 @@
+<!DOCTYPE html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta http-equiv="X-UA-Compatible" content="IE=edge" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>streamlit-camera-input-live</title>
+    <script src="./streamlit-component-lib.js"></script>
+    <script src="./main.js"></script>
+    <!--link rel="stylesheet" href="./style.css" / -->
+  </head>
+  <body>
+    <div id="container"> 
+      <input id="videoheight" type="range" min="1" max="100" value="20" style="width:100%">
+      <video id="video" autoplay="true"></video>      
+      <canvas id="canvas"></canvas>
+    </div>    
+  </body>
+</html>