Skip to content

Commit

Permalink
first init
Browse files Browse the repository at this point in the history
  • Loading branch information
jamiesun committed Dec 7, 2023
0 parents commit 5d915ad
Show file tree
Hide file tree
Showing 29 changed files with 1,145 additions and 0 deletions.
23 changes: 23 additions & 0 deletions .editotconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
root = true

# all files
[*.go]
indent_style = tab
indent_size = 4
insert_final_newline = true

[*.py]
indent_style = space
indent_size = 4

[Makefile]
indent_style = tab

[*.js]
charset = utf-8
indent_style = space
indent_size = 4

[*.json]
indent_style = space
indent_size = 2
46 changes: 46 additions & 0 deletions .github/workflows/docker-gptstudio-publish.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
name: GPTStudio Build and Publish

on:
# run it on push to the default repository branch
push:
branches: [main]
# run it during pull request
pull_request:

jobs:
# define job to build and publish docker image
build-and-push-docker-image:
name: Build Docker image and push to repositories
# run only when code is compiling and tests are passing
runs-on: ubuntu-latest

# steps to perform in job
steps:
- name: Checkout code
uses: actions/checkout@v3

# setup Docker buld action
- name: Set up Docker Buildx
id: buildx
uses: docker/setup-buildx-action@v2

- name: Login to DockerHub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}

- name: Build image and push to Docker Hub and GitHub Container Registry
uses: docker/build-push-action@v2
with:
# 指向带有 Dockerfile 的源代码所在位置的相对路径
context: ./
file: ./Dockerfile
# Note: tags has to be all lower-case
tags: |
talkincode/gptstudio:latest
# build on feature branches, push only on main branch
push: ${{ github.ref == 'refs/heads/main' }}

- name: Image digest
run: echo ${{ steps.docker_build.outputs.digest }}
14 changes: 14 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
.idea
__pycache__
.vscode
/release/
release
Dockerfile.local
__debug_bin
.DS_Store
build
/rundata/
.env
/venv/
/playground/chroma_db/
/playground/local_qdrant/
41 changes: 41 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# 使用 Mambaforge 基础镜像
FROM condaforge/mambaforge:latest

# 设置非交互式前端,避免 apt-get 交互式提示
ENV DEBIAN_FRONTEND=noninteractive

# 设置时区
RUN echo "Asia/Shanghai" > /etc/timezone && \
ln -fs /usr/share/zoneinfo/Asia/Shanghai /etc/localtime && \
apt-get update && \
apt-get install -y tzdata && \
dpkg-reconfigure --frontend noninteractive tzdata

# 安装 Tesseract-OCR、Graphviz、字体以及 FFMPEG
RUN apt-get update && \
apt-get install -y tesseract-ocr tesseract-ocr-chi-sim graphviz fonts-wqy-microhei fonts-noto ffmpeg && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

# 设置工作目录
WORKDIR /app

# 复制项目文件
COPY ./GPTStudio.py ./GPTStudio.py
COPY ./pages ./pages
COPY ./libs ./libs
COPY ./config.toml ./.streamlit/config.toml
COPY ./components ./components
COPY requirements.txt ./requirements.txt

# 安装项目依赖以及 OpenCV
RUN pip install --no-cache-dir -r requirements.txt

# 暴露 Streamlit 默认端口
EXPOSE 8501

# 环境变量设置为非缓冲模式,以便实时输出
ENV PYTHONUNBUFFERED=1

# 设置启动命令
CMD ["streamlit","run", "GPTStudio.py", "--server.port=8501"]
51 changes: 51 additions & 0 deletions GPTStudio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import streamlit as st
from libs.msal import msal_auth

msal_auth()


def sidebar():
st.sidebar.markdown("""
# 🦜GPTStudio
- [GPTStudio Github](https://github.com/terateams/GPTService)
- [Streamlit Website](https://streamlit.io)
""")
if st.sidebar.button('登出'):
st.session_state['authenticated'] = False
st.rerun()


def show_page():
sidebar()
st.title("🦜GPTStudio")
st.markdown("""
GPTStudio is a library of tools based on GPT (Generative Pre-trained Transformer).
It is designed to provide developers and data scientists with powerful and easy-to-use GPT capabilities.
It combines knowledge base management, GPT capabilities, and a collection of AI-based tools to make it a powerful and easy-to-use tool for anyone involved in AI and big data.
making it ideal for any project involving AI and big models.
## Key Features
### Knowledge base retrieval:
Provides an efficient search tool to help users quickly find relevant information in the knowledge base.
### GPT Proficiency Test
- **Model Capability Testing**: Allows users to test the performance and capability of GPT models with the assistance of the knowledge base.
- **Real-time Feedback**: Provides real-time feedback to help users understand the response and accuracy of the model.
### AI Tools Collection
- **A wide range of AI tools**: including but not limited to text generation, language understanding, data analysis and many other AI-related tools.
- **Large Model Support**: Supports integration with other large AI models to extend the capability and scope of the application.
Translated with www.DeepL.com/Translator (free version)
""")


def main():
"""Main app"""
show_page()


if __name__ == "__main__":
main()
9 changes: 9 additions & 0 deletions License
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
MIT License

Copyright (c) 2023 GPTStudio

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
12 changes: 12 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
arm64:
docker buildx build --build-arg GoArch="arm64" --platform=linux/arm64 -t \
talkincode/gptstudio:latest-arm64 .
docker push talkincode/gptstudio:latest-arm64

fastpub:
docker buildx build --build-arg GoArch="amd64" --platform=linux/amd64 -t \
talkincode/gptstudio:latest .
docker push talkincode/gptstudio:latest


.PHONY: clean build
79 changes: 79 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@

______ _______ _________ ______ _ __ _
.' ___ ||_ __ \ | _ _ |.' ____ \ / |_ | ] (_)
/ .' \_| | |__) ||_/ | | \_|| (___ \_|`| |-'__ _ .--.| | __ .--.
| | ____ | ___/ | | _.____`. | | [ | | | / /'`\' | [ |/ .'`\ \
\ `.___] |_| |_ _| |_ | \____) | | |, | \_/ |,| \__/ | | || \__. |
`._____.'|_____| |_____| \______.' \__/ '.__.'_/ '.__.;__][___]'.__.'
# GPTStudio

GPTStudio is a library of tools based on GPT (Generative Pre-trained Transformer).
It is designed to provide developers and data scientists with powerful and easy-to-use GPT capabilities.
It combines knowledge base management, GPT capabilities, and a collection of AI-based tools to make it
a powerful and easy-to-use tool for anyone involved in AI and big data.
making it ideal for any project involving AI and big models.

## Key Features

### Knowledge base retrieval:

Provides an efficient search tool to help users quickly find relevant information in the knowledge base.

### GPT Proficiency Test

- **Model Capability Testing**: Allows users to test the performance and capability of GPT models with the assistance of the knowledge base.
- **Real-time Feedback**: Provides real-time feedback to help users understand the response and accuracy of the model.

### AI Tools Collection

- **A wide range of AI tools**: including but not limited to text generation, language understanding, data analysis and many other AI-related tools.
- **Large Model Support**: Supports integration with other large AI models to extend the capability and scope of the application.


## Quick Start

### docker-compose

> Use the .env environment variable file or configure docker-compose.yml
```yaml
version: "3"
services:
gptstudio:
container_name: "gptstudio"
image: talkincode/gptstudio:latest
logging:
driver: "json-file"
options:
max-size: "50m"
environment:
- GPT_SERVICE_ADDRESS=${GPT_SERVICE_ADDRESS}
- GPT_SERVICE_TOKEN=${GPT_SERVICE_TOKEN}
- OPENAI_API_TYPE=${OPENAI_API_TYPE}
- OPENAI_API_KEY=${OPENAI_API_KEY}
- AZURE_OPENAI_API_VERSION=${AZURE_OPENAI_API_VERSION}
- AZURE_OPENAI_API_BASE=${AZURE_OPENAI_API_BASE}
- AZURE_OPENAI_API_KEY=${AZURE_OPENAI_API_KEY}
- MSAL_TENANTID=${MSAL_TENANTID}
- MSAL_APPID=${MSAL_APPID}
- DATA_DIR=/data
volumes:
- gptstudio-volume:/data
ports:
- "8898:80"
command: ["streamlit","run", "/GPTStudio.py"]
networks:
gptstudio_network:

networks:
gptstudio_network:

volumes:
gptstudio-volume:
```
## Contribute
We welcome contributions of any kind, including but not limited to issues, pull requests, documentation, examples, etc.
Empty file added components/__init__.py
Empty file.
79 changes: 79 additions & 0 deletions components/streamlit_tesseract_scanner/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import base64
from io import BytesIO
from pathlib import Path
from typing import Optional
import cv2
import numpy as np
import pytesseract
from pytesseract import Output

import streamlit as st
import streamlit.components.v1 as components

# Tell streamlit that there is a component called camera_input_live,
# and that the code to display that component is in the "frontend" folder
frontend_dir = (Path(__file__).parent / "frontend").absolute()
_component_func = components.declare_component(
"tesseract_scanner", path=str(frontend_dir)
)


def tesseract_scanner(showimg: bool =False,
lang: str = 'eng',
blacklist: str = None,
whitelist: str = None,
psm: str = '3',
hrate: float=0.2,
key: Optional[str] = None
) -> Optional[BytesIO]:
"""
Add a descriptive docstring
"""
b64_data: Optional[str] = _component_func(hrate=hrate, key=key)

if b64_data is None:
return None

raw_data = b64_data.split(",")[1] # Strip the data: type prefix

component_value = BytesIO(base64.b64decode(raw_data))

# return component_value
# image = cv2.imdecode(np.frombuffer(component_value, np.uint8), cv2.IMREAD_COLOR)

image = base64.b64decode(raw_data)
image = np.fromstring(image, dtype=np.uint8)
image = cv2.imdecode(image, cv2.IMREAD_COLOR)
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
image = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]

if showimg:
st.image(image)

# blacklist = '@*|©_Ⓡ®¢§š'
if blacklist:
custom_config = f'''--oem 3 --psm 11'''
else:
custom_config = f'''--oem 3 --psm 3'''

text = pytesseract.image_to_string(image, lang=lang, config=custom_config)
# text = text.split('\n')
# while("" in text): text.remove("")
# while(" " in text): text.remove(" ")
# text.remove("\x0c")

return text


def main():
st.write("## Example")

blacklist='@*|©_Ⓡ®¢§š'
data = tesseract_scanner(showimg=False, lang='vie+eng',
blacklist=blacklist, psm=3)

if data is not None:
st.write(data)

if __name__ == "__main__":
main()
19 changes: 19 additions & 0 deletions components/streamlit_tesseract_scanner/frontend/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>streamlit-camera-input-live</title>
<script src="./streamlit-component-lib.js"></script>
<script src="./main.js"></script>
<!--link rel="stylesheet" href="./style.css" / -->
</head>
<body>
<div id="container">
<input id="videoheight" type="range" min="1" max="100" value="20" style="width:100%">
<video id="video" autoplay="true"></video>
<canvas id="canvas"></canvas>
</div>
</body>
</html>
Loading

0 comments on commit 5d915ad

Please sign in to comment.