Skip to content

Commit

Permalink
Merge pull request #32 from SWM-SMART/dev
Browse files Browse the repository at this point in the history
🐛 fix: fix model path error
  • Loading branch information
minseok-oh authored Nov 14, 2023
2 parents e1b5490 + 3410055 commit 8555a10
Show file tree
Hide file tree
Showing 6 changed files with 40 additions and 39 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.11.1
FROM python:3.11

WORKDIR /code
COPY ./requirements.txt /code/requirements.txt
Expand Down
2 changes: 1 addition & 1 deletion app/api/v1/endpoints/stt.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def get_speech_to_text(
) -> SpeechText:

prefix = '.'.join(audio.key.split('.')[:-1])
s3_controller.download_file(S3_BUCKET_NAME, AUDIO_S3_PREFIX + audio.key, f'stt/static/{audio.key}')
s3_controller.download_file(S3_BUCKET_NAME, AUDIO_S3_PREFIX + audio.key, f'app/static/{audio.key}')

stt_controller.convert_to_wav(prefix)
stt_controller.speech_to_text(prefix)
Expand Down
2 changes: 1 addition & 1 deletion app/controller/keywords.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@ def __init__(self, llm: LLMController = Depends(LLMController)):
def get_keywords(self, document: Document) -> Keywords:
self.llm.set_document(document)
answer = self.llm.request(self.prompt).content
return Keywords(keywords=list(map(lambda word: word.strip()[1:-1], list(answer[1:-1].split(',')))))
return Keywords(keywords=list(map(lambda word: word.strip(), list(answer[1:-1].split(',')))))
48 changes: 26 additions & 22 deletions app/controller/mindmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
from app.schemas.mindmap import MindMap
from app.schemas.context import Keywords

from bs4 import BeautifulSoup
import re

from typing import List

class MindMapController:
prompt = "Question: 문맥 내에서 %s들의 계층 구조를 html의 <ul>, <li>로 알려줘 \nAnswer: <html>"
prompt = "Question: 문맥 내에서 %s들의 계층 구조를 MarkDown의 '-'로 알려줘 \nAnswer: -"
def __init__(self, llm: LLMController = Depends(LLMController)):
self.llm = llm

Expand All @@ -21,32 +21,36 @@ def delete_stopwords(self, html: str) -> str:
html.replace(stopword, '')
return html

def parse_html(self, html: str, keywords: List[str]) -> MindMap:
def parse_html(self, markdown: str, keywords: List[str]) -> MindMap:
mindmap = MindMap()
mindmap.keywords = keywords
keyword2index = {w: i for i, w in enumerate(keywords)}

soup = BeautifulSoup(html, 'html.parser')
prettified_html = soup.prettify()
prettified_html = list(map(lambda x: x.strip(), prettified_html.split('\n')))
prettified_html = [html for html in prettified_html if (html in keywords) or (html in ['<ul>', '</ul>'])]
keyword2index = {v: i for i, v in enumerate(keywords)}

current = 0
stack = []
for index, word in enumerate(prettified_html):
if word == '</ul>': stack.pop(len(stack)-1)
lines = markdown.split('\n')
for line in lines:
sep = line.split('- ')[0]
word = line.split('- ')[-1]
if word in keywords:
if len(stack) == 0:
stack.append(word)
mindmap.root = keyword2index[stack[0]]
sep = len(sep)
mindmap.graph[str(keyword2index[word])] = []
print(stack)

if sep == 0:
mindmap.root = sep

if current == sep:
if len(stack) != 0: stack.pop()
if len(stack) != 0: mindmap.graph[str(stack[-1])].append(keyword2index[word])
stack.append(keyword2index[word])
elif current < sep:
mindmap.graph[str(stack[-1])].append(keyword2index[word])
stack.append(keyword2index[word])
else:
if prettified_html[index-1] != '<ul>': stack.pop(len(stack)-1)
key = str(keyword2index[mindmap.root]) if len(stack) == 0 else str(keyword2index[stack[len(stack)-1]])

if key not in mindmap.graph.keys():
mindmap.graph[key] = [keyword2index[word]]
else:
mindmap.graph[key].append(keyword2index[word])
stack.append(word)
stack.pop()
if len(stack) != 0: mindmap.graph[str(stack[-1])].append(keyword2index[word])
current = sep
return mindmap

def get_mindmap(self, document: Document, keywords: List[str]) -> MindMap:
Expand Down
8 changes: 4 additions & 4 deletions app/controller/stt.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,15 @@ def __init__(self):
pass

def convert_to_wav(self, prefix):
output_path = f'stt/static/{prefix}.wav'
y, sr = librosa.load(f'stt/static/{prefix}.m4a', sr=16000)
output_path = f'app/static/{prefix}.wav'
y, sr = librosa.load(f'app/static/{prefix}.m4a', sr=16000)
sf.write(output_path, y, sr)

def speech_to_text(self, prefix):
os.system(f'./stt/whisper/main -m ./stt/whisper/models/ggml-medium.bin -l "ko" -f ./stt/static/{prefix}.wav -oj')
os.system(f'./app/whisper/main -m ./app/whisper/models/ggml-medium.bin -l "ko" -f ./app/static/{prefix}.wav -oj')

def get_speech_text(self, prefix):
with open(f'stt/static/{prefix}.wav.json', 'r') as json_file:
with open(f'app/static/{prefix}.wav.json', 'r') as json_file:
json_data = json.load(json_file)

text = ""
Expand Down
17 changes: 7 additions & 10 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,8 @@ attrs==23.1.0
audioread==3.0.1
backoff==2.2.1
bcrypt==4.0.1
beautifulsoup4==4.12.2
boto3==1.28.84
botocore==1.31.84
bs4==0.0.1
boto3==1.28.85
botocore==1.31.85
cachetools==5.3.2
certifi==2023.7.22
cffi==1.16.0
Expand Down Expand Up @@ -47,8 +45,8 @@ jsonpatch==1.33
jsonpointer==2.4
kubernetes==28.1.0
langchain==0.0.335
langchainhub==0.1.13
langsmith==0.0.63
langchainhub==0.1.14
langsmith==0.0.64
lazy_loader==0.3
librosa==0.10.1
llvmlite==0.41.1
Expand All @@ -65,7 +63,7 @@ numba==0.58.1
numpy==1.26.2
oauthlib==3.2.2
onnxruntime==1.16.2
openai==1.2.3
openai==1.2.4
opentelemetry-api==1.21.0
opentelemetry-exporter-otlp-proto-common==1.21.0
opentelemetry-exporter-otlp-proto-grpc==1.21.0
Expand All @@ -82,8 +80,8 @@ pulsar-client==3.3.0
pyasn1==0.5.0
pyasn1-modules==0.3.0
pycparser==2.21
pydantic==2.4.2
pydantic_core==2.10.1
pydantic==2.5.0
pydantic_core==2.14.1
pypdf==3.17.0
PyPika==0.48.9
python-dateutil==2.8.2
Expand All @@ -100,7 +98,6 @@ scipy==1.11.3
six==1.16.0
sniffio==1.3.0
soundfile==0.12.1
soupsieve==2.5
soxr==0.3.7
SQLAlchemy==2.0.23
starlette==0.27.0
Expand Down

0 comments on commit 8555a10

Please sign in to comment.