Merge pull request #32 from SWM-SMART/dev

🐛 fix: fix model path error
SWM-SMART · Nov 14, 2023 · 8555a10 · 8555a10
2 parents e1b5490 + 3410055
commit 8555a10
Show file tree

Hide file tree

Showing 6 changed files with 40 additions and 39 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -1,4 +1,4 @@
-FROM python:3.11.1
+FROM python:3.11
 
 WORKDIR /code
 COPY ./requirements.txt /code/requirements.txt

diff --git a/app/api/v1/endpoints/stt.py b/app/api/v1/endpoints/stt.py
@@ -20,7 +20,7 @@ def get_speech_to_text(
     ) -> SpeechText:
 
     prefix = '.'.join(audio.key.split('.')[:-1])
-    s3_controller.download_file(S3_BUCKET_NAME, AUDIO_S3_PREFIX + audio.key, f'stt/static/{audio.key}')
+    s3_controller.download_file(S3_BUCKET_NAME, AUDIO_S3_PREFIX + audio.key, f'app/static/{audio.key}')
 
     stt_controller.convert_to_wav(prefix)
     stt_controller.speech_to_text(prefix)

diff --git a/app/controller/keywords.py b/app/controller/keywords.py
@@ -12,4 +12,4 @@ def __init__(self, llm: LLMController = Depends(LLMController)):
     def get_keywords(self, document: Document) -> Keywords:
         self.llm.set_document(document)
         answer = self.llm.request(self.prompt).content
-        return Keywords(keywords=list(map(lambda word: word.strip()[1:-1], list(answer[1:-1].split(',')))))
+        return Keywords(keywords=list(map(lambda word: word.strip(), list(answer[1:-1].split(',')))))
diff --git a/app/controller/mindmap.py b/app/controller/mindmap.py
@@ -5,12 +5,12 @@
 from app.schemas.mindmap import MindMap
 from app.schemas.context import Keywords
 
-from bs4 import BeautifulSoup
+import re
 
 from typing import List
 
 class MindMapController:
-    prompt = "Question: 문맥 내에서 %s들의 계층 구조를 html의 <ul>, <li>로 알려줘 \nAnswer: <html>"
+    prompt = "Question: 문맥 내에서 %s들의 계층 구조를 MarkDown의 '-'로 알려줘 \nAnswer: -"
     def __init__(self, llm: LLMController = Depends(LLMController)):
         self.llm = llm
 
@@ -21,32 +21,36 @@ def delete_stopwords(self, html: str) -> str:
                 html.replace(stopword, '')
         return html
 
-    def parse_html(self, html: str, keywords: List[str]) -> MindMap:
+    def parse_html(self, markdown: str, keywords: List[str]) -> MindMap:
         mindmap = MindMap()
         mindmap.keywords = keywords
-        keyword2index = {w: i for i, w in enumerate(keywords)}
-
-        soup = BeautifulSoup(html, 'html.parser')
-        prettified_html = soup.prettify()
-        prettified_html = list(map(lambda x: x.strip(), prettified_html.split('\n')))
-        prettified_html = [html for html in prettified_html if (html in keywords) or (html in ['<ul>', '</ul>'])]
+        keyword2index = {v: i for i, v in enumerate(keywords)}
 
+        current = 0
         stack = []
-        for index, word in enumerate(prettified_html):
-            if word == '</ul>': stack.pop(len(stack)-1)
+        lines = markdown.split('\n')
+        for line in lines:
+            sep = line.split('- ')[0]
+            word = line.split('- ')[-1]
             if word in keywords:
-                if len(stack) == 0:
-                    stack.append(word)
-                    mindmap.root = keyword2index[stack[0]]
+                sep = len(sep)
+                mindmap.graph[str(keyword2index[word])] = []
+                print(stack)
+
+                if sep == 0:
+                    mindmap.root = sep
+
+                if current == sep:
+                    if len(stack) != 0: stack.pop()
+                    if len(stack) != 0: mindmap.graph[str(stack[-1])].append(keyword2index[word])
+                    stack.append(keyword2index[word])
+                elif current < sep:
+                    mindmap.graph[str(stack[-1])].append(keyword2index[word])
+                    stack.append(keyword2index[word])
                 else:
-                    if prettified_html[index-1] != '<ul>': stack.pop(len(stack)-1)
-                    key = str(keyword2index[mindmap.root]) if len(stack) == 0 else str(keyword2index[stack[len(stack)-1]])
-
-                    if key not in mindmap.graph.keys():
-                        mindmap.graph[key] = [keyword2index[word]]
-                    else:
-                        mindmap.graph[key].append(keyword2index[word])
-                    stack.append(word)
+                    stack.pop()
+                    if len(stack) != 0: mindmap.graph[str(stack[-1])].append(keyword2index[word])
+                current = sep
         return mindmap
 
     def get_mindmap(self, document: Document, keywords: List[str]) -> MindMap:

diff --git a/app/controller/stt.py b/app/controller/stt.py
@@ -9,15 +9,15 @@ def __init__(self):
         pass
 
     def convert_to_wav(self, prefix):
-        output_path = f'stt/static/{prefix}.wav'        
-        y, sr = librosa.load(f'stt/static/{prefix}.m4a', sr=16000)
+        output_path = f'app/static/{prefix}.wav'        
+        y, sr = librosa.load(f'app/static/{prefix}.m4a', sr=16000)
         sf.write(output_path, y, sr)
 
     def speech_to_text(self, prefix):
-        os.system(f'./stt/whisper/main -m ./stt/whisper/models/ggml-medium.bin -l "ko" -f ./stt/static/{prefix}.wav -oj')
+        os.system(f'./app/whisper/main -m ./app/whisper/models/ggml-medium.bin -l "ko" -f ./app/static/{prefix}.wav -oj')
 
     def get_speech_text(self, prefix):
-        with open(f'stt/static/{prefix}.wav.json', 'r') as json_file:
+        with open(f'app/static/{prefix}.wav.json', 'r') as json_file:
             json_data = json.load(json_file)
 
         text = ""

diff --git a/requirements.txt b/requirements.txt
@@ -7,10 +7,8 @@ attrs==23.1.0
 audioread==3.0.1
 backoff==2.2.1
 bcrypt==4.0.1
-beautifulsoup4==4.12.2
-boto3==1.28.84
-botocore==1.31.84
-bs4==0.0.1
+boto3==1.28.85
+botocore==1.31.85
 cachetools==5.3.2
 certifi==2023.7.22
 cffi==1.16.0
@@ -47,8 +45,8 @@ jsonpatch==1.33
 jsonpointer==2.4
 kubernetes==28.1.0
 langchain==0.0.335
-langchainhub==0.1.13
-langsmith==0.0.63
+langchainhub==0.1.14
+langsmith==0.0.64
 lazy_loader==0.3
 librosa==0.10.1
 llvmlite==0.41.1
@@ -65,7 +63,7 @@ numba==0.58.1
 numpy==1.26.2
 oauthlib==3.2.2
 onnxruntime==1.16.2
-openai==1.2.3
+openai==1.2.4
 opentelemetry-api==1.21.0
 opentelemetry-exporter-otlp-proto-common==1.21.0
 opentelemetry-exporter-otlp-proto-grpc==1.21.0
@@ -82,8 +80,8 @@ pulsar-client==3.3.0
 pyasn1==0.5.0
 pyasn1-modules==0.3.0
 pycparser==2.21
-pydantic==2.4.2
-pydantic_core==2.10.1
+pydantic==2.5.0
+pydantic_core==2.14.1
 pypdf==3.17.0
 PyPika==0.48.9
 python-dateutil==2.8.2
@@ -100,7 +98,6 @@ scipy==1.11.3
 six==1.16.0
 sniffio==1.3.0
 soundfile==0.12.1
-soupsieve==2.5
 soxr==0.3.7
 SQLAlchemy==2.0.23
 starlette==0.27.0