From fe36ac3a0c15ac616707a84a764c070817fd05a7 Mon Sep 17 00:00:00 2001 From: Ryan R Rogers Date: Wed, 12 Jul 2023 21:12:50 -0400 Subject: [PATCH 1/8] add gpt webpage extraction --- .gitignore | 5 +- .vscode/settings.json | 6 + balancer/controllers/chatgpt.py | 50 +++++ balancer/settings.py | 3 + balancer/urls.py | 2 + env | 1 + poetry.lock | 383 ++++++++++++++++++++++++++------ pyproject.toml | 10 +- 8 files changed, 391 insertions(+), 69 deletions(-) create mode 100644 .vscode/settings.json create mode 100644 balancer/controllers/chatgpt.py create mode 100644 env diff --git a/.gitignore b/.gitignore index 739889f5..88d0c9ce 100644 --- a/.gitignore +++ b/.gitignore @@ -161,4 +161,7 @@ cython_debug/ # Custom script -*git-via-* \ No newline at end of file +*git-via-* + +# test env +balancer-test-env/ \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 00000000..cca67acf --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,6 @@ +{ + "[python]": { + "editor.defaultFormatter": "ms-python.autopep8" + }, + "python.formatting.provider": "none" +} \ No newline at end of file diff --git a/balancer/controllers/chatgpt.py b/balancer/controllers/chatgpt.py new file mode 100644 index 00000000..e7bf624d --- /dev/null +++ b/balancer/controllers/chatgpt.py @@ -0,0 +1,50 @@ +from django.http import JsonResponse +from bs4 import BeautifulSoup +from nltk.stem import PorterStemmer +import requests +import openai +import tiktoken +import os +import json + +# remove before production +from django.views.decorators.csrf import csrf_exempt + + +@csrf_exempt +def extract_webpage(request): + openai.api_key = os.getenv('OPENAI_API_KEY') + data = json.loads(request.body) + webpage_url = data['webpage_url'] + print(webpage_url) + + response = requests.get(webpage_url) + soup = BeautifulSoup(response.text, 'html.parser') + text_contents = soup.find_all('p') + text_contents = [p.get_text() for p in text_contents] + text_contents = ' '.join(text_contents) + + stemmer = PorterStemmer() + text_contents = text_contents.split() + text_contents = [stemmer.stem(word) for word in text_contents] + text_contents = ' '.join(text_contents) + print(text_contents) + + tokens = get_tokens(text_contents, "cl100k_base") + + ai_response = openai.ChatCompletion.create( + model="gpt-3.5-turbo", + messages=[ + {"role": "system", "content": "Give a brief description of this medicine: %s" % tokens, }], + max_tokens=500, + ) + + return JsonResponse({'message': ai_response}) + + +def get_tokens(string: str, encoding_name: str) -> str: + encoding = tiktoken.get_encoding(encoding_name) + tokens = encoding.encode(string) + tokens = tokens[:3500] + output_string = encoding.decode(tokens) + return output_string diff --git a/balancer/settings.py b/balancer/settings.py index 04809782..e90886d2 100644 --- a/balancer/settings.py +++ b/balancer/settings.py @@ -11,6 +11,9 @@ """ from pathlib import Path +from dotenv import load_dotenv + +load_dotenv() # Build paths inside the project like this: BASE_DIR / 'subdir'. BASE_DIR = Path(__file__).resolve().parent.parent diff --git a/balancer/urls.py b/balancer/urls.py index 8ee17373..9865a6b0 100644 --- a/balancer/urls.py +++ b/balancer/urls.py @@ -16,7 +16,9 @@ """ from django.contrib import admin from django.urls import path +from balancer.controllers import chatgpt urlpatterns = [ path('admin/', admin.site.urls), + path('extract_webpage/', chatgpt.extract_webpage, name='post_web_extraction') ] diff --git a/env b/env new file mode 100644 index 00000000..9847a1df --- /dev/null +++ b/env @@ -0,0 +1 @@ +OPENAI_API_KEY= \ No newline at end of file diff --git a/poetry.lock b/poetry.lock index cf524871..6b41ea1d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,33 @@ -# This file is automatically @generated by Poetry and should not be changed by hand. +[[package]] +name = "aiohttp" +version = "3.8.4" +description = "Async http client/server framework (asyncio)" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +aiosignal = ">=1.1.2" +async-timeout = ">=4.0.0a3,<5.0" +attrs = ">=17.3.0" +charset-normalizer = ">=2.0,<4.0" +frozenlist = ">=1.1.1" +multidict = ">=4.5,<7.0" +yarl = ">=1.0,<2.0" + +[package.extras] +speedups = ["aiodns", "brotli", "cchardet"] + +[[package]] +name = "aiosignal" +version = "1.3.1" +description = "aiosignal: a list of registered asynchronous callbacks" +category = "main" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +frozenlist = ">=1.1.0" [[package]] name = "asgiref" @@ -7,16 +36,77 @@ description = "ASGI specs, helper code, and adapters" category = "main" optional = false python-versions = ">=3.7" -files = [ - {file = "asgiref-3.7.2-py3-none-any.whl", hash = "sha256:89b2ef2247e3b562a16eef663bc0e2e703ec6468e2fa8a5cd61cd449786d4f6e"}, - {file = "asgiref-3.7.2.tar.gz", hash = "sha256:9e0ce3aa93a819ba5b45120216b23878cf6e8525eb3848653452b4192b92afed"}, -] [package.dependencies] typing-extensions = {version = ">=4", markers = "python_version < \"3.11\""} [package.extras] -tests = ["mypy (>=0.800)", "pytest", "pytest-asyncio"] +tests = ["pytest", "pytest-asyncio", "mypy (>=0.800)"] + +[[package]] +name = "async-timeout" +version = "4.0.2" +description = "Timeout context manager for asyncio programs" +category = "main" +optional = false +python-versions = ">=3.6" + +[[package]] +name = "attrs" +version = "23.1.0" +description = "Classes Without Boilerplate" +category = "main" +optional = false +python-versions = ">=3.7" + +[package.extras] +cov = ["attrs", "coverage[toml] (>=5.3)"] +dev = ["attrs", "pre-commit"] +docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope-interface"] +tests = ["attrs", "zope-interface"] +tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pytest-mypy-plugins", "pytest-xdist", "pytest (>=4.3.0)"] + +[[package]] +name = "beautifulsoup4" +version = "4.12.2" +description = "Screen-scraping library" +category = "main" +optional = false +python-versions = ">=3.6.0" + +[package.dependencies] +soupsieve = ">1.2" + +[package.extras] +html5lib = ["html5lib"] +lxml = ["lxml"] + +[[package]] +name = "certifi" +version = "2023.5.7" +description = "Python package for providing Mozilla's CA Bundle." +category = "main" +optional = false +python-versions = ">=3.6" + +[[package]] +name = "charset-normalizer" +version = "3.2.0" +description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +category = "main" +optional = false +python-versions = ">=3.7.0" + +[[package]] +name = "click" +version = "8.1.4" +description = "Composable command line interface toolkit" +category = "main" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} [[package]] name = "colorama" @@ -25,22 +115,14 @@ description = "Cross-platform colored terminal text." category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" -files = [ - {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, - {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, -] [[package]] name = "django" -version = "4.2.2" +version = "4.2.3" description = "A high-level Python web framework that encourages rapid development and clean, pragmatic design." category = "main" optional = false python-versions = ">=3.8" -files = [ - {file = "Django-4.2.2-py3-none-any.whl", hash = "sha256:672b3fa81e1f853bb58be1b51754108ab4ffa12a77c06db86aa8df9ed0c46fe5"}, - {file = "Django-4.2.2.tar.gz", hash = "sha256:2a6b6fbff5b59dd07bef10bcb019bee2ea97a30b2a656d51346596724324badf"}, -] [package.dependencies] asgiref = ">=3.6.0,<4" @@ -53,19 +135,31 @@ bcrypt = ["bcrypt"] [[package]] name = "exceptiongroup" -version = "1.1.1" +version = "1.1.2" description = "Backport of PEP 654 (exception groups)" category = "main" optional = false python-versions = ">=3.7" -files = [ - {file = "exceptiongroup-1.1.1-py3-none-any.whl", hash = "sha256:232c37c63e4f682982c8b6459f33a8981039e5fb8756b2074364e5055c498c9e"}, - {file = "exceptiongroup-1.1.1.tar.gz", hash = "sha256:d484c3090ba2889ae2928419117447a14daf3c1231d5e30d0aae34f354f01785"}, -] [package.extras] test = ["pytest (>=6)"] +[[package]] +name = "frozenlist" +version = "1.4.0" +description = "A list-like structure which implements collections.abc.MutableSequence" +category = "main" +optional = false +python-versions = ">=3.8" + +[[package]] +name = "idna" +version = "3.4" +description = "Internationalized Domain Names in Applications (IDNA)" +category = "main" +optional = false +python-versions = ">=3.5" + [[package]] name = "iniconfig" version = "2.0.0" @@ -73,10 +167,63 @@ description = "brain-dead simple config-ini parsing" category = "main" optional = false python-versions = ">=3.7" -files = [ - {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, - {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, -] + +[[package]] +name = "joblib" +version = "1.3.1" +description = "Lightweight pipelining with Python functions" +category = "main" +optional = false +python-versions = ">=3.7" + +[[package]] +name = "multidict" +version = "6.0.4" +description = "multidict implementation" +category = "main" +optional = false +python-versions = ">=3.7" + +[[package]] +name = "nltk" +version = "3.8.1" +description = "Natural Language Toolkit" +category = "main" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +click = "*" +joblib = "*" +regex = ">=2021.8.3" +tqdm = "*" + +[package.extras] +all = ["scikit-learn", "python-crfsuite", "requests", "numpy", "pyparsing", "twython", "scipy", "matplotlib"] +corenlp = ["requests"] +machine_learning = ["numpy", "python-crfsuite", "scikit-learn", "scipy"] +plot = ["matplotlib"] +tgrep = ["pyparsing"] +twitter = ["twython"] + +[[package]] +name = "openai" +version = "0.27.8" +description = "Python client library for the OpenAI API" +category = "main" +optional = false +python-versions = ">=3.7.1" + +[package.dependencies] +aiohttp = "*" +requests = ">=2.20" +tqdm = "*" + +[package.extras] +datalib = ["numpy", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "openpyxl (>=3.0.7)"] +dev = ["black (>=21.6b0,<22.0.0)", "pytest (>=6.0.0,<7.0.0)", "pytest-asyncio", "pytest-mock"] +embeddings = ["scikit-learn (>=1.0.2)", "tenacity (>=8.0.1)", "matplotlib", "plotly", "numpy", "scipy", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "openpyxl (>=3.0.7)"] +wandb = ["wandb", "numpy", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "openpyxl (>=3.0.7)"] [[package]] name = "packaging" @@ -85,22 +232,14 @@ description = "Core utilities for Python packages" category = "main" optional = false python-versions = ">=3.7" -files = [ - {file = "packaging-23.1-py3-none-any.whl", hash = "sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61"}, - {file = "packaging-23.1.tar.gz", hash = "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"}, -] [[package]] name = "pluggy" -version = "1.0.0" +version = "1.2.0" description = "plugin and hook calling mechanisms for python" category = "main" optional = false -python-versions = ">=3.6" -files = [ - {file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"}, - {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"}, -] +python-versions = ">=3.7" [package.extras] dev = ["pre-commit", "tox"] @@ -108,15 +247,11 @@ testing = ["pytest", "pytest-benchmark"] [[package]] name = "pytest" -version = "7.3.2" +version = "7.4.0" description = "pytest: simple powerful testing with Python" category = "main" optional = false python-versions = ">=3.7" -files = [ - {file = "pytest-7.3.2-py3-none-any.whl", hash = "sha256:cdcbd012c9312258922f8cd3f1b62a6580fdced17db6014896053d47cddf9295"}, - {file = "pytest-7.3.2.tar.gz", hash = "sha256:ee990a3cc55ba808b80795a79944756f315c67c12b56abd3ac993a7b8c17030b"}, -] [package.dependencies] colorama = {version = "*", markers = "sys_platform == \"win32\""} @@ -136,17 +271,58 @@ description = "A Django plugin for pytest." category = "main" optional = false python-versions = ">=3.5" -files = [ - {file = "pytest-django-4.5.2.tar.gz", hash = "sha256:d9076f759bb7c36939dbdd5ae6633c18edfc2902d1a69fdbefd2426b970ce6c2"}, - {file = "pytest_django-4.5.2-py3-none-any.whl", hash = "sha256:c60834861933773109334fe5a53e83d1ef4828f2203a1d6a0fa9972f4f75ab3e"}, -] [package.dependencies] pytest = ">=5.4.0" [package.extras] docs = ["sphinx", "sphinx-rtd-theme"] -testing = ["Django", "django-configurations (>=2.0)"] +testing = ["django", "django-configurations (>=2.0)"] + +[[package]] +name = "python-dotenv" +version = "1.0.0" +description = "Read key-value pairs from a .env file and set them as environment variables" +category = "main" +optional = false +python-versions = ">=3.8" + +[package.extras] +cli = ["click (>=5.0)"] + +[[package]] +name = "regex" +version = "2023.6.3" +description = "Alternative regular expression module, to replace re." +category = "main" +optional = false +python-versions = ">=3.6" + +[[package]] +name = "requests" +version = "2.31.0" +description = "Python HTTP for Humans." +category = "main" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +certifi = ">=2017.4.17" +charset-normalizer = ">=2,<4" +idna = ">=2.5,<4" +urllib3 = ">=1.21.1,<3" + +[package.extras] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use_chardet_on_py3 = ["chardet (>=3.0.2,<6)"] + +[[package]] +name = "soupsieve" +version = "2.4.1" +description = "A modern CSS selector implementation for Beautiful Soup." +category = "main" +optional = false +python-versions = ">=3.7" [[package]] name = "sqlparse" @@ -155,16 +331,27 @@ description = "A non-validating SQL parser." category = "main" optional = false python-versions = ">=3.5" -files = [ - {file = "sqlparse-0.4.4-py3-none-any.whl", hash = "sha256:5430a4fe2ac7d0f93e66f1efc6e1338a41884b7ddf2a350cedd20ccc4d9d28f3"}, - {file = "sqlparse-0.4.4.tar.gz", hash = "sha256:d446183e84b8349fa3061f0fe7f06ca94ba65b426946ffebe6e3e8295332420c"}, -] [package.extras] -dev = ["build", "flake8"] +dev = ["flake8", "build"] doc = ["sphinx"] test = ["pytest", "pytest-cov"] +[[package]] +name = "tiktoken" +version = "0.4.0" +description = "tiktoken is a fast BPE tokeniser for use with OpenAI's models" +category = "main" +optional = false +python-versions = ">=3.8" + +[package.dependencies] +regex = ">=2022.1.18" +requests = ">=2.26.0" + +[package.extras] +blobfile = ["blobfile (>=2)"] + [[package]] name = "tomli" version = "2.0.1" @@ -172,22 +359,31 @@ description = "A lil' TOML parser" category = "main" optional = false python-versions = ">=3.7" -files = [ - {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, - {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, -] + +[[package]] +name = "tqdm" +version = "4.65.0" +description = "Fast, Extensible Progress Meter" +category = "main" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[package.extras] +dev = ["py-make (>=0.1.0)", "twine", "wheel"] +notebook = ["ipywidgets (>=6)"] +slack = ["slack-sdk"] +telegram = ["requests"] [[package]] name = "typing-extensions" -version = "4.6.3" +version = "4.7.1" description = "Backported and Experimental Type Hints for Python 3.7+" category = "main" optional = false python-versions = ">=3.7" -files = [ - {file = "typing_extensions-4.6.3-py3-none-any.whl", hash = "sha256:88a4153d8505aabbb4e13aacb7c486c2b4a33ca3b3f807914a9b4c844c471c26"}, - {file = "typing_extensions-4.6.3.tar.gz", hash = "sha256:d91d5919357fe7f681a9f2b5b4cb2a5f1ef0a1e9f59c4d8ff0d3491e05c0ffd5"}, -] [[package]] name = "tzdata" @@ -196,12 +392,71 @@ description = "Provider of IANA time zone data" category = "main" optional = false python-versions = ">=2" -files = [ - {file = "tzdata-2023.3-py2.py3-none-any.whl", hash = "sha256:7e65763eef3120314099b6939b5546db7adce1e7d6f2e179e3df563c70511eda"}, - {file = "tzdata-2023.3.tar.gz", hash = "sha256:11ef1e08e54acb0d4f95bdb1be05da659673de4acbd21bf9c69e94cc5e907a3a"}, -] + +[[package]] +name = "urllib3" +version = "2.0.3" +description = "HTTP library with thread-safe connection pooling, file post, and more." +category = "main" +optional = false +python-versions = ">=3.7" + +[package.extras] +brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] +secure = ["certifi", "cryptography (>=1.9)", "idna (>=2.0.0)", "pyopenssl (>=17.1.0)", "urllib3-secure-extra"] +socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] +zstd = ["zstandard (>=0.18.0)"] + +[[package]] +name = "yarl" +version = "1.9.2" +description = "Yet another URL library" +category = "main" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +idna = ">=2.0" +multidict = ">=4.0" [metadata] -lock-version = "2.0" +lock-version = "1.1" python-versions = "^3.10" -content-hash = "acad0887d3a0036b536ec3820a8d056cb55af74cede28a7ba94d63e35d587f60" +content-hash = "fb8279b3dd71a3767817f35d25fcdabc2490fa1fb4b0a9a9f50aa314ce9e6389" + +[metadata.files] +aiohttp = [] +aiosignal = [] +asgiref = [] +async-timeout = [] +attrs = [] +beautifulsoup4 = [] +certifi = [] +charset-normalizer = [] +click = [] +colorama = [] +django = [] +exceptiongroup = [] +frozenlist = [] +idna = [] +iniconfig = [] +joblib = [] +multidict = [] +nltk = [] +openai = [] +packaging = [] +pluggy = [] +pytest = [] +pytest-django = [] +python-dotenv = [] +regex = [] +requests = [] +soupsieve = [] +sqlparse = [] +tiktoken = [] +tomli = [] +tqdm = [] +typing-extensions = [] +tzdata = [] +urllib3 = [] +yarl = [] diff --git a/pyproject.toml b/pyproject.toml index c847138a..294d8cf0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,10 +11,12 @@ python = "^3.10" Django = "^4.2.2" pytest = "^7.3.2" pytest-django = "^4.5.2" - -[tool.poetry.group.dev.dependencies] - - +beautifulsoup4 = "^4.12.2" +openai = "^0.27.8" +requests = "^2.31.0" +tiktoken = "^0.4.0" +nltk = "^3.8.1" +python-dotenv = "^1.0.0" [build-system] requires = ["poetry-core"] From 4392fbfe7a314b48190760dbeef4a969cf5bedae Mon Sep 17 00:00:00 2001 From: Ryan R Rogers Date: Fri, 14 Jul 2023 17:45:26 -0400 Subject: [PATCH 2/8] add diagnosis from dev server --- balancer/controllers/chatgpt.py | 32 +++++++++++++++++++++++++++++++- balancer/urls.py | 3 ++- poetry.lock | 2 +- 3 files changed, 34 insertions(+), 3 deletions(-) diff --git a/balancer/controllers/chatgpt.py b/balancer/controllers/chatgpt.py index e7bf624d..d68c71e4 100644 --- a/balancer/controllers/chatgpt.py +++ b/balancer/controllers/chatgpt.py @@ -10,13 +10,14 @@ # remove before production from django.views.decorators.csrf import csrf_exempt +# summarize web content extracted from url + @csrf_exempt def extract_webpage(request): openai.api_key = os.getenv('OPENAI_API_KEY') data = json.loads(request.body) webpage_url = data['webpage_url'] - print(webpage_url) response = requests.get(webpage_url) soup = BeautifulSoup(response.text, 'html.parser') @@ -41,6 +42,8 @@ def extract_webpage(request): return JsonResponse({'message': ai_response}) +# get first 3500 tokens from string + def get_tokens(string: str, encoding_name: str) -> str: encoding = tiktoken.get_encoding(encoding_name) @@ -48,3 +51,30 @@ def get_tokens(string: str, encoding_name: str) -> str: tokens = tokens[:3500] output_string = encoding.decode(tokens) return output_string + +# provide diagnosis information + +# remove before production + + +@csrf_exempt +def diagnosis(request): + openai.api_key = os.getenv('OPENAI_API_KEY') + data = json.loads(request.body) + + if data is not None: + diagnosis = data['diagnosis'] + print(diagnosis) + + ai_response = openai.ChatCompletion.create( + model="gpt-3.5-turbo", + messages=[ + {"role": "system", "content": "Diagnosis: %s" % diagnosis, }], + max_tokens=4000, + ) + + response_data = {'message': ai_response} + return JsonResponse(response_data) + + # Handle the case when data is None + return JsonResponse({'error': 'Invalid request'}) diff --git a/balancer/urls.py b/balancer/urls.py index 9865a6b0..a4be270e 100644 --- a/balancer/urls.py +++ b/balancer/urls.py @@ -20,5 +20,6 @@ urlpatterns = [ path('admin/', admin.site.urls), - path('extract_webpage/', chatgpt.extract_webpage, name='post_web_extraction') + path('extract_webpage/', chatgpt.extract_webpage, name='post_web_extraction'), + path('diagnosis/', chatgpt.diagnosis, name='post_diagnosis'), ] diff --git a/poetry.lock b/poetry.lock index 6b41ea1d..3ad23d60 100644 --- a/poetry.lock +++ b/poetry.lock @@ -99,7 +99,7 @@ python-versions = ">=3.7.0" [[package]] name = "click" -version = "8.1.4" +version = "8.1.5" description = "Composable command line interface toolkit" category = "main" optional = false From 018e464aeb8a71bcf4d7c4129f7fa40302a474b1 Mon Sep 17 00:00:00 2001 From: Ryan R Rogers Date: Fri, 14 Jul 2023 17:48:27 -0400 Subject: [PATCH 3/8] update .gitignore --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 88d0c9ce..20d7f5d2 100644 --- a/.gitignore +++ b/.gitignore @@ -164,4 +164,5 @@ cython_debug/ *git-via-* # test env -balancer-test-env/ \ No newline at end of file +balancer-test-env/ +.vscode \ No newline at end of file From ec4098d84bc67ab1e9c7c2f2bc573aa573577264 Mon Sep 17 00:00:00 2001 From: Ryan R Rogers Date: Thu, 20 Jul 2023 10:52:50 -0400 Subject: [PATCH 4/8] resolve code review issues --- .vscode/settings.json | 6 --- balancer/controllers/chatgpt.py | 61 ++++++++++++++---------- env => config/docker/examples/sample.env | 0 poetry.lock | 34 +++++-------- pyproject.toml | 7 +-- 5 files changed, 50 insertions(+), 58 deletions(-) delete mode 100644 .vscode/settings.json rename env => config/docker/examples/sample.env (100%) diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index cca67acf..00000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "[python]": { - "editor.defaultFormatter": "ms-python.autopep8" - }, - "python.formatting.provider": "none" -} \ No newline at end of file diff --git a/balancer/controllers/chatgpt.py b/balancer/controllers/chatgpt.py index d68c71e4..7594da5c 100644 --- a/balancer/controllers/chatgpt.py +++ b/balancer/controllers/chatgpt.py @@ -7,74 +7,83 @@ import os import json -# remove before production +# XXX: remove csrf_exempt usage before production from django.views.decorators.csrf import csrf_exempt -# summarize web content extracted from url - @csrf_exempt -def extract_webpage(request): - openai.api_key = os.getenv('OPENAI_API_KEY') +def extract_webpage(request: str) -> JsonResponse: + """Takes a URL and returns a summary of page's text content. + + Currently only uses the first 3500 tokens.""" + openai.api_key = os.getenv("OPENAI_API_KEY") data = json.loads(request.body) - webpage_url = data['webpage_url'] + webpage_url = data["webpage_url"] response = requests.get(webpage_url) - soup = BeautifulSoup(response.text, 'html.parser') - text_contents = soup.find_all('p') + soup = BeautifulSoup(response.text, "html.parser") + text_contents = soup.find_all("p") text_contents = [p.get_text() for p in text_contents] - text_contents = ' '.join(text_contents) + text_contents = " ".join(text_contents) stemmer = PorterStemmer() text_contents = text_contents.split() text_contents = [stemmer.stem(word) for word in text_contents] - text_contents = ' '.join(text_contents) - print(text_contents) + text_contents = " ".join(text_contents) tokens = get_tokens(text_contents, "cl100k_base") ai_response = openai.ChatCompletion.create( model="gpt-3.5-turbo", messages=[ - {"role": "system", "content": "Give a brief description of this medicine: %s" % tokens, }], + { + "role": "system", + "content": "Give a brief description of this medicine: %s" % tokens, + } + ], max_tokens=500, ) - return JsonResponse({'message': ai_response}) - -# get first 3500 tokens from string + return JsonResponse({"message": ai_response}) def get_tokens(string: str, encoding_name: str) -> str: + """Tokenize the first 3500 tokens of a string.""" encoding = tiktoken.get_encoding(encoding_name) tokens = encoding.encode(string) tokens = tokens[:3500] output_string = encoding.decode(tokens) return output_string -# provide diagnosis information - -# remove before production - @csrf_exempt -def diagnosis(request): - openai.api_key = os.getenv('OPENAI_API_KEY') +def diagnosis(request: str) -> JsonResponse: + """Takes a diagnosis and returns a table of the most commonly prescribed medications for that diagnosis.""" + openai.api_key = os.getenv("OPENAI_API_KEY") data = json.loads(request.body) if data is not None: - diagnosis = data['diagnosis'] - print(diagnosis) + diagnosis = data["diagnosis"] ai_response = openai.ChatCompletion.create( model="gpt-3.5-turbo", messages=[ - {"role": "system", "content": "Diagnosis: %s" % diagnosis, }], + { + "role": "system", + "content": """Please provide a table of the most commonly prescribed medications for %s. + The table should be in HTML format, without any tags. It should have a maximum width + of 630px, with a margin of 0 for the top and bottom. The table should consist of two columns: + 'Medication Class' and 'Medication Names'. Each cell should have a left padding and a border, + and the text in the 'Medication Class' and 'Medications' cells should be displayed in bold. + No other cells should be bold.""" + % diagnosis, + } + ], max_tokens=4000, ) - response_data = {'message': ai_response} + response_data = {"message": ai_response} return JsonResponse(response_data) # Handle the case when data is None - return JsonResponse({'error': 'Invalid request'}) + return JsonResponse({"error": "Invalid request"}) diff --git a/env b/config/docker/examples/sample.env similarity index 100% rename from env rename to config/docker/examples/sample.env diff --git a/poetry.lock b/poetry.lock index 3ad23d60..ae53344f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,6 +1,6 @@ [[package]] name = "aiohttp" -version = "3.8.4" +version = "3.8.5" description = "Async http client/server framework (asyncio)" category = "main" optional = false @@ -99,7 +99,7 @@ python-versions = ">=3.7.0" [[package]] name = "click" -version = "8.1.5" +version = "8.1.6" description = "Composable command line interface toolkit" category = "main" optional = false @@ -137,7 +137,7 @@ bcrypt = ["bcrypt"] name = "exceptiongroup" version = "1.1.2" description = "Backport of PEP 654 (exception groups)" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" @@ -164,7 +164,7 @@ python-versions = ">=3.5" name = "iniconfig" version = "2.0.0" description = "brain-dead simple config-ini parsing" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" @@ -229,7 +229,7 @@ wandb = ["wandb", "numpy", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)", "ope name = "packaging" version = "23.1" description = "Core utilities for Python packages" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" @@ -237,7 +237,7 @@ python-versions = ">=3.7" name = "pluggy" version = "1.2.0" description = "plugin and hook calling mechanisms for python" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" @@ -249,7 +249,7 @@ testing = ["pytest", "pytest-benchmark"] name = "pytest" version = "7.4.0" description = "pytest: simple powerful testing with Python" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" @@ -268,7 +268,7 @@ testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "no name = "pytest-django" version = "4.5.2" description = "A Django plugin for pytest." -category = "main" +category = "dev" optional = false python-versions = ">=3.5" @@ -279,17 +279,6 @@ pytest = ">=5.4.0" docs = ["sphinx", "sphinx-rtd-theme"] testing = ["django", "django-configurations (>=2.0)"] -[[package]] -name = "python-dotenv" -version = "1.0.0" -description = "Read key-value pairs from a .env file and set them as environment variables" -category = "main" -optional = false -python-versions = ">=3.8" - -[package.extras] -cli = ["click (>=5.0)"] - [[package]] name = "regex" version = "2023.6.3" @@ -356,7 +345,7 @@ blobfile = ["blobfile (>=2)"] name = "tomli" version = "2.0.1" description = "A lil' TOML parser" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" @@ -395,7 +384,7 @@ python-versions = ">=2" [[package]] name = "urllib3" -version = "2.0.3" +version = "2.0.4" description = "HTTP library with thread-safe connection pooling, file post, and more." category = "main" optional = false @@ -422,7 +411,7 @@ multidict = ">=4.0" [metadata] lock-version = "1.1" python-versions = "^3.10" -content-hash = "fb8279b3dd71a3767817f35d25fcdabc2490fa1fb4b0a9a9f50aa314ce9e6389" +content-hash = "ebf9ec188179e84af9d74841c88e01aee825fa753ba6023559ffb02b9143b605" [metadata.files] aiohttp = [] @@ -448,7 +437,6 @@ packaging = [] pluggy = [] pytest = [] pytest-django = [] -python-dotenv = [] regex = [] requests = [] soupsieve = [] diff --git a/pyproject.toml b/pyproject.toml index 294d8cf0..d6b9bd4e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,14 +9,15 @@ readme = "README.md" [tool.poetry.dependencies] python = "^3.10" Django = "^4.2.2" -pytest = "^7.3.2" -pytest-django = "^4.5.2" beautifulsoup4 = "^4.12.2" openai = "^0.27.8" requests = "^2.31.0" tiktoken = "^0.4.0" nltk = "^3.8.1" -python-dotenv = "^1.0.0" + +[tool.poetry.dev-dependencies] +pytest = "^7.3.2" +pytest-django = "^4.5.2" [build-system] requires = ["poetry-core"] From 2674ed2edb808257f966baf7e2ce5c0d2c017fc7 Mon Sep 17 00:00:00 2001 From: Ryan R Rogers Date: Thu, 20 Jul 2023 10:56:01 -0400 Subject: [PATCH 5/8] resolve chatgpt.py issues --- balancer/controllers/chatgpt.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/balancer/controllers/chatgpt.py b/balancer/controllers/chatgpt.py index 7594da5c..a4a78fc7 100644 --- a/balancer/controllers/chatgpt.py +++ b/balancer/controllers/chatgpt.py @@ -16,7 +16,7 @@ def extract_webpage(request: str) -> JsonResponse: """Takes a URL and returns a summary of page's text content. Currently only uses the first 3500 tokens.""" - openai.api_key = os.getenv("OPENAI_API_KEY") + openai.api_key = os.environ.get("OPENAI_API_KEY") data = json.loads(request.body) webpage_url = data["webpage_url"] @@ -59,7 +59,7 @@ def get_tokens(string: str, encoding_name: str) -> str: @csrf_exempt def diagnosis(request: str) -> JsonResponse: """Takes a diagnosis and returns a table of the most commonly prescribed medications for that diagnosis.""" - openai.api_key = os.getenv("OPENAI_API_KEY") + openai.api_key = os.environ.get("OPENAI_API_KEY") data = json.loads(request.body) if data is not None: From 60f7246419756f1aa84a2787504c3417b6b40d3e Mon Sep 17 00:00:00 2001 From: Ryan R Rogers Date: Thu, 20 Jul 2023 11:05:35 -0400 Subject: [PATCH 6/8] remove dotenv, update endpoint name --- balancer/controllers/chatgpt.py | 2 +- balancer/settings.py | 75 ++++++++++++++++----------------- balancer/urls.py | 6 +-- 3 files changed, 40 insertions(+), 43 deletions(-) diff --git a/balancer/controllers/chatgpt.py b/balancer/controllers/chatgpt.py index a4a78fc7..0c8f4b14 100644 --- a/balancer/controllers/chatgpt.py +++ b/balancer/controllers/chatgpt.py @@ -12,7 +12,7 @@ @csrf_exempt -def extract_webpage(request: str) -> JsonResponse: +def extract_text(request: str) -> JsonResponse: """Takes a URL and returns a summary of page's text content. Currently only uses the first 3500 tokens.""" diff --git a/balancer/settings.py b/balancer/settings.py index e90886d2..c9e4e153 100644 --- a/balancer/settings.py +++ b/balancer/settings.py @@ -11,9 +11,6 @@ """ from pathlib import Path -from dotenv import load_dotenv - -load_dotenv() # Build paths inside the project like this: BASE_DIR / 'subdir'. BASE_DIR = Path(__file__).resolve().parent.parent @@ -23,7 +20,7 @@ # See https://docs.djangoproject.com/en/4.2/howto/deployment/checklist/ # SECURITY WARNING: keep the secret key used in production secret! -SECRET_KEY = 'django-insecure-vi%ccle5(!=4=c!=03z5bqr+ljh#0#xa^#af#-&2e#4lmhn&h8' +SECRET_KEY = "django-insecure-vi%ccle5(!=4=c!=03z5bqr+ljh#0#xa^#af#-&2e#4lmhn&h8" # SECURITY WARNING: don't run with debug turned on in production! DEBUG = True @@ -34,52 +31,52 @@ # Application definition INSTALLED_APPS = [ - 'django.contrib.admin', - 'django.contrib.auth', - 'django.contrib.contenttypes', - 'django.contrib.sessions', - 'django.contrib.messages', - 'django.contrib.staticfiles', + "django.contrib.admin", + "django.contrib.auth", + "django.contrib.contenttypes", + "django.contrib.sessions", + "django.contrib.messages", + "django.contrib.staticfiles", ] MIDDLEWARE = [ - 'django.middleware.security.SecurityMiddleware', - 'django.contrib.sessions.middleware.SessionMiddleware', - 'django.middleware.common.CommonMiddleware', - 'django.middleware.csrf.CsrfViewMiddleware', - 'django.contrib.auth.middleware.AuthenticationMiddleware', - 'django.contrib.messages.middleware.MessageMiddleware', - 'django.middleware.clickjacking.XFrameOptionsMiddleware', + "django.middleware.security.SecurityMiddleware", + "django.contrib.sessions.middleware.SessionMiddleware", + "django.middleware.common.CommonMiddleware", + "django.middleware.csrf.CsrfViewMiddleware", + "django.contrib.auth.middleware.AuthenticationMiddleware", + "django.contrib.messages.middleware.MessageMiddleware", + "django.middleware.clickjacking.XFrameOptionsMiddleware", ] -ROOT_URLCONF = 'balancer.urls' +ROOT_URLCONF = "balancer.urls" TEMPLATES = [ { - 'BACKEND': 'django.template.backends.django.DjangoTemplates', - 'DIRS': [], - 'APP_DIRS': True, - 'OPTIONS': { - 'context_processors': [ - 'django.template.context_processors.debug', - 'django.template.context_processors.request', - 'django.contrib.auth.context_processors.auth', - 'django.contrib.messages.context_processors.messages', + "BACKEND": "django.template.backends.django.DjangoTemplates", + "DIRS": [], + "APP_DIRS": True, + "OPTIONS": { + "context_processors": [ + "django.template.context_processors.debug", + "django.template.context_processors.request", + "django.contrib.auth.context_processors.auth", + "django.contrib.messages.context_processors.messages", ], }, }, ] -WSGI_APPLICATION = 'balancer.wsgi.application' +WSGI_APPLICATION = "balancer.wsgi.application" # Database # https://docs.djangoproject.com/en/4.2/ref/settings/#databases DATABASES = { - 'default': { - 'ENGINE': 'django.db.backends.sqlite3', - 'NAME': BASE_DIR / 'db.sqlite3', + "default": { + "ENGINE": "django.db.backends.sqlite3", + "NAME": BASE_DIR / "db.sqlite3", } } @@ -89,16 +86,16 @@ AUTH_PASSWORD_VALIDATORS = [ { - 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', + "NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator", }, { - 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', + "NAME": "django.contrib.auth.password_validation.MinimumLengthValidator", }, { - 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', + "NAME": "django.contrib.auth.password_validation.CommonPasswordValidator", }, { - 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', + "NAME": "django.contrib.auth.password_validation.NumericPasswordValidator", }, ] @@ -106,9 +103,9 @@ # Internationalization # https://docs.djangoproject.com/en/4.2/topics/i18n/ -LANGUAGE_CODE = 'en-us' +LANGUAGE_CODE = "en-us" -TIME_ZONE = 'UTC' +TIME_ZONE = "UTC" USE_I18N = True @@ -118,9 +115,9 @@ # Static files (CSS, JavaScript, Images) # https://docs.djangoproject.com/en/4.2/howto/static-files/ -STATIC_URL = 'static/' +STATIC_URL = "static/" # Default primary key field type # https://docs.djangoproject.com/en/4.2/ref/settings/#default-auto-field -DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField' +DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField" diff --git a/balancer/urls.py b/balancer/urls.py index a4be270e..8a5de288 100644 --- a/balancer/urls.py +++ b/balancer/urls.py @@ -19,7 +19,7 @@ from balancer.controllers import chatgpt urlpatterns = [ - path('admin/', admin.site.urls), - path('extract_webpage/', chatgpt.extract_webpage, name='post_web_extraction'), - path('diagnosis/', chatgpt.diagnosis, name='post_diagnosis'), + path("admin/", admin.site.urls), + path("extract_text/", chatgpt.extract_text, name="post_web_text"), + path("diagnosis/", chatgpt.diagnosis, name="post_diagnosis"), ] From 3e689df311bfaf77ee101ca97cea5edd31e3c5f4 Mon Sep 17 00:00:00 2001 From: Jayson Dorsett Date: Thu, 20 Jul 2023 21:28:56 -0400 Subject: [PATCH 7/8] refactor: include dotenv until docker env vars --- balancer/settings.py | 4 ++++ pyproject.toml | 1 + 2 files changed, 5 insertions(+) diff --git a/balancer/settings.py b/balancer/settings.py index c9e4e153..947718f6 100644 --- a/balancer/settings.py +++ b/balancer/settings.py @@ -11,6 +11,10 @@ """ from pathlib import Path +from dotenv import load_dotenv + +load_dotenv() + # Build paths inside the project like this: BASE_DIR / 'subdir'. BASE_DIR = Path(__file__).resolve().parent.parent diff --git a/pyproject.toml b/pyproject.toml index d6b9bd4e..1a36546e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,6 +14,7 @@ openai = "^0.27.8" requests = "^2.31.0" tiktoken = "^0.4.0" nltk = "^3.8.1" +python-dotenv = "^1.0.0" [tool.poetry.dev-dependencies] pytest = "^7.3.2" From 58e1cb924f241299c9ef96e997727ed73ba89d30 Mon Sep 17 00:00:00 2001 From: Jayson Dorsett Date: Thu, 20 Jul 2023 21:29:06 -0400 Subject: [PATCH 8/8] docs: format --- balancer/controllers/chatgpt.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/balancer/controllers/chatgpt.py b/balancer/controllers/chatgpt.py index 0c8f4b14..a7897836 100644 --- a/balancer/controllers/chatgpt.py +++ b/balancer/controllers/chatgpt.py @@ -13,9 +13,11 @@ @csrf_exempt def extract_text(request: str) -> JsonResponse: - """Takes a URL and returns a summary of page's text content. + """ + Takes a URL and returns a summary of page's text content. - Currently only uses the first 3500 tokens.""" + Currently only uses the first 3500 tokens. + """ openai.api_key = os.environ.get("OPENAI_API_KEY") data = json.loads(request.body) webpage_url = data["webpage_url"]