Opentrons · y3rsh · May 31, 2024 · May 29, 2024 · May 29, 2024 · May 29, 2024
@@ -33,3 +33,4 @@ app-testing/files
 # app testing don't format the snapshots
 app-testing/tests/__snapshots__
 opentrons-ai-server/package
+opentrons-ai-server/api/storage/index/
@@ -1,9 +1,13 @@
-.env
+*.env
 results
 package
 function.zip
 requirements.txt
 test.env
 cached_token.txt
 tests/helpers/cached_token.txt
+tests/helpers/prod_cached_token.txt
+tests/helpers/staging_cached_token.txt
+tests/helpers/*_cached_token.txt
 tests/helpers/test.env
+tests/helpers/*.env
@@ -1,13 +1,16 @@
-FROM --platform=linux/amd64 public.ecr.aws/lambda/python:3.12
+FROM --platform=linux/amd64 python:3.12-slim
 
-COPY --from=public.ecr.aws/datadog/lambda-extension:57 /opt/. /opt/
+ENV PYTHONUNBUFFERED True
+ENV DOCKER_RUNNING True
 
-WORKDIR ${LAMBDA_TASK_ROOT}
+WORKDIR /code
 
-COPY requirements.txt .
+COPY ./requirements.txt /code/requirements.txt
 
-RUN pip install -r requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
 
-COPY api ${LAMBDA_TASK_ROOT}/api
+COPY ./api /code/api
 
-CMD [ "datadog_lambda.handler.handler" ]
+EXPOSE 8000
+
+CMD ["ddtrace-run", "uvicorn", "api.handler.fast:app", "--proxy-headers",  "--host", "0.0.0.0", "--port", "8000", "--timeout-keep-alive", "190", "--workers", "3"]
@@ -67,13 +67,17 @@ gen-requirements:
 	@echo "Generating requirements.txt from Pipfile.lock..."
 	python -m pipenv requirements > requirements.txt
 
-ENV ?= sandbox
+ENV ?= local
 
 .PHONY: deploy
 deploy: gen-requirements
 	@echo "Deploying to environment: $(ENV)"
 	python -m pipenv run python deploy.py --env $(ENV)
 
+.PHONY: prompted-deploy
+prompted-deploy: gen-requirements
+	python -m pipenv run python deploy.py
+
 .PHONY: direct-chat-completion
 direct-chat-completion:
 	python -m pipenv run python -m api.domain.openai_predict
@@ -89,3 +93,7 @@ live-client:
 .PHONY: test-live
 test-live:
 	python -m pipenv run python -m pytest tests -m live --env $(ENV)
+
+.PHONY: run
+run:
+	python -m pipenv run python -m api.handler.local_run --env $(ENV)
@@ -4,26 +4,29 @@ verify_ssl = true
 name = "pypi"
 
 [packages]
-openai = "==1.25.1"
+openai = "==1.30.4"
 python-dotenv = "==1.0.1"
 httpx = "==0.27.0"
 llama-index = "==0.10.24"
-datadog-lambda = "==5.94.0"
 pydantic = "==2.7.1"
+fastapi = "==0.111.0"
+ddtrace = "==2.8.5"
+pydantic-settings = "==2.2.1"
+pyjwt = {extras = ["crypto"], version = "*"}
+python-json-logger = "==2.0.7"
 
 [dev-packages]
-docker = "==7.0.0"
-pytest = "==8.2.0"
-ruff = "==0.4.2"
+docker = "==7.1.0"
+pytest = "==8.2.1"
+ruff = "==0.4.6"
 mypy = "==1.10.0"
 black = "==24.4.2"
 types-requests = "*"
-boto3 = "==1.34.97"
-boto3-stubs = "==1.34.97"
+boto3 = "==1.34.114"
+boto3-stubs = "==1.34.114"
 rich = "==13.7.1"
-pyjwt = "==2.8.0"
 cryptography = "==42.0.7"
-types-docker = "==7.0.0.20240513"
+types-docker = "==7.0.0.20240528"
 
 [requires]
 python_version = "3.12"

@@ -20,42 +20,41 @@ The Opentrons AI application's server.
    1. This will create a `.python-version` file in this directory
 1. select the node version with `nvs` or `nvm` currently 18.19\*
 1. Install pipenv and python dependencies `make setup`
-1. to build and deploy you must have
-1. AWS credentials and the right roles
-1. docker installed
+
+## For building and deploying
+
+1. AWS credentials and config
+1. docker
 
 ## Install a dev dependency
 
 `python -m pipenv install pytest==8.2.0 --dev`
 
 ## Install a production dependency
 
-`python -m pipenv install openai==1.25.1`
+`python -m pipenv install openai==1.30.4`
 
-## Lambda Code Organizations and Separation of Concerns
+## FastAPI Code Organization and Separation of Concerns
 
 - handler
-  - the lambda handler
+  - the router and request/response handling
 - domain
-  - the business logic
+  - business logic
 - integration
-  - the integration with other services
+  - integration with other services
 
 ## Dev process
 
 1. Make your changes
-1. Fix what can be automatically then lent and unit test like CI will `make pre-commit`
+1. Fix what can be automatically then lint and unit test like CI will `make pre-commit`
 1. `make pre-commit` passes
-1. deploy to sandbox `make deploy test-live ENV=sandbox AWS_PROFILE=the-profile`
-
-## Custom runtime
-
-- Due to the size requirements of `llama-index` and our data we switched to a custom runtime
-- This also allows us to use HTTP streaming
-- The runtime is defined in the `Dockerfile`
-- deploy.py contains the steps to
-  1. build the container image
-  1. tag the container image (currently uses the epoch until versioning in place)
-  1. log into and push to the correct ECR
-  1. create a new lambda version against the new image
-  1. await the function to be ready
+1. run locally `make run` this runs the FastAPI server directly at localhost:8000
+   1. this watches for changes and restarts the server
+1. test locally `make live-test` (ENV=local is the default in the Makefile)
+1. use the live client `make live-client`
+
+## ECS Fargate
+
+- Our first version of this service is a long running POST that may take from 1-3 minutes to complete
+- This forces us to use CloudFront(Max 180) + Load Balancer + ECS Fargate FastAPI container
+- An AWS service ticket is needed to increase the max CloudFront response time from 60 to 180 seconds
@@ -1,3 +1,4 @@
+import logging
 from pathlib import Path
 from typing import List, Tuple
 
@@ -22,7 +23,9 @@
     tools,
 )
 from api.domain.utils import refine_characters
-from api.settings import Settings, is_running_on_lambda
+from api.settings import Settings
+
+logger = logging.getLogger(__name__)
 
 ROOT_PATH: Path = Path(Path(__file__)).parent.parent.parent
 
@@ -37,7 +40,7 @@ def __init__(self, settings: Settings) -> None:
 
     def get_docs_all(self, query: str) -> Tuple[str, str, str]:
         commands = self.extract_atomic_description(query)
-        print(f"commands: {commands}")
+        logger.info("Commands", extra={"commands": commands})
 
         # define file paths for storage
         example_command_path = str(ROOT_PATH / "api" / "storage" / "index" / "commands")
@@ -84,28 +87,27 @@ class atomic_descr(BaseModel):
             output_cls=atomic_descr,
             prompt_template_str=prompt_template_str.format(protocol_description=protocol_description),
             verbose=False,
-            llm=li_OpenAI(model=self.settings.OPENAI_MODEL_NAME),
+            llm=li_OpenAI(model=self.settings.openai_model_name, api_key=self.settings.openai_api_key.get_secret_value()),
         )
         details = program(protocol_description=protocol_description)
         descriptions = []
-        print("=" * 50)
         for x in details.desc:
             if x not in ["Modules:", "Adapter:", "Labware:", "Pipette mount:", "Commands:", "Well Allocation:", "No modules"]:
                 descriptions.append(x)
         return descriptions
 
-    def refine_response(self, assitant_message: str) -> str:
-        if assitant_message is None:
+    def refine_response(self, assistant_message: str) -> str:
+        if assistant_message is None:
             return ""
         system_message: ChatCompletionMessageParam = {
             "role": "system",
             "content": f"{general_rules_1}\n Please leave useful comments for each command.",
         }
 
-        user_message: ChatCompletionMessageParam = {"role": "user", "content": assitant_message}
+        user_message: ChatCompletionMessageParam = {"role": "user", "content": assistant_message}
 
         response = self.client.chat.completions.create(
-            model=self.settings.OPENAI_MODEL_NAME,
+            model=self.settings.openai_model_name,
             messages=[system_message, user_message],
             stream=False,
             temperature=0.005,
@@ -137,7 +139,7 @@ def predict(self, prompt: str, chat_completion_message_params: List[ChatCompleti
         messages.append(user_message)
 
         response: ChatCompletion = self.client.chat.completions.create(
-            model=self.settings.OPENAI_MODEL_NAME,
+            model=self.settings.openai_model_name,
             messages=messages,
             stream=False,
             temperature=0.005,
@@ -155,7 +157,7 @@ def predict(self, prompt: str, chat_completion_message_params: List[ChatCompleti
         assistant_message.content = str(self.refine_response(assistant_message.content))
 
         if assistant_message.tool_calls and assistant_message.tool_calls[0]:
-            print("Simulation is started.")
+            logger.info("Simulation has started")
             if assistant_message.tool_calls[0]:
                 assistant_message.content = str(assistant_message.tool_calls[0].function)
                 messages.append({"role": assistant_message.role, "content": assistant_message.content})
@@ -167,7 +169,7 @@ def predict(self, prompt: str, chat_completion_message_params: List[ChatCompleti
                     ChatCompletionFunctionMessageParam(role="function", name=tool_call.function.name, content=str(function_response))
                 )
                 response2: ChatCompletion = self.client.chat.completions.create(
-                    model=self.settings.OPENAI_MODEL_NAME,
+                    model=self.settings.openai_model_name,
                     messages=messages,
                     stream=False,
                     temperature=0,
@@ -183,12 +185,10 @@ def predict(self, prompt: str, chat_completion_message_params: List[ChatCompleti
 
 def main() -> None:
     """Intended for testing this class locally."""
-    if is_running_on_lambda():
-        return
     from rich import print
     from rich.prompt import Prompt
 
-    settings = Settings.build()
+    settings = Settings()
     openai = OpenAIPredict(settings)
     prompt = Prompt.ask("Type a prompt to send to the OpenAI API:")
     completion = openai.predict(prompt)

@@ -1,4 +1,5 @@
 import json
+import logging
 import uuid
 from typing import Any, Dict, Iterable
 
@@ -7,7 +8,8 @@
 
 from api.settings import Settings
 
-settings = Settings.build()
+settings: Settings = Settings()
+logger = logging.getLogger(__name__)
 
 
 def generate_unique_name() -> str:
@@ -25,23 +27,23 @@ def send_post_request(payload: str) -> str:
     response = requests.post(url, json=data, headers=headers)
 
     if response.status_code != 200:
-        print("Error: " + response.text)
+        logger.error("Error: " + response.text)
         return "Error: " + response.text
 
     # Check the response before returning it
     # ToDo clean up code
     response_data: Dict[str, Any] = response.json()
     if "error_message" in response_data:
-        print("Error in response:", response_data["error_message"])
+        logger.error("Error in response:", response_data["error_message"])
         return str(response_data["error_message"])
     elif "protocol_name" in response_data:
-        # print("Protocol executed successfully. Run log:", response_data["run_log"])
+        logger.debug("Protocol executed successfully", extra={"response_data": response_data["run_log"]})
 
         return str(response_data["run_status"])
         # ToDo if run_log option is on
         # return response_data["run_log"]
     else:
-        print("Unexpected response:", response_data)
+        logger.info("Unexpected response:", extra={"response_data": response_data})
         return "Unexpected response"