diff --git a/example-apps/chatbot-rag-app/.flaskenv b/example-apps/chatbot-rag-app/.flaskenv deleted file mode 100644 index 88612f8d..00000000 --- a/example-apps/chatbot-rag-app/.flaskenv +++ /dev/null @@ -1,4 +0,0 @@ -FLASK_APP=api/app.py -FLASK_RUN_PORT=4000 -# Production mode ensures we don't run into problems. -FLASK_ENV=production diff --git a/example-apps/chatbot-rag-app/Dockerfile b/example-apps/chatbot-rag-app/Dockerfile index 317ebd69..bca2b036 100644 --- a/example-apps/chatbot-rag-app/Dockerfile +++ b/example-apps/chatbot-rag-app/Dockerfile @@ -1,4 +1,4 @@ -FROM node:20-alpine AS build-step +FROM node:22-alpine AS build-step WORKDIR /app ENV PATH=/node_modules/.bin:$PATH COPY frontend ./frontend @@ -28,7 +28,4 @@ COPY api ./api COPY data ./data EXPOSE 4000 -# The only thing different from running local is that in docker we need to -# listen on all IPs, not just localhost. -ENV FLASK_RUN_HOST=0.0.0.0 -CMD [ "flask", "run"] +CMD [ "python", "api/app.py"] diff --git a/example-apps/chatbot-rag-app/README.md b/example-apps/chatbot-rag-app/README.md index 0419fae4..81e53614 100644 --- a/example-apps/chatbot-rag-app/README.md +++ b/example-apps/chatbot-rag-app/README.md @@ -26,11 +26,27 @@ use-cases. Visit the [Install Elasticsearch](https://www.elastic.co/search-labs/ Once you decided your approach, edit your `.env` file accordingly. -### Elasticsearch index and chat_history index +### Running your own Elastic Stack with Docker -By default, the app will use the `workplace-app-docs` index and the chat -history index will be `workplace-app-docs-chat-history`. If you want to change -these, edit `ES_INDEX` and `ES_INDEX_CHAT_HISTORY` entries in your `.env` file. +If you'd like to start Elastic locally, you can use the provided +[docker-compose-elastic.yml](docker-compose-elastic.yml) file. This starts +Elasticsearch, Kibana, and APM Server and only requires Docker installed. + +Use docker compose to run Elastic stack in the background: + +```bash +docker compose -f docker-compose-elastic.yml up --force-recreate -d +``` + +Then, you can view Kibana at http://localhost:5601/app/home#/ + +If asked for a username and password, use username: elastic and password: elastic. + +Clean up when finished, like this: + +```bash +docker compose -f docker-compose-elastic.yml down +``` ## Connecting to LLM @@ -67,6 +83,12 @@ docker compose up --build --force-recreate *Note*: First time creating the index can fail on timeout. Wait a few minutes and retry. +Clean up when finished, like this: + +```bash +docker compose down +``` + ### Run locally If you want to run this example with Python and Node.js, you need to do a few @@ -95,9 +117,8 @@ correct packages installed: ```bash python3 -m venv .venv source .venv/bin/activate -# install dev requirements for pip-compile and dotenv -pip install pip-tools "python-dotenv[cli]" -pip-compile +# Install dotenv which is a portable way to load environment variables. +pip install "python-dotenv[cli]" pip install -r requirements.txt ``` @@ -105,13 +126,7 @@ pip install -r requirements.txt First, ingest the data into elasticsearch: ```bash -$ dotenv run -- flask create-index -".elser_model_2" model not available, downloading it now -Model downloaded, starting deployment -Loading data from ./data/data.json -Loaded 15 documents -Split 15 documents into 26 chunks -Creating Elasticsearch sparse vector store in http://localhost:9200 +FLASK_APP=api/app.py dotenv run -- flask create-index ``` *Note*: First time creating the index can fail on timeout. Wait a few minutes @@ -121,12 +136,33 @@ and retry. Now, run the app, which listens on http://localhost:4000 ```bash -$ dotenv run -- flask run - * Serving Flask app 'api/app.py' - * Debug mode: off +dotenv run -- python api/app.py ``` -## Customizing the app +## Advanced + +### Updating package versions + +To update package versions, recreate [requirements.txt](requirements.txt) and +reinstall like this. Once checked in, any commands above will use updates. + +```bash +rm -rf .venv +python3 -m venv .venv +source .venv/bin/activate +# Install dev requirements for pip-compile +pip install pip-tools +# Recreate requirements.txt +pip-compile +# Install main dependencies +pip install -r requirements.txt +``` + +### Elasticsearch index and chat_history index + +By default, the app will use the `workplace-app-docs` index and the chat +history index will be `workplace-app-docs-chat-history`. If you want to change +these, edit `ES_INDEX` and `ES_INDEX_CHAT_HISTORY` entries in your `.env` file. ### Indexing your own data diff --git a/example-apps/chatbot-rag-app/api/app.py b/example-apps/chatbot-rag-app/api/app.py index 826b6349..a94f4a24 100644 --- a/example-apps/chatbot-rag-app/api/app.py +++ b/example-apps/chatbot-rag-app/api/app.py @@ -37,6 +37,5 @@ def create_index(): index_data.main() -# Unless we run through flask, we can miss critical settings or telemetry signals. if __name__ == "__main__": - raise RuntimeError("Run via the parent directory: 'flask run'") + app.run(host="0.0.0.0", port=4000, debug=False) diff --git a/example-apps/chatbot-rag-app/api/llm_integrations.py b/example-apps/chatbot-rag-app/api/llm_integrations.py index ac34f1be..7da4bd60 100644 --- a/example-apps/chatbot-rag-app/api/llm_integrations.py +++ b/example-apps/chatbot-rag-app/api/llm_integrations.py @@ -11,60 +11,47 @@ def init_openai_chat(temperature): + # Include streaming usage as this allows recording of LLM metrics return ChatOpenAI( - model=os.getenv("CHAT_MODEL"), streaming=True, temperature=temperature + model=os.getenv("CHAT_MODEL"), + streaming=True, + temperature=temperature, + model_kwargs={"stream_options": {"include_usage": True}}, ) def init_vertex_chat(temperature): - VERTEX_PROJECT_ID = os.getenv("VERTEX_PROJECT_ID") - VERTEX_REGION = os.getenv("VERTEX_REGION", "us-central1") - vertexai.init(project=VERTEX_PROJECT_ID, location=VERTEX_REGION) - return ChatVertexAI(streaming=True, temperature=temperature) + return ChatVertexAI( + model_name=os.getenv("CHAT_MODEL"), streaming=True, temperature=temperature + ) def init_azure_chat(temperature): + # Include streaming usage as this allows recording of LLM metrics return AzureChatOpenAI( - model=os.getenv("CHAT_DEPLOYMENT"), streaming=True, temperature=temperature + model=os.getenv("CHAT_DEPLOYMENT"), + streaming=True, + temperature=temperature, + model_kwargs={"stream_options": {"include_usage": True}}, ) def init_bedrock(temperature): - AWS_ACCESS_KEY = os.getenv("AWS_ACCESS_KEY") - AWS_SECRET_KEY = os.getenv("AWS_SECRET_KEY") - AWS_REGION = os.getenv("AWS_REGION") - AWS_MODEL_ID = os.getenv("AWS_MODEL_ID", "anthropic.claude-v2") return ChatBedrock( - region_name=AWS_REGION, - aws_access_key_id=AWS_ACCESS_KEY, - aws_secret_access_key=AWS_SECRET_KEY, - model_id=AWS_MODEL_ID, + model_id=os.getenv("CHAT_MODEL"), streaming=True, model_kwargs={"temperature": temperature}, ) def init_mistral_chat(temperature): - MISTRAL_API_ENDPOINT = os.getenv("MISTRAL_API_ENDPOINT") - MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY") - MISTRAL_MODEL = os.getenv("MISTRAL_MODEL", "Mistral-large") - kwargs = { - "mistral_api_key": MISTRAL_API_KEY, - "temperature": temperature, - } - if MISTRAL_API_ENDPOINT: - kwargs["endpoint"] = MISTRAL_API_ENDPOINT - if MISTRAL_MODEL: - kwargs["model"] = MISTRAL_MODEL - return ChatMistralAI(**kwargs) + return ChatMistralAI( + model=os.getenv("CHAT_MODEL"), streaming=True, temperature=temperature + ) def init_cohere_chat(temperature): - COHERE_API_KEY = os.getenv("COHERE_API_KEY") - COHERE_MODEL = os.getenv("COHERE_MODEL") - return ChatCohere( - cohere_api_key=COHERE_API_KEY, model=COHERE_MODEL, temperature=temperature - ) + return ChatCohere(model=os.getenv("CHAT_MODEL"), temperature=temperature) MAP_LLM_TYPE_TO_CHAT_MODEL = { diff --git a/example-apps/chatbot-rag-app/docker-compose-elastic.yml b/example-apps/chatbot-rag-app/docker-compose-elastic.yml new file mode 100644 index 00000000..6d2b0b8b --- /dev/null +++ b/example-apps/chatbot-rag-app/docker-compose-elastic.yml @@ -0,0 +1,91 @@ +name: elastic-stack + +services: + elasticsearch: + image: docker.elastic.co/elasticsearch/elasticsearch:8.17.0 + container_name: elasticsearch + ports: + - 9200:9200 + environment: + - node.name=elasticsearch + - cluster.name=docker-cluster + - discovery.type=single-node + - ELASTIC_PASSWORD=elastic + - bootstrap.memory_lock=true + - xpack.security.enabled=true + - xpack.security.http.ssl.enabled=false + - xpack.security.transport.ssl.enabled=false + - xpack.license.self_generated.type=trial + - ES_JAVA_OPTS=-Xmx8g + ulimits: + memlock: + soft: -1 + hard: -1 + healthcheck: + test: ["CMD-SHELL", "curl -s http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=500ms"] + retries: 300 + interval: 1s + + elasticsearch_settings: + depends_on: + elasticsearch: + condition: service_healthy + image: docker.elastic.co/elasticsearch/elasticsearch:8.17.0 + container_name: elasticsearch_settings + restart: 'no' + command: > + bash -c ' + # gen-ai assistants in kibana save state in a way that requires security to be enabled, so we need to create + # a kibana system user before starting it. + echo "Setup the kibana_system password"; + until curl -s -u "elastic:elastic" -X POST http://elasticsearch:9200/_security/user/kibana_system/_password -d "{\"password\":\"elastic\"}" -H "Content-Type: application/json" | grep -q "^{}"; do sleep 5; done; + ' + + kibana: + image: docker.elastic.co/kibana/kibana:8.17.0 + container_name: kibana + depends_on: + elasticsearch_settings: + condition: service_completed_successfully + ports: + - 5601:5601 + environment: + - SERVERNAME=kibana + - ELASTICSEARCH_HOSTS=http://elasticsearch:9200 + - ELASTICSEARCH_USERNAME=kibana_system + - ELASTICSEARCH_PASSWORD=elastic + # Non-default settings from here: + # https://github.com/elastic/apm-server/blob/main/testing/docker/kibana/kibana.yml + - MONITORING_UI_CONTAINER_ELASTICSEARCH_ENABLED=true + - XPACK_SECURITY_ENCRYPTIONKEY=fhjskloppd678ehkdfdlliverpoolfcr + - XPACK_ENCRYPTEDSAVEDOBJECTS_ENCRYPTIONKEY=fhjskloppd678ehkdfdlliverpoolfcr + - SERVER_PUBLICBASEURL=http://127.0.0.1:5601 + healthcheck: + test: ["CMD-SHELL", "curl -s http://localhost:5601/api/status | grep -q 'All services are available'"] + retries: 300 + interval: 1s + + apm-server: + image: docker.elastic.co/apm/apm-server:8.17.0 + container_name: apm-server + depends_on: + elasticsearch: + condition: service_healthy + command: > + apm-server + -E apm-server.kibana.enabled=true + -E apm-server.kibana.host=http://kibana:5601 + -E apm-server.kibana.username=elastic + -E apm-server.kibana.password=elastic + -E output.elasticsearch.hosts=["http://elasticsearch:9200"] + -E output.elasticsearch.username=elastic + -E output.elasticsearch.password=elastic + cap_add: ["CHOWN", "DAC_OVERRIDE", "SETGID", "SETUID"] + cap_drop: ["ALL"] + ports: + - 8200:8200 + healthcheck: + test: ["CMD-SHELL", "bash -c 'echo -n > /dev/tcp/127.0.0.1/8200'"] + retries: 300 + interval: 1s + diff --git a/example-apps/chatbot-rag-app/docker-compose.yml b/example-apps/chatbot-rag-app/docker-compose.yml index 634fa831..f6f2de0b 100644 --- a/example-apps/chatbot-rag-app/docker-compose.yml +++ b/example-apps/chatbot-rag-app/docker-compose.yml @@ -1,13 +1,20 @@ +name: chatbot-rag-app + services: ingest-data: build: context: . container_name: ingest-data restart: 'no' + environment: + # host.docker.internal means connect to the host machine, e.g. your laptop + ELASTICSEARCH_URL: "http://host.docker.internal:9200" + FLASK_APP: api/app.py env_file: - .env - - .flaskenv command: flask create-index + extra_hosts: + - "host.docker.internal:host-gateway" api-frontend: depends_on: @@ -16,8 +23,12 @@ services: container_name: api-frontend build: context: . + environment: + # host.docker.internal means connect to the host machine, e.g. your laptop + ELASTICSEARCH_URL: "http://host.docker.internal:9200" env_file: - .env - - .flaskenv ports: - "4000:4000" + extra_hosts: + - "host.docker.internal:host-gateway" diff --git a/example-apps/chatbot-rag-app/env.example b/example-apps/chatbot-rag-app/env.example index d078293f..f060c491 100644 --- a/example-apps/chatbot-rag-app/env.example +++ b/example-apps/chatbot-rag-app/env.example @@ -28,24 +28,31 @@ ES_INDEX_CHAT_HISTORY=workplace-app-docs-chat-history # Uncomment and complete if you want to use Bedrock LLM # LLM_TYPE=bedrock -# AWS_ACCESS_KEY= -# AWS_SECRET_KEY= -# AWS_REGION= -# AWS_MODEL_ID= +# AWS_ACCESS_KEY_ID= +# AWS_SECRET_ACCESS_KEY= +# AWS_DEFAULT_REGION= +# CHAT_MODEL=anthropic.claude-3-5-sonnet-20240620-v1:0 # Uncomment and complete if you want to use Vertex AI # LLM_TYPE=vertex -# VERTEX_PROJECT_ID= -# VERTEX_REGION= +## Project that has the service "aiplatform.googleapis.com" enabled +# GOOGLE_CLOUD_PROJECT= +# GOOGLE_CLOUD_REGION= +# CHAT_MODEL=gemini-1.5-flash-002 +## Needed if you haven't run `gcloud auth application-default login` # GOOGLE_APPLICATION_CREDENTIALS= # Uncomment and complete if you want to use Mistral AI # LLM_TYPE=mistral +## Key in https://console.mistral.ai/api-keys/ # MISTRAL_API_KEY= -# MISTRAL_API_ENDPOINT= -# MISTRAL_MODEL= +## 'API Endpoints' from https://docs.mistral.ai/getting-started/models/models_overview/ +# CHAT_MODEL=open-mistral-nemo +## Only set this if not using the default Mistral base URL +# MISTRAL_BASE_URL= # Uncomment and complete if you want to use Cohere # LLM_TYPE=cohere +## Key in https://dashboard.cohere.com/api-keys # COHERE_API_KEY= -# COHERE_MODEL= +# CHAT_MODEL=command-r7b-12-2024 diff --git a/example-apps/chatbot-rag-app/requirements.txt b/example-apps/chatbot-rag-app/requirements.txt index 21fea6a1..2e2f2f04 100644 --- a/example-apps/chatbot-rag-app/requirements.txt +++ b/example-apps/chatbot-rag-app/requirements.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.12 +# This file is autogenerated by pip-compile with Python 3.13 # by the following command: # # pip-compile @@ -352,7 +352,6 @@ types-requests==2.32.0.20241016 # via cohere typing-extensions==4.12.2 # via - # anyio # cohere # huggingface-hub # langchain-core