Merge branch 'main' into pydantic-setting

langgenius · Jun 19, 2024 · 5700bcd · 5700bcd
2 parents 3fbe0dd + d160d1e
commit 5700bcd
Show file tree

Hide file tree

Showing 26 changed files with 987 additions and 90 deletions.
diff --git a/api/README.md b/api/README.md
@@ -11,24 +11,34 @@
    docker-compose -f docker-compose.middleware.yaml -p dify up -d
    cd ../api
    ```
+
 2. Copy `.env.example` to `.env`
 3. Generate a `SECRET_KEY` in the `.env` file.
 
-   ```bash
+   ```bash for Linux
    sed -i "/^SECRET_KEY=/c\SECRET_KEY=$(openssl rand -base64 42)" .env
    ```
+
+   ```bash for Mac
+   secret_key=$(openssl rand -base64 42)
+   sed -i '' "/^SECRET_KEY=/c\\
+   SECRET_KEY=${secret_key}" .env
+   ```
+
 4. Create environment.
 
    Dify API service uses [Poetry](https://python-poetry.org/docs/) to manage dependencies. You can execute `poetry shell` to activate the environment.
 
-   > Using pip can be found [below](#usage-with-pip). 
-   
-6. Install dependencies
-
+   > Using pip can be found [below](#usage-with-pip).
+
+5. Install dependencies
+
+=======
    ```bash
+   poetry env use 3.10
    poetry install
    ```
-   
+
    In case of contributors missing to update dependencies for `pyproject.toml`, you can perform the following shell instead.
 
    ```bash
@@ -37,47 +47,45 @@
    poetry add $(cat requirements-dev.txt) --group dev    # install dependencies of development and update pyproject.toml
    ```
 
-7. Run migrate
+6. Run migrate
 
    Before the first launch, migrate the database to the latest version.
-   
+
    ```bash
    poetry run python -m flask db upgrade
    ```
 
-8. Start backend
-    
+7. Start backend
+
    ```bash
    poetry run python -m flask run --host 0.0.0.0 --port=5001 --debug
    ```
 
-9. Start Dify [web](../web) service.
-10. Setup your application by visiting `http://localhost:3000`...
-11. If you need to debug local async processing, please start the worker service.
+8. Start Dify [web](../web) service.
+9. Setup your application by visiting `http://localhost:3000`...
+10. If you need to debug local async processing, please start the worker service.
 
    ```bash
    poetry run python -m celery -A app.celery worker -P gevent -c 1 --loglevel INFO -Q dataset,generation,mail
    ```
 
    The started celery app handles the async tasks, e.g. dataset importing and documents indexing.
 
-
 ## Testing
 
 1. Install dependencies for both the backend and the test environment
-   
+
    ```bash
    poetry install --with dev
-   ``` 
-   
+   ```
+
 2. Run the tests locally with mocked system environment variables in `tool.pytest_env` section in `pyproject.toml`
-   
+
    ```bash
    cd ../
    poetry run -C api bash dev/pytest/pytest_all_tests.sh
    ```
 
-
 ## Usage with pip
 
 > [!NOTE]  
@@ -92,7 +100,7 @@
    docker-compose -f docker-compose.middleware.yaml -p dify up -d
    cd ../api
    ```
-   
+
 2. Copy `.env.example` to `.env`
 3. Generate a `SECRET_KEY` in the `.env` file.
 
@@ -101,49 +109,39 @@
    ```
 
 4. Create environment.
-   
+
    If you use Anaconda, create a new environment and activate it
 
    ```bash
    conda create --name dify python=3.10
    conda activate dify
    ```
-      
-6. Install dependencies
-   
+
+5. Install dependencies
+
    ```bash
    pip install -r requirements.txt
    ```
 
-7. Run migrate
+6. Run migrate
 
    Before the first launch, migrate the database to the latest version.
 
    ```bash
    flask db upgrade
    ```
 
-8. Start backend:
+7. Start backend:
+
    ```bash
    flask run --host 0.0.0.0 --port=5001 --debug
    ```
-9. Setup your application by visiting http://localhost:5001/console/api/setup or other apis...
-10. If you need to debug local async processing, please start the worker service.
-   ```bash
-   celery -A app.celery worker -P gevent -c 1 --loglevel INFO -Q dataset,generation,mail
-   ```
-   The started celery app handles the async tasks, e.g. dataset importing and documents indexing.
-
 
-## Testing
+8. Setup your application by visiting <http://localhost:5001/console/api/setup> or other apis...
+9. If you need to debug local async processing, please start the worker service.
 
-1. Install dependencies for both the backend and the test environment
    ```bash
-   pip install -r requirements.txt -r requirements-dev.txt
-   ``` 
-
-2. Run the tests locally with mocked system environment variables in `tool.pytest_env` section in `pyproject.toml`
-   ```bash
-   dev/pytest/pytest_all_tests.sh
+   celery -A app.celery worker -P gevent -c 1 --loglevel INFO -Q dataset,generation,mail
    ```
 
+   The started celery app handles the async tasks, e.g. dataset importing and documents indexing.
diff --git a/api/commands.py b/api/commands.py
@@ -327,6 +327,14 @@ def migrate_knowledge_vector_database():
                         "vector_store": {"class_prefix": collection_name}
                     }
                     dataset.index_struct = json.dumps(index_struct_dict)
+                elif vector_type == VectorType.OPENSEARCH:
+                    dataset_id = dataset.id
+                    collection_name = Dataset.gen_collection_name_by_id(dataset_id)
+                    index_struct_dict = {
+                        "type": VectorType.OPENSEARCH,
+                        "vector_store": {"class_prefix": collection_name}
+                    }
+                    dataset.index_struct = json.dumps(index_struct_dict)
                 else:
                     raise ValueError(f"Vector store {vector_type} is not supported.")
 

diff --git a/api/config.py b/api/config.py
@@ -161,6 +161,13 @@ def __init__(self):
         self.MILVUS_SECURE = get_env('MILVUS_SECURE')
         self.MILVUS_DATABASE = get_env('MILVUS_DATABASE')
 
+        # OpenSearch settings
+        self.OPENSEARCH_HOST = get_env('OPENSEARCH_HOST')
+        self.OPENSEARCH_PORT = get_env('OPENSEARCH_PORT')
+        self.OPENSEARCH_USER = get_env('OPENSEARCH_USER')
+        self.OPENSEARCH_PASSWORD = get_env('OPENSEARCH_PASSWORD')
+        self.OPENSEARCH_SECURE = get_bool_env('OPENSEARCH_SECURE')
+
         # weaviate settings
         self.WEAVIATE_ENDPOINT = get_env('WEAVIATE_ENDPOINT')
         self.WEAVIATE_API_KEY = get_env('WEAVIATE_API_KEY')

diff --git a/api/controllers/console/datasets/datasets.py b/api/controllers/console/datasets/datasets.py
@@ -503,7 +503,7 @@ def get(self):
                         'semantic_search'
                     ]
                 }
-            case VectorType.QDRANT | VectorType.WEAVIATE:
+            case VectorType.QDRANT | VectorType.WEAVIATE | VectorType.OPENSEARCH:
                 return {
                     'retrieval_method': [
                         'semantic_search', 'full_text_search', 'hybrid_search'
@@ -525,7 +525,7 @@ def get(self, vector_type):
                         'semantic_search'
                     ]
                 }
-            case VectorType.QDRANT | VectorType.WEAVIATE:
+            case VectorType.QDRANT | VectorType.WEAVIATE | VectorType.OPENSEARCH:
                 return {
                     'retrieval_method': [
                         'semantic_search', 'full_text_search', 'hybrid_search'

diff --git a/api/core/agent/cot_agent_runner.py b/api/core/agent/cot_agent_runner.py
@@ -61,8 +61,6 @@ def run(self, message: Message,
         # convert tools into ModelRuntime Tool format
         tool_instances, self._prompt_messages_tools = self._init_prompt_tools()
 
-        prompt_messages = self._organize_prompt_messages()
-
         function_call_state = True
         llm_usage = {
             'usage': None

diff --git a/api/core/agent/cot_chat_agent_runner.py b/api/core/agent/cot_chat_agent_runner.py
@@ -5,6 +5,7 @@
     AssistantPromptMessage,
     PromptMessage,
     SystemPromptMessage,
+    TextPromptMessageContent,
     UserPromptMessage,
 )
 from core.model_runtime.utils.encoders import jsonable_encoder
@@ -25,6 +26,21 @@ def _organize_system_prompt(self) -> SystemPromptMessage:
 
         return SystemPromptMessage(content=system_prompt)
 
+    def _organize_user_query(self, query,  prompt_messages: list[PromptMessage] = None) -> list[PromptMessage]:
+        """
+        Organize user query
+        """
+        if self.files:
+            prompt_message_contents = [TextPromptMessageContent(data=query)]
+            for file_obj in self.files:
+                prompt_message_contents.append(file_obj.prompt_message_content)
+
+            prompt_messages.append(UserPromptMessage(content=prompt_message_contents))
+        else:
+            prompt_messages.append(UserPromptMessage(content=query))
+
+        return prompt_messages
+
     def _organize_prompt_messages(self) -> list[PromptMessage]:
         """
         Organize 
@@ -51,27 +67,27 @@ def _organize_prompt_messages(self) -> list[PromptMessage]:
             assistant_messages = [assistant_message]
 
         # query messages
-        query_messages = UserPromptMessage(content=self._query)
+        query_messages = self._organize_user_query(self._query, [])
 
         if assistant_messages:
             # organize historic prompt messages
             historic_messages = self._organize_historic_prompt_messages([
                 system_message,
-                query_messages,
+                *query_messages,
                 *assistant_messages,
                 UserPromptMessage(content='continue')
-            ])            
+            ])
             messages = [
                 system_message,
                 *historic_messages,
-                query_messages,
+                *query_messages,
                 *assistant_messages,
                 UserPromptMessage(content='continue')
             ]
         else:
             # organize historic prompt messages
-            historic_messages = self._organize_historic_prompt_messages([system_message, query_messages])
-            messages = [system_message, *historic_messages, query_messages]
+            historic_messages = self._organize_historic_prompt_messages([system_message, *query_messages])
+            messages = [system_message, *historic_messages, *query_messages]
 
         # join all messages
-        return messages
+        return messages
diff --git a/api/core/rag/datasource/vdb/opensearch/__init__.py b/api/core/rag/datasource/vdb/opensearch/__init__.py