Skip to content

Commit

Permalink
Merge branch 'main' into feat/new-login
Browse files Browse the repository at this point in the history
* main: (35 commits)
  fix #9409 (#9433)
  update dataset clean rule (#9426)
  add clean 7 days datasets (#9424)
  fix: resolve overlap issue with API Extension selector and modal (#9407)
  refactor: update the default values of top-k parameter in vdb to be consistent (#9367)
  fix: incorrect webapp image displayed (#9401)
  Fix/economical knowledge retrieval (#9396)
  feat: add timezone conversion for time tool (#9393)
  fix: Deprecated gemma2-9b model in Fireworks AI Provider (#9373)
  feat: storybook (#9324)
  fix: use gpt-4o-mini for validating credentials (#9387)
  feat: Enable baiduvector intergration test (#9369)
  fix: remove the stream option of zhipu and gemini (#9319)
  fix: add missing vikingdb param in docker .env.example (#9334)
  feat: add minimax abab6.5t support (#9365)
  fix: (#9336 followup) skip poetry preperation in style workflow when no change in api folder (#9362)
  feat: add glm-4-flashx, deprecated chatglm_turbo (#9357)
  fix: Azure OpenAI o1 max_completion_token and get_num_token_from_messages error (#9326)
  fix: In the output, the order of 'ta' is sometimes reversed as 'at'. #8015 (#8791)
  refactor: Add an enumeration type and use the factory pattern to obtain the corresponding class (#9356)
  ...
  • Loading branch information
ZhouhaoJiang committed Oct 17, 2024
2 parents 5c8d149 + fdcf87c commit abe6a1b
Show file tree
Hide file tree
Showing 250 changed files with 7,606 additions and 9,662 deletions.
7 changes: 3 additions & 4 deletions .github/workflows/api-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,18 +27,17 @@ jobs:
- name: Checkout code
uses: actions/checkout@v4

- name: Install Poetry
uses: abatilo/actions-poetry@v3

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: 'poetry'
cache-dependency-path: |
api/pyproject.toml
api/poetry.lock
- name: Install Poetry
uses: abatilo/actions-poetry@v3

- name: Check Poetry lockfile
run: |
poetry check -C api --lock
Expand Down
7 changes: 3 additions & 4 deletions .github/workflows/db-migration-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,18 +23,17 @@ jobs:
- name: Checkout code
uses: actions/checkout@v4

- name: Install Poetry
uses: abatilo/actions-poetry@v3

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: 'poetry'
cache-dependency-path: |
api/pyproject.toml
api/poetry.lock
- name: Install Poetry
uses: abatilo/actions-poetry@v3

- name: Install dependencies
run: poetry install -C api

Expand Down
7 changes: 4 additions & 3 deletions .github/workflows/style.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,16 @@ jobs:
with:
files: api/**

- name: Install Poetry
uses: abatilo/actions-poetry@v3

- name: Set up Python
uses: actions/setup-python@v5
if: steps.changed-files.outputs.any_changed == 'true'
with:
python-version: '3.10'

- name: Install Poetry
if: steps.changed-files.outputs.any_changed == 'true'
uses: abatilo/actions-poetry@v3

- name: Python dependencies
if: steps.changed-files.outputs.any_changed == 'true'
run: poetry install -C api --only lint
Expand Down
1 change: 1 addition & 0 deletions api/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,3 +85,4 @@
cd ../
poetry run -C api bash dev/pytest/pytest_all_tests.sh
```

9 changes: 7 additions & 2 deletions api/configs/feature/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -531,11 +531,16 @@ class DataSetConfig(BaseSettings):
Configuration for dataset management
"""

CLEAN_DAY_SETTING: PositiveInt = Field(
description="Interval in days for dataset cleanup operations",
PLAN_SANDBOX_CLEAN_DAY_SETTING: PositiveInt = Field(
description="Interval in days for dataset cleanup operations - plan: sandbox",
default=30,
)

PLAN_PRO_CLEAN_DAY_SETTING: PositiveInt = Field(
description="Interval in days for dataset cleanup operations - plan: pro and team",
default=7,
)

DATASET_OPERATOR_ENABLED: bool = Field(
description="Enable or disable dataset operator functionality",
default=False,
Expand Down
2 changes: 1 addition & 1 deletion api/configs/middleware/vdb/oracle_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class OracleConfig(BaseSettings):
default=None,
)

ORACLE_PORT: Optional[PositiveInt] = Field(
ORACLE_PORT: PositiveInt = Field(
description="Port number on which the Oracle database server is listening (default is 1521)",
default=1521,
)
Expand Down
2 changes: 1 addition & 1 deletion api/configs/middleware/vdb/pgvector_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class PGVectorConfig(BaseSettings):
default=None,
)

PGVECTOR_PORT: Optional[PositiveInt] = Field(
PGVECTOR_PORT: PositiveInt = Field(
description="Port number on which the PostgreSQL server is listening (default is 5433)",
default=5433,
)
Expand Down
2 changes: 1 addition & 1 deletion api/configs/middleware/vdb/pgvectors_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class PGVectoRSConfig(BaseSettings):
default=None,
)

PGVECTO_RS_PORT: Optional[PositiveInt] = Field(
PGVECTO_RS_PORT: PositiveInt = Field(
description="Port number on which the PostgreSQL server with PGVecto.RS is listening (default is 5431)",
default=5431,
)
Expand Down
36 changes: 24 additions & 12 deletions api/configs/middleware/vdb/vikingdb_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,27 +11,39 @@ class VikingDBConfig(BaseModel):
"""

VIKINGDB_ACCESS_KEY: Optional[str] = Field(
default=None, description="The Access Key provided by Volcengine VikingDB for API authentication."
description="The Access Key provided by Volcengine VikingDB for API authentication."
"Refer to the following documentation for details on obtaining credentials:"
"https://www.volcengine.com/docs/6291/65568",
default=None,
)

VIKINGDB_SECRET_KEY: Optional[str] = Field(
default=None, description="The Secret Key provided by Volcengine VikingDB for API authentication."
description="The Secret Key provided by Volcengine VikingDB for API authentication.",
default=None,
)
VIKINGDB_REGION: Optional[str] = Field(
default="cn-shanghai",

VIKINGDB_REGION: str = Field(
description="The region of the Volcengine VikingDB service.(e.g., 'cn-shanghai', 'cn-beijing').",
default="cn-shanghai",
)
VIKINGDB_HOST: Optional[str] = Field(
default="api-vikingdb.mlp.cn-shanghai.volces.com",

VIKINGDB_HOST: str = Field(
description="The host of the Volcengine VikingDB service.(e.g., 'api-vikingdb.volces.com', \
'api-vikingdb.mlp.cn-shanghai.volces.com')",
default="api-vikingdb.mlp.cn-shanghai.volces.com",
)
VIKINGDB_SCHEME: Optional[str] = Field(
default="http",

VIKINGDB_SCHEME: str = Field(
description="The scheme of the Volcengine VikingDB service.(e.g., 'http', 'https').",
default="http",
)
VIKINGDB_CONNECTION_TIMEOUT: Optional[int] = Field(
default=30, description="The connection timeout of the Volcengine VikingDB service."

VIKINGDB_CONNECTION_TIMEOUT: int = Field(
description="The connection timeout of the Volcengine VikingDB service.",
default=30,
)
VIKINGDB_SOCKET_TIMEOUT: Optional[int] = Field(
default=30, description="The socket timeout of the Volcengine VikingDB service."

VIKINGDB_SOCKET_TIMEOUT: int = Field(
description="The socket timeout of the Volcengine VikingDB service.",
default=30,
)
2 changes: 1 addition & 1 deletion api/configs/packaging/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ class PackagingInfo(BaseSettings):

CURRENT_VERSION: str = Field(
description="Dify version",
default="0.9.1",
default="0.9.2",
)

COMMIT_SHA: str = Field(
Expand Down
78 changes: 7 additions & 71 deletions api/controllers/console/datasets/hit_testing.py
Original file line number Diff line number Diff line change
@@ -1,88 +1,24 @@
import logging
from flask_restful import Resource

from flask_login import current_user
from flask_restful import Resource, marshal, reqparse
from werkzeug.exceptions import Forbidden, InternalServerError, NotFound

import services
from controllers.console import api
from controllers.console.app.error import (
CompletionRequestError,
ProviderModelCurrentlyNotSupportError,
ProviderNotInitializeError,
ProviderQuotaExceededError,
)
from controllers.console.datasets.error import DatasetNotInitializedError
from controllers.console.datasets.hit_testing_base import DatasetsHitTestingBase
from controllers.console.setup import setup_required
from controllers.console.wraps import account_initialization_required
from core.errors.error import (
LLMBadRequestError,
ModelCurrentlyNotSupportError,
ProviderTokenNotInitError,
QuotaExceededError,
)
from core.model_runtime.errors.invoke import InvokeError
from fields.hit_testing_fields import hit_testing_record_fields
from libs.login import login_required
from services.dataset_service import DatasetService
from services.hit_testing_service import HitTestingService


class HitTestingApi(Resource):
class HitTestingApi(Resource, DatasetsHitTestingBase):
@setup_required
@login_required
@account_initialization_required
def post(self, dataset_id):
dataset_id_str = str(dataset_id)

dataset = DatasetService.get_dataset(dataset_id_str)
if dataset is None:
raise NotFound("Dataset not found.")

try:
DatasetService.check_dataset_permission(dataset, current_user)
except services.errors.account.NoPermissionError as e:
raise Forbidden(str(e))

parser = reqparse.RequestParser()
parser.add_argument("query", type=str, location="json")
parser.add_argument("retrieval_model", type=dict, required=False, location="json")
parser.add_argument("external_retrieval_model", type=dict, required=False, location="json")
args = parser.parse_args()

HitTestingService.hit_testing_args_check(args)

try:
response = HitTestingService.retrieve(
dataset=dataset,
query=args["query"],
account=current_user,
retrieval_model=args["retrieval_model"],
external_retrieval_model=args["external_retrieval_model"],
limit=10,
)
dataset = self.get_and_validate_dataset(dataset_id_str)
args = self.parse_args()
self.hit_testing_args_check(args)

return {"query": response["query"], "records": marshal(response["records"], hit_testing_record_fields)}
except services.errors.index.IndexNotInitializedError:
raise DatasetNotInitializedError()
except ProviderTokenNotInitError as ex:
raise ProviderNotInitializeError(ex.description)
except QuotaExceededError:
raise ProviderQuotaExceededError()
except ModelCurrentlyNotSupportError:
raise ProviderModelCurrentlyNotSupportError()
except LLMBadRequestError:
raise ProviderNotInitializeError(
"No Embedding Model or Reranking Model available. Please configure a valid provider "
"in the Settings -> Model Provider."
)
except InvokeError as e:
raise CompletionRequestError(e.description)
except ValueError as e:
raise ValueError(str(e))
except Exception as e:
logging.exception("Hit testing failed.")
raise InternalServerError(str(e))
return self.perform_hit_testing(dataset, args)


api.add_resource(HitTestingApi, "/datasets/<uuid:dataset_id>/hit-testing")
85 changes: 85 additions & 0 deletions api/controllers/console/datasets/hit_testing_base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import logging

from flask_login import current_user
from flask_restful import marshal, reqparse
from werkzeug.exceptions import Forbidden, InternalServerError, NotFound

import services.dataset_service
from controllers.console.app.error import (
CompletionRequestError,
ProviderModelCurrentlyNotSupportError,
ProviderNotInitializeError,
ProviderQuotaExceededError,
)
from controllers.console.datasets.error import DatasetNotInitializedError
from core.errors.error import (
LLMBadRequestError,
ModelCurrentlyNotSupportError,
ProviderTokenNotInitError,
QuotaExceededError,
)
from core.model_runtime.errors.invoke import InvokeError
from fields.hit_testing_fields import hit_testing_record_fields
from services.dataset_service import DatasetService
from services.hit_testing_service import HitTestingService


class DatasetsHitTestingBase:
@staticmethod
def get_and_validate_dataset(dataset_id: str):
dataset = DatasetService.get_dataset(dataset_id)
if dataset is None:
raise NotFound("Dataset not found.")

try:
DatasetService.check_dataset_permission(dataset, current_user)
except services.errors.account.NoPermissionError as e:
raise Forbidden(str(e))

return dataset

@staticmethod
def hit_testing_args_check(args):
HitTestingService.hit_testing_args_check(args)

@staticmethod
def parse_args():
parser = reqparse.RequestParser()

parser.add_argument("query", type=str, location="json")
parser.add_argument("retrieval_model", type=dict, required=False, location="json")
parser.add_argument("external_retrieval_model", type=dict, required=False, location="json")
return parser.parse_args()

@staticmethod
def perform_hit_testing(dataset, args):
try:
response = HitTestingService.retrieve(
dataset=dataset,
query=args["query"],
account=current_user,
retrieval_model=args["retrieval_model"],
external_retrieval_model=args["external_retrieval_model"],
limit=10,
)
return {"query": response["query"], "records": marshal(response["records"], hit_testing_record_fields)}
except services.errors.index.IndexNotInitializedError:
raise DatasetNotInitializedError()
except ProviderTokenNotInitError as ex:
raise ProviderNotInitializeError(ex.description)
except QuotaExceededError:
raise ProviderQuotaExceededError()
except ModelCurrentlyNotSupportError:
raise ProviderModelCurrentlyNotSupportError()
except LLMBadRequestError:
raise ProviderNotInitializeError(
"No Embedding Model or Reranking Model available. Please configure a valid provider "
"in the Settings -> Model Provider."
)
except InvokeError as e:
raise CompletionRequestError(e.description)
except ValueError as e:
raise ValueError(str(e))
except Exception as e:
logging.exception("Hit testing failed.")
raise InternalServerError(str(e))
3 changes: 1 addition & 2 deletions api/controllers/service_api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
bp = Blueprint("service_api", __name__, url_prefix="/v1")
api = ExternalApi(bp)


from . import index
from .app import app, audio, completion, conversation, file, message, workflow
from .dataset import dataset, document, segment
from .dataset import dataset, document, hit_testing, segment
2 changes: 0 additions & 2 deletions api/controllers/service_api/app/completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from werkzeug.exceptions import InternalServerError, NotFound

import services
from constants import UUID_NIL
from controllers.service_api import api
from controllers.service_api.app.error import (
AppUnavailableError,
Expand Down Expand Up @@ -108,7 +107,6 @@ def post(self, app_model: App, end_user: EndUser):
parser.add_argument("conversation_id", type=uuid_value, location="json")
parser.add_argument("retriever_from", type=str, required=False, default="dev", location="json")
parser.add_argument("auto_generate_name", type=bool, required=False, default=True, location="json")
parser.add_argument("parent_message_id", type=uuid_value, required=False, default=UUID_NIL, location="json")

args = parser.parse_args()

Expand Down
Loading

0 comments on commit abe6a1b

Please sign in to comment.