Skip to content

Commit

Permalink
merge
Browse files Browse the repository at this point in the history
  • Loading branch information
takatost committed Sep 10, 2024
2 parents 5f2b382 + 7e88556 commit 79cd626
Show file tree
Hide file tree
Showing 776 changed files with 23,323 additions and 22,817 deletions.
2 changes: 1 addition & 1 deletion api/configs/feature/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -516,7 +516,7 @@ class DataSetConfig(BaseSettings):
)

TIDB_SERVERLESS_NUMBER: PositiveInt = Field(
description='number of tidb serverless cluster',
description="number of tidb serverless cluster",
default=500,
)

Expand Down
26 changes: 13 additions & 13 deletions api/configs/middleware/vdb/tidb_on_qdrant_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,56 +10,56 @@ class TidbOnQdrantConfig(BaseSettings):
"""

TIDB_ON_QDRANT_URL: Optional[str] = Field(
description='Tidb on Qdrant url',
description="Tidb on Qdrant url",
default=None,
)

TIDB_ON_QDRANT_API_KEY: Optional[str] = Field(
description='Tidb on Qdrant api key',
description="Tidb on Qdrant api key",
default=None,
)

TIDB_ON_QDRANT_CLIENT_TIMEOUT: NonNegativeInt = Field(
description='Tidb on Qdrant client timeout in seconds',
description="Tidb on Qdrant client timeout in seconds",
default=20,
)

TIDB_ON_QDRANT_GRPC_ENABLED: bool = Field(
description='whether enable grpc support for Tidb on Qdrant connection',
description="whether enable grpc support for Tidb on Qdrant connection",
default=False,
)

TIDB_ON_QDRANT_GRPC_PORT: PositiveInt = Field(
description='Tidb on Qdrant grpc port',
description="Tidb on Qdrant grpc port",
default=6334,
)

TIDB_PUBLIC_KEY: Optional[str] = Field(
description='Tidb account public key',
description="Tidb account public key",
default=None,
)

TIDB_PRIVATE_KEY: Optional[str] = Field(
description='Tidb account private key',
description="Tidb account private key",
default=None,
)

TIDB_API_URL: Optional[str] = Field(
description='Tidb API url',
description="Tidb API url",
default=None,
)

TIDB_IAM_API_URL: Optional[str] = Field(
description='Tidb IAM API url',
description="Tidb IAM API url",
default=None,
)

TIDB_REGION: Optional[str] = Field(
description='Tidb serverless region',
default='regions/aws-us-east-1',
description="Tidb serverless region",
default="regions/aws-us-east-1",
)

TIDB_PROJECT_ID: Optional[str] = Field(
description='Tidb project id',
description="Tidb project id",
default=None,
)
)
30 changes: 20 additions & 10 deletions api/controllers/console/datasets/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -565,17 +565,27 @@ class DatasetRetrievalSettingApi(Resource):
def get(self):
vector_type = dify_config.VECTOR_STORE
match vector_type:
case VectorType.MILVUS | VectorType.RELYT | VectorType.PGVECTOR | VectorType.TIDB_VECTOR \
| VectorType.CHROMA | VectorType.TENCENT:
return {
'retrieval_method': [
RetrievalMethod.SEMANTIC_SEARCH.value
]
}
case VectorType.QDRANT | VectorType.WEAVIATE | VectorType.OPENSEARCH | (
VectorType.ANALYTICDB) | VectorType.MYSCALE | VectorType.ORACLE | VectorType.TIDB_ON_QDRANT | VectorType.ELASTICSEARCH:
case (
VectorType.MILVUS
| VectorType.RELYT
| VectorType.PGVECTOR
| VectorType.TIDB_VECTOR
| VectorType.CHROMA
| VectorType.TENCENT
):
return {"retrieval_method": [RetrievalMethod.SEMANTIC_SEARCH.value]}
case (
VectorType.QDRANT
| VectorType.WEAVIATE
| VectorType.OPENSEARCH
| (VectorType.ANALYTICDB)
| VectorType.MYSCALE
| VectorType.ORACLE
| VectorType.TIDB_ON_QDRANT
| VectorType.ELASTICSEARCH
):
return {
'retrieval_method': [
"retrieval_method": [
RetrievalMethod.SEMANTIC_SEARCH.value,
RetrievalMethod.FULL_TEXT_SEARCH.value,
RetrievalMethod.HYBRID_SEARCH.value,
Expand Down
41 changes: 21 additions & 20 deletions api/controllers/console/datasets/datasets_document.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,28 +287,32 @@ def post(self):
raise Forbidden()

parser = reqparse.RequestParser()
parser.add_argument('indexing_technique', type=str, choices=Dataset.INDEXING_TECHNIQUE_LIST, required=True,
nullable=False, location='json')
parser.add_argument('data_source', type=dict, required=True, nullable=True, location='json')
parser.add_argument('process_rule', type=dict, required=True, nullable=True, location='json')
parser.add_argument('doc_form', type=str, default='text_model', required=False, nullable=False, location='json')
parser.add_argument('doc_language', type=str, default='English', required=False, nullable=False,
location='json')
parser.add_argument('retrieval_model', type=dict, required=False, nullable=False,
location='json')
parser.add_argument('embedding_model', type=str, required=False, nullable=True,
location='json')
parser.add_argument('embedding_model_provider', type=str, required=False, nullable=True,
location='json')
parser.add_argument(
"indexing_technique",
type=str,
choices=Dataset.INDEXING_TECHNIQUE_LIST,
required=True,
nullable=False,
location="json",
)
parser.add_argument("data_source", type=dict, required=True, nullable=True, location="json")
parser.add_argument("process_rule", type=dict, required=True, nullable=True, location="json")
parser.add_argument("doc_form", type=str, default="text_model", required=False, nullable=False, location="json")
parser.add_argument(
"doc_language", type=str, default="English", required=False, nullable=False, location="json"
)
parser.add_argument("retrieval_model", type=dict, required=False, nullable=False, location="json")
parser.add_argument("embedding_model", type=str, required=False, nullable=True, location="json")
parser.add_argument("embedding_model_provider", type=str, required=False, nullable=True, location="json")
args = parser.parse_args()

# The role of the current user in the ta table must be admin, owner, or editor, or dataset_operator
if not current_user.is_dataset_editor:
raise Forbidden()

if args['indexing_technique'] == 'high_quality':
if args['embedding_model'] is None or args['embedding_model_provider'] is None:
raise ValueError('embedding model and embedding model provider are required for high quality indexing.')
if args["indexing_technique"] == "high_quality":
if args["embedding_model"] is None or args["embedding_model_provider"] is None:
raise ValueError("embedding model and embedding model provider are required for high quality indexing.")
try:
model_manager = ModelManager()
model_manager.get_default_model_instance(
Expand Down Expand Up @@ -409,10 +413,7 @@ def get(self, dataset_id, batch):
dataset_id = str(dataset_id)
batch = str(batch)
documents = self.get_batch_documents(dataset_id, batch)
response = {
"total_segments": 0,
"preview": []
}
response = {"total_segments": 0, "preview": []}
if not documents:
return response
data_process_rule = documents[0].dataset_process_rule
Expand Down
2 changes: 1 addition & 1 deletion api/controllers/console/tag/tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@


def _validate_name(name):
if not name or len(name) < 1 or len(name) > 40:
if not name or len(name) < 1 or len(name) > 50:
raise ValueError("Name must be between 1 to 50 characters.")
return name

Expand Down
2 changes: 1 addition & 1 deletion api/core/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
import core.moderation.base
import core.moderation.base
Loading

0 comments on commit 79cd626

Please sign in to comment.