diff --git a/web/app/components/datasets/create/index.tsx b/web/app/components/datasets/create/index.tsx index 5fdebe317f8d37..4c7d14a2af104f 100644 --- a/web/app/components/datasets/create/index.tsx +++ b/web/app/components/datasets/create/index.tsx @@ -127,7 +127,7 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => { {(step === 2 && (!datasetId || (datasetId && !!detail))) && file.file)} diff --git a/web/app/components/datasets/create/step-two/index.tsx b/web/app/components/datasets/create/step-two/index.tsx index 2d1bda399ec728..a4422ebc3c381c 100644 --- a/web/app/components/datasets/create/step-two/index.tsx +++ b/web/app/components/datasets/create/step-two/index.tsx @@ -1,4 +1,3 @@ -/* eslint-disable no-mixed-operators */ 'use client' import React, { useEffect, useLayoutEffect, useRef, useState } from 'react' import { useTranslation } from 'react-i18next' @@ -11,7 +10,7 @@ import { groupBy } from 'lodash-es' import PreviewItem, { PreviewType } from './preview-item' import LanguageSelect from './language-select' import s from './index.module.css' -import type { CreateDocumentReq, CustomFile, FullDocumentDetail, FileIndexingEstimateResponse as IndexingEstimateResponse, NotionInfo, PreProcessingRule, Rules, createDocumentResponse } from '@/models/datasets' +import type { CreateDocumentReq, CustomFile, FileIndexingEstimateResponse, FullDocumentDetail, IndexingEstimateParams, IndexingEstimateResponse, NotionInfo, PreProcessingRule, ProcessRule, Rules, createDocumentResponse } from '@/models/datasets' import { createDocument, createFirstDocument, @@ -33,13 +32,14 @@ import { useDatasetDetailContext } from '@/context/dataset-detail' import I18n from '@/context/i18n' import { IS_CE_EDITION } from '@/config' +type ValueOf = T[keyof T] type StepTwoProps = { isSetting?: boolean documentDetail?: FullDocumentDetail hasSetAPIKEY: boolean onSetting: () => void datasetId?: string - indexingType?: string + indexingType?: ValueOf dataSourceType: DataSourceType files: CustomFile[] notionPages?: NotionPage[] @@ -89,21 +89,23 @@ const StepTwo = ({ const [rules, setRules] = useState([]) const [defaultConfig, setDefaultConfig] = useState() const hasSetIndexType = !!indexingType - const [indexType, setIndexType] = useState( - indexingType - || hasSetAPIKEY + const [indexType, setIndexType] = useState>( + (indexingType + || hasSetAPIKEY) ? IndexingType.QUALIFIED : IndexingType.ECONOMICAL, ) const [docForm, setDocForm] = useState( - datasetId && documentDetail ? documentDetail.doc_form : DocForm.TEXT, + (datasetId && documentDetail) ? documentDetail.doc_form : DocForm.TEXT, ) const [docLanguage, setDocLanguage] = useState(locale === 'en' ? 'English' : 'Chinese') const [QATipHide, setQATipHide] = useState(false) const [previewSwitched, setPreviewSwitched] = useState(false) const [showPreview, { setTrue: setShowPreview, setFalse: hidePreview }] = useBoolean() - const [customFileIndexingEstimate, setCustomFileIndexingEstimate] = useState(null) - const [automaticFileIndexingEstimate, setAutomaticFileIndexingEstimate] = useState(null) + const [customFileIndexingEstimate, setCustomFileIndexingEstimate] = useState(null) + const [automaticFileIndexingEstimate, setAutomaticFileIndexingEstimate] = useState(null) + const [estimateTokes, setEstimateTokes] = useState | null>(null) + const fileIndexingEstimate = (() => { return segmentationType === SegmentType.AUTO ? automaticFileIndexingEstimate : customFileIndexingEstimate })() @@ -153,7 +155,7 @@ const StepTwo = ({ } const resetRules = () => { if (defaultConfig) { - setSegmentIdentifier(defaultConfig.segmentation.separator === '\n' ? '\\n' : defaultConfig.segmentation.separator || '\\n') + setSegmentIdentifier((defaultConfig.segmentation.separator === '\n' ? '\\n' : defaultConfig.segmentation.separator) || '\\n') setMax(defaultConfig.segmentation.max_tokens) setRules(defaultConfig.pre_processing_rules) } @@ -161,12 +163,14 @@ const StepTwo = ({ const fetchFileIndexingEstimate = async (docForm = DocForm.TEXT) => { // eslint-disable-next-line @typescript-eslint/no-use-before-define - const res = await didFetchFileIndexingEstimate(getFileIndexingEstimateParams(docForm)) - if (segmentationType === SegmentType.CUSTOM) + const res = await didFetchFileIndexingEstimate(getFileIndexingEstimateParams(docForm)!) + if (segmentationType === SegmentType.CUSTOM) { setCustomFileIndexingEstimate(res) - - else + } + else { setAutomaticFileIndexingEstimate(res) + indexType === IndexingType.QUALIFIED && setEstimateTokes({ tokens: res.tokens, total_price: res.total_price }) + } } const confirmChangeCustomConfig = () => { @@ -179,8 +183,8 @@ const StepTwo = ({ const getIndexing_technique = () => indexingType || indexType const getProcessRule = () => { - const processRule: any = { - rules: {}, // api will check this. It will be removed after api refactored. + const processRule: ProcessRule = { + rules: {} as any, // api will check this. It will be removed after api refactored. mode: segmentationType, } if (segmentationType === SegmentType.CUSTOM) { @@ -220,37 +224,35 @@ const StepTwo = ({ }) as NotionInfo[] } - const getFileIndexingEstimateParams = (docForm: DocForm) => { - let params + const getFileIndexingEstimateParams = (docForm: DocForm): IndexingEstimateParams | undefined => { if (dataSourceType === DataSourceType.FILE) { - params = { + return { info_list: { data_source_type: dataSourceType, file_info_list: { - file_ids: files.map(file => file.id), + file_ids: files.map(file => file.id) as string[], }, }, - indexing_technique: getIndexing_technique(), + indexing_technique: getIndexing_technique() as string, process_rule: getProcessRule(), doc_form: docForm, doc_language: docLanguage, - dataset_id: datasetId, + dataset_id: datasetId as string, } } if (dataSourceType === DataSourceType.NOTION) { - params = { + return { info_list: { data_source_type: dataSourceType, notion_info_list: getNotionInfo(), }, - indexing_technique: getIndexing_technique(), + indexing_technique: getIndexing_technique() as string, process_rule: getProcessRule(), doc_form: docForm, doc_language: docLanguage, - dataset_id: datasetId, + dataset_id: datasetId as string, } } - return params } const getCreationParams = () => { @@ -291,7 +293,7 @@ const StepTwo = ({ try { const res = await fetchDefaultProcessRule({ url: '/datasets/process-rule' }) const separator = res.rules.segmentation.separator - setSegmentIdentifier(separator === '\n' ? '\\n' : separator || '\\n') + setSegmentIdentifier((separator === '\n' ? '\\n' : separator) || '\\n') setMax(res.rules.segmentation.max_tokens) setRules(res.rules.pre_processing_rules) setDefaultConfig(res.rules) @@ -306,7 +308,7 @@ const StepTwo = ({ const rules = documentDetail.dataset_process_rule.rules const separator = rules.segmentation.separator const max = rules.segmentation.max_tokens - setSegmentIdentifier(separator === '\n' ? '\\n' : separator || '\\n') + setSegmentIdentifier((separator === '\n' ? '\\n' : separator) || '\\n') setMax(max) setRules(rules.pre_processing_rules) setDefaultConfig(rules) @@ -330,7 +332,7 @@ const StepTwo = ({ res = await createFirstDocument({ body: params, }) - updateIndexingTypeCache && updateIndexingTypeCache(indexType) + updateIndexingTypeCache && updateIndexingTypeCache(indexType as string) updateResultCache && updateResultCache(res) } else { @@ -338,7 +340,7 @@ const StepTwo = ({ datasetId, body: params, }) - updateIndexingTypeCache && updateIndexingTypeCache(indexType) + updateIndexingTypeCache && updateIndexingTypeCache(indexType as string) updateResultCache && updateResultCache(res) } if (mutateDatasetRes) @@ -549,9 +551,9 @@ const StepTwo = ({
{t('datasetCreation.stepTwo.qualifiedTip')}
{t('datasetCreation.stepTwo.emstimateCost')}
{ - fileIndexingEstimate + estimateTokes ? ( -
{formatNumber(fileIndexingEstimate.tokens)} tokens(${formatNumber(fileIndexingEstimate.total_price)})
+
{formatNumber(estimateTokes.tokens)} tokens(${formatNumber(estimateTokes.total_price)})
) : (
{t('datasetCreation.stepTwo.calculating')}
diff --git a/web/models/datasets.ts b/web/models/datasets.ts index cfaa965e08c8c5..be0b281e69e490 100644 --- a/web/models/datasets.ts +++ b/web/models/datasets.ts @@ -183,15 +183,22 @@ export type DocumentListResponse = { limit: number } -export type CreateDocumentReq = { +export type DocumentReq = { original_document_id?: string indexing_technique?: string doc_form: 'text_model' | 'qa_model' doc_language: string - data_source: DataSource process_rule: ProcessRule } +export type CreateDocumentReq = DocumentReq & { + data_source: DataSource +} + +export type IndexingEstimateParams = DocumentReq & Partial & { + dataset_id: string +} + export type DataSource = { type: DataSourceType info_list: { diff --git a/web/service/datasets.ts b/web/service/datasets.ts index aada9c641b50ec..63120d39dfb658 100644 --- a/web/service/datasets.ts +++ b/web/service/datasets.ts @@ -10,6 +10,7 @@ import type { FileIndexingEstimateResponse, HitTestingRecordsResponse, HitTestingResponse, + IndexingEstimateParams, IndexingEstimateResponse, IndexingStatusBatchResponse, IndexingStatusResponse, @@ -189,7 +190,7 @@ export const fetchTestingRecords: Fetcher(`/datasets/${datasetId}/queries`, { params }) } -export const fetchFileIndexingEstimate: Fetcher = (body: any) => { +export const fetchFileIndexingEstimate: Fetcher = (body: IndexingEstimateParams) => { return post('/datasets/indexing-estimate', { body }) }