Skip to content

Commit

Permalink
Merge branch 'langgenius:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
XiaoBa-Yu authored Dec 26, 2024
2 parents 7213914 + 8339d2c commit 10c8f50
Show file tree
Hide file tree
Showing 206 changed files with 9,026 additions and 3,103 deletions.
2 changes: 1 addition & 1 deletion api/configs/feature/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -601,7 +601,7 @@ class RagEtlConfig(BaseSettings):

UNSTRUCTURED_API_KEY: Optional[str] = Field(
description="API key for Unstructured.io service",
default=None,
default="",
)

SCARF_NO_ANALYTICS: Optional[str] = Field(
Expand Down
7 changes: 3 additions & 4 deletions api/core/rag/extractor/extract_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,12 +102,11 @@ def extract(
input_file = Path(file_path)
file_extension = input_file.suffix.lower()
etl_type = dify_config.ETL_TYPE
unstructured_api_url = dify_config.UNSTRUCTURED_API_URL
unstructured_api_key = dify_config.UNSTRUCTURED_API_KEY
assert unstructured_api_url is not None, "unstructured_api_url is required"
assert unstructured_api_key is not None, "unstructured_api_key is required"
extractor: Optional[BaseExtractor] = None
if etl_type == "Unstructured":
unstructured_api_url = dify_config.UNSTRUCTURED_API_URL
unstructured_api_key = dify_config.UNSTRUCTURED_API_KEY or ""

if file_extension in {".xlsx", ".xls"}:
extractor = ExcelExtractor(file_path)
elif file_extension == ".pdf":
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import base64
import logging
from typing import Optional

from bs4 import BeautifulSoup # type: ignore

Expand All @@ -15,7 +16,7 @@ class UnstructuredEmailExtractor(BaseExtractor):
file_path: Path to the file to load.
"""

def __init__(self, file_path: str, api_url: str, api_key: str):
def __init__(self, file_path: str, api_url: Optional[str] = None, api_key: str = ""):
"""Initialize with file path."""
self._file_path = file_path
self._api_url = api_url
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def __init__(
self,
file_path: str,
api_url: Optional[str] = None,
api_key: Optional[str] = None,
api_key: str = "",
):
"""Initialize with file path."""
self._file_path = file_path
Expand All @@ -30,9 +30,6 @@ def extract(self) -> list[Document]:
if self._api_url:
from unstructured.partition.api import partition_via_api

if self._api_key is None:
raise ValueError("api_key is required")

elements = partition_via_api(filename=self._file_path, api_url=self._api_url, api_key=self._api_key)
else:
from unstructured.partition.epub import partition_epub
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
from typing import Optional

from core.rag.extractor.extractor_base import BaseExtractor
from core.rag.models.document import Document
Expand All @@ -24,7 +25,7 @@ class UnstructuredMarkdownExtractor(BaseExtractor):
if the specified encoding fails.
"""

def __init__(self, file_path: str, api_url: str, api_key: str):
def __init__(self, file_path: str, api_url: Optional[str] = None, api_key: str = ""):
"""Initialize with file path."""
self._file_path = file_path
self._api_url = api_url
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
from typing import Optional

from core.rag.extractor.extractor_base import BaseExtractor
from core.rag.models.document import Document
Expand All @@ -14,7 +15,7 @@ class UnstructuredMsgExtractor(BaseExtractor):
file_path: Path to the file to load.
"""

def __init__(self, file_path: str, api_url: str, api_key: str):
def __init__(self, file_path: str, api_url: Optional[str] = None, api_key: str = ""):
"""Initialize with file path."""
self._file_path = file_path
self._api_url = api_url
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
from typing import Optional

from core.rag.extractor.extractor_base import BaseExtractor
from core.rag.models.document import Document
Expand All @@ -14,7 +15,7 @@ class UnstructuredPPTExtractor(BaseExtractor):
file_path: Path to the file to load.
"""

def __init__(self, file_path: str, api_url: str, api_key: str):
def __init__(self, file_path: str, api_url: Optional[str] = None, api_key: str = ""):
"""Initialize with file path."""
self._file_path = file_path
self._api_url = api_url
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
from typing import Optional

from core.rag.extractor.extractor_base import BaseExtractor
from core.rag.models.document import Document
Expand All @@ -14,7 +15,7 @@ class UnstructuredPPTXExtractor(BaseExtractor):
file_path: Path to the file to load.
"""

def __init__(self, file_path: str, api_url: str, api_key: str):
def __init__(self, file_path: str, api_url: Optional[str] = None, api_key: str = ""):
"""Initialize with file path."""
self._file_path = file_path
self._api_url = api_url
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
from typing import Optional

from core.rag.extractor.extractor_base import BaseExtractor
from core.rag.models.document import Document
Expand All @@ -14,7 +15,7 @@ class UnstructuredXmlExtractor(BaseExtractor):
file_path: Path to the file to load.
"""

def __init__(self, file_path: str, api_url: str, api_key: str):
def __init__(self, file_path: str, api_url: Optional[str] = None, api_key: str = ""):
"""Initialize with file path."""
self._file_path = file_path
self._api_url = api_url
Expand Down
3 changes: 2 additions & 1 deletion api/core/tools/utils/text_processing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,6 @@ def remove_leading_symbols(text: str) -> str:
str: The text with leading punctuation or symbols removed.
"""
# Match Unicode ranges for punctuation and symbols
pattern = r"^[\u2000-\u206F\u2E00-\u2E7F\u3000-\u303F!\"#$%&'()*+,\-./:;<=>?@\[\]^_`{|}~]+"
# FIXME this pattern is confused quick fix for #11868 maybe refactor it later
pattern = r"^[\u2000-\u206F\u2E00-\u2E7F\u3000-\u303F!\"#$%&'()*+,./:;<=>?@^_`~]+"
return re.sub(pattern, "", text)
Original file line number Diff line number Diff line change
Expand Up @@ -7,85 +7,36 @@ import { useTranslation } from 'react-i18next'
import { useBoolean } from 'ahooks'
import {
Cog8ToothIcon,
// CommandLineIcon,
Squares2X2Icon,
// eslint-disable-next-line sort-imports
PuzzlePieceIcon,
DocumentTextIcon,
PaperClipIcon,
QuestionMarkCircleIcon,
} from '@heroicons/react/24/outline'
import {
Cog8ToothIcon as Cog8ToothSolidIcon,
// CommandLineIcon as CommandLineSolidIcon,
DocumentTextIcon as DocumentTextSolidIcon,
} from '@heroicons/react/24/solid'
import Link from 'next/link'
import { RiApps2AddLine, RiInformation2Line } from '@remixicon/react'
import s from './style.module.css'
import classNames from '@/utils/classnames'
import { fetchDatasetDetail, fetchDatasetRelatedApps } from '@/service/datasets'
import type { RelatedApp, RelatedAppResponse } from '@/models/datasets'
import type { RelatedAppResponse } from '@/models/datasets'
import AppSideBar from '@/app/components/app-sidebar'
import Divider from '@/app/components/base/divider'
import AppIcon from '@/app/components/base/app-icon'
import Loading from '@/app/components/base/loading'
import FloatPopoverContainer from '@/app/components/base/float-popover-container'
import DatasetDetailContext from '@/context/dataset-detail'
import { DataSourceType } from '@/models/datasets'
import useBreakpoints, { MediaType } from '@/hooks/use-breakpoints'
import { LanguagesSupported } from '@/i18n/language'
import { useStore } from '@/app/components/app/store'
import { AiText, ChatBot, CuteRobot } from '@/app/components/base/icons/src/vender/solid/communication'
import { Route } from '@/app/components/base/icons/src/vender/solid/mapsAndTravel'
import { getLocaleOnClient } from '@/i18n'
import { useAppContext } from '@/context/app-context'
import Tooltip from '@/app/components/base/tooltip'
import LinkedAppsPanel from '@/app/components/base/linked-apps-panel'

export type IAppDetailLayoutProps = {
children: React.ReactNode
params: { datasetId: string }
}

type ILikedItemProps = {
type?: 'plugin' | 'app'
appStatus?: boolean
detail: RelatedApp
isMobile: boolean
}

const LikedItem = ({
type = 'app',
detail,
isMobile,
}: ILikedItemProps) => {
return (
<Link className={classNames(s.itemWrapper, 'px-2', isMobile && 'justify-center')} href={`/app/${detail?.id}/overview`}>
<div className={classNames(s.iconWrapper, 'mr-0')}>
<AppIcon size='tiny' iconType={detail.icon_type} icon={detail.icon} background={detail.icon_background} imageUrl={detail.icon_url} />
{type === 'app' && (
<span className='absolute bottom-[-2px] right-[-2px] w-3.5 h-3.5 p-0.5 bg-white rounded border-[0.5px] border-[rgba(0,0,0,0.02)] shadow-sm'>
{detail.mode === 'advanced-chat' && (
<ChatBot className='w-2.5 h-2.5 text-[#1570EF]' />
)}
{detail.mode === 'agent-chat' && (
<CuteRobot className='w-2.5 h-2.5 text-indigo-600' />
)}
{detail.mode === 'chat' && (
<ChatBot className='w-2.5 h-2.5 text-[#1570EF]' />
)}
{detail.mode === 'completion' && (
<AiText className='w-2.5 h-2.5 text-[#0E9384]' />
)}
{detail.mode === 'workflow' && (
<Route className='w-2.5 h-2.5 text-[#f79009]' />
)}
</span>
)}
</div>
{!isMobile && <div className={classNames(s.appInfo, 'ml-2')}>{detail?.name || '--'}</div>}
</Link>
)
}

const TargetIcon = ({ className }: SVGProps<SVGElement>) => {
return <svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg" className={className ?? ''}>
<g clipPath="url(#clip0_4610_6951)">
Expand Down Expand Up @@ -117,65 +68,80 @@ const BookOpenIcon = ({ className }: SVGProps<SVGElement>) => {
type IExtraInfoProps = {
isMobile: boolean
relatedApps?: RelatedAppResponse
expand: boolean
}

const ExtraInfo = ({ isMobile, relatedApps }: IExtraInfoProps) => {
const ExtraInfo = ({ isMobile, relatedApps, expand }: IExtraInfoProps) => {
const locale = getLocaleOnClient()
const [isShowTips, { toggle: toggleTips, set: setShowTips }] = useBoolean(!isMobile)
const { t } = useTranslation()

const hasRelatedApps = relatedApps?.data && relatedApps?.data?.length > 0
const relatedAppsTotal = relatedApps?.data?.length || 0

useEffect(() => {
setShowTips(!isMobile)
}, [isMobile, setShowTips])

return <div className='w-full flex flex-col items-center'>
<Divider className='mt-5' />
{(relatedApps?.data && relatedApps?.data?.length > 0) && (
return <div>
{hasRelatedApps && (
<>
{!isMobile && <div className='w-full px-2 pb-1 pt-4 uppercase text-xs text-gray-500 font-medium'>{relatedApps?.total || '--'} {t('common.datasetMenus.relatedApp')}</div>}
{!isMobile && (
<Tooltip
position='right'
noDecoration
needsDelay
popupContent={
<LinkedAppsPanel
relatedApps={relatedApps.data}
isMobile={isMobile}
/>
}
>
<div className='inline-flex items-center system-xs-medium-uppercase text-text-secondary space-x-1 cursor-pointer'>
<span>{relatedAppsTotal || '--'} {t('common.datasetMenus.relatedApp')}</span>
<RiInformation2Line className='w-4 h-4' />
</div>
</Tooltip>
)}

{isMobile && <div className={classNames(s.subTitle, 'flex items-center justify-center !px-0 gap-1')}>
{relatedApps?.total || '--'}
{relatedAppsTotal || '--'}
<PaperClipIcon className='h-4 w-4 text-gray-700' />
</div>}
{relatedApps?.data?.map((item, index) => (<LikedItem key={index} isMobile={isMobile} detail={item} />))}
</>
)}
{!relatedApps?.data?.length && (
<FloatPopoverContainer
placement='bottom-start'
open={isShowTips}
toggle={toggleTips}
isMobile={isMobile}
triggerElement={
<div className={classNames('h-7 w-7 inline-flex justify-center items-center rounded-lg bg-transparent', isShowTips && '!bg-gray-50')}>
<QuestionMarkCircleIcon className='h-4 w-4 flex-shrink-0 text-gray-500' />
{!hasRelatedApps && !expand && (
<Tooltip
position='right'
noDecoration
needsDelay
popupContent={
<div className='p-4 w-[240px] bg-components-panel-bg-blur border-[0.5px] border-components-panel-border rounded-xl'>
<div className='inline-flex p-2 rounded-lg border-[0.5px] border-components-panel-border-subtle bg-background-default-subtle'>
<RiApps2AddLine className='h-4 w-4 text-text-tertiary' />
</div>
<div className='text-xs text-text-tertiary my-2'>{t('common.datasetMenus.emptyTip')}</div>
<a
className='inline-flex items-center text-xs text-text-accent mt-2 cursor-pointer'
href={
locale === LanguagesSupported[1]
? 'https://docs.dify.ai/v/zh-hans/guides/knowledge-base/integrate-knowledge-within-application'
: 'https://docs.dify.ai/guides/knowledge-base/integrate-knowledge-within-application'
}
target='_blank' rel='noopener noreferrer'
>
<BookOpenIcon className='mr-1' />
{t('common.datasetMenus.viewDoc')}
</a>
</div>
}
>
<div className={classNames('mt-5 p-3', isMobile && 'border-[0.5px] border-gray-200 shadow-lg rounded-lg bg-white w-[160px]')}>
<div className='flex items-center justify-start gap-2'>
<div className={s.emptyIconDiv}>
<Squares2X2Icon className='w-3 h-3 text-gray-500' />
</div>
<div className={s.emptyIconDiv}>
<PuzzlePieceIcon className='w-3 h-3 text-gray-500' />
</div>
</div>
<div className='text-xs text-gray-500 mt-2'>{t('common.datasetMenus.emptyTip')}</div>
<a
className='inline-flex items-center text-xs text-primary-600 mt-2 cursor-pointer'
href={
locale === LanguagesSupported[1]
? 'https://docs.dify.ai/v/zh-hans/guides/knowledge-base/integrate-knowledge-within-application'
: 'https://docs.dify.ai/guides/knowledge-base/integrate-knowledge-within-application'
}
target='_blank' rel='noopener noreferrer'
>
<BookOpenIcon className='mr-1' />
{t('common.datasetMenus.viewDoc')}
</a>
<div className='inline-flex items-center system-xs-medium-uppercase text-text-secondary space-x-1 cursor-pointer'>
<span>{t('common.datasetMenus.noRelatedApp')}</span>
<RiInformation2Line className='w-4 h-4' />
</div>
</FloatPopoverContainer>
</Tooltip>
)}
</div>
}
Expand Down Expand Up @@ -235,7 +201,7 @@ const DatasetDetailLayout: FC<IAppDetailLayoutProps> = (props) => {
}, [isMobile, setAppSiderbarExpand])

if (!datasetRes && !error)
return <Loading />
return <Loading type='app' />

return (
<div className='grow flex overflow-hidden'>
Expand All @@ -246,15 +212,15 @@ const DatasetDetailLayout: FC<IAppDetailLayoutProps> = (props) => {
desc={datasetRes?.description || '--'}
isExternal={datasetRes?.provider === 'external'}
navigation={navigation}
extraInfo={!isCurrentWorkspaceDatasetOperator ? mode => <ExtraInfo isMobile={mode === 'collapse'} relatedApps={relatedApps} /> : undefined}
extraInfo={!isCurrentWorkspaceDatasetOperator ? mode => <ExtraInfo isMobile={mode === 'collapse'} relatedApps={relatedApps} expand={mode === 'collapse'} /> : undefined}
iconType={datasetRes?.data_source_type === DataSourceType.NOTION ? 'notion' : 'dataset'}
/>}
<DatasetDetailContext.Provider value={{
indexingTechnique: datasetRes?.indexing_technique,
dataset: datasetRes,
mutateDatasetRes: () => mutateDatasetRes(),
}}>
<div className="bg-white grow overflow-hidden">{children}</div>
<div className="bg-background-default-subtle grow overflow-hidden">{children}</div>
</DatasetDetailContext.Provider>
</div>
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@ const Settings = async () => {
const { t } = await translate(locale, 'dataset-settings')

return (
<div className='bg-white h-full overflow-y-auto'>
<div className='h-full overflow-y-auto'>
<div className='px-6 py-3'>
<div className='mb-1 text-lg font-semibold text-gray-900'>{t('title')}</div>
<div className='text-sm text-gray-500'>{t('desc')}</div>
<div className='mb-1 system-xl-semibold text-text-primary'>{t('title')}</div>
<div className='system-sm-regular text-text-tertiary'>{t('desc')}</div>
</div>
<Form />
</div>
Expand Down
Loading

0 comments on commit 10c8f50

Please sign in to comment.