-
Notifications
You must be signed in to change notification settings - Fork 3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Knowledge #1567
Knowledge #1567
Changes from 19 commits
75322b2
dc314c1
a8a2f80
1a35114
6131dba
617ee98
4af263c
59165cb
86ede83
7b59c5b
98a708c
10f445e
cb03ee6
cdf5233
b907938
352d053
b2c06d5
cbfcde7
4831dcb
d579c5a
b104404
70910dd
c8bf242
cbfdbe3
e882725
efa8a37
de742c8
914067d
0c5b6f2
705ee16
58bf2d5
ec2fe6f
8373c9b
e7d816f
787f2ea
b185b9e
4663997
76da972
fe18da5
23276cb
3c4504b
44ab749
52189a4
8a54042
8564f55
38c0d61
9329119
6359b64
c0ad457
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
from abc import ABC, abstractmethod | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Question: I imagine that the path of this file is not correct. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. looks right ? Abstract class could be inside the source dir |
||
from typing import List | ||
|
||
from crewai.knowledge.embedder.base_embedder import BaseEmbedder | ||
|
||
|
||
class BaseKnowledgeSource(ABC): | ||
"""Abstract base class for different types of knowledge sources.""" | ||
|
||
def __init__( | ||
self, | ||
chunk_size: int = 1000, | ||
chunk_overlap: int = 200, | ||
): | ||
self.chunk_size = chunk_size | ||
self.chunk_overlap = chunk_overlap | ||
self.chunks: List[str] = [] | ||
|
||
@abstractmethod | ||
def load_content(self): | ||
lorenzejay marked this conversation as resolved.
Show resolved
Hide resolved
|
||
"""Load and preprocess content from the source.""" | ||
pass | ||
|
||
@abstractmethod | ||
def add(self, embedder: BaseEmbedder) -> None: | ||
"""Add content to the knowledge base, chunk it, and compute embeddings.""" | ||
pass | ||
|
||
@abstractmethod | ||
def query(self, embedder: BaseEmbedder, query: str, top_k: int = 3) -> str: | ||
"""Query the knowledge base using semantic search.""" | ||
pass |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -39,6 +39,16 @@ Repository = "https://github.com/crewAIInc/crewAI" | |
[project.optional-dependencies] | ||
tools = ["crewai-tools>=0.14.0"] | ||
agentops = ["agentops>=0.3.0"] | ||
fastembed = ["fastembed>=0.4.1"] | ||
pdfplumber = [ | ||
"pdfplumber>=0.11.4", | ||
] | ||
pandas = [ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Suggestion:I'm wondering if we need to keep "pandas" as an optional dependency. I took a look at the code, and it seems we're only using it to read Excel files and save them as CSVs. Maybe we could find some lighter libraries to handle that? Just a thought! If the lib is still required maybe we should go with "polars"
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. these are optional deps, maybe this can be a fast follow ? |
||
"pandas>=2.2.3", | ||
] | ||
openpyxl = [ | ||
"openpyxl>=3.1.5", | ||
] | ||
mem0 = ["mem0ai>=0.1.29"] | ||
|
||
[tool.uv] | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,18 +1,19 @@ | ||
import os | ||
import shutil | ||
import subprocess | ||
from typing import Any, List, Literal, Optional, Union | ||
from typing import Any, List, Literal, Optional, Union, Dict, Any | ||
|
||
from pydantic import Field, InstanceOf, PrivateAttr, model_validator | ||
|
||
from crewai.agents import CacheHandler | ||
from crewai.agents.agent_builder.base_agent import BaseAgent | ||
from crewai.agents.crew_agent_executor import CrewAgentExecutor | ||
from crewai.cli.constants import ENV_VARS | ||
from crewai.knowledge.knowledge import Knowledge | ||
from crewai.llm import LLM | ||
from crewai.memory.contextual.contextual_memory import ContextualMemory | ||
from crewai.tools.agent_tools.agent_tools import AgentTools | ||
from crewai.tools import BaseTool | ||
from crewai.tools.agent_tools.agent_tools import AgentTools | ||
from crewai.utilities import Converter, Prompts | ||
from crewai.utilities.constants import TRAINED_AGENTS_DATA_FILE, TRAINING_DATA_FILE | ||
from crewai.utilities.token_counter_callback import TokenCalcHandler | ||
|
@@ -52,6 +53,7 @@ class Agent(BaseAgent): | |
role: The role of the agent. | ||
goal: The objective of the agent. | ||
backstory: The backstory of the agent. | ||
knowledge: The knowledge base of the agent. | ||
config: Dict representation of agent configuration. | ||
llm: The language model that will run the agent. | ||
function_calling_llm: The language model that will handle the tool calling for this agent, it overrides the crew function_calling_llm. | ||
|
@@ -119,6 +121,7 @@ class Agent(BaseAgent): | |
default="safe", | ||
description="Mode for code execution: 'safe' (using Docker) or 'unsafe' (direct execution).", | ||
) | ||
_knowledge: Optional[Knowledge] = PrivateAttr(default=None) | ||
|
||
@model_validator(mode="after") | ||
def post_init_setup(self): | ||
|
@@ -227,6 +230,12 @@ def post_init_setup(self): | |
if self.allow_code_execution: | ||
self._validate_docker_installation() | ||
|
||
# Initialize the Knowledge object if knowledge_sources are provided | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nitpick: But in here you can do
Or even remove the = None |
||
if self.crew and self.crew.knowledge: | ||
self._knowledge = self.crew.knowledge | ||
else: | ||
self._knowledge = None | ||
|
||
return self | ||
|
||
def _setup_agent_executor(self): | ||
|
@@ -272,6 +281,21 @@ def execute_task( | |
if memory.strip() != "": | ||
task_prompt += self.i18n.slice("memory").format(memory=memory) | ||
|
||
# Integrate the knowledge base | ||
if self.crew and self.crew.knowledge: | ||
knowledge_snippets: List[Dict[str, Any]] = self.crew.knowledge.query( | ||
[task.prompt()] | ||
) | ||
if knowledge_snippets: | ||
valid_snippets = [ | ||
result["context"] | ||
for result in knowledge_snippets | ||
if result and result.get("context") | ||
] | ||
if valid_snippets: | ||
formatted_knowledge = "\n".join(valid_snippets) | ||
task_prompt += f"\n\nAdditional Information:\n{formatted_knowledge}" | ||
|
||
tools = tools or self.tools or [] | ||
self.create_agent_executor(tools=tools, task=task) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -136,24 +136,25 @@ def log_tasks_outputs() -> None: | |
@click.option("-l", "--long", is_flag=True, help="Reset LONG TERM memory") | ||
@click.option("-s", "--short", is_flag=True, help="Reset SHORT TERM memory") | ||
@click.option("-e", "--entities", is_flag=True, help="Reset ENTITIES memory") | ||
@click.option("-kn", "--knowledge", is_flag=True, help="Reset KNOWLEDGE storage") | ||
@click.option( | ||
"-k", | ||
"--kickoff-outputs", | ||
is_flag=True, | ||
help="Reset LATEST KICKOFF TASK OUTPUTS", | ||
) | ||
@click.option("-a", "--all", is_flag=True, help="Reset ALL memories") | ||
def reset_memories(long, short, entities, kickoff_outputs, all): | ||
def reset_memories(long, short, entities, knowledge, kickoff_outputs, all): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nitpick: Maybe add type-hints 😅 |
||
""" | ||
Reset the crew memories (long, short, entity, latest_crew_kickoff_ouputs). This will delete all the data saved. | ||
""" | ||
try: | ||
if not all and not (long or short or entities or kickoff_outputs): | ||
if not all and not (long or short or entities or knowledge or kickoff_outputs): | ||
click.echo( | ||
"Please specify at least one memory type to reset using the appropriate flags." | ||
) | ||
return | ||
reset_memories_command(long, short, entities, kickoff_outputs, all) | ||
reset_memories_command(long, short, entities, knowledge, kickoff_outputs, all) | ||
except Exception as e: | ||
click.echo(f"An error occurred while resetting memories: {e}", err=True) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
from abc import ABC, abstractmethod | ||
from typing import List | ||
|
||
import numpy as np | ||
|
||
|
||
class BaseEmbedder(ABC): | ||
""" | ||
Abstract base class for text embedding models | ||
""" | ||
|
||
@abstractmethod | ||
def embed_chunks(self, chunks: List[str]) -> np.ndarray: | ||
""" | ||
Generate embeddings for a list of text chunks | ||
Args: | ||
chunks: List of text chunks to embed | ||
Returns: | ||
Array of embeddings | ||
""" | ||
pass | ||
|
||
@abstractmethod | ||
def embed_texts(self, texts: List[str]) -> np.ndarray: | ||
""" | ||
Generate embeddings for a list of texts | ||
Args: | ||
texts: List of texts to embed | ||
Returns: | ||
Array of embeddings | ||
""" | ||
pass | ||
|
||
@abstractmethod | ||
def embed_text(self, text: str) -> np.ndarray: | ||
""" | ||
Generate embedding for a single text | ||
Args: | ||
text: Text to embed | ||
Returns: | ||
Embedding array | ||
""" | ||
pass | ||
|
||
@property | ||
@abstractmethod | ||
def dimension(self) -> int: | ||
"""Get the dimension of the embeddings""" | ||
pass |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Question: Do you think we need the --all-extra option in this case? It seems like we'll have to install all the optional dependencies to be able to run our tests. What do you think?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, there are a bunch of optional dep that were brought up like the
pdfplumber
for our PdfKnowledgeSource.