Merge branch 'main' into patch-1

crewAIInc · Nov 21, 2024 · 2a7067f · 2a7067f
2 parents 827ad0f + f8ca49d
commit 2a7067f
Show file tree

Hide file tree

Showing 50 changed files with 4,379 additions and 855 deletions.
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -26,7 +26,7 @@ jobs:
         run: uv python install 3.11.9
 
       - name: Install the project
-        run: uv sync --dev
+        run: uv sync --dev --all-extras
 
       - name: Run tests
-        run: uv run pytest tests
+        run: uv run pytest tests -vv
diff --git a/README.md b/README.md
@@ -100,7 +100,7 @@ You can now start developing your crew by editing the files in the `src/my_proje
 
 #### Example of a simple crew with a sequential process:
 
-Instatiate your crew:
+Instantiate your crew:
 
 ```shell
 crewai create crew latest-ai-development
@@ -399,7 +399,7 @@ Data collected includes:
 - Roles of agents in a crew
   - Understand high level use cases so we can build better tools, integrations and examples about it
 - Tools names available
-  - Understand out of the publically available tools, which ones are being used the most so we can improve them
+  - Understand out of the publicly available tools, which ones are being used the most so we can improve them
 
 Users can opt-in to Further Telemetry, sharing the complete telemetry data by setting the `share_crew` attribute to `True` on their Crews. Enabling `share_crew` results in the collection of detailed crew and task execution data, including `goal`, `backstory`, `context`, and `output` of tasks. This enables a deeper insight into usage patterns while respecting the user's choice to share.
 

diff --git a/docs/concepts/knowledge.mdx b/docs/concepts/knowledge.mdx
@@ -0,0 +1,75 @@
+---
+title: Knowledge
+description: What is knowledge in CrewAI and how to use it.
+icon: book
+---
+
+# Using Knowledge in CrewAI
+
+## Introduction
+
+The Knowledge class in CrewAI provides a powerful way to manage and query knowledge sources for your AI agents. This guide will show you how to implement knowledge management in your CrewAI projects.
+Additionally, we have specific tools for generate knowledge sources for strings, text files, PDF's, and Spreadsheets. You can expand on any source type by extending the `KnowledgeSource` class.
+
+## Basic Implementation
+
+Here's a simple example of how to use the Knowledge class:
+
+```python
+from crewai import Agent, Task, Crew, Process, LLM
+from crewai.knowledge.source.string_knowledge_source import StringKnowledgeSource
+
+# Create a knowledge source
+content = "Users name is John. He is 30 years old and lives in San Francisco."
+string_source = StringKnowledgeSource(
+    content=content, metadata={"preference": "personal"}
+)
+
+
+llm = LLM(model="gpt-4o-mini", temperature=0)
+  # Create an agent with the knowledge store
+agent = Agent(
+    role="About User",
+    goal="You know everything about the user.",
+    backstory="""You are a master at understanding people and their preferences.""",
+    verbose=True,
+    allow_delegation=False,
+    llm=llm,
+)
+task = Task(
+    description="Answer the following questions about the user: {question}",
+    expected_output="An answer to the question.",
+    agent=agent,
+)
+
+crew = Crew(
+    agents=[agent],
+    tasks=[task],
+    verbose=True,
+    process=Process.sequential,
+    knowledge={"sources": [string_source], "metadata": {"preference": "personal"}}, # Enable knowledge by adding the sources here. You can also add more sources to the sources list.
+)
+
+result = crew.kickoff(inputs={"question": "What city does John live in and how old is he?"})
+```
+
+
+## Embedder Configuration
+
+You can also configure the embedder for the knowledge store. This is useful if you want to use a different embedder for the knowledge store than the one used for the agents.
+
+```python
+...
+string_source = StringKnowledgeSource(
+    content="Users name is John. He is 30 years old and lives in San Francisco.",
+    metadata={"preference": "personal"}
+)
+crew = Crew(
+    ...
+    knowledge={
+        "sources": [string_source],
+        "metadata": {"preference": "personal"},
+        "embedder_config": {"provider": "openai", "config": {"model": "text-embedding-3-small"}},
+    },
+)
+```
diff --git a/docs/concepts/llms.mdx b/docs/concepts/llms.mdx
@@ -47,6 +47,8 @@ researcher:
     # llm: ollama/llama3:70b
     # llm: groq/llama-3.2-90b-vision-preview
     # llm: watsonx/meta-llama/llama-3-1-70b-instruct
+    # llm: nvidia_nim/meta/llama3-70b-instruct
+    # llm: sambanova/Meta-Llama-3.1-8B-Instruct
     # ...
 ```
 
@@ -308,8 +310,8 @@ These are examples of how to configure LLMs for your agent.
     from crewai import LLM
 
     llm = LLM(
-        model="perplexity/mistral-7b-instruct",
-        base_url="https://api.perplexity.ai/v1",
+        model="llama-3.1-sonar-large-128k-online",
+        base_url="https://api.perplexity.ai/",
         api_key="your-api-key-here"
     )
     agent = Agent(llm=llm, ...)
@@ -398,4 +400,4 @@ This is particularly useful when working with OpenAI-compatible APIs or when you
 - **API Errors**: Check your API key, network connection, and rate limits.
 - **Unexpected Outputs**: Refine your prompts and adjust temperature or top_p.
 - **Performance Issues**: Consider using a more powerful model or optimizing your queries.
-- **Timeout Errors**: Increase the `timeout` parameter or optimize your input.
+- **Timeout Errors**: Increase the `timeout` parameter or optimize your input.
diff --git a/docs/tools/githubsearchtool.mdx b/docs/tools/githubsearchtool.mdx
@@ -34,20 +34,23 @@ from crewai_tools import GithubSearchTool
 # Initialize the tool for semantic searches within a specific GitHub repository
 tool = GithubSearchTool(
 	github_repo='https://github.com/example/repo',
+	gh_token='your_github_personal_access_token',
 	content_types=['code', 'issue'] # Options: code, repo, pr, issue
 )
 
 # OR
 
 # Initialize the tool for semantic searches within a specific GitHub repository, so the agent can search any repository if it learns about during its execution
 tool = GithubSearchTool(
+	gh_token='your_github_personal_access_token',
 	content_types=['code', 'issue'] # Options: code, repo, pr, issue
 )
 ```
 
 ## Arguments
 
 - `github_repo` : The URL of the GitHub repository where the search will be conducted. This is a mandatory field and specifies the target repository for your search.
+- `gh_token` : Your GitHub Personal Access Token (PAT) required for authentication. You can create one in your GitHub account settings under Developer Settings > Personal Access Tokens.
 - `content_types` : Specifies the types of content to include in your search. You must provide a list of content types from the following options: `code` for searching within the code, 
 `repo` for searching within the repository's general information, `pr` for searching within pull requests, and `issue` for searching within issues. 
 This field is mandatory and allows tailoring the search to specific content types within the GitHub repository.
@@ -77,5 +80,4 @@ tool = GithubSearchTool(
             ),
         ),
     )
-)
-```
+)
diff --git a/pyproject.toml b/pyproject.toml
@@ -39,6 +39,16 @@ Repository = "https://github.com/crewAIInc/crewAI"
 [project.optional-dependencies]
 tools = ["crewai-tools>=0.14.0"]
 agentops = ["agentops>=0.3.0"]
+fastembed = ["fastembed>=0.4.1"]
+pdfplumber = [
+    "pdfplumber>=0.11.4",
+]
+pandas = [
+    "pandas>=2.2.3",
+]
+openpyxl = [
+    "openpyxl>=3.1.5",
+]
 mem0 = ["mem0ai>=0.1.29"]
 
 [tool.uv]

diff --git a/src/crewai/__init__.py b/src/crewai/__init__.py
@@ -1,7 +1,9 @@
 import warnings
+
 from crewai.agent import Agent
 from crewai.crew import Crew
 from crewai.flow.flow import Flow
+from crewai.knowledge.knowledge import Knowledge
 from crewai.llm import LLM
 from crewai.pipeline import Pipeline
 from crewai.process import Process
@@ -15,4 +17,14 @@
     module="pydantic.main",
 )
 __version__ = "0.80.0"
-__all__ = ["Agent", "Crew", "Process", "Task", "Pipeline", "Router", "LLM", "Flow"]
+__all__ = [
+    "Agent",
+    "Crew",
+    "Process",
+    "Task",
+    "Pipeline",
+    "Router",
+    "LLM",
+    "Flow",
+    "Knowledge",
+]
diff --git a/src/crewai/agent.py b/src/crewai/agent.py
@@ -11,8 +11,8 @@
 from crewai.cli.constants import ENV_VARS
 from crewai.llm import LLM
 from crewai.memory.contextual.contextual_memory import ContextualMemory
-from crewai.tools.agent_tools.agent_tools import AgentTools
 from crewai.tools import BaseTool
+from crewai.tools.agent_tools.agent_tools import AgentTools
 from crewai.utilities import Converter, Prompts
 from crewai.utilities.constants import TRAINED_AGENTS_DATA_FILE, TRAINING_DATA_FILE
 from crewai.utilities.token_counter_callback import TokenCalcHandler
@@ -52,6 +52,7 @@ class Agent(BaseAgent):
             role: The role of the agent.
             goal: The objective of the agent.
             backstory: The backstory of the agent.
+            knowledge: The knowledge base of the agent.
             config: Dict representation of agent configuration.
             llm: The language model that will run the agent.
             function_calling_llm: The language model that will handle the tool calling for this agent, it overrides the crew function_calling_llm.
@@ -272,6 +273,18 @@ def execute_task(
             if memory.strip() != "":
                 task_prompt += self.i18n.slice("memory").format(memory=memory)
 
+        # Integrate the knowledge base
+        if self.crew and self.crew.knowledge:
+            knowledge_snippets = self.crew.knowledge.query([task.prompt()])
+            valid_snippets = [
+                result["context"] 
+                for result in knowledge_snippets 
+                if result and result.get("context")
+            ]
+            if valid_snippets:
+                formatted_knowledge = "\n".join(valid_snippets)
+                task_prompt += f"\n\nAdditional Information:\n{formatted_knowledge}"
+
         tools = tools or self.tools or []
         self.create_agent_executor(tools=tools, task=task)
 

diff --git a/src/crewai/cli/cli.py b/src/crewai/cli/cli.py
@@ -136,24 +136,32 @@ def log_tasks_outputs() -> None:
 @click.option("-l", "--long", is_flag=True, help="Reset LONG TERM memory")
 @click.option("-s", "--short", is_flag=True, help="Reset SHORT TERM memory")
 @click.option("-e", "--entities", is_flag=True, help="Reset ENTITIES memory")
+@click.option("-kn", "--knowledge", is_flag=True, help="Reset KNOWLEDGE storage")
 @click.option(
     "-k",
     "--kickoff-outputs",
     is_flag=True,
     help="Reset LATEST KICKOFF TASK OUTPUTS",
 )
 @click.option("-a", "--all", is_flag=True, help="Reset ALL memories")
-def reset_memories(long, short, entities, kickoff_outputs, all):
+def reset_memories(
+    long: bool,
+    short: bool,
+    entities: bool,
+    knowledge: bool,
+    kickoff_outputs: bool,
+    all: bool,
+) -> None:
     """
     Reset the crew memories (long, short, entity, latest_crew_kickoff_ouputs). This will delete all the data saved.
     """
     try:
-        if not all and not (long or short or entities or kickoff_outputs):
+        if not all and not (long or short or entities or knowledge or kickoff_outputs):
             click.echo(
                 "Please specify at least one memory type to reset using the appropriate flags."
             )
             return
-        reset_memories_command(long, short, entities, kickoff_outputs, all)
+        reset_memories_command(long, short, entities, knowledge, kickoff_outputs, all)
     except Exception as e:
         click.echo(f"An error occurred while resetting memories: {e}", err=True)
 

diff --git a/src/crewai/cli/constants.py b/src/crewai/cli/constants.py
@@ -123,21 +123,14 @@
     ],
     "ollama": ["ollama/llama3.1", "ollama/mixtral"],
     "watson": [
-        "watsonx/google/flan-t5-xxl",
-        "watsonx/google/flan-ul2",
-        "watsonx/bigscience/mt0-xxl",
-        "watsonx/eleutherai/gpt-neox-20b",
-        "watsonx/ibm/mpt-7b-instruct2",
-        "watsonx/bigcode/starcoder",
-        "watsonx/meta-llama/llama-2-70b-chat",
-        "watsonx/meta-llama/llama-2-13b-chat",
-        "watsonx/ibm/granite-13b-instruct-v1",
-        "watsonx/ibm/granite-13b-chat-v1",
-        "watsonx/google/flan-t5-xl",
-        "watsonx/ibm/granite-13b-chat-v2",
-        "watsonx/ibm/granite-13b-instruct-v2",
-        "watsonx/elyza/elyza-japanese-llama-2-7b-instruct",
-        "watsonx/ibm-mistralai/mixtral-8x7b-instruct-v01-q",
+        "watsonx/meta-llama/llama-3-1-70b-instruct",
+        "watsonx/meta-llama/llama-3-1-8b-instruct",
+        "watsonx/meta-llama/llama-3-2-11b-vision-instruct",
+        "watsonx/meta-llama/llama-3-2-1b-instruct",
+        "watsonx/meta-llama/llama-3-2-90b-vision-instruct",
+        "watsonx/meta-llama/llama-3-405b-instruct",
+        "watsonx/mistral/mistral-large",
+        "watsonx/ibm/granite-3-8b-instruct",
     ],
     "bedrock": [
         "bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0",

diff --git a/src/crewai/cli/reset_memories_command.py b/src/crewai/cli/reset_memories_command.py
@@ -5,9 +5,17 @@
 from crewai.memory.long_term.long_term_memory import LongTermMemory
 from crewai.memory.short_term.short_term_memory import ShortTermMemory
 from crewai.utilities.task_output_storage_handler import TaskOutputStorageHandler
+from crewai.knowledge.storage.knowledge_storage import KnowledgeStorage
 
 
-def reset_memories_command(long, short, entity, kickoff_outputs, all) -> None:
+def reset_memories_command(
+    long,
+    short,
+    entity,
+    knowledge,
+    kickoff_outputs,
+    all,
+) -> None:
     """
     Reset the crew memories.
 
@@ -17,6 +25,7 @@ def reset_memories_command(long, short, entity, kickoff_outputs, all) -> None:
       entity (bool): Whether to reset the entity memory.
       kickoff_outputs (bool): Whether to reset the latest kickoff task outputs.
       all (bool): Whether to reset all memories.
+      knowledge (bool): Whether to reset the knowledge.
     """
 
     try:
@@ -25,6 +34,7 @@ def reset_memories_command(long, short, entity, kickoff_outputs, all) -> None:
             EntityMemory().reset()
             LongTermMemory().reset()
             TaskOutputStorageHandler().reset()
+            KnowledgeStorage().reset()
             click.echo("All memories have been reset.")
         else:
             if long:
@@ -40,6 +50,9 @@ def reset_memories_command(long, short, entity, kickoff_outputs, all) -> None:
             if kickoff_outputs:
                 TaskOutputStorageHandler().reset()
                 click.echo("Latest Kickoff outputs stored has been reset.")
+            if knowledge:
+                KnowledgeStorage().reset()
+                click.echo("Knowledge has been reset.")
 
     except subprocess.CalledProcessError as e:
         click.echo(f"An error occurred while resetting the memories: {e}", err=True)