Merge branch 'main' into releases/rc-trulens-eval-0.23.0

truera · Feb 16, 2024 · 89a28ad · 89a28ad
2 parents b89ebdc + 5b3e8fc
commit 89a28ad
Show file tree

Hide file tree

Showing 22 changed files with 347 additions and 317 deletions.
diff --git a/docs/trulens_eval/api/endpoint.md b/docs/trulens_eval/api/endpoint.md
diff --git a/docs/trulens_eval/api/endpoint/base.md b/docs/trulens_eval/api/endpoint/base.md
@@ -0,0 +1,3 @@
+# Endpoint
+
+::: trulens_eval.feedback.provider.endpoint.base
diff --git a/docs/trulens_eval/api/endpoint/openai.md b/docs/trulens_eval/api/endpoint/openai.md
@@ -0,0 +1,3 @@
+# OpenAI Endpoint
+
+::: trulens_eval.feedback.provider.endpoint.openai
diff --git a/docs/trulens_eval/api/provider/llmprovider.md b/docs/trulens_eval/api/provider/llmprovider.md
@@ -1,11 +1,3 @@
 # LLM Provider
 
-LLMProvider is the base class containing feedback functions that use an LLM for
-evaluation. This includes:
-
-* [OpenAI][trulens_eval.feedback.provider.openai.OpenAI] and subclass [AzureOpenAI][trulens_eval.feedback.provider.openai.AzureOpenAI].
-* [Bedrock][trulens_eval.feedback.provider.bedrock.Bedrock].
-* [LiteLLM][trulens_eval.feedback.provider.litellm.LiteLLM].
-* [Langchain][trulens_eval.feedback.provider.langchain.Langchain].
-
 ::: trulens_eval.feedback.provider.base.LLMProvider
diff --git a/docs/trulens_eval/api/utils/python.md b/docs/trulens_eval/api/utils/python.md
@@ -6,4 +6,4 @@
 
 ::: trulens_eval.utils.threading
 
-::: trulens_eval.utils.asynchro
+::: trulens_eval.utils.asynchro
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -218,8 +218,10 @@ nav:
             - AWS Bedrock: trulens_eval/api/provider/bedrock.md
             - LiteLLM: trulens_eval/api/provider/litellm.md
             - 🦜️🔗 Langchain: trulens_eval/api/provider/langchain.md
-          - 🤗 HuggingFace: trulens_eval/api/provider/huggingface.md
-#            - Endpoint: trulens_eval/api/endpoint.md
+            - 🤗 HuggingFace: trulens_eval/api/provider/huggingface.md
+            - Endpoint:
+              - Base: trulens_eval/api/endpoint/base.md
+              - OpenAI: trulens_eval/api/endpoint/openai.md
           - Utils: 
             - trulens_eval/api/utils/python.md
             - trulens_eval/api/utils/serial.md

diff --git a/trulens_eval/TECHDEBT.md b/trulens_eval/TECHDEBT.md
@@ -13,11 +13,11 @@ lives.
 See `instruments.py` docstring for discussion why these are done.
 
 - We inspect the call stack in process of tracking method invocation. It may be
-  possible to replace this with `contextvars`. 
+  possible to replace this with `contextvars`.
 
 - "HACK012" -- In the optional imports scheme, we have to make sure that imports
-  that happen from outside of trulens raise normal exceptions instead of
-  producing dummies.
+  that happen from outside of trulens raise exceptions instead of
+  producing dummies without raising exceptions.
 
 ## Method overriding
 
@@ -59,7 +59,8 @@ See `instruments.py` docstring for discussion why these are done.
   otherwise.
 
 - "HACK005" -- `model_validate` inside `WithClassInfo` is implemented in
-  decorated method because pydantic doesn't call it otherwise.
+  decorated method because pydantic doesn't call it otherwise. It is uncertain
+  whether this is a pydantic bug.
 
 - We dump attributes marked to be excluded by pydantic except our own classes.
   This is because some objects are of interest despite being marked to exclude.
@@ -88,6 +89,8 @@ See `instruments.py` docstring for discussion why these are done.
 
 - "HACK012" -- same but with `Queue`.
 
+- Similarly, we define `NoneType` for older python versions.
+
 - "HACK013" -- when using `from __future__ import annotations` for more
   convenient type annotation specification, one may have to call pydantic's
   `BaseModel.model_rebuild` after all types references in annotations in that file

diff --git a/trulens_eval/trulens_eval/feedback/groundedness.py b/trulens_eval/trulens_eval/feedback/groundedness.py
@@ -36,42 +36,40 @@
 class Groundedness(WithClassInfo, SerialModel):
     """
     Measures Groundedness.
-    """
-
-    groundedness_provider: Provider
 
-    def __init__(
-        self, groundedness_provider: Optional[Provider] = None, **kwargs
-    ):
-        """
-        Instantiates the groundedness providers. Currently the groundedness
-        functions work well with a summarizer. This class will use an LLM to
-        find the relevant strings in a text. The groundedness_provider can
-        either be an LLM provider (such as OpenAI) or NLI with huggingface.
+    Currently the groundedness
+    functions work well with a summarizer. This class will use an LLM to
+    find the relevant strings in a text. The groundedness_provider can
+    either be an LLM provider (such as OpenAI) or NLI with huggingface.
 
-        Usage 1:
+    Usage:
         ```python
         from trulens_eval.feedback import Groundedness
         from trulens_eval.feedback.provider.openai import OpenAI
         openai_provider = OpenAI()
         groundedness_imp = Groundedness(groundedness_provider=openai_provider)
         ```
 
-        Usage 2:
+    Usage:
         ```python
         from trulens_eval.feedback import Groundedness
         from trulens_eval.feedback.provider.hugs import Huggingface
         huggingface_provider = Huggingface()
         groundedness_imp = Groundedness(groundedness_provider=huggingface_provider)
         ```
 
-        Args:
-            - groundedness_provider (Provider, optional): groundedness provider
-              options: OpenAI LLM or HuggingFace NLI. Defaults to OpenAI().
-            - summarize_provider (Provider, optional): Internal Usage for DB
-              serialization.
-        """
+    Args:
+        groundedness_provider: Provider to use for evaluating groundedness. This
+            should be [OpenAI][trulens_eval.feedback.provider.openai.OpenAI] LLM
+            or [HuggingFace][trulens_eval.feedback.provider.hugs.Huggingface]
+            NLI. Defaults to `OpenAI`.
+    """
+
+    groundedness_provider: Provider
 
+    def __init__(
+        self, groundedness_provider: Optional[Provider] = None, **kwargs
+    ):
         if groundedness_provider is None:
             logger.warning("Provider not provided. Using OpenAI.")
             groundedness_provider = OpenAI()
@@ -81,32 +79,33 @@ def __init__(
     def groundedness_measure_with_cot_reasons(
         self, source: str, statement: str
     ) -> Tuple[float, dict]:
-        """
-        A measure to track if the source material supports each sentence in the statement using an LLM provider.
+        """A measure to track if the source material supports each sentence in
+        the statement using an LLM provider.
 
-        The LLM will process the entire statement at once, using chain of thought methodology to emit the reasons. 
+        The LLM will process the entire statement at once, using chain of
+        thought methodology to emit the reasons. 
 
         Usage on RAG Contexts:
-        ```
-        from trulens_eval import Feedback
-        from trulens_eval.feedback import Groundedness
-        from trulens_eval.feedback.provider.openai import OpenAI
-        grounded = feedback.Groundedness(groundedness_provider=OpenAI())
-
+            ```python
+            from trulens_eval import Feedback
+            from trulens_eval.feedback import Groundedness
+            from trulens_eval.feedback.provider.openai import OpenAI
+            grounded = feedback.Groundedness(groundedness_provider=OpenAI())
 
-        f_groundedness = feedback.Feedback(grounded.groundedness_measure_with_cot_reasons).on(
-            Select.Record.app.combine_documents_chain._call.args.inputs.input_documents[:].page_content # See note below
-        ).on_output().aggregate(grounded.grounded_statements_aggregator)
-        ```
-        The `on(...)` selector can be changed. See [Feedback Function Guide : Selectors](https://www.trulens.org/trulens_eval/feedback_function_guide/#selector-details)
+            f_groundedness = feedback.Feedback(grounded.groundedness_measure_with_cot_reasons).on(
+                Select.Record.app.combine_documents_chain._call.args.inputs.input_documents[:].page_content # See note below
+            ).on_output().aggregate(grounded.grounded_statements_aggregator)
+            ```
 
+            The `on(...)` selector can be changed. See [Feedback Function Guide : Selectors](https://www.trulens.org/trulens_eval/feedback_function_guide/#selector-details)
 
         Args:
-            source (str): The source that should support the statement
-            statement (str): The statement to check groundedness
+            source: The source that should support the statement.
+
+            statement: The statement to check groundedness.
 
         Returns:
-            Tuple[float, dict]: A measure between 0 and 1, where 1 means each sentence is grounded in the source.
+            A measure between 0 and 1, where 1 means each sentence is grounded in the source.
         """
         groundedness_scores = {}
         if not isinstance(self.groundedness_provider, LLMProvider):

diff --git a/trulens_eval/trulens_eval/feedback/provider/base.py b/trulens_eval/trulens_eval/feedback/provider/base.py
@@ -1,11 +1,7 @@
-from abc import ABC
-from abc import abstractmethod
 import logging
-from typing import ClassVar, Dict, Optional, Sequence, Tuple, Union
+from typing import ClassVar, Dict, Optional, Sequence, Tuple
 import warnings
 
-import pydantic
-
 from trulens_eval.feedback import prompts
 from trulens_eval.feedback.provider.endpoint.base import Endpoint
 from trulens_eval.utils.generated import re_0_10_rating
@@ -35,12 +31,18 @@ class LLMProvider(Provider):
     
     This is an abstract class and needs to be initialized as one of these:
 
-    - [OpenAI provider][trulens_eval.feedback.provider.openai.OpenAI] or
-      [AzureOpenAI provider][trulens_eval.feedback.provider.openai.AzureOpenAI]
-    - [Bedrock provider][trulens_eval.feedback.provider.bedrock.Bedrock]
-    - [LiteLLM provider][trulens_eval.feedback.provider.litellm.LiteLLM]
-    - [Langchain provider][trulens_eval.feedback.provider.langchain.Langchain]
-    """
+    * [OpenAI][trulens_eval.feedback.provider.openai.OpenAI] and subclass
+      [AzureOpenAI][trulens_eval.feedback.provider.openai.AzureOpenAI].
+
+    * [Bedrock][trulens_eval.feedback.provider.bedrock.Bedrock].
+
+    * [LiteLLM][trulens_eval.feedback.provider.litellm.LiteLLM]. LiteLLM provides an
+    interface to a [wide range of
+    models](https://docs.litellm.ai/docs/providers).
+    
+    * [Langchain][trulens_eval.feedback.provider.langchain.Langchain].
+
+"""
 
     # NOTE(piotrm): "model_" prefix for attributes is "protected" by pydantic v2
     # by default. Need the below adjustment but this means we don't get any
@@ -389,21 +391,20 @@ def sentiment(self, text: str) -> float:
         Uses chat completion model. A function that completes a template to
         check the sentiment of some text.
 
-        **Usage:**
-        ```python
-        feedback = Feedback(provider.sentiment).on_output() 
-        ```
+        Usage:
+            ```python
+            feedback = Feedback(provider.sentiment).on_output() 
+            ```
 
-        The `on_output()` selector can be changed. See [Feedback Function
-        Guide](https://www.trulens.org/trulens_eval/feedback_function_guide/)
+            The `on_output()` selector can be changed. See [Feedback Function
+            Guide](https://www.trulens.org/trulens_eval/feedback_function_guide/)
 
-        Parameters:
-            text (str): A prompt to an agent.
-            response (str): The agent's response to the prompt.
+        Args:
+            text: The text to evaluate sentiment of.
 
         Returns:
-            float: A value between 0 and 1. 0 being "negative sentiment" and 1
-            being "positive sentiment".
+            A value between 0 and 1. 0 being "negative sentiment" and 1
+                being "positive sentiment".
         """
         system_prompt = prompts.SENTIMENT_SYSTEM_PROMPT + text
         return self.generate_score(system_prompt=system_prompt)
@@ -519,21 +520,20 @@ def conciseness(self, text: str) -> float:
         Uses chat completion model. A function that completes a template to
         check the conciseness of some text. Prompt credit to Langchain Eval.
 
-        **Usage:**
-
-        ```python
-        feedback = Feedback(provider.conciseness).on_output() 
-        ```
+        Usage:
+            ```python
+            feedback = Feedback(provider.conciseness).on_output() 
+            ```
 
-        The `on_output()` selector can be changed. See [Feedback Function
-        Guide](https://www.trulens.org/trulens_eval/feedback_function_guide/)
+            The `on_output()` selector can be changed. See [Feedback Function
+            Guide](https://www.trulens.org/trulens_eval/feedback_function_guide/)
 
-        Parameters:
-            text (str): A prompt to an agent.
-            response (str): The agent's response to the prompt.
+        Args:
+            text: The text to evaluate the conciseness of.
 
         Returns:
-            float: A value between 0.0 (not concise) and 1.0 (concise).
+            A value between 0.0 (not concise) and 1.0 (concise).
+
         """
         return self._langchain_evaluate(
             text=text, criteria=prompts.LANGCHAIN_CONCISENESS_PROMPT
@@ -544,22 +544,21 @@ def conciseness_with_cot_reasons(self, text: str) -> Tuple[float, Dict]:
         Uses chat completion model. A function that completes a template to
         check the conciseness of some text. Prompt credit to Langchain Eval.
 
-        **Usage:**
+        Usage:
+            ```python
+            feedback = Feedback(provider.conciseness).on_output() 
+            ```
 
-        ```python
-        feedback = Feedback(provider.conciseness).on_output() 
-        ```
-
-        The `on_output()` selector can be changed. See [Feedback Function
-        Guide](https://www.trulens.org/trulens_eval/feedback_function_guide/)
+            The `on_output()` selector can be changed. See [Feedback Function
+            Guide](https://www.trulens.org/trulens_eval/feedback_function_guide/)
 
-        Parameters:
-            text (str): A prompt to an agent.
-            response (str): The agent's response to the prompt.
+        Args:
+            text: The text to evaluate the conciseness of.
 
         Returns:
-            Tuple[float, str]: A tuple containing a value between 0.0 (not concise) and 1.0 (concise),
-            and a string containing the reasons for the evaluation.
+            A value between 0.0 (not concise) and 1.0 (concise)
+            
+            A dictionary containing the reasons for the evaluation.
         """
         return self._langchain_evaluate_with_cot_reasons(
             text=text, criteria=prompts.LANGCHAIN_CONCISENESS_PROMPT
@@ -570,20 +569,19 @@ def correctness(self, text: str) -> float:
         Uses chat completion model. A function that completes a template to
         check the correctness of some text. Prompt credit to Langchain Eval.
 
-        **Usage:**
-        ```python
-        feedback = Feedback(provider.correctness).on_output() 
-        ```
+        Usage:
+            ```python
+            feedback = Feedback(provider.correctness).on_output() 
+            ```
 
-        The `on_output()` selector can be changed. See [Feedback Function
-        Guide](https://www.trulens.org/trulens_eval/feedback_function_guide/)
+            The `on_output()` selector can be changed. See [Feedback Function
+            Guide](https://www.trulens.org/trulens_eval/feedback_function_guide/)
 
         Parameters:
-            text (str): A prompt to an agent. response (str): The agent's
-            response to the prompt.
+            text: A prompt to an agent.
 
         Returns:
-            float: A value between 0.0 (not correct) and 1.0 (correct).
+            A value between 0.0 (not correct) and 1.0 (correct).
         """
         return self._langchain_evaluate(
             text=text, criteria=prompts.LANGCHAIN_CORRECTNESS_PROMPT
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		# Endpoint

		::: trulens_eval.feedback.provider.endpoint.base
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		# OpenAI Endpoint

		::: trulens_eval.feedback.provider.endpoint.openai
Original file line number	Diff line number	Diff line change
Expand Up		@@ -6,4 +6,4 @@

		::: trulens_eval.utils.threading

		::: trulens_eval.utils.asynchro
		::: trulens_eval.utils.asynchro