landing-ai · CamiloInx · Jul 12, 2024 · Jul 12, 2024 · Jul 12, 2024 · Jul 12, 2024
diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
@@ -33,8 +33,8 @@ jobs:
           make install-zeroshot-counting
           make test-zeroshot-counting
 
-      - name: depth-estimation tests
+      - name: depth-anything-v2 tests
         run: |
-          make install-depth-estimation
-          make test-depth-estimation
+          make install-depth-anything-v2
+          make test-depth-anything-v2
 
diff --git a/Makefile b/Makefile
@@ -22,7 +22,7 @@ install-zeroshot-counting:
 	# Install loca dependencies only
 	$(POETRY) install -E loca-model
 
-install-depth-estimation:
+install-depth-anything-v2:
 	# Install depth-anything-v2 dependencies only
 	$(POETRY) install -E depth-anything-v2-model --no-interaction
 
@@ -55,8 +55,8 @@ test-zeroshot-counting:
 	# Run zeroshot-counting unit tests
 	$(POETRY) run pytest tests/tools/test_loca.py
 
-test-depth-estimation:
-	# Run depth-estimation unit tests
+test-depth-anything-v2:
+	# Run depth-anything-v2 unit tests
 	$(POETRY) run pytest tests/tools/test_depth_anything_v2.py
 
 test-florencev2:

diff --git a/docs/depth_estimation.md → docs/depth_anything_v2.md b/docs/depth_estimation.md → docs/depth_anything_v2.md
@@ -5,15 +5,15 @@ This example demonstrates using the Depth-Anything-V2 tool for depth estimation
 
 
 ```python
-from vision_agent_tools.tools.depth_estimation import DepthEstimation
+from vision_agent_tools.tools.depth_anything_v2 import DepthAnythingV2
 
 # (replace this path with your own!)
 test_image = "path/to/your/image.jpg"
 
 # Load the image
 image = Image.open(test_image)
 # Initialize the depth map estimation model.
-depth_estimate = DepthEstimation()
+depth_estimate = DepthAnythingV2()
 
 # Run the inference
 results = depth_estimate(image)
@@ -22,4 +22,4 @@ results = depth_estimate(image)
 print(results.map)
 ```
 
-::: vision_agent_tools.tools.depth_estimation
+::: vision_agent_tools.tools.depth_anything_v2
diff --git a/tests/tools/test_depth_anything_v2.py b/tests/tools/test_depth_anything_v2.py
@@ -1,12 +1,12 @@
 from PIL import Image
 
-from vision_agent_tools.tools.depth_estimation import DepthEstimation
+from vision_agent_tools.tools.depth_anything_v2 import DepthAnythingV2
 
 
 def test_successful_depth_estimation():
     test_image = "tomatoes.jpg"
     image = Image.open(f"tests/tools/data/loca/{test_image}")
 
-    depth = DepthEstimation()
+    depth = DepthAnythingV2()
     results = depth(image)
     assert len(results.map) > 0
diff --git a/vision_agent_tools/tools/depth_estimation.py → ...on_agent_tools/tools/depth_anything_v2.py b/vision_agent_tools/tools/depth_estimation.py → ...on_agent_tools/tools/depth_anything_v2.py
@@ -12,15 +12,21 @@
 from .utils import download, CHECKPOINT_DIR
 from typing import Union, Any
 from vision_agent_tools.tools.shared_types import BaseTool
-from depth_anything_v2.dpt import DepthAnythingV2
+from depth_anything_v2.dpt import DepthAnythingV2 as DepthAnythingV2Model
 from pydantic import BaseModel
 
 
 class DepthMap(BaseModel):
+    """Represents the depth map of an image.
+
+    Attributes:
+        map (Any): HxW raw depth map of the image.
+    """
+
     map: Any
 
 
-class DepthEstimation(BaseTool):
+class DepthAnythingV2(BaseTool):
     """
     Tool for depth estimation using the Depth-Anything-V2 model from the paper
     [Depth Anything V2](https://github.com/DepthAnything/Depth-Anything-V2).
@@ -41,7 +47,7 @@ def __init__(self) -> None:
             "depth_anything_v2_vits.pth",
         )
         # init model
-        self._model = DepthAnythingV2(
+        self._model = DepthAnythingV2Model(
             encoder="vits", features=64, out_channels=[48, 96, 192, 384]
         )
 
@@ -78,8 +84,6 @@ def __call__(self, image: Union[str, Image.Image]) -> DepthMap:
             image = cv2.imread(image)
         elif isinstance(image, Image.Image):
             image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
-        else:
-            raise ValueError("Invalid image type. Expected str or PIL.Image.Image.")
 
         depth = self._model.infer_image(image)  # HxW raw depth map
 

diff --git a/vision_agent_tools/tools/florencev2.py b/vision_agent_tools/tools/florencev2.py
@@ -15,6 +15,7 @@ class PromptTask(str, Enum):
     Valid task_prompts options for the Florence2 model.
 
     """
+
     CAPTION = "<CAPTION>"
     """"""
     CAPTION_TO_PHRASE_GROUNDING = "<CAPTION_TO_PHRASE_GROUNDING>"
-Original file line number
+Diff line change
@@ Expand Up / @@ -15,6 +15,7 @@ class PromptTask(str, Enum): @@
         Valid task_prompts options for the Florence2 model.
         """
         CAPTION = "<CAPTION>"
         """"""
         CAPTION_TO_PHRASE_GROUNDING = "<CAPTION_TO_PHRASE_GROUNDING>"
@@ Expand Down @@