activeloopai · adolkhan · Sep 12, 2023 · Sep 1, 2023 · Sep 1, 2023 · Sep 4, 2023
diff --git a/deeplake/api/dataset.py b/deeplake/api/dataset.py
@@ -1,6 +1,7 @@
 import os
 
 import deeplake
+import jwt
 import pathlib
 import posixpath
 from typing import Dict, Optional, Union, List
@@ -11,6 +12,7 @@
 from deeplake.auto.unstructured.yolo.yolo import YoloDataset
 from deeplake.client.client import DeepLakeBackendClient
 from deeplake.client.log import logger
+from deeplake.client.utils import get_user_name, read_token
 from deeplake.core.dataset import Dataset, dataset_factory
 from deeplake.core.tensor import Tensor
 from deeplake.core.meta.dataset_meta import DatasetMeta
@@ -361,6 +363,7 @@ def empty(
         lock_enabled: Optional[bool] = True,
         lock_timeout: Optional[int] = 0,
         verbose: bool = True,
+        username: str = "public",
     ) -> Dataset:
         """Creates an empty dataset
 
@@ -383,8 +386,8 @@ def empty(
             org_id (str, Optional): Organization id to be used for enabling enterprise features. Only applicable for local datasets.
             verbose (bool): If True, logs will be printed. Defaults to True.
             lock_timeout (int): Number of seconds to wait before throwing a LockException. If None, wait indefinitely
-            lock_enabled (bool): If true, the dataset manages a write lock. NOTE: Only set to False if you are managing concurrent access externally. Defaults to ``True``.
-
+            lock_enabled (bool): If true, the dataset manages a write lock. NOTE: Only set to False if you are managing concurrent access externally.
+            username (str): Username to be used for creating a dataset in the Deep Lake Tensor Database.
         Returns:
             Dataset: Dataset created using the arguments provided.
 
@@ -435,6 +438,7 @@ def empty(
                     "lock_timeout": lock_timeout,
                 },
                 token=token,
+                username=username,
             )
         except Exception as e:
             if isinstance(e, UserNotLoggedInException):
@@ -481,6 +485,7 @@ def load(
         check_integrity: bool = True,
         lock_timeout: Optional[int] = 0,
         lock_enabled: Optional[bool] = True,
+        username: str = "public",
     ) -> Dataset:
         """Loads an existing dataset
 
@@ -546,6 +551,7 @@ def load(
             reset (bool): If the specified dataset cannot be loaded due to a corrupted HEAD state of the branch being loaded,
                           setting ``reset=True`` will reset HEAD changes and load the previous version.
             check_integrity (bool): If the param is True it will do integrity check during dataset loading otherwise the check is not performed
+            username: (str): Username to be used for creating a dataset in the Deep Lake Tensor Database.
 
         ..
             # noqa: DAR101
@@ -598,6 +604,7 @@ def load(
                 "load",
                 {"lock_enabled": lock_enabled, "lock_timeout": lock_timeout},
                 token=token,
+                username=username,
             )
         except Exception as e:
             if isinstance(e, UserNotLoggedInException):

diff --git a/deeplake/client/client.py b/deeplake/client/client.py
@@ -39,6 +39,7 @@
     GET_PRESIGNED_URL_SUFFIX,
     CONNECT_DATASET_SUFFIX,
     REMOTE_QUERY_SUFFIX,
+    ORG_PERMISSION_SUFFIX,
 )
 from deeplake.client.log import logger
 import jwt  # should add it to requirements.txt
@@ -509,3 +510,20 @@ def remote_query(
         ).json()
 
         return response
+
+    def has_indra_org_permission(self, org_id: str) -> Dict[str, Any]:
+        """Queries a remote dataset.
+
+        Args:
+            org_id (str): The organization to which the dataset belongs.
+
+        Returns:
+            Dict[str, Any]: The json response containing org permissions.
+        """
+        response = self.request(
+            "GET",
+            ORG_PERMISSION_SUFFIX.format(org_id),
+            endpoint=self.endpoint(),
+        ).json()
+
+        return response
diff --git a/deeplake/client/config.py b/deeplake/client/config.py
@@ -30,3 +30,4 @@
 DEFAULT_REQUEST_TIMEOUT = 170
 
 DEEPLAKE_AUTH_TOKEN = "ACTIVELOOP_TOKEN"
+ORG_PERMISSION_SUFFIX = "/api/organizations/{}/features/dataset_query"
diff --git a/deeplake/core/dataset/dataset.py b/deeplake/core/dataset/dataset.py
@@ -4684,3 +4684,6 @@ def _temp_write_access(self):
 
     def _get_storage_repository(self) -> Optional[str]:
         return getattr(self.base_storage, "repository", None)
+
+    def get_user_name(self) -> Optional[str]:
+        return getattr(self.base_storage, "user_name", None)
diff --git a/deeplake/core/vectorstore/deeplake_vectorstore.py b/deeplake/core/vectorstore/deeplake_vectorstore.py
@@ -1,6 +1,7 @@
 import logging
 import pathlib
-from typing import Optional, Any, Iterable, List, Dict, Union, Callable
+from typing import Optional, Any, List, Dict, Union, Callable
+import jwt
 
 import numpy as np
 
@@ -15,10 +16,10 @@
 from deeplake.constants import (
     DEFAULT_VECTORSTORE_TENSORS,
 )
+from deeplake.client.utils import read_token
 from deeplake.core.vectorstore import utils
 from deeplake.core.vectorstore.vector_search import vector_search
 from deeplake.core.vectorstore.vector_search import dataset as dataset_utils
-from deeplake.core.vectorstore.vector_search import filter as filter_utils
 
 from deeplake.util.bugout_reporter import (
     feature_report_path,
@@ -45,6 +46,7 @@ def __init__(
         verbose: bool = True,
         runtime: Optional[Dict] = None,
         creds: Optional[Union[Dict, str]] = None,
+        org_id: Optional[str] = None,
         **kwargs: Any,
     ) -> None:
         """Creates an empty VectorStore or loads an existing one if it exists at the specified ``path``.
@@ -105,6 +107,14 @@ def __init__(
         Danger:
             Setting ``overwrite`` to ``True`` will delete all of your data if the Vector Store exists! Be very careful when setting this parameter.
         """
+        self.token = token or read_token(from_env=True)
+        self.username = "public"
+        self.path = path
+        if self.token is not None:
+            self.username = jwt.decode(self.token, options={"verify_signature": False})[
+                "id"
+            ]
+
         feature_report_path(
             path,
             "vs.initialize",
@@ -118,11 +128,12 @@ def __init__(
                 "read_only": read_only,
                 "ingestion_batch_size": ingestion_batch_size,
                 "exec_option": exec_option,
-                "token": token,
+                "token": self.token,
                 "verbose": verbose,
                 "runtime": runtime,
             },
-            token=token,
+            token=self.token,
+            username=self.username,
         )
 
         self.ingestion_batch_size = ingestion_batch_size
@@ -134,19 +145,21 @@ def __init__(
         self.dataset = dataset_utils.create_or_load_dataset(
             tensor_params,
             path,
-            token,
+            self.token,
             creds,
             logger,
             read_only,
             exec_option,
             embedding_function,
             overwrite,
             runtime,
+            org_id,
+            self.username,
             **kwargs,
         )
         self.embedding_function = embedding_function
         self.exec_option = utils.parse_exec_option(
-            self.dataset, exec_option, _INDRA_INSTALLED
+            self.dataset, exec_option, _INDRA_INSTALLED, self.username
         )
         self.verbose = verbose
         self.tensor_params = tensor_params
@@ -233,7 +246,8 @@ def add(
             Optional[List[str]]: List of ids if ``return_ids`` is set to True. Otherwise, None.
         """
 
-        deeplake_reporter.feature_report(
+        feature_report_path(
+            path=self.path,
             feature_name="vs.add",
             parameters={
                 "tensors": list(tensors.keys()) if tensors else None,
@@ -242,6 +256,8 @@ def add(
                 "embedding_function": True if embedding_function is not None else False,
                 "embedding_data": True if embedding_data is not None else False,
             },
+            token=self.token,
+            username=self.username,
         )
 
         (
@@ -367,7 +383,8 @@ def search(
             Dict: Dictionary where keys are tensor names and values are the results of the search
         """
 
-        deeplake_reporter.feature_report(
+        feature_report_path(
+            path=self.path,
             feature_name="vs.search",
             parameters={
                 "embedding_data": True if embedding_data is not None else False,
@@ -382,6 +399,8 @@ def search(
                 "return_tensors": return_tensors,
                 "return_view": return_view,
             },
+            token=self.token,
+            username=self.username,
         )
 
         if exec_option is None and self.exec_option != "python" and callable(filter):
@@ -481,7 +500,8 @@ def delete(
             ValueError: If neither ``ids``, ``filter``, ``query``, nor ``delete_all`` are specified, or if an invalid ``exec_option`` is provided.
         """
 
-        deeplake_reporter.feature_report(
+        feature_report_path(
+            path=self.path,
             feature_name="vs.delete",
             parameters={
                 "ids": True if ids is not None else False,
@@ -491,6 +511,8 @@ def delete(
                 "exec_option": exec_option,
                 "delete_all": delete_all,
             },
+            token=self.token,
+            username=self.username,
         )
 
         if not row_ids:
@@ -575,7 +597,8 @@ def update_embedding(
             embedding_source_tensor (Union[str, List[str]], optional): Name of tensor with data that needs to be converted to embeddings. Defaults to `text`.
             embedding_tensor (Optional[Union[str, List[str]]], optional): Name of the tensor with embeddings. Defaults to None.
         """
-        deeplake_reporter.feature_report(
+        feature_report_path(
+            path=self.path,
             feature_name="vs.delete",
             parameters={
                 "ids": True if ids is not None else False,
@@ -584,6 +607,8 @@ def update_embedding(
                 "filter": True if filter is not None else False,
                 "exec_option": exec_option,
             },
+            token=self.token,
+            username=self.username,
         )
 
         (
@@ -635,12 +660,21 @@ def delete_by_path(
         Danger:
             This method permanently deletes all of your data if the Vector Store exists! Be very careful when using this method.
         """
+        token = token or read_token(from_env=True)
+        username = "public"
+        if token:
+            username = jwt.decode(token, options={"verify_signature": False})["id"]
 
         feature_report_path(
             path,
             "vs.delete_by_path",
-            {},
+            parameters={
+                "path": path,
+                "token": token,
+                "force": force,
+            },
             token=token,
+            username=username,
         )
         deeplake.delete(path, large_ok=True, token=token, force=force)
 

diff --git a/deeplake/core/vectorstore/test_deeplake_vectorstore.py b/deeplake/core/vectorstore/test_deeplake_vectorstore.py
@@ -23,6 +23,8 @@
     DatasetHandlerError,
 )
 from deeplake.core.vectorstore.vector_search import dataset as dataset_utils
+from deeplake.cli.auth import login, logout
+from click.testing import CliRunner
 
 
 EMBEDDING_DIM = 100
@@ -1784,3 +1786,96 @@ def test_read_only():
 def test_delete_by_path_wrong_path():
     with pytest.raises(DatasetHandlerError):
         VectorStore.delete_by_path("some_path")
+
+
+@requires_libdeeplake
+def test_exec_option_with_auth(local_path, hub_cloud_path, hub_cloud_dev_token):
+    db = VectorStore(path=local_path)
+    assert db.exec_option == "python"
+
+    db = VectorStore(
+        path=local_path,
+        token=hub_cloud_dev_token,
+    )
+    assert db.exec_option == "compute_engine"
+
+    db = VectorStore(
+        path=hub_cloud_path,
+        token=hub_cloud_dev_token,
+    )
+    assert db.exec_option == "compute_engine"
+
+    db = VectorStore(
+        path=hub_cloud_path + "_tensor_db",
+        token=hub_cloud_dev_token,
+        runtime={"tensor_db": True},
+    )
+    assert db.exec_option == "tensor_db"
+
+
+@requires_libdeeplake
+def test_exec_option_cli(
+    local_path,
+    hub_cloud_path,
+    hub_cloud_dev_token,
+    hub_cloud_dev_credentials,
+):
+    runner = CliRunner()
+    username, password = hub_cloud_dev_credentials
+    # Testing exec_option with cli login and logout commands are executed
+    runner.invoke(login, f"-u {username} -p {password}")
+
+    # local dataset and logged in with cli
+    db = VectorStore(
+        path=local_path,
+    )
+    assert db.exec_option == "compute_engine"
+
+    # hub cloud dataset and logged in with cli
+    db = VectorStore(
+        path=hub_cloud_path,
+    )
+    assert db.exec_option == "compute_engine"
+
+    # logging out with cli
+    runner.invoke(logout)
+
+    # local dataset and logged out with cli
+    db = VectorStore(
+        path=local_path,
+    )
+    assert db.exec_option == "python"
+
+    # logging in with cli token
+    runner.invoke(login, f"-t {hub_cloud_dev_token}")
+    db = VectorStore(
+        path=local_path,
+    )
+    assert db.exec_option == "compute_engine"
+
+
+@requires_libdeeplake
+@pytest.mark.parametrize(
+    "path",
+    [
+        "s3_path",
+        "gcs_path",
+        "azure_path",
+    ],
+    indirect=True,
+)
+def test_exec_option_with_connected_datasets(
+    hub_cloud_dev_token,
+    hub_cloud_path,
+    hub_cloud_dev_managed_creds_key,
+    path,
+):
+    db = VectorStore(path, overwrite=True)
+
+    db.dataset.connect(
+        creds_key=hub_cloud_dev_managed_creds_key,
+        dest_path=hub_cloud_path,
+        token=hub_cloud_dev_token,
+    )
+    db.dataset.add_creds_key(hub_cloud_dev_managed_creds_key, managed=True)
+    assert db.exec_option == "compute_engine"
Original file line number	Diff line number	Diff line change
Expand Up		@@ -30,3 +30,4 @@
		DEFAULT_REQUEST_TIMEOUT = 170

		DEEPLAKE_AUTH_TOKEN = "ACTIVELOOP_TOKEN"
		ORG_PERMISSION_SUFFIX = "/api/organizations/{}/features/dataset_query"