Skip to content

Commit

Permalink
Added allow_delete flag to dataset (#2763)
Browse files Browse the repository at this point in the history
Added allow_delete flag to dataset which is stored in dataset_meta.json
  • Loading branch information
nvoxland-al authored Feb 21, 2024
1 parent 4e1eb75 commit 914d802
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 1 deletion.
28 changes: 27 additions & 1 deletion deeplake/api/dataset.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import os

import deeplake
Expand Down Expand Up @@ -53,7 +54,7 @@
from deeplake.util.auto import get_most_common_extension
from deeplake.util.bugout_reporter import feature_report_path, deeplake_reporter
from deeplake.util.delete_entry import remove_path_from_backend
from deeplake.util.keys import dataset_exists
from deeplake.util.keys import dataset_exists, get_dataset_meta_key, FIRST_COMMIT_ID
from deeplake.util.exceptions import (
AgreementError,
DatasetHandlerError,
Expand Down Expand Up @@ -237,6 +238,11 @@ def init(

if ds_exists:
if overwrite:
if not dataset._allow_delete(cache_chain):
raise DatasetHandlerError(
"Dataset overwrite failed. The dataset is marked as delete_allowed=false. To allow overwrite, you must first run `allow_delete(True)` on the dataset."
)

try:
cache_chain.clear()
except Exception as e:
Expand Down Expand Up @@ -457,6 +463,11 @@ def empty(
raise

if overwrite and dataset_exists(cache_chain):
if not dataset._allow_delete(cache_chain):
raise DatasetHandlerError(
"Dataset overwrite failed. The dataset is marked as delete_allowed=false. To allow overwrite, you must first run `allow_delete(True)` on the dataset."
)

try:
cache_chain.clear()
except Exception as e:
Expand Down Expand Up @@ -866,6 +877,7 @@ def delete(
ds = deeplake.load(path, verbose=False, token=token, creds=creds)
except UserNotLoggedInException:
raise UserNotLoggedInException from None

ds.delete(large_ok=large_ok)
if verbose:
logger.info(f"{path} dataset deleted successfully.")
Expand Down Expand Up @@ -1275,6 +1287,11 @@ def deepcopy(

if dataset_exists(cache_chain):
if overwrite:
if not dataset._allow_delete(cache_chain):
raise DatasetHandlerError(
"Dataset overwrite failed. The dataset is marked as delete_allowed=false. To allow overwrite, you must first run `allow_delete(True)` on the dataset."
)

try:
cache_chain.clear()
except Exception as e:
Expand Down Expand Up @@ -2062,3 +2079,12 @@ def query(query_string: str, token: Optional[str] = "") -> Dataset:
from deeplake.enterprise.libdeeplake_query import universal_query

return universal_query(query_string=query_string, token=token)

@staticmethod
def _allow_delete(storage, commit_id=None) -> bool:
meta = json.loads(
storage[get_dataset_meta_key(commit_id or FIRST_COMMIT_ID)].decode("utf-8")
)
if "allow_delete" in meta and not meta["allow_delete"]:
return False
return True
20 changes: 20 additions & 0 deletions deeplake/api/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,23 @@ def test_persistence_bug(local_ds_generator):

ds = local_ds_generator()
np.testing.assert_array_equal(ds[tensor_name].numpy(), np.array([[1], [2]]))


def test_allow_delete(local_ds_generator, local_path):
ds = local_ds_generator()
assert ds.allow_delete is True

ds.allow_delete = False
assert ds.allow_delete is False

ds2 = deeplake.load(ds.path)
assert ds2.allow_delete is False

with pytest.raises(DatasetHandlerError):
deeplake.empty(ds.path, overwrite=True)
deeplake.deepcopy(src=ds.path, dest=local_path, overwrite=True)
ds.delete()

ds.allow_delete = True
assert ds.allow_delete is True
ds.delete()
20 changes: 20 additions & 0 deletions deeplake/core/dataset/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@
BadRequestException,
SampleAppendError,
SampleExtendError,
DatasetHandlerError,
)
from deeplake.util.keys import (
dataset_exists,
Expand Down Expand Up @@ -2040,6 +2041,16 @@ def _set_read_only(self, value: bool, err: bool):
def read_only(self, value: bool):
self._set_read_only(value, True)

@property
def allow_delete(self) -> bool:
"""Returns True if dataset can be deleted from storage. Whether it can be deleted or not is stored in the database_meta.json and can be changed with allow_delete(boolean)"""
return self.meta.allow_delete

@allow_delete.setter
def allow_delete(self, value: bool):
self.meta.allow_delete = value
self.flush()

def pytorch(
self,
transform: Optional[Callable] = None,
Expand Down Expand Up @@ -2587,12 +2598,18 @@ def delete(self, large_ok=False):
Raises:
DatasetTooLargeToDelete: If the dataset is larger than 1 GB and ``large_ok`` is ``False``.
DatasetHandlerError: If the dataset is marked as delete_allowed=False.
"""

deeplake_reporter.feature_report(
feature_name="delete", parameters={"large_ok": large_ok}
)

if not self.allow_delete:
raise DatasetHandlerError(
"The dataset is marked as delete_allowed=false. To delete this dataset, you must first run `allow_delete(True)` on the dataset."
)

if hasattr(self, "_view_entry"):
self._view_entry.delete()
return
Expand Down Expand Up @@ -2652,6 +2669,9 @@ def __str__(self):
if self.read_only:
mode_str = f"read_only=True, "

if not self.allow_delete:
mode_str = f"allow_delete=False, "

index_str = f"index={self.index}, "
if self.index.is_trivial():
index_str = ""
Expand Down
13 changes: 13 additions & 0 deletions deeplake/core/meta/dataset_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ def __init__(self):
self.tensor_names = {}
self.hidden_tensors = []
self.default_index = Index().to_json()
self._allow_delete = True

@property
def visible_tensors(self):
Expand All @@ -33,16 +34,28 @@ def nbytes(self):
# TODO: can optimize this
return len(self.tobytes())

@property
def allow_delete(self):
return self._allow_delete

@allow_delete.setter
def allow_delete(self, value):
self._allow_delete = value
self.is_dirty = True

def __getstate__(self) -> Dict[str, Any]:
d = super().__getstate__()
d["tensors"] = self.tensors.copy()
d["groups"] = self.groups.copy()
d["tensor_names"] = self.tensor_names.copy()
d["hidden_tensors"] = self.hidden_tensors.copy()
d["default_index"] = self.default_index.copy()
d["allow_delete"] = self._allow_delete
return d

def __setstate__(self, d):
if "allow_delete" in d:
d["_allow_delete"] = d.pop("allow_delete")
self.__dict__.update(d)

def add_tensor(self, name, key, hidden=False):
Expand Down

0 comments on commit 914d802

Please sign in to comment.