Skip to content

Commit

Permalink
Ensure username is stored for use in commit messages (#2772)
Browse files Browse the repository at this point in the history
Ensure username is stored for use in commit messages and other places, even if token was passed as an argument to deeplake
  • Loading branch information
nvoxland-al authored Feb 21, 2024
1 parent 914d802 commit d2e3f00
Show file tree
Hide file tree
Showing 7 changed files with 90 additions and 10 deletions.
1 change: 0 additions & 1 deletion deeplake/api/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import os

import deeplake
import jwt
import pathlib
import posixpath
import warnings
Expand Down
3 changes: 1 addition & 2 deletions deeplake/client/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ def __init__(self, token: Optional[str] = None):
)

self.version = deeplake.__version__
self._token_from_env = False
self.auth_header = None
self.token = (
token
Expand All @@ -76,7 +75,7 @@ def __init__(self, token: Optional[str] = None):
if orgs == ["public"]:
self.token = token or self.get_token()
self.auth_header = f"Bearer {self.token}"
if self._token_from_env:
else:
username = self.get_user_profile()["name"]
if get_reporting_config().get("username") != username:
save_reporting_config(True, username=username)
Expand Down
12 changes: 12 additions & 0 deletions deeplake/core/dataset/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
import pathlib
import numpy as np
from time import time, sleep

from jwt import DecodeError
from tqdm import tqdm

import deeplake
Expand Down Expand Up @@ -332,6 +334,16 @@ def maybe_flush(self):
self._flush_vc_info()
self.storage.flush()

@property
def username(self) -> str:
if not self.token:
return "public"

try:
return jwt.decode(self.token, options={"verify_signature": False})["id"]
except DecodeError:
return "public"

@property
def num_samples(self) -> int:
"""Returns the length of the smallest tensor.
Expand Down
68 changes: 68 additions & 0 deletions deeplake/core/tests/test_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import os

from deeplake.client.config import DEEPLAKE_AUTH_TOKEN
from deeplake.core import LRUCache
from deeplake.core.storage.memory import MemoryProvider

from deeplake.core.dataset import Dataset


def test_token_and_username(hub_cloud_dev_token):
assert DEEPLAKE_AUTH_TOKEN not in os.environ

ds = Dataset(
storage=LRUCache(
cache_storage=MemoryProvider(), cache_size=0, next_storage=MemoryProvider()
)
)
assert ds.token is None
assert ds.username == "public"

# invalid tokens come through as "public"
ds = Dataset(
token="invalid_value",
storage=LRUCache(
cache_storage=MemoryProvider(), cache_size=0, next_storage=MemoryProvider()
),
)
assert ds.token == "invalid_value"
assert ds.username == "public"

# valid tokens come through correctly
ds = Dataset(
token=hub_cloud_dev_token,
storage=LRUCache(
cache_storage=MemoryProvider(), cache_size=0, next_storage=MemoryProvider()
),
)
assert ds.token == hub_cloud_dev_token
assert ds.username == "testingacc2"

# When env is set, it takes precedence over None for the token but not over a set token
try:
os.environ[DEEPLAKE_AUTH_TOKEN] = hub_cloud_dev_token
ds = Dataset(
storage=LRUCache(
cache_storage=MemoryProvider(),
cache_size=0,
next_storage=MemoryProvider(),
)
)
assert ds.token == hub_cloud_dev_token
assert ds.username == "testingacc2"

ds = Dataset(
token="invalid_value",
storage=LRUCache(
cache_storage=MemoryProvider(),
cache_size=0,
next_storage=MemoryProvider(),
),
)
assert ds.token == "invalid_value"
assert ds.username == "public"

finally:
os.environ.pop(DEEPLAKE_AUTH_TOKEN)

assert DEEPLAKE_AUTH_TOKEN not in os.environ
6 changes: 4 additions & 2 deletions deeplake/core/version_control/commit_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,14 @@ def copy(self):
node.total_samples_processed = self.total_samples_processed
return node

def add_successor(self, node: "CommitNode", message: Optional[str] = None):
def add_successor(
self, node: "CommitNode", author: str, message: Optional[str] = None
):
"""Adds a successor (a type of child) to the node, used for commits."""
node.parent = self
self.children.append(node)
self.commit_message = message
self.commit_user_name = get_user_name()
self.commit_user_name = author
self.commit_time = datetime.utcnow()

def merge_from(self, node: "CommitNode"):
Expand Down
8 changes: 4 additions & 4 deletions deeplake/util/tests/test_version_control.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ def test_merge_commit_node_map():
b = CommitNode("main", "b")
c = CommitNode("main", "c")
e = CommitNode("main", "e")
root.add_successor(a, "commit a")
root.add_successor(b, "commit b")
a.add_successor(c, "commit c")
c.add_successor(e, "commit e")
root.add_successor(a, "me", "commit a")
root.add_successor(b, "me", "commit b")
a.add_successor(c, "me", "commit c")
c.add_successor(e, "me", "commit e")
map1 = {
FIRST_COMMIT_ID: root,
"a": a,
Expand Down
2 changes: 1 addition & 1 deletion deeplake/util/version_control.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ def commit(
hash = generate_hash()
version_state["commit_id"] = hash
new_node = CommitNode(version_state["branch"], hash)
stored_commit_node.add_successor(new_node, message)
stored_commit_node.add_successor(new_node, dataset.username, message)
stored_commit_node.is_checkpoint = is_checkpoint
stored_commit_node.total_samples_processed = total_samples_processed
version_state["commit_node"] = new_node
Expand Down

0 comments on commit d2e3f00

Please sign in to comment.