From 16e35a976c471454205423928644b72afae61cbe Mon Sep 17 00:00:00 2001
From: Nir Ozery <nir.ozery@treeverse.io>
Date: Sun, 3 Dec 2023 20:58:49 +0000
Subject: [PATCH 1/4] Python Wrapper: Objects generator

---
 .../docs/lakefs.object_manager.rst            |  7 ---
 clients/python-wrapper/docs/lakefs.rst        |  1 -
 clients/python-wrapper/lakefs/__init__.py     | 11 ++++-
 .../python-wrapper/lakefs/import_manager.py   |  5 +-
 clients/python-wrapper/lakefs/models.py       | 13 ++++-
 clients/python-wrapper/lakefs/object.py       | 12 ++---
 .../python-wrapper/lakefs/object_manager.py   | 10 ----
 clients/python-wrapper/lakefs/reference.py    | 35 +++++++++++---
 clients/python-wrapper/tests/.pylintrc        |  1 -
 .../tests/integration/test_branch.py          |  5 +-
 .../tests/integration/test_reference.py       | 21 ++++++++
 .../tests/utests/test_namedtuple.py           |  4 +-
 .../tests/utests/test_reference.py            | 48 ++++++++++++++++++-
 13 files changed, 127 insertions(+), 46 deletions(-)
 delete mode 100644 clients/python-wrapper/docs/lakefs.object_manager.rst
 delete mode 100644 clients/python-wrapper/lakefs/object_manager.py

diff --git a/clients/python-wrapper/docs/lakefs.object_manager.rst b/clients/python-wrapper/docs/lakefs.object_manager.rst
deleted file mode 100644
index 97ceb3df9e4..00000000000
--- a/clients/python-wrapper/docs/lakefs.object_manager.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-lakefs.object\_manager module
-=============================
-
-.. automodule:: lakefs.object_manager
-   :members:
-   :undoc-members:
-   :show-inheritance:
diff --git a/clients/python-wrapper/docs/lakefs.rst b/clients/python-wrapper/docs/lakefs.rst
index df999d43025..b75ad4c681b 100644
--- a/clients/python-wrapper/docs/lakefs.rst
+++ b/clients/python-wrapper/docs/lakefs.rst
@@ -15,7 +15,6 @@ Submodules
    lakefs.models
    lakefs.namedtuple
    lakefs.object
-   lakefs.object_manager
    lakefs.reference
    lakefs.repository
    lakefs.tag
diff --git a/clients/python-wrapper/lakefs/__init__.py b/clients/python-wrapper/lakefs/__init__.py
index 7515110a78e..4810f6bc594 100644
--- a/clients/python-wrapper/lakefs/__init__.py
+++ b/clients/python-wrapper/lakefs/__init__.py
@@ -4,8 +4,15 @@
 
 from lakefs.repository import Repository
 from lakefs.reference import Reference
-from lakefs.models import Commit, Change, RepositoryProperties
+from lakefs.models import (
+    Commit,
+    Change,
+    ImportStatus,
+    ServerStorageConfiguration,
+    ObjectInfo,
+    CommonPrefix,
+    RepositoryProperties
+)
 from lakefs.tag import Tag
 from lakefs.branch import Branch
 from lakefs.object import StoredObject, WriteableObject, ObjectReader
-from lakefs.object_manager import ObjectManager
diff --git a/clients/python-wrapper/lakefs/import_manager.py b/clients/python-wrapper/lakefs/import_manager.py
index 0ff4490864b..16a3a2d81c2 100644
--- a/clients/python-wrapper/lakefs/import_manager.py
+++ b/clients/python-wrapper/lakefs/import_manager.py
@@ -10,13 +10,10 @@
 
 import lakefs_sdk
 
-from lakefs.models import ImportStatus
+from lakefs.models import ImportStatus, _OBJECT, _PREFIX
 from lakefs.client import Client, DEFAULT_CLIENT
 from lakefs.exceptions import ImportManagerException, api_exception_handler
 
-_PREFIX = "common_prefix"
-_OBJECT = "object"
-
 
 class ImportManager:
     """
diff --git a/clients/python-wrapper/lakefs/models.py b/clients/python-wrapper/lakefs/models.py
index 11e93836415..3936aeac67f 100644
--- a/clients/python-wrapper/lakefs/models.py
+++ b/clients/python-wrapper/lakefs/models.py
@@ -9,6 +9,9 @@
 
 from lakefs.namedtuple import LenientNamedTuple
 
+_PREFIX = "common_prefix"
+_OBJECT = "object"
+
 
 class Commit(LenientNamedTuple):
     """
@@ -74,12 +77,11 @@ class ServerStorageConfiguration(LenientNamedTuple):
     default_namespace_prefix: Optional[str] = None
 
 
-class ObjectStats(LenientNamedTuple):
+class ObjectInfo(LenientNamedTuple):
     """
     Represent a lakeFS object's stats
     """
     path: str
-    path_type: str
     physical_address: str
     checksum: str
     mtime: int
@@ -89,6 +91,13 @@ class ObjectStats(LenientNamedTuple):
     content_type: Optional[str] = None
 
 
+class CommonPrefix(LenientNamedTuple):
+    """
+    Represents a common prefix in lakeFS
+    """
+    path: str
+
+
 class RepositoryProperties(LenientNamedTuple):
     """
     Represent a lakeFS repository's properties
diff --git a/clients/python-wrapper/lakefs/object.py b/clients/python-wrapper/lakefs/object.py
index a955ffd97aa..910e6990a1b 100644
--- a/clients/python-wrapper/lakefs/object.py
+++ b/clients/python-wrapper/lakefs/object.py
@@ -30,7 +30,7 @@
     PermissionException,
     ObjectExistsException,
 )
-from lakefs.models import ObjectStats
+from lakefs.models import ObjectInfo
 
 _LAKEFS_METADATA_PREFIX = "x-lakefs-meta-"
 # _BUFFER_SIZE - Writer buffer size. While buffer size not exceed, data will be maintained in memory and file will
@@ -302,7 +302,7 @@ class ObjectWriter(LakeFSIOBase):
     implicitly when using writer as a context.
     """
     _fd: tempfile.SpooledTemporaryFile
-    _obj_stats: ObjectStats = None
+    _obj_stats: ObjectInfo = None
 
     def __init__(self,
                  obj: StoredObject,
@@ -381,7 +381,7 @@ def close(self) -> None:
         Write the data to the lakeFS server and close open descriptors
         """
         stats = self._upload_presign() if self.pre_sign else self._upload_raw()
-        self._obj_stats = ObjectStats(**stats.dict())
+        self._obj_stats = ObjectInfo(**stats.dict())
 
         self._fd.close()
         super().close()
@@ -502,7 +502,7 @@ class StoredObject:
     _repo_id: str
     _ref_id: str
     _path: str
-    _stats: Optional[ObjectStats] = None
+    _stats: Optional[ObjectInfo] = None
 
     def __init__(self, repository: str, reference: str, path: str, client: Optional[Client] = DEFAULT_CLIENT):
         self._client = client
@@ -548,14 +548,14 @@ def reader(self, mode: ReadModes = 'rb', pre_sign: Optional[bool] = None) -> Obj
         """
         return ObjectReader(self, mode=mode, pre_sign=pre_sign, client=self._client)
 
-    def stat(self) -> ObjectStats:
+    def stat(self) -> ObjectInfo:
         """
         Return the Stat object representing this object
         """
         if self._stats is None:
             with api_exception_handler(_io_exception_handler):
                 stat = self._client.sdk_client.objects_api.stat_object(self._repo_id, self._ref_id, self._path)
-                self._stats = ObjectStats(**stat.dict())
+                self._stats = ObjectInfo(**stat.dict())
         return self._stats
 
     def exists(self) -> bool:
diff --git a/clients/python-wrapper/lakefs/object_manager.py b/clients/python-wrapper/lakefs/object_manager.py
deleted file mode 100644
index d1eee06c1a7..00000000000
--- a/clients/python-wrapper/lakefs/object_manager.py
+++ /dev/null
@@ -1,10 +0,0 @@
-"""
-Module implementing reference's object management logic
-"""
-
-
-class ObjectManager:
-    """
-    Manage objects listing for a given repository
-    """
-    # TODO: Implement
diff --git a/clients/python-wrapper/lakefs/reference.py b/clients/python-wrapper/lakefs/reference.py
index 36c77fd9ed7..23b2342cb5a 100644
--- a/clients/python-wrapper/lakefs/reference.py
+++ b/clients/python-wrapper/lakefs/reference.py
@@ -8,11 +8,10 @@
 
 import lakefs_sdk
 
-from lakefs.models import Commit, Change
+from lakefs.models import Commit, Change, CommonPrefix, ObjectInfo, _OBJECT
 from lakefs.client import Client, DEFAULT_CLIENT
 from lakefs.exceptions import api_exception_handler
 from lakefs.object import StoredObject
-from lakefs.object_manager import ObjectManager
 
 
 class Reference:
@@ -43,12 +42,36 @@ def id(self) -> str:
         """
         return self._id
 
-    @property
-    def objects(self) -> ObjectManager:
+    def objects(self,
+                max_amount: Optional[int] = None,
+                after: Optional[str] = None,
+                prefix: Optional[str] = None,
+                delimiter: Optional[str] = None,
+                **kwargs) -> Generator[StoredObject | CommonPrefix]:
         """
-        Returns a ObjectManager object for this reference
+        Returns an object generator for this reference
+
+        :param max_amount: Stop showing changes after this amount
+        :param after: Return items after this value
+        :param prefix: Return items prefixed with this value
+        :param delimiter: Group common prefixes by this delimiter
+        :param kwargs: Additional Keyword Arguments to send to the server
+        :raises:
+            NotFoundException if this reference or other_ref does not exist
+            NotAuthorizedException if user is not authorized to perform this operation
+            ServerException for any other errors
         """
-        # TODO: Implement
+
+        for res in generate_listing(self._client.sdk_client.objects_api.list_objects,
+                                    repository=self._repo_id,
+                                    ref=self._id,
+                                    max_amount=max_amount,
+                                    after=after,
+                                    prefix=prefix,
+                                    delimiter=delimiter,
+                                    **kwargs):
+            type_class = ObjectInfo if res.path_type == _OBJECT else CommonPrefix
+            yield type_class(**res.dict())
 
     def log(self, max_amount: Optional[int] = None, **kwargs) -> Generator[Commit]:
         """
diff --git a/clients/python-wrapper/tests/.pylintrc b/clients/python-wrapper/tests/.pylintrc
index 79a51c08326..278290b517f 100644
--- a/clients/python-wrapper/tests/.pylintrc
+++ b/clients/python-wrapper/tests/.pylintrc
@@ -12,7 +12,6 @@ disable=
     missing-function-docstring,
     missing-module-docstring,
     too-few-public-methods,
-    pointless-statement,
     no-member,
     fixme,
 
diff --git a/clients/python-wrapper/tests/integration/test_branch.py b/clients/python-wrapper/tests/integration/test_branch.py
index 8dcbfb65a4a..2fa763e8947 100644
--- a/clients/python-wrapper/tests/integration/test_branch.py
+++ b/clients/python-wrapper/tests/integration/test_branch.py
@@ -50,9 +50,8 @@ def test_reset_changes(setup_repo):
 def test_delete_object_changes(setup_repo):
     _, repo = setup_repo
     test_branch = repo.branch("main")
-    path_and_data = ["a", "b", "bar/a", "bar/b", "bar/c", "c", "foo/a", "foo/b", "foo/c", ]
-    for s in path_and_data:
-        test_branch.object(s).upload(s)
+    path_and_data = ["a", "b", "bar/a", "bar/b", "bar/c", "c", "foo/a", "foo/b", "foo/c"]
+    upload_data(test_branch, path_and_data)
     test_branch.commit("add some files", {"test_key": "test_value"})
 
     test_branch.delete_objects("foo/a")
diff --git a/clients/python-wrapper/tests/integration/test_reference.py b/clients/python-wrapper/tests/integration/test_reference.py
index e0a749b0058..82d2244382e 100644
--- a/clients/python-wrapper/tests/integration/test_reference.py
+++ b/clients/python-wrapper/tests/integration/test_reference.py
@@ -57,3 +57,24 @@ def test_reference_merge_into(setup_branch_with_commits):
     branch.merge_into(other_branch.id, message="Merge2")
     assert other_branch.commit_message() == "Merge2"
     assert list(other_branch.log(max_amount=3))[2].id == commits[0].id
+
+
+def test_reference_objects(setup_repo):
+    _, repo = setup_repo
+    test_branch = repo.branch("main")
+    path_and_data = ["a", "b", "bar/a", "bar/b", "bar/c", "c", "foo/a", "foo/b", "foo/c"]
+    for s in path_and_data:
+        test_branch.object(s).upload(s)
+
+    objects = list(test_branch.objects())
+    assert len(objects) == len(path_and_data)
+    for obj in objects:
+        assert obj.path in path_and_data
+
+    expected = ["a", "b", "bar/", "c", "foo/"]
+    i = 0
+    for obj in test_branch.objects(delimiter='/'):
+        i += 1
+        assert obj.path in expected
+
+    assert i == len(expected)
diff --git a/clients/python-wrapper/tests/utests/test_namedtuple.py b/clients/python-wrapper/tests/utests/test_namedtuple.py
index 265187208f2..ecf42fe8804 100644
--- a/clients/python-wrapper/tests/utests/test_namedtuple.py
+++ b/clients/python-wrapper/tests/utests/test_namedtuple.py
@@ -36,10 +36,10 @@ def test_namedtuple():
     assert not nt2.field3
 
     with expect_exception_context(AttributeError):
-        nt2.field4
+        nt2.field4  # pylint: disable=pointless-statement
 
     with expect_exception_context(AttributeError):
-        nt2.field5
+        nt2.field5  # pylint: disable=pointless-statement
 
     # Verify extra kwargs are in 'unknown' dict
     assert nt2.unknown['field4'] == "something"
diff --git a/clients/python-wrapper/tests/utests/test_reference.py b/clients/python-wrapper/tests/utests/test_reference.py
index 8ba925d9b8e..830511c3a95 100644
--- a/clients/python-wrapper/tests/utests/test_reference.py
+++ b/clients/python-wrapper/tests/utests/test_reference.py
@@ -1,8 +1,8 @@
 import lakefs_sdk
 
-from tests.utests.common import get_test_client
-
+from lakefs import ObjectInfo, CommonPrefix
 from lakefs.repository import Repository
+from tests.utests.common import get_test_client, expect_exception_context
 
 
 def get_test_ref():
@@ -118,3 +118,47 @@ def monkey_diff_refs(*_, **__):
         idx = 0
         max_amount = pages * items_per_page * 2
         assert len(list(ref.diff(other_ref="other_ref", max_amount=max_amount))) == pages * items_per_page
+
+
+def test_reference_objects(monkeypatch):
+    ref = get_test_ref()
+    with monkeypatch.context():
+        def monkey_list_objects(*_, **__):
+            results = []
+            for i in range(10):
+                if i % 2:
+                    results.append(lakefs_sdk.ObjectStats(
+                        path=f"path-{i}",
+                        path_type="object",
+                        physical_address=f"address-{i}",
+                        checksum=f"{i}",
+                        size_bytes=i,
+                        mtime=i,
+                    ))
+                else:
+                    results.append(lakefs_sdk.ObjectStats(
+                        path=f"path-{i}",
+                        path_type="common_prefix",
+                        physical_address="?",
+                        checksum="",
+                        mtime=i,
+                    ))
+            return lakefs_sdk.ObjectStatsList(pagination=lakefs_sdk.Pagination(
+                has_more=False,
+                next_offset="",
+                max_per_page=1,
+                results=1),
+                results=results)
+
+        monkeypatch.setattr(ref._client.sdk_client.objects_api, "list_objects", monkey_list_objects)
+
+        for i, item in enumerate(ref.objects()):
+            if i % 2:
+                assert isinstance(item, ObjectInfo)
+                assert item.size_bytes == i
+            else:
+                assert isinstance(item, CommonPrefix)
+                with expect_exception_context(AttributeError):
+                    item.checksum  # pylint: disable=pointless-statement
+
+            assert item.path == f"path{i}"

From ddb77468136eb6683595f4ce3166b3d702c5ee20 Mon Sep 17 00:00:00 2001
From: Nir Ozery <nir.ozery@treeverse.io>
Date: Mon, 4 Dec 2023 14:04:52 +0000
Subject: [PATCH 2/4] fix test

---
 clients/python-wrapper/tests/utests/test_reference.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clients/python-wrapper/tests/utests/test_reference.py b/clients/python-wrapper/tests/utests/test_reference.py
index 830511c3a95..1e36e261964 100644
--- a/clients/python-wrapper/tests/utests/test_reference.py
+++ b/clients/python-wrapper/tests/utests/test_reference.py
@@ -161,4 +161,4 @@ def monkey_list_objects(*_, **__):
                 with expect_exception_context(AttributeError):
                     item.checksum  # pylint: disable=pointless-statement
 
-            assert item.path == f"path{i}"
+            assert item.path == f"path-{i}"

From 448674d2a5165ea721f9752f7d08f11c9dcf0e26 Mon Sep 17 00:00:00 2001
From: Nir Ozery <nir.ozery@treeverse.io>
Date: Tue, 5 Dec 2023 08:32:15 +0000
Subject: [PATCH 3/4] fix test 2

---
 clients/python-wrapper/tests/integration/test_sanity.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/clients/python-wrapper/tests/integration/test_sanity.py b/clients/python-wrapper/tests/integration/test_sanity.py
index bfe9efdfa58..a45ffbbe6fa 100644
--- a/clients/python-wrapper/tests/integration/test_sanity.py
+++ b/clients/python-wrapper/tests/integration/test_sanity.py
@@ -110,7 +110,6 @@ def test_object_sanity(setup_repo):
 
     stats = obj.stat()
     assert stats.path == path == obj.path
-    assert stats.path_type == "object"
     assert stats.mtime <= time.time()
     assert stats.size_bytes == len(data)
     assert stats.metadata == metadata

From 13869e6630b8ce19705439f4912e5849ec153ca3 Mon Sep 17 00:00:00 2001
From: Nir Ozery <nir.ozery@treeverse.io>
Date: Tue, 5 Dec 2023 20:51:32 +0000
Subject: [PATCH 4/4] CF Fixes

---
 clients/python-wrapper/lakefs/import_manager.py | 5 +++--
 clients/python-wrapper/lakefs/models.py         | 2 +-
 clients/python-wrapper/lakefs/reference.py      | 3 ++-
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/clients/python-wrapper/lakefs/import_manager.py b/clients/python-wrapper/lakefs/import_manager.py
index 16a3a2d81c2..e543cb7fdee 100644
--- a/clients/python-wrapper/lakefs/import_manager.py
+++ b/clients/python-wrapper/lakefs/import_manager.py
@@ -10,7 +10,7 @@
 
 import lakefs_sdk
 
-from lakefs.models import ImportStatus, _OBJECT, _PREFIX
+from lakefs.models import ImportStatus, _OBJECT, _COMMON_PREFIX
 from lakefs.client import Client, DEFAULT_CLIENT
 from lakefs.exceptions import ImportManagerException, api_exception_handler
 
@@ -54,7 +54,8 @@ def prefix(self, object_store_uri: str, destination: str) -> ImportManager:
         :param destination: The destination prefix relative to the branch
         :return: The ImportManager instance (self) after update, to allow operations chaining
         """
-        self.sources.append(lakefs_sdk.ImportLocation(type=_PREFIX, path=object_store_uri, destination=destination))
+        self.sources.append(
+            lakefs_sdk.ImportLocation(type=_COMMON_PREFIX, path=object_store_uri, destination=destination))
         return self
 
     def object(self, object_store_uri: str, destination: str) -> ImportManager:
diff --git a/clients/python-wrapper/lakefs/models.py b/clients/python-wrapper/lakefs/models.py
index 3936aeac67f..40a1f06345c 100644
--- a/clients/python-wrapper/lakefs/models.py
+++ b/clients/python-wrapper/lakefs/models.py
@@ -9,7 +9,7 @@
 
 from lakefs.namedtuple import LenientNamedTuple
 
-_PREFIX = "common_prefix"
+_COMMON_PREFIX = "common_prefix"
 _OBJECT = "object"
 
 
diff --git a/clients/python-wrapper/lakefs/reference.py b/clients/python-wrapper/lakefs/reference.py
index 23b2342cb5a..79d8aa504bc 100644
--- a/clients/python-wrapper/lakefs/reference.py
+++ b/clients/python-wrapper/lakefs/reference.py
@@ -49,7 +49,8 @@ def objects(self,
                 delimiter: Optional[str] = None,
                 **kwargs) -> Generator[StoredObject | CommonPrefix]:
         """
-        Returns an object generator for this reference
+        Returns an object generator for this reference, the generator can yield either a StoredObject or a CommonPrefix
+        object depending on the listing parameters provided.
 
         :param max_amount: Stop showing changes after this amount
         :param after: Return items after this value