diff --git a/clients/python-wrapper/lakefs/branch.py b/clients/python-wrapper/lakefs/branch.py
index f5246f778f4..e2efb094359 100644
--- a/clients/python-wrapper/lakefs/branch.py
+++ b/clients/python-wrapper/lakefs/branch.py
@@ -21,12 +21,12 @@ class Branch(Reference):
Class representing a branch in lakeFS.
"""
- def _get_commit(self):
+ def get_commit(self):
"""
For branches override the default _get_commit method to ensure we always fetch the latest head
"""
self._commit = None
- return super()._get_commit()
+ return super().get_commit()
def create(self, source_reference_id: str | Reference, exist_ok: bool = False) -> Branch:
"""
@@ -137,7 +137,7 @@ def object(self, path: str) -> WriteableObject:
return WriteableObject(self.repo_id, self._id, path, client=self._client)
- def uncommitted(self, max_amount: Optional[int], after: Optional[str] = None, prefix: Optional[str] = None,
+ def uncommitted(self, max_amount: Optional[int] = None, after: Optional[str] = None, prefix: Optional[str] = None,
**kwargs) -> Generator[Change]:
"""
Returns a diff generator of uncommitted changes on this branch
diff --git a/clients/python-wrapper/lakefs/client.py b/clients/python-wrapper/lakefs/client.py
index 9e52430a070..2d608671b3b 100644
--- a/clients/python-wrapper/lakefs/client.py
+++ b/clients/python-wrapper/lakefs/client.py
@@ -1,12 +1,7 @@
"""
lakeFS Client module
-
Handles authentication against the lakeFS server and wraps the underlying lakefs_sdk client.
-The client module holds a DefaultClient which will attempt to initialize on module loading using
-environment credentials.
-In case no credentials exist, a call to init() will be required or a Client object must be created explicitly
-
"""
from __future__ import annotations
diff --git a/clients/python-wrapper/lakefs/namedtuple.py b/clients/python-wrapper/lakefs/namedtuple.py
index aa9db5d20a2..b392d4a9c41 100644
--- a/clients/python-wrapper/lakefs/namedtuple.py
+++ b/clients/python-wrapper/lakefs/namedtuple.py
@@ -48,3 +48,10 @@ def __eq__(self, other):
return False
return True
+
+ def __str__(self):
+ fields = {}
+ for k, v in self.__dict__.items():
+ if k != "unknown" and k[0] != "_": # Filter internal and unknown fields
+ fields[k] = v
+ return str(fields)
diff --git a/clients/python-wrapper/lakefs/object.py b/clients/python-wrapper/lakefs/object.py
index 05146f5d48f..1fd66f6673a 100644
--- a/clients/python-wrapper/lakefs/object.py
+++ b/clients/python-wrapper/lakefs/object.py
@@ -660,7 +660,7 @@ def __repr__(self):
def upload(self,
data: str | bytes,
- mode: WriteModes = 'wb',
+ mode: WriteModes = 'w',
pre_sign: Optional[bool] = None,
content_type: Optional[str] = None,
metadata: Optional[dict[str, str]] = None) -> WriteableObject:
diff --git a/clients/python-wrapper/lakefs/reference.py b/clients/python-wrapper/lakefs/reference.py
index 901e1acc3f1..650d82ef822 100644
--- a/clients/python-wrapper/lakefs/reference.py
+++ b/clients/python-wrapper/lakefs/reference.py
@@ -86,31 +86,20 @@ def log(self, max_amount: Optional[int] = None, **kwargs) -> Generator[Commit]:
max_amount=max_amount, **kwargs):
yield Commit(**res.dict())
- def _get_commit(self):
+ def get_commit(self) -> Commit:
+ """
+ Returns the underlying commit referenced by this reference id
+
+ :raise NotFoundException: if this reference does not exist
+ :raise NotAuthorizedException: if user is not authorized to perform this operation
+ :raise ServerException: for any other errors
+ """
if self._commit is None:
with api_exception_handler():
commit = self._client.sdk_client.commits_api.get_commit(self._repo_id, self._id)
self._commit = Commit(**commit.dict())
return self._commit
- def metadata(self) -> dict[str, str]:
- """
- Return commit metadata for this reference id
- """
- return self._get_commit().metadata
-
- def commit_message(self) -> str:
- """
- Return commit message for this reference id
- """
- return self._get_commit().message
-
- def commit_id(self) -> str:
- """
- Return commit id for this reference id
- """
- return self._get_commit().id
-
def diff(self,
other_ref: str | Reference,
max_amount: Optional[int] = None,
diff --git a/clients/python-wrapper/lakefs/repository.py b/clients/python-wrapper/lakefs/repository.py
index 5a2f9459bb4..ce0ac52c489 100644
--- a/clients/python-wrapper/lakefs/repository.py
+++ b/clients/python-wrapper/lakefs/repository.py
@@ -171,6 +171,9 @@ def id(self) -> str:
def __repr__(self) -> str:
return f'Repository(id="{self.id}")'
+ def __str__(self):
+ return str(self.properties)
+
def repositories(client: Client = None,
prefix: Optional[str] = None,
diff --git a/clients/python-wrapper/tests/integration/test_import.py b/clients/python-wrapper/tests/integration/test_import.py
index 845ef5d9faa..47d4e53160c 100644
--- a/clients/python-wrapper/tests/integration/test_import.py
+++ b/clients/python-wrapper/tests/integration/test_import.py
@@ -36,7 +36,7 @@ def test_import_manager(setup_repo):
res = mgr.run()
assert res.error is None
assert res.completed
- assert res.commit.id == branch.commit_id()
+ assert res.commit.id == branch.get_commit().id
assert res.commit.message == "my imported data"
assert res.commit.metadata.get("foo") == "bar"
assert res.ingested_objects == 0
@@ -59,7 +59,7 @@ def test_import_manager(setup_repo):
assert res.error is None
assert res.completed
- assert res.commit.id == branch.commit_id()
+ assert res.commit.id == branch.get_commit().id
assert res.commit.message == mgr.commit_message
assert res.commit.metadata.get("foo") is None
assert res.ingested_objects == 4207
@@ -73,8 +73,8 @@ def test_import_manager_cancel(setup_repo):
clt, repo = setup_repo
skip_on_unsupported_blockstore(clt, "s3")
branch = repo.branch("import-branch").create("main")
- expected_commit_id = branch.commit_id()
- expected_commit_message = branch.commit_message()
+ expected_commit_id = branch.get_commit().id
+ expected_commit_message = branch.get_commit().message
mgr = branch.import_data(commit_message="my imported data", metadata={"foo": "bar"})
mgr.prefix(_IMPORT_PATH, "import/")
@@ -88,8 +88,8 @@ def test_import_manager_cancel(setup_repo):
mgr.cancel()
status = mgr.status()
- assert branch.commit_id() == expected_commit_id
- assert branch.commit_message() == expected_commit_message
+ assert branch.get_commit().id == expected_commit_id
+ assert branch.get_commit().message == expected_commit_message
assert not status.completed
assert "Canceled" in status.error.message
assert len(mgr.sources) == 1
diff --git a/clients/python-wrapper/tests/integration/test_reference.py b/clients/python-wrapper/tests/integration/test_reference.py
index 82d2244382e..332b5b4b5f0 100644
--- a/clients/python-wrapper/tests/integration/test_reference.py
+++ b/clients/python-wrapper/tests/integration/test_reference.py
@@ -19,7 +19,7 @@ def test_reference_diff(setup_branch_with_commits):
branch = setup_branch_with_commits
commits = list(branch.log(max_amount=2))
- assert len(list(branch.diff(branch.commit_id()))) == 0
+ assert len(list(branch.diff(branch.get_commit().id))) == 0
changes = list(branch.diff(commits[0].id, type="two_dot"))
assert len(changes) == 0
@@ -51,11 +51,11 @@ def test_reference_merge_into(setup_branch_with_commits):
other_branch = repo.branch("test_reference_merge_into").create(main)
ref = repo.ref(commits[1].id)
ref.merge_into(other_branch, message="Merge1")
- assert other_branch.commit_message() == "Merge1"
+ assert other_branch.get_commit().message == "Merge1"
assert list(other_branch.log(max_amount=2))[1].id == commits[1].id
branch.merge_into(other_branch.id, message="Merge2")
- assert other_branch.commit_message() == "Merge2"
+ assert other_branch.get_commit().message == "Merge2"
assert list(other_branch.log(max_amount=3))[2].id == commits[0].id
diff --git a/clients/python-wrapper/tests/integration/test_sanity.py b/clients/python-wrapper/tests/integration/test_sanity.py
index 920d264c2a5..f68a15af708 100644
--- a/clients/python-wrapper/tests/integration/test_sanity.py
+++ b/clients/python-wrapper/tests/integration/test_sanity.py
@@ -59,8 +59,8 @@ def test_ref_sanity(setup_repo):
ref = repo.ref(ref_id)
assert ref.repo_id == repo.properties.id
assert ref.id == ref_id
- assert ref.metadata() == {}
- assert ref.commit_message() == "Repository created"
+ assert ref.get_commit().metadata == {}
+ assert ref.get_commit().message == "Repository created"
def test_tag_sanity(setup_repo):
@@ -70,14 +70,14 @@ def test_tag_sanity(setup_repo):
# expect not found
with expect_exception_context(NotFoundException):
- tag.commit_message()
+ tag.get_commit()
commit = repo.commit("main")
res = tag.create(commit.id)
assert res == tag
assert tag.id == tag_name
- assert tag.metadata() == commit.metadata()
- assert tag.commit_message() == commit.commit_message()
+ assert tag.get_commit().metadata == commit.get_commit().metadata
+ assert tag.get_commit().message == commit.get_commit().message
# Create again
with expect_exception_context(ConflictException):
@@ -92,7 +92,7 @@ def test_tag_sanity(setup_repo):
# expect not found
with expect_exception_context(NotFoundException):
- tag.metadata()
+ tag.get_commit()
# Delete twice
with expect_exception_context(NotFoundException):
diff --git a/docs/integrations/python.md b/docs/integrations/python.md
index 044bb244213..4ba86adeef0 100644
--- a/docs/integrations/python.md
+++ b/docs/integrations/python.md
@@ -13,15 +13,19 @@ redirect_from:
{% include toc_2-3.html %}
-**Improved Python SDK** Newbeta
-Check-out the newly released [Python SDK library](https://pydocs-lakefs.lakefs.io/) - providing simpler interface and improved user experience!
-*OR* continue reading for the current Python SDK.
+**High Level Python SDK** New
+We've just released a new High Level Python SDK library, and we're super excited to tell you about it! Continue reading to get the
+full story!
+Though our previous SDK client is still supported and maintained, we highly recommend using the new High Level SDK.
+**For previous Python SDKs follow these links:**
+[lakefs-sdk](https://pydocs-sdk.lakefs.io)
+[legacy-sdk](https://pydocs.lakefs.io) (Depracated)
{: .note }
There are two primary ways to work with lakeFS from Python:
-* [Use Boto](#using-boto) to perform **object operations** through the lakeFS S3 gateway.
-* [Use the lakeFS SDK](#using-the-lakefs-sdk) to perform **versioning** and other lakeFS-specific operations.
+* [Use Boto](#using-boto) to perform **object operations** through the **lakeFS S3 gateway**.
+* [Use the High Level lakeFS SDK](#using-the-lakefs-sdk) to perform **object operations**, **versioning** and other **lakeFS-specific operations**.
## Using the lakeFS SDK
@@ -31,29 +35,31 @@ Install the Python client using pip:
```shell
-pip install 'lakefs_sdk~=1.0'
+pip install lakefs
```
### Initializing
+The High Level SDK by default will try to collect authentication parameters from the environment and attempt to create a default client.
+When working in an environment where **lakectl** is configured it is not necessary to instantiate a lakeFS client or provide it for creating the lakeFS objects.
+In case no authentication parameters exist, it is also possible to explicitly create a lakeFS client
+
Here's how to instantiate a client:
```python
-import lakefs_sdk
-from lakefs_sdk.client import LakeFSClient
+from lakefs.client import Client
-configuration = lakefs_sdk.Configuration(
+clt = Client(
host="http://localhost:8000",
username="AKIAIOSFODNN7EXAMPLE",
password="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
)
-client = LakeFSClient(configuration)
-```
+```
For testing SSL endpoints you may wish to use a self-signed certificate. If you do this and receive an `SSL: CERTIFICATE_VERIFY_FAILED` error message you might add the following configuration to your client:
```python
-configuration.verify_ssl = False
+clt.config.verify_ssl= False
```
{: .warning }
@@ -64,62 +70,52 @@ production setting.
Optionally, to enable communication via proxies, simply set the proxy configuration:
```python
-configuration.ssl_ca_cert = # Set this to customize the certificate file to verify the peer
-configuration.proxy =
-```
+clt.config.ssl_ca_cert = # Set this to customize the certificate file to verify the peer
+clt.config.proxy =
+```
### Usage Examples
-Now that you have a client object, you can use it to interact with the API.
-
+Lets see how we can interact with lakeFS using the High Level SDK.
-To shorten the code example, let's create a helper function that will iterate over all results of a paginated API:
+#### Creating a repository
```python
-def pagination_helper(page_fetcher, **kwargs):
- """Helper function to iterate over paginated results"""
- while True:
- resp = page_fetcher(**kwargs)
- yield from resp.results
- if not resp.pagination.has_more:
- break
- kwargs['after'] = resp.pagination.next_offset
+import lakefs
+
+repo = lakefs.repository("example-repo").create(storage_namespace="s3://storage-bucket/repos/example-repo")
+print(repo)
```
-#### Creating a repository
+If using an explicit client, create the Repository object and pass the client to it (note the changed syntax).
```python
-import lakefs_sdk
-from lakefs_sdk.models import RepositoryCreation
-from lakefs_sdk.client import LakeFSClient
-
-# ... client ...
+import lakefs
+from lakefs.client import Client
-resp = client.repositories_api.create_repository(
- RepositoryCreation(
- name="example-repo",
- storage_namespace="s3://storage-bucket/repos/example-repo",
- )
+clt = Client(
+ host="http://localhost:8000",
+ username="AKIAIOSFODNN7EXAMPLE",
+ password="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
)
-print(resp)
+
+repo = lakefs.Repository("example-repo", client=clt).create(storage_namespace="s3://storage-bucket/repos/example-repo")
+print(repo)
```
#### Output
```
-id='example-repo' creation_date=1697815536 default_branch='main' storage_namespace='s3://storage-bucket/repos/example-repo'
+{id: 'example-repo', creation_date: 1697815536, default_branch: 'main', storage_namespace: 's3://storage-bucket/repos/example-repo'}
```
#### List repositories
```python
-import lakefs_sdk
-from lakefs_sdk.client import LakeFSClient
-
-# ... client and pagination_helper ...
+import lakefs
print("Listing repositories:")
-for repo in pagination_helper(client.repositories_api.list_repositories):
+for repo in lakefs.repositories():
print(repo)
```
@@ -127,23 +123,19 @@ for repo in pagination_helper(client.repositories_api.list_repositories):
#### Output
```
Listing repositories:
-id='example-repo' creation_date=1697815536 default_branch='main' storage_namespace='s3://storage-bucket/repos/example-repo'
+{id: 'example-repo', creation_date: 1697815536, default_branch: 'main', storage_namespace: 's3://storage-bucket/repos/example-repo'}
```
#### Creating a branch
```python
-import lakefs_sdk
-from lakefs_sdk.models import BranchCreation
-from lakefs_sdk.client import LakeFSClient
+import lakefs
-# ... client ...
+branch1 = lakefs.repository("example-repo").branch("experiment1").create(source_reference_id="main")
+print("experiment1 ref:", branch1.get_commit().id)
-ref1 = client.branches_api.create_branch('example-repo', BranchCreation(name='experiment1', source='main'))
-print("experiment1 ref:", ref1)
-
-ref2 = client.branches_api.create_branch('example-repo', BranchCreation(name='experiment2', source='main'))
-print("experiment2 ref:", ref2)
+branch1 = lakefs.repository("example-repo").branch("experiment2").create(source_reference_id="main")
+print("experiment2 ref:", branch2.get_commit().id)
```
#### Output
@@ -155,27 +147,51 @@ experiment2 ref: 7a300b41a8e1ca666c653171a364c08f640549c24d7e82b401bf077c646f885
### List branches
```python
-import lakefs_sdk
-from lakefs_sdk.client import LakeFSClient
-
-# .. client and pagination_helper
+import lakefs
-for branch in pagination_helper(client.branches_api.list_branches, repository='example-repo'):
+for branch in lakefs.repository("example-repo").branches():
print(branch)
```
#### Output
```
-id='experiment1' commit_id='7a300b41a8e1ca666c653171a364c08f640549c24d7e82b401bf077c646f8859'
-id='experiment2' commit_id='7a300b41a8e1ca666c653171a364c08f640549c24d7e82b401bf077c646f8859'
-id='main' commit_id='7a300b41a8e1ca666c653171a364c08f640549c24d7e82b401bf077c646f8859'
+experiment1
+experiment2
+main
```
+## IO
+
+Great, now lets see some IO operations in action!
+The new High Level SDK provide IO semantics which allow to work with lakeFS objects as if they were files in your
+filesystem. This is extremely useful when working with data transformation packages that accept file descriptors and streams.
+
### Upload
-Great. Now, let's upload some data:
-Generate "sample_data.csv" or use your own data
+A simple way to upload data is to use the `upload` method which accepts contents as `str/bytes`
+
+```python
+obj = branch1.object(path="text/sample_data.txt").upload(content_type="text/plain", data="This is my object data")
+print(obj.stats())
+```
+
+#### Output
+```
+{'path': 'text/sample_data.txt', 'physical_address': 's3://storage-bucket/repos/example-repo/data/gke0ignnl531fa6k90p0/ckpfk4fnl531fa6k90pg', 'physical_address_expiry': None, 'checksum': '4a09d10820234a95bb548f14e4435bba', 'size_bytes': 15, 'mtime': 1701865289, 'metadata': {}, 'content_type': 'text/plain'}
+```
+
+Reading the data is just as simple:
+```python
+print(obj.reader(mode='r').read())
+```
+
+#### Output
+```
+This is my object data
+```
+
+Now let's generate a "sample_data.csv" file and write it directly to a lakeFS writer object
```python
import csv
@@ -186,106 +202,78 @@ sample_data = [
[3, "Carol", "carol@example.com"],
]
-with open("sample_data.csv", "w", newline="") as csvfile:
- writer = csv.writer(csvfile)
+obj = branch1.object(path="csv/sample_data.csv")
+
+with obj.writer(mode='w', pre_sign=True, content_type="text/csv") as fd:
+ writer = csv.writer(fd)
writer.writerow(["ID", "Name", "Email"])
for row in sample_data:
writer.writerow(row)
```
-Upload the data file by passing the filename as content:
+On context exit the object will be uploaded to lakeFS
```python
-import lakefs_sdk
-from lakefs_sdk.client import LakeFSClient
-
-# ... client ...
-
-resp = client.objects_api.upload_object(repository="example-repo", branch="experiment1", path="csv/sample_data.csv", content="sample_data.csv")
-print(resp)
+print(obj.stats())
```
#### Output
```
-path='csv/sample_data.csv' path_type='object' physical_address='s3://storage-bucket/repos/example-repo/data/gke0ignnl531fa6k90p0/ckpfk4fnl531fa6k90pg' physical_address_expiry=None checksum='b6b6a1a17ff85291376ae6a5d7fa69d0' size_bytes=92 mtime=1697839635 metadata=None content_type='text/csv'
+{'path': 'csv/sample_data.csv', 'physical_address': 's3://storage-bucket/repos/example-repo/data/gke0ignnl531fa6k90p0/ckpfk4fnl531fa6k90pg', 'physical_address_expiry': None, 'checksum': 'f181262c138901a74d47652d5ea72295', 'size_bytes': 88, 'mtime': 1701865939, 'metadata': {}, 'content_type': 'text/csv'}
```
-We can also upload content a bytes:
+We can also upload raw byte contents:
```python
-import lakefs_sdk
-from lakefs_sdk.client import LakeFSClient
-
-# ... client ...
-
-resp = client.objects_api.upload_object(repository="example-repo", branch="experiment1", path="raw/file1.data", content=b"Hello Object World")
-print(resp)
+obj = branch1.object(path="raw/file1.data").upload(data=b"Hello Object World", pre_sign=True)
+print(obj.stats())
```
#### Output
```
-path='rawv/file1.data' path_type='object' physical_address='s3://storage-bucket/repos/example-repo/data/gke0ignnl531fa6k90p0/ckpfltvnl531fa6k90q0' physical_address_expiry=None checksum='0ef432f8eb0305f730b0c57bbd7a6b08' size_bytes=18 mtime=1697839863 metadata=None content_type='application/octet-stream
+{'path': 'raw/file1.data', 'physical_address': 's3://storage-bucket/repos/example-repo/data/gke0ignnl531fa6k90p0/ckpfltvnl531fa6k90q0', 'physical_address_expiry': None, 'checksum': '0ef432f8eb0305f730b0c57bbd7a6b08', 'size_bytes': 18, 'mtime': 1701866323, 'metadata': {}, 'content_type': 'application/octet-stream'}
```
### Uncommitted changes
-Diffing a single branch will show all the uncommitted changes on that branch:
+Using the branch `uncommmitted` method will show all the uncommitted changes on that branch:
```python
-import lakefs_sdk
-from lakefs_sdk.client import LakeFSClient
-
-# ... client and pagination_helper ...
-
-for diff in pagination_helper(client.branches_api.diff_branch, repository='example-repo', branch='experiment1'):
+for diff in branch1.uncommitted():
print(diff)
-
```
#### Output
```
-type='added' path='csv/sample_data.csv' path_type='object' size_bytes=92
-type='added' path='raw/file1.data' path_type='object' size_bytes=18
+{'type': 'added', 'path': 'text/sample_data.txt', 'path_type': 'object', 'size_bytes': 15}
+{'type': 'added', 'path': 'csv/sample_data.csv', 'path_type': 'object', 'size_bytes': 88}
+{'type': 'added', 'path': 'raw/file1.data', 'path_type': 'object', 'size_bytes': 18}
+
```
As expected, our change appears here. Let's commit it and attach some arbitrary metadata:
```python
-import lakefs_sdk
-from lakefs_sdk.models import CommitCreation
-from lakefs_sdk.client import LakeFSClient
-
-# ... client ...
-
-resp = client.commits_api.commit(
- repository='example-repo',
- branch='experiment1',
- commit_creation=CommitCreation(message='Add some data!', metadata={'using': 'python_api'})
-)
-print(resp)
+ref = branch1.commit(message='Add some data!', metadata={'using': 'python_sdk'})
+print(ref.get_commit())
```
#### Output
```
-id='d51b2428106921fcb893813b1eb668b46284067bb5264d89ed409ccb95676e3d' parents=['7a300b41a8e1ca666c653171a364c08f640549c24d7e82b401bf077c646f8859'] committer='barak' message='Add some data!' creation_date=1697884139 meta_range_id='' metadata={'using': 'python_api'}
+{'id': 'c4666db80d2a984b4eab8ce02b6a60830767eba53995c26350e0ad994e15fedb', 'parents': ['a7a092a5a32a2cd97f22abcc99414f6283d29f6b9dd2725ce89f90188c5901e5'], 'committer': 'admin', 'message': 'Add some data!', 'creation_date': 1701866838, 'meta_range_id': '999bedeab1b740f83d2cf8c52548d55446f9038c69724d399adc4438412cade2', 'metadata': {'using': 'python_sdk'}}
+
```
-Calling diff again on the same branch, this time there should be no uncommitted files:
+Calling `uncommitted` again on the same branch, this time there should be no uncommitted files:
```python
-import lakefs_sdk
-from lakefs_sdk.client import LakeFSClient
-
-# ... client ...
-
-resp = client.branches_api.diff_branch(repository='example-repo', branch='experiment1')
-print(resp)
+print(len(list(branch1.uncommitted())))
```
#### Output
```
-pagination=Pagination(has_more=False, next_offset='', results=0, max_per_page=1000) results=[]
+0
```
#### Merging changes from a branch into main
@@ -293,68 +281,46 @@ pagination=Pagination(has_more=False, next_offset='', results=0, max_per_page=10
Let's diff between your branch and the main branch:
```python
-import lakefs_sdk
-from lakefs_sdk.client import LakeFSClient
-
-# ... client and pagination_helper ...
-
-for diff in pagination_helper(client.refs_api.diff_refs, repository='example-repo', left_ref='main', right_ref='experiment1'):
- print(diff)
-
+main = repo.branch("main")
+for diff in main.diff(other_ref=branch1):
+ print(diff)
```
#### Output
```
-type='added' path='csv/sample_data.csv' path_type='object' size_bytes=92
-type='added' path='raw/file1.data' path_type='object' size_bytes=18
+{'type': 'added', 'path': 'text/sample_data.txt', 'path_type': 'object', 'size_bytes': 15}
+{'type': 'added', 'path': 'csv/sample_data.csv', 'path_type': 'object', 'size_bytes': 88}
+{'type': 'added', 'path': 'raw/file1.data', 'path_type': 'object', 'size_bytes': 18}
```
-Looks like you have a change. Let's merge it:
+Looks like we have some changes. Let's merge them:
```python
-client.refs_api.merge_into_branch(repository='example-repo', source_ref='experiment-aggregations1', destination_branch='main')
+res = branch1.merge_into(main)
+print(res)
# output:
-# {'reference': 'd0414a3311a8c1cef1ef355d6aca40db72abe545e216648fe853e25db788fa2e',
-# 'summary': {'added': 1, 'changed': 0, 'conflict': 0, 'removed': 0}}
+# cfddb68b7265ae0b17fafa1a2068f8414395e0a8b8bc0f8d741cbcce1e67e394
```
Let's diff again - there should be no changes as all changes are on our main branch already:
```python
-import lakefs_sdk
-from lakefs_sdk.client import LakeFSClient
-
-# ... client ...
-
-resp = client.refs_api.merge_into_branch(
- repository='example-repo',
- source_ref='experiment1',
- destination_branch='main')
-print(resp)
+print(len(list(main.diff(other_ref=branch1))))
```
#### Output
```
-reference='a3ea99167a25748cf1d33ba284bda9c1400a8acfae8477562032d2b2435fd37b'
+0
```
### Read data from main branch
```python
import csv
-from io import StringIO
-import lakefs_sdk
-from lakefs_sdk.client import LakeFSClient
-
-# ... client ...
-resp = client.objects_api.get_object(
- repository='example-repo',
- ref='main',
- path='csv/sample_data.csv')
+obj = main.object(path="csv/sample_data.csv")
-data = StringIO(resp.decode('utf-8'))
-for row in csv.reader(data):
+for row in csv.reader(obj.reader(mode='r')):
print(row)
```
@@ -366,14 +332,9 @@ for row in csv.reader(data):
['3', 'Carol', 'carol@example.com']
```
-### Python Client documentation
-
-For the documentation of lakeFS’s Python package, see [https://pydocs-sdk.lakefs.io](https://pydocs-sdk.lakefs.io)
-
-
-### Full API reference
+### Python SDK documentation and API reference
-For a full reference of the lakeFS API, see [lakeFS API]({% link reference/api.md %})
+For the documentation of lakeFS’s Python package and full api reference, see [https://pydocs-lakefs.lakefs.io](https://pydocs-lakefs.lakefs.io)
## Using Boto