diff --git a/clients/python-wrapper/lakefs/branch.py b/clients/python-wrapper/lakefs/branch.py index f5246f778f4..e2efb094359 100644 --- a/clients/python-wrapper/lakefs/branch.py +++ b/clients/python-wrapper/lakefs/branch.py @@ -21,12 +21,12 @@ class Branch(Reference): Class representing a branch in lakeFS. """ - def _get_commit(self): + def get_commit(self): """ For branches override the default _get_commit method to ensure we always fetch the latest head """ self._commit = None - return super()._get_commit() + return super().get_commit() def create(self, source_reference_id: str | Reference, exist_ok: bool = False) -> Branch: """ @@ -137,7 +137,7 @@ def object(self, path: str) -> WriteableObject: return WriteableObject(self.repo_id, self._id, path, client=self._client) - def uncommitted(self, max_amount: Optional[int], after: Optional[str] = None, prefix: Optional[str] = None, + def uncommitted(self, max_amount: Optional[int] = None, after: Optional[str] = None, prefix: Optional[str] = None, **kwargs) -> Generator[Change]: """ Returns a diff generator of uncommitted changes on this branch diff --git a/clients/python-wrapper/lakefs/client.py b/clients/python-wrapper/lakefs/client.py index 9e52430a070..2d608671b3b 100644 --- a/clients/python-wrapper/lakefs/client.py +++ b/clients/python-wrapper/lakefs/client.py @@ -1,12 +1,7 @@ """ lakeFS Client module - Handles authentication against the lakeFS server and wraps the underlying lakefs_sdk client. -The client module holds a DefaultClient which will attempt to initialize on module loading using -environment credentials. -In case no credentials exist, a call to init() will be required or a Client object must be created explicitly - """ from __future__ import annotations diff --git a/clients/python-wrapper/lakefs/namedtuple.py b/clients/python-wrapper/lakefs/namedtuple.py index aa9db5d20a2..b392d4a9c41 100644 --- a/clients/python-wrapper/lakefs/namedtuple.py +++ b/clients/python-wrapper/lakefs/namedtuple.py @@ -48,3 +48,10 @@ def __eq__(self, other): return False return True + + def __str__(self): + fields = {} + for k, v in self.__dict__.items(): + if k != "unknown" and k[0] != "_": # Filter internal and unknown fields + fields[k] = v + return str(fields) diff --git a/clients/python-wrapper/lakefs/object.py b/clients/python-wrapper/lakefs/object.py index 05146f5d48f..1fd66f6673a 100644 --- a/clients/python-wrapper/lakefs/object.py +++ b/clients/python-wrapper/lakefs/object.py @@ -660,7 +660,7 @@ def __repr__(self): def upload(self, data: str | bytes, - mode: WriteModes = 'wb', + mode: WriteModes = 'w', pre_sign: Optional[bool] = None, content_type: Optional[str] = None, metadata: Optional[dict[str, str]] = None) -> WriteableObject: diff --git a/clients/python-wrapper/lakefs/reference.py b/clients/python-wrapper/lakefs/reference.py index 901e1acc3f1..650d82ef822 100644 --- a/clients/python-wrapper/lakefs/reference.py +++ b/clients/python-wrapper/lakefs/reference.py @@ -86,31 +86,20 @@ def log(self, max_amount: Optional[int] = None, **kwargs) -> Generator[Commit]: max_amount=max_amount, **kwargs): yield Commit(**res.dict()) - def _get_commit(self): + def get_commit(self) -> Commit: + """ + Returns the underlying commit referenced by this reference id + + :raise NotFoundException: if this reference does not exist + :raise NotAuthorizedException: if user is not authorized to perform this operation + :raise ServerException: for any other errors + """ if self._commit is None: with api_exception_handler(): commit = self._client.sdk_client.commits_api.get_commit(self._repo_id, self._id) self._commit = Commit(**commit.dict()) return self._commit - def metadata(self) -> dict[str, str]: - """ - Return commit metadata for this reference id - """ - return self._get_commit().metadata - - def commit_message(self) -> str: - """ - Return commit message for this reference id - """ - return self._get_commit().message - - def commit_id(self) -> str: - """ - Return commit id for this reference id - """ - return self._get_commit().id - def diff(self, other_ref: str | Reference, max_amount: Optional[int] = None, diff --git a/clients/python-wrapper/lakefs/repository.py b/clients/python-wrapper/lakefs/repository.py index 5a2f9459bb4..ce0ac52c489 100644 --- a/clients/python-wrapper/lakefs/repository.py +++ b/clients/python-wrapper/lakefs/repository.py @@ -171,6 +171,9 @@ def id(self) -> str: def __repr__(self) -> str: return f'Repository(id="{self.id}")' + def __str__(self): + return str(self.properties) + def repositories(client: Client = None, prefix: Optional[str] = None, diff --git a/clients/python-wrapper/tests/integration/test_import.py b/clients/python-wrapper/tests/integration/test_import.py index 845ef5d9faa..47d4e53160c 100644 --- a/clients/python-wrapper/tests/integration/test_import.py +++ b/clients/python-wrapper/tests/integration/test_import.py @@ -36,7 +36,7 @@ def test_import_manager(setup_repo): res = mgr.run() assert res.error is None assert res.completed - assert res.commit.id == branch.commit_id() + assert res.commit.id == branch.get_commit().id assert res.commit.message == "my imported data" assert res.commit.metadata.get("foo") == "bar" assert res.ingested_objects == 0 @@ -59,7 +59,7 @@ def test_import_manager(setup_repo): assert res.error is None assert res.completed - assert res.commit.id == branch.commit_id() + assert res.commit.id == branch.get_commit().id assert res.commit.message == mgr.commit_message assert res.commit.metadata.get("foo") is None assert res.ingested_objects == 4207 @@ -73,8 +73,8 @@ def test_import_manager_cancel(setup_repo): clt, repo = setup_repo skip_on_unsupported_blockstore(clt, "s3") branch = repo.branch("import-branch").create("main") - expected_commit_id = branch.commit_id() - expected_commit_message = branch.commit_message() + expected_commit_id = branch.get_commit().id + expected_commit_message = branch.get_commit().message mgr = branch.import_data(commit_message="my imported data", metadata={"foo": "bar"}) mgr.prefix(_IMPORT_PATH, "import/") @@ -88,8 +88,8 @@ def test_import_manager_cancel(setup_repo): mgr.cancel() status = mgr.status() - assert branch.commit_id() == expected_commit_id - assert branch.commit_message() == expected_commit_message + assert branch.get_commit().id == expected_commit_id + assert branch.get_commit().message == expected_commit_message assert not status.completed assert "Canceled" in status.error.message assert len(mgr.sources) == 1 diff --git a/clients/python-wrapper/tests/integration/test_reference.py b/clients/python-wrapper/tests/integration/test_reference.py index 82d2244382e..332b5b4b5f0 100644 --- a/clients/python-wrapper/tests/integration/test_reference.py +++ b/clients/python-wrapper/tests/integration/test_reference.py @@ -19,7 +19,7 @@ def test_reference_diff(setup_branch_with_commits): branch = setup_branch_with_commits commits = list(branch.log(max_amount=2)) - assert len(list(branch.diff(branch.commit_id()))) == 0 + assert len(list(branch.diff(branch.get_commit().id))) == 0 changes = list(branch.diff(commits[0].id, type="two_dot")) assert len(changes) == 0 @@ -51,11 +51,11 @@ def test_reference_merge_into(setup_branch_with_commits): other_branch = repo.branch("test_reference_merge_into").create(main) ref = repo.ref(commits[1].id) ref.merge_into(other_branch, message="Merge1") - assert other_branch.commit_message() == "Merge1" + assert other_branch.get_commit().message == "Merge1" assert list(other_branch.log(max_amount=2))[1].id == commits[1].id branch.merge_into(other_branch.id, message="Merge2") - assert other_branch.commit_message() == "Merge2" + assert other_branch.get_commit().message == "Merge2" assert list(other_branch.log(max_amount=3))[2].id == commits[0].id diff --git a/clients/python-wrapper/tests/integration/test_sanity.py b/clients/python-wrapper/tests/integration/test_sanity.py index 920d264c2a5..f68a15af708 100644 --- a/clients/python-wrapper/tests/integration/test_sanity.py +++ b/clients/python-wrapper/tests/integration/test_sanity.py @@ -59,8 +59,8 @@ def test_ref_sanity(setup_repo): ref = repo.ref(ref_id) assert ref.repo_id == repo.properties.id assert ref.id == ref_id - assert ref.metadata() == {} - assert ref.commit_message() == "Repository created" + assert ref.get_commit().metadata == {} + assert ref.get_commit().message == "Repository created" def test_tag_sanity(setup_repo): @@ -70,14 +70,14 @@ def test_tag_sanity(setup_repo): # expect not found with expect_exception_context(NotFoundException): - tag.commit_message() + tag.get_commit() commit = repo.commit("main") res = tag.create(commit.id) assert res == tag assert tag.id == tag_name - assert tag.metadata() == commit.metadata() - assert tag.commit_message() == commit.commit_message() + assert tag.get_commit().metadata == commit.get_commit().metadata + assert tag.get_commit().message == commit.get_commit().message # Create again with expect_exception_context(ConflictException): @@ -92,7 +92,7 @@ def test_tag_sanity(setup_repo): # expect not found with expect_exception_context(NotFoundException): - tag.metadata() + tag.get_commit() # Delete twice with expect_exception_context(NotFoundException): diff --git a/docs/integrations/python.md b/docs/integrations/python.md index 044bb244213..4ba86adeef0 100644 --- a/docs/integrations/python.md +++ b/docs/integrations/python.md @@ -13,15 +13,19 @@ redirect_from: {% include toc_2-3.html %} -**Improved Python SDK** Newbeta -Check-out the newly released [Python SDK library](https://pydocs-lakefs.lakefs.io/) - providing simpler interface and improved user experience! -*OR* continue reading for the current Python SDK. +**High Level Python SDK** New +We've just released a new High Level Python SDK library, and we're super excited to tell you about it! Continue reading to get the +full story! +Though our previous SDK client is still supported and maintained, we highly recommend using the new High Level SDK. +**For previous Python SDKs follow these links:** +[lakefs-sdk](https://pydocs-sdk.lakefs.io) +[legacy-sdk](https://pydocs.lakefs.io) (Depracated) {: .note } There are two primary ways to work with lakeFS from Python: -* [Use Boto](#using-boto) to perform **object operations** through the lakeFS S3 gateway. -* [Use the lakeFS SDK](#using-the-lakefs-sdk) to perform **versioning** and other lakeFS-specific operations. +* [Use Boto](#using-boto) to perform **object operations** through the **lakeFS S3 gateway**. +* [Use the High Level lakeFS SDK](#using-the-lakefs-sdk) to perform **object operations**, **versioning** and other **lakeFS-specific operations**. ## Using the lakeFS SDK @@ -31,29 +35,31 @@ Install the Python client using pip: ```shell -pip install 'lakefs_sdk~=1.0' +pip install lakefs ``` ### Initializing +The High Level SDK by default will try to collect authentication parameters from the environment and attempt to create a default client. +When working in an environment where **lakectl** is configured it is not necessary to instantiate a lakeFS client or provide it for creating the lakeFS objects. +In case no authentication parameters exist, it is also possible to explicitly create a lakeFS client + Here's how to instantiate a client: ```python -import lakefs_sdk -from lakefs_sdk.client import LakeFSClient +from lakefs.client import Client -configuration = lakefs_sdk.Configuration( +clt = Client( host="http://localhost:8000", username="AKIAIOSFODNN7EXAMPLE", password="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", ) -client = LakeFSClient(configuration) -``` +``` For testing SSL endpoints you may wish to use a self-signed certificate. If you do this and receive an `SSL: CERTIFICATE_VERIFY_FAILED` error message you might add the following configuration to your client: ```python -configuration.verify_ssl = False +clt.config.verify_ssl= False ``` {: .warning } @@ -64,62 +70,52 @@ production setting. Optionally, to enable communication via proxies, simply set the proxy configuration: ```python -configuration.ssl_ca_cert = # Set this to customize the certificate file to verify the peer -configuration.proxy = -``` +clt.config.ssl_ca_cert = # Set this to customize the certificate file to verify the peer +clt.config.proxy = +``` ### Usage Examples -Now that you have a client object, you can use it to interact with the API. - +Lets see how we can interact with lakeFS using the High Level SDK. -To shorten the code example, let's create a helper function that will iterate over all results of a paginated API: +#### Creating a repository ```python -def pagination_helper(page_fetcher, **kwargs): - """Helper function to iterate over paginated results""" - while True: - resp = page_fetcher(**kwargs) - yield from resp.results - if not resp.pagination.has_more: - break - kwargs['after'] = resp.pagination.next_offset +import lakefs + +repo = lakefs.repository("example-repo").create(storage_namespace="s3://storage-bucket/repos/example-repo") +print(repo) ``` -#### Creating a repository +If using an explicit client, create the Repository object and pass the client to it (note the changed syntax). ```python -import lakefs_sdk -from lakefs_sdk.models import RepositoryCreation -from lakefs_sdk.client import LakeFSClient - -# ... client ... +import lakefs +from lakefs.client import Client -resp = client.repositories_api.create_repository( - RepositoryCreation( - name="example-repo", - storage_namespace="s3://storage-bucket/repos/example-repo", - ) +clt = Client( + host="http://localhost:8000", + username="AKIAIOSFODNN7EXAMPLE", + password="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", ) -print(resp) + +repo = lakefs.Repository("example-repo", client=clt).create(storage_namespace="s3://storage-bucket/repos/example-repo") +print(repo) ``` #### Output ``` -id='example-repo' creation_date=1697815536 default_branch='main' storage_namespace='s3://storage-bucket/repos/example-repo' +{id: 'example-repo', creation_date: 1697815536, default_branch: 'main', storage_namespace: 's3://storage-bucket/repos/example-repo'} ``` #### List repositories ```python -import lakefs_sdk -from lakefs_sdk.client import LakeFSClient - -# ... client and pagination_helper ... +import lakefs print("Listing repositories:") -for repo in pagination_helper(client.repositories_api.list_repositories): +for repo in lakefs.repositories(): print(repo) ``` @@ -127,23 +123,19 @@ for repo in pagination_helper(client.repositories_api.list_repositories): #### Output ``` Listing repositories: -id='example-repo' creation_date=1697815536 default_branch='main' storage_namespace='s3://storage-bucket/repos/example-repo' +{id: 'example-repo', creation_date: 1697815536, default_branch: 'main', storage_namespace: 's3://storage-bucket/repos/example-repo'} ``` #### Creating a branch ```python -import lakefs_sdk -from lakefs_sdk.models import BranchCreation -from lakefs_sdk.client import LakeFSClient +import lakefs -# ... client ... +branch1 = lakefs.repository("example-repo").branch("experiment1").create(source_reference_id="main") +print("experiment1 ref:", branch1.get_commit().id) -ref1 = client.branches_api.create_branch('example-repo', BranchCreation(name='experiment1', source='main')) -print("experiment1 ref:", ref1) - -ref2 = client.branches_api.create_branch('example-repo', BranchCreation(name='experiment2', source='main')) -print("experiment2 ref:", ref2) +branch1 = lakefs.repository("example-repo").branch("experiment2").create(source_reference_id="main") +print("experiment2 ref:", branch2.get_commit().id) ``` #### Output @@ -155,27 +147,51 @@ experiment2 ref: 7a300b41a8e1ca666c653171a364c08f640549c24d7e82b401bf077c646f885 ### List branches ```python -import lakefs_sdk -from lakefs_sdk.client import LakeFSClient - -# .. client and pagination_helper +import lakefs -for branch in pagination_helper(client.branches_api.list_branches, repository='example-repo'): +for branch in lakefs.repository("example-repo").branches(): print(branch) ``` #### Output ``` -id='experiment1' commit_id='7a300b41a8e1ca666c653171a364c08f640549c24d7e82b401bf077c646f8859' -id='experiment2' commit_id='7a300b41a8e1ca666c653171a364c08f640549c24d7e82b401bf077c646f8859' -id='main' commit_id='7a300b41a8e1ca666c653171a364c08f640549c24d7e82b401bf077c646f8859' +experiment1 +experiment2 +main ``` +## IO + +Great, now lets see some IO operations in action! +The new High Level SDK provide IO semantics which allow to work with lakeFS objects as if they were files in your +filesystem. This is extremely useful when working with data transformation packages that accept file descriptors and streams. + ### Upload -Great. Now, let's upload some data: -Generate "sample_data.csv" or use your own data +A simple way to upload data is to use the `upload` method which accepts contents as `str/bytes` + +```python +obj = branch1.object(path="text/sample_data.txt").upload(content_type="text/plain", data="This is my object data") +print(obj.stats()) +``` + +#### Output +``` +{'path': 'text/sample_data.txt', 'physical_address': 's3://storage-bucket/repos/example-repo/data/gke0ignnl531fa6k90p0/ckpfk4fnl531fa6k90pg', 'physical_address_expiry': None, 'checksum': '4a09d10820234a95bb548f14e4435bba', 'size_bytes': 15, 'mtime': 1701865289, 'metadata': {}, 'content_type': 'text/plain'} +``` + +Reading the data is just as simple: +```python +print(obj.reader(mode='r').read()) +``` + +#### Output +``` +This is my object data +``` + +Now let's generate a "sample_data.csv" file and write it directly to a lakeFS writer object ```python import csv @@ -186,106 +202,78 @@ sample_data = [ [3, "Carol", "carol@example.com"], ] -with open("sample_data.csv", "w", newline="") as csvfile: - writer = csv.writer(csvfile) +obj = branch1.object(path="csv/sample_data.csv") + +with obj.writer(mode='w', pre_sign=True, content_type="text/csv") as fd: + writer = csv.writer(fd) writer.writerow(["ID", "Name", "Email"]) for row in sample_data: writer.writerow(row) ``` -Upload the data file by passing the filename as content: +On context exit the object will be uploaded to lakeFS ```python -import lakefs_sdk -from lakefs_sdk.client import LakeFSClient - -# ... client ... - -resp = client.objects_api.upload_object(repository="example-repo", branch="experiment1", path="csv/sample_data.csv", content="sample_data.csv") -print(resp) +print(obj.stats()) ``` #### Output ``` -path='csv/sample_data.csv' path_type='object' physical_address='s3://storage-bucket/repos/example-repo/data/gke0ignnl531fa6k90p0/ckpfk4fnl531fa6k90pg' physical_address_expiry=None checksum='b6b6a1a17ff85291376ae6a5d7fa69d0' size_bytes=92 mtime=1697839635 metadata=None content_type='text/csv' +{'path': 'csv/sample_data.csv', 'physical_address': 's3://storage-bucket/repos/example-repo/data/gke0ignnl531fa6k90p0/ckpfk4fnl531fa6k90pg', 'physical_address_expiry': None, 'checksum': 'f181262c138901a74d47652d5ea72295', 'size_bytes': 88, 'mtime': 1701865939, 'metadata': {}, 'content_type': 'text/csv'} ``` -We can also upload content a bytes: +We can also upload raw byte contents: ```python -import lakefs_sdk -from lakefs_sdk.client import LakeFSClient - -# ... client ... - -resp = client.objects_api.upload_object(repository="example-repo", branch="experiment1", path="raw/file1.data", content=b"Hello Object World") -print(resp) +obj = branch1.object(path="raw/file1.data").upload(data=b"Hello Object World", pre_sign=True) +print(obj.stats()) ``` #### Output ``` -path='rawv/file1.data' path_type='object' physical_address='s3://storage-bucket/repos/example-repo/data/gke0ignnl531fa6k90p0/ckpfltvnl531fa6k90q0' physical_address_expiry=None checksum='0ef432f8eb0305f730b0c57bbd7a6b08' size_bytes=18 mtime=1697839863 metadata=None content_type='application/octet-stream +{'path': 'raw/file1.data', 'physical_address': 's3://storage-bucket/repos/example-repo/data/gke0ignnl531fa6k90p0/ckpfltvnl531fa6k90q0', 'physical_address_expiry': None, 'checksum': '0ef432f8eb0305f730b0c57bbd7a6b08', 'size_bytes': 18, 'mtime': 1701866323, 'metadata': {}, 'content_type': 'application/octet-stream'} ``` ### Uncommitted changes -Diffing a single branch will show all the uncommitted changes on that branch: +Using the branch `uncommmitted` method will show all the uncommitted changes on that branch: ```python -import lakefs_sdk -from lakefs_sdk.client import LakeFSClient - -# ... client and pagination_helper ... - -for diff in pagination_helper(client.branches_api.diff_branch, repository='example-repo', branch='experiment1'): +for diff in branch1.uncommitted(): print(diff) - ``` #### Output ``` -type='added' path='csv/sample_data.csv' path_type='object' size_bytes=92 -type='added' path='raw/file1.data' path_type='object' size_bytes=18 +{'type': 'added', 'path': 'text/sample_data.txt', 'path_type': 'object', 'size_bytes': 15} +{'type': 'added', 'path': 'csv/sample_data.csv', 'path_type': 'object', 'size_bytes': 88} +{'type': 'added', 'path': 'raw/file1.data', 'path_type': 'object', 'size_bytes': 18} + ``` As expected, our change appears here. Let's commit it and attach some arbitrary metadata: ```python -import lakefs_sdk -from lakefs_sdk.models import CommitCreation -from lakefs_sdk.client import LakeFSClient - -# ... client ... - -resp = client.commits_api.commit( - repository='example-repo', - branch='experiment1', - commit_creation=CommitCreation(message='Add some data!', metadata={'using': 'python_api'}) -) -print(resp) +ref = branch1.commit(message='Add some data!', metadata={'using': 'python_sdk'}) +print(ref.get_commit()) ``` #### Output ``` -id='d51b2428106921fcb893813b1eb668b46284067bb5264d89ed409ccb95676e3d' parents=['7a300b41a8e1ca666c653171a364c08f640549c24d7e82b401bf077c646f8859'] committer='barak' message='Add some data!' creation_date=1697884139 meta_range_id='' metadata={'using': 'python_api'} +{'id': 'c4666db80d2a984b4eab8ce02b6a60830767eba53995c26350e0ad994e15fedb', 'parents': ['a7a092a5a32a2cd97f22abcc99414f6283d29f6b9dd2725ce89f90188c5901e5'], 'committer': 'admin', 'message': 'Add some data!', 'creation_date': 1701866838, 'meta_range_id': '999bedeab1b740f83d2cf8c52548d55446f9038c69724d399adc4438412cade2', 'metadata': {'using': 'python_sdk'}} + ``` -Calling diff again on the same branch, this time there should be no uncommitted files: +Calling `uncommitted` again on the same branch, this time there should be no uncommitted files: ```python -import lakefs_sdk -from lakefs_sdk.client import LakeFSClient - -# ... client ... - -resp = client.branches_api.diff_branch(repository='example-repo', branch='experiment1') -print(resp) +print(len(list(branch1.uncommitted()))) ``` #### Output ``` -pagination=Pagination(has_more=False, next_offset='', results=0, max_per_page=1000) results=[] +0 ``` #### Merging changes from a branch into main @@ -293,68 +281,46 @@ pagination=Pagination(has_more=False, next_offset='', results=0, max_per_page=10 Let's diff between your branch and the main branch: ```python -import lakefs_sdk -from lakefs_sdk.client import LakeFSClient - -# ... client and pagination_helper ... - -for diff in pagination_helper(client.refs_api.diff_refs, repository='example-repo', left_ref='main', right_ref='experiment1'): - print(diff) - +main = repo.branch("main") +for diff in main.diff(other_ref=branch1): + print(diff) ``` #### Output ``` -type='added' path='csv/sample_data.csv' path_type='object' size_bytes=92 -type='added' path='raw/file1.data' path_type='object' size_bytes=18 +{'type': 'added', 'path': 'text/sample_data.txt', 'path_type': 'object', 'size_bytes': 15} +{'type': 'added', 'path': 'csv/sample_data.csv', 'path_type': 'object', 'size_bytes': 88} +{'type': 'added', 'path': 'raw/file1.data', 'path_type': 'object', 'size_bytes': 18} ``` -Looks like you have a change. Let's merge it: +Looks like we have some changes. Let's merge them: ```python -client.refs_api.merge_into_branch(repository='example-repo', source_ref='experiment-aggregations1', destination_branch='main') +res = branch1.merge_into(main) +print(res) # output: -# {'reference': 'd0414a3311a8c1cef1ef355d6aca40db72abe545e216648fe853e25db788fa2e', -# 'summary': {'added': 1, 'changed': 0, 'conflict': 0, 'removed': 0}} +# cfddb68b7265ae0b17fafa1a2068f8414395e0a8b8bc0f8d741cbcce1e67e394 ``` Let's diff again - there should be no changes as all changes are on our main branch already: ```python -import lakefs_sdk -from lakefs_sdk.client import LakeFSClient - -# ... client ... - -resp = client.refs_api.merge_into_branch( - repository='example-repo', - source_ref='experiment1', - destination_branch='main') -print(resp) +print(len(list(main.diff(other_ref=branch1)))) ``` #### Output ``` -reference='a3ea99167a25748cf1d33ba284bda9c1400a8acfae8477562032d2b2435fd37b' +0 ``` ### Read data from main branch ```python import csv -from io import StringIO -import lakefs_sdk -from lakefs_sdk.client import LakeFSClient - -# ... client ... -resp = client.objects_api.get_object( - repository='example-repo', - ref='main', - path='csv/sample_data.csv') +obj = main.object(path="csv/sample_data.csv") -data = StringIO(resp.decode('utf-8')) -for row in csv.reader(data): +for row in csv.reader(obj.reader(mode='r')): print(row) ``` @@ -366,14 +332,9 @@ for row in csv.reader(data): ['3', 'Carol', 'carol@example.com'] ``` -### Python Client documentation - -For the documentation of lakeFS’s Python package, see [https://pydocs-sdk.lakefs.io](https://pydocs-sdk.lakefs.io) - - -### Full API reference +### Python SDK documentation and API reference -For a full reference of the lakeFS API, see [lakeFS API]({% link reference/api.md %}) +For the documentation of lakeFS’s Python package and full api reference, see [https://pydocs-lakefs.lakefs.io](https://pydocs-lakefs.lakefs.io) ## Using Boto