Skip to content

Commit

Permalink
Merge pull request #386 from martindurant/version_caching
Browse files Browse the repository at this point in the history
Fixed cached directory info when versioned
  • Loading branch information
martindurant authored Oct 19, 2020
2 parents 86e8dbd + 9b44f0d commit 327e3fc
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 12 deletions.
22 changes: 10 additions & 12 deletions s3fs/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1365,26 +1365,24 @@ def __init__(self, s3, path, mode='rb', block_size=5 * 2 ** 20, acl="",
self.fill_cache = fill_cache
self.s3_additional_kwargs = s3_additional_kwargs or {}
self.req_kw = {'RequestPayer': 'requester'} if requester_pays else {}
super().__init__(s3, path, mode, block_size, autocommit=autocommit,
cache_type=cache_type)
self.s3 = self.fs # compatibility
if self.writable():
if 'r' not in mode:
if block_size < 5 * 2 ** 20:
raise ValueError('Block size must be >=5MB')
else:
if version_id and self.fs.version_aware:
if version_id and s3.version_aware:
self.version_id = version_id
self.details = self.fs.info(self.path, version_id=version_id)
self.details = s3.info(path, version_id=version_id)
self.size = self.details['size']
elif self.fs.version_aware:
self.version_id = self.details.get('VersionId')
elif s3.version_aware:
# In this case we have not managed to get the VersionId out of details and
# we should invalidate the cache and perform a full head_object since it
# has likely been partially populated by ls.
if self.version_id is None:
self.fs.invalidate_cache(self.path)
self.details = self.fs.info(self.path)
self.version_id = self.details.get('VersionId')
s3.invalidate_cache(path)
self.details = s3.info(path)
self.version_id = self.details.get('VersionId')
super().__init__(s3, path, mode, block_size, autocommit=autocommit,
cache_type=cache_type)
self.s3 = self.fs # compatibility

# when not using autocommit we want to have transactional state to manage
self.append_block = False
Expand Down
22 changes: 22 additions & 0 deletions s3fs/tests/test_s3fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -1691,3 +1691,25 @@ def test_shallow_find(s3):

assert s3.ls(test_bucket_name) == s3.find(test_bucket_name, maxdepth=1, withdirs=True)
assert s3.ls(test_bucket_name) == s3.glob(test_bucket_name + "/*")


def test_version_sizes(s3):
# protect against caching of incorrect version details
s3 = S3FileSystem(anon=False, version_aware=True,
client_kwargs={'endpoint_url': endpoint_uri})
import gzip
path = f"s3://{versioned_bucket_name}/test.txt.gz"
versions = [
s3.pipe_file(path, gzip.compress(text))
for text in (
b'good morning!',
b'hello!',
b'hi!',
b'hello!',
)
]
for version in versions:
version_id = version['VersionId']
with s3.open(path, version_id=version_id) as f:
with gzip.open(f) as zfp:
zfp.read()

0 comments on commit 327e3fc

Please sign in to comment.