Skip to content

Commit

Permalink
Fix media contents error in arrow format (#986)
Browse files Browse the repository at this point in the history
  • Loading branch information
cih9088 authored May 4, 2023
1 parent 9fe8a6d commit b4d5d7e
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 23 deletions.
7 changes: 5 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## \[Unreleased\]
### Bug fixes
- Fix project level CVAT for images format import
(<https://github.com/openvinotoolkit/datumaro/pull/980>)
- Fix media contents not returning bytes in arrow format
(<https://github.com/openvinotoolkit/datumaro/pull/986>)

## 20/04/2023 - Release 1.2.0
### New features
Expand Down Expand Up @@ -49,8 +54,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
(<https://github.com/openvinotoolkit/datumaro/pull/918>)
- Fix log issue when importing celeba and align celeba dataset
(<https://github.com/openvinotoolkit/datumaro/pull/919>)
- Fix project level CVAT for images format import
(<https://github.com/openvinotoolkit/datumaro/pull/980>)

## 28/03/2023 - Release 1.1.1
### Bug fixes
Expand Down
4 changes: 1 addition & 3 deletions datumaro/components/media.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,9 +184,7 @@ def data(self) -> Optional[AnyData]:
@property
def bytes(self) -> Optional[bytes]:
if self.has_data:
if callable(self._data):
_bytes = self._data()
_bytes = self._data
_bytes = self._data() if callable(self._data) else self._data
if isinstance(_bytes, bytes):
return _bytes
return None
Expand Down
23 changes: 8 additions & 15 deletions datumaro/plugins/data_formats/arrow/mapper/media.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,21 +188,16 @@ def backward_from_batches(
attributes_ = [DictMapper.backward(attributes)[0] for attributes in attributes_]

images = []

def data_loader(path, idx):
options = {
"path": path if os.path.exists(path) else None,
"data": pa_batches_decoder(batches, f"{parent}.bytes" if parent else "bytes")[idx],
}
return cls.decode(**options)

for idx, (path, attributes) in enumerate(zip(paths, attributes_)):
if os.path.exists(path):
images.append(Image.from_file(path=path, size=attributes["size"]))
else:
images.append(
Image.from_bytes(
data=partial(data_loader, idx=idx, path=path), size=attributes["size"]
data=lambda: pa_batches_decoder(
batches, f"{parent}.bytes" if parent else "bytes"
)[idx],
size=attributes["size"],
)
)
return images
Expand All @@ -222,7 +217,7 @@ def forward(
_bytes = None
if isinstance(encoder, Callable):
_bytes = encoder(obj)
else:
elif encoder != "NONE":
_bytes = obj.data
out["bytes"] = _bytes

Expand Down Expand Up @@ -269,10 +264,6 @@ def backward_from_batches(
) -> List[PointCloud]:
paths = pa_batches_decoder(batches, f"{parent}.path" if parent else "path")

def data_loader(idx):
data = pa_batches_decoder(batches, f"{parent}.bytes" if parent else "bytes")[idx]
return data

def extra_images(idx):
offset = 0
attributes = pa_batches_decoder(
Expand All @@ -298,7 +289,9 @@ def extra_images(idx):
else:
point_clouds.append(
PointCloud.from_bytes(
data=partial(data_loader, idx=idx),
data=lambda: pa_batches_decoder(
batches, f"{parent}.bytes" if parent else "bytes"
)[idx],
extra_images=partial(extra_images, idx=idx),
)
)
Expand Down
47 changes: 46 additions & 1 deletion tests/unit/data_formats/arrow/test_arrow_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from datumaro.components.dataset_base import DatasetItem
from datumaro.components.environment import Environment
from datumaro.components.importer import DatasetImportError
from datumaro.components.media import Image
from datumaro.components.media import FromFileMixin, Image
from datumaro.components.project import Dataset
from datumaro.plugins.data_formats.arrow import ArrowExporter, ArrowImporter
from datumaro.plugins.data_formats.arrow.arrow_dataset import ArrowDataset
Expand Down Expand Up @@ -343,6 +343,51 @@ def test_can_detect(self, fxt_test_datumaro_format_dataset, test_dir):
detected_formats = Environment().detect_dataset(test_dir)
assert [self.importer.NAME] == detected_formats

@pytest.mark.parametrize(
["fxt_dataset", "save_media"],
[
pytest.param(
"fxt_image",
True,
id="image_with_media",
),
pytest.param(
"fxt_point_cloud",
True,
id="point_cloud_with_media",
),
pytest.param(
"fxt_image",
False,
id="image_without_media",
),
pytest.param(
"fxt_point_cloud",
False,
id="point_cloud_without_media",
),
],
)
def test_media_contents(self, fxt_dataset, save_media, test_dir, request):
fxt_dataset = request.getfixturevalue(fxt_dataset)

fxt_dataset.export(test_dir, format=self.format, save_media=save_media)
imported_dataset = Dataset.import_from(test_dir)
for item_a, item_b in zip(fxt_dataset, imported_dataset):
if isinstance(item_a.media, FromFileMixin):
assert item_a.media.bytes is not None
assert item_a.media.data is not None
if save_media:
assert item_b.media.bytes is not None
assert item_b.media.data is not None
else:
if isinstance(item_a.media, FromFileMixin):
assert item_b.media.bytes is not None
assert item_b.media.data is not None
else:
assert item_b.media.bytes is None
assert item_b.media.data is None

# Below is testing special cases...
@mark_requirement(Requirements.DATUM_GENERAL_REQ)
def test_inplace_save_writes_only_updated_data_with_direct_changes(self, test_dir, helper_tc):
Expand Down
3 changes: 1 addition & 2 deletions tests/unit/test_images.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,7 @@ def test_ctors(self):
img = Image.from_bytes(**args)
self.assertTrue(img.has_data)
np.testing.assert_array_equal(img.data, image)
if img.bytes:
self.assertEqual(img.bytes, image_bytes)
self.assertEqual(img.bytes, image_bytes)
self.assertEqual(img.size, tuple(image.shape[:2]))

with self.subTest():
Expand Down

0 comments on commit b4d5d7e

Please sign in to comment.