Skip to content

Commit

Permalink
Fix an info message when using the convert CLI command with no args.i…
Browse files Browse the repository at this point in the history
…nput_format (#982)

- Ticket no. 109749
- Current behavior: **Source dataset format detected as `'None'` <-
`'None'` should be replaced with the detected data format.**
```console
(datumaro) vinnamki@vinnamki:~/datumaro$ datum convert -i notebooks/d6-dice -f cvat -o ./ws_test/d6-dice-cvat --overwrite -- --save-media
2023-05-02 16:07:35,594 INFO: Source dataset format detected as 'None'
2023-05-02 16:07:49,679 INFO: Exporting the dataset
2023-05-02 16:07:51,706 INFO: Dataset exported to '/home/vinnamki/datumaro/ws_test/d6-dice-cvat' as 'cvat' 
```
- Expected behavior
```console
(datumaro) vinnamki@vinnamki:~/datumaro$ datum convert -i notebooks/d6-dice -f cvat -o ./ws_test/d6-dice-cvat --overwrite -- --save-media
2023-05-02 16:07:35,594 INFO: Source dataset format detected as yolo
2023-05-02 16:07:49,679 INFO: Exporting the dataset
2023-05-02 16:07:51,706 INFO: Dataset exported to '/home/vinnamki/datumaro/ws_test/d6-dice-cvat' as 'cvat' 
```
 - Add `test_convert.py` as well

Signed-off-by: Kim, Vinnam <[email protected]>
  • Loading branch information
vinnamkim authored May 4, 2023
1 parent 320ea82 commit 9fe8a6d
Show file tree
Hide file tree
Showing 3 changed files with 134 additions and 1 deletion.
6 changes: 5 additions & 1 deletion datumaro/cli/commands/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,11 @@ def convert_command(args):
return 2

fmt = matches[0]
log.info("Source dataset format detected as '%s'", args.input_format)
log.info(f"Source dataset format detected as {fmt}")

if fmt == args.output_format:
log.error("The source data format and the output data format is same as {fmt}.")
return 3

source = osp.abspath(args.source)

Expand Down
125 changes: 125 additions & 0 deletions tests/integration/cli/test_convert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
# Copyright (C) 2023 Intel Corporation
#
# SPDX-License-Identifier: MIT
import contextlib
import io
import os.path as osp

import numpy as np
import pytest

from datumaro.components.annotation import AnnotationType, Bbox, LabelCategories
from datumaro.components.dataset import Dataset
from datumaro.components.dataset_base import DatasetItem
from datumaro.components.media import Image

from tests.requirements import Requirements, mark_requirement
from tests.utils.test_utils import IGNORE_ALL, TestCaseHelper, compare_datasets
from tests.utils.test_utils import run_datum as run


@pytest.fixture()
def fxt_dataset():
h = w = 8
n_labels = 5
n_items = 5

return Dataset.from_iterable(
[
DatasetItem(
id=f"img_{item_id}",
subset=subset,
media=Image.from_numpy(
data=np.random.randint(0, 255, size=(h, w, 3), dtype=np.uint8), ext=".png"
),
annotations=[
Bbox(
*np.random.randint(0, h, size=(4,)).tolist(),
label=np.random.randint(0, n_labels),
z_order=0,
attributes={},
)
],
)
for subset in ["Test", "Train", "Validation"]
for item_id in range(n_items)
],
categories={
AnnotationType.label: LabelCategories.from_iterable(
[f"label_{idx}" for idx in range(n_labels)]
)
},
)


@mark_requirement(Requirements.DATUM_GENERAL_REQ)
@pytest.mark.parametrize(
"input_format", ["coco", "yolo", "datumaro", "datumaro_binary"], ids=lambda x: f"[if:{x}]"
)
@pytest.mark.parametrize(
"output_format", ["coco", "yolo", "datumaro", "datumaro_binary"], ids=lambda x: f"[of:{x}]"
)
@pytest.mark.parametrize("give_input_format", [True, False])
def test_convert_object_detection(
fxt_dataset: Dataset,
input_format: str,
output_format: str,
give_input_format: bool,
test_dir: str,
helper_tc: TestCaseHelper,
caplog: pytest.LogCaptureFixture,
):
# TODO: Fix xfail cases...
if output_format == "coco":
pytest.xfail()
elif (
output_format == "datumaro_binary" or output_format == "datumaro"
) and input_format == "coco":
pytest.xfail()

src_dir = osp.join(test_dir, "src")
dst_dir = osp.join(test_dir, "dst")

fxt_dataset.export(src_dir, format=input_format, save_media=True)

expected_code = 0 if input_format != output_format else 3

cmd = [
"convert",
"-f",
output_format,
"-i",
src_dir,
"-o",
dst_dir,
]
if give_input_format:
cmd += [
"-if",
input_format,
]
cmd += [
"--",
"--save-media",
]

run(
helper_tc,
*cmd,
expected_code=expected_code,
)

if not give_input_format:
# If no input_format => detect msg
matched = [
msg
for msg in caplog.messages
if msg == f"Source dataset format detected as {input_format}"
]
assert len(matched) == 1

if expected_code == 0:
actual = Dataset.import_from(dst_dir, format=output_format)
compare_datasets(
helper_tc, fxt_dataset, actual, require_media=True, ignored_attrs=IGNORE_ALL
)
4 changes: 4 additions & 0 deletions tests/utils/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from tempfile import TemporaryDirectory
from typing import Any, Collection, List, Optional, Union

import pytest
from typing_extensions import Literal

from datumaro.components.annotation import AnnotationType
Expand Down Expand Up @@ -429,3 +430,6 @@ def assertListEqual(self, list1: List[Any], list2: List[Any], err_msg: str = "")
assert len(list1) == len(list2), err_msg
for item1, item2 in zip(list1, list2):
self.assertEqual(item1, item2, err_msg)

def fail(self, msg):
pytest.fail(reason=msg)

0 comments on commit 9fe8a6d

Please sign in to comment.