From 9fe8a6d76fff135b1ff0ce883e1888abf845ce34 Mon Sep 17 00:00:00 2001
From: Vinnam Kim <vinnam.kim@intel.com>
Date: Thu, 4 May 2023 10:16:38 +0900
Subject: [PATCH] Fix an info message when using the convert CLI command with
 no args.input_format (#982)

- Ticket no. 109749
- Current behavior: **Source dataset format detected as `'None'` <-
`'None'` should be replaced with the detected data format.**
```console
(datumaro) vinnamki@vinnamki:~/datumaro$ datum convert -i notebooks/d6-dice -f cvat -o ./ws_test/d6-dice-cvat --overwrite -- --save-media
2023-05-02 16:07:35,594 INFO: Source dataset format detected as 'None'
2023-05-02 16:07:49,679 INFO: Exporting the dataset
2023-05-02 16:07:51,706 INFO: Dataset exported to '/home/vinnamki/datumaro/ws_test/d6-dice-cvat' as 'cvat'
```
- Expected behavior
```console
(datumaro) vinnamki@vinnamki:~/datumaro$ datum convert -i notebooks/d6-dice -f cvat -o ./ws_test/d6-dice-cvat --overwrite -- --save-media
2023-05-02 16:07:35,594 INFO: Source dataset format detected as yolo
2023-05-02 16:07:49,679 INFO: Exporting the dataset
2023-05-02 16:07:51,706 INFO: Dataset exported to '/home/vinnamki/datumaro/ws_test/d6-dice-cvat' as 'cvat'
```
 - Add `test_convert.py` as well

Signed-off-by: Kim, Vinnam <vinnam.kim@intel.com>
---
 datumaro/cli/commands/convert.py      |   6 +-
 tests/integration/cli/test_convert.py | 125 ++++++++++++++++++++++++++
 tests/utils/test_utils.py             |   4 +
 3 files changed, 134 insertions(+), 1 deletion(-)
 create mode 100644 tests/integration/cli/test_convert.py

diff --git a/datumaro/cli/commands/convert.py b/datumaro/cli/commands/convert.py
index 2d19b00c73..172e200759 100644
--- a/datumaro/cli/commands/convert.py
+++ b/datumaro/cli/commands/convert.py
@@ -126,7 +126,11 @@ def convert_command(args):
             return 2
 
         fmt = matches[0]
-        log.info("Source dataset format detected as '%s'", args.input_format)
+        log.info(f"Source dataset format detected as {fmt}")
+
+    if fmt == args.output_format:
+        log.error("The source data format and the output data format is same as {fmt}.")
+        return 3
 
     source = osp.abspath(args.source)
 
diff --git a/tests/integration/cli/test_convert.py b/tests/integration/cli/test_convert.py
new file mode 100644
index 0000000000..e6585e8c7f
--- /dev/null
+++ b/tests/integration/cli/test_convert.py
@@ -0,0 +1,125 @@
+# Copyright (C) 2023 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+import contextlib
+import io
+import os.path as osp
+
+import numpy as np
+import pytest
+
+from datumaro.components.annotation import AnnotationType, Bbox, LabelCategories
+from datumaro.components.dataset import Dataset
+from datumaro.components.dataset_base import DatasetItem
+from datumaro.components.media import Image
+
+from tests.requirements import Requirements, mark_requirement
+from tests.utils.test_utils import IGNORE_ALL, TestCaseHelper, compare_datasets
+from tests.utils.test_utils import run_datum as run
+
+
+@pytest.fixture()
+def fxt_dataset():
+    h = w = 8
+    n_labels = 5
+    n_items = 5
+
+    return Dataset.from_iterable(
+        [
+            DatasetItem(
+                id=f"img_{item_id}",
+                subset=subset,
+                media=Image.from_numpy(
+                    data=np.random.randint(0, 255, size=(h, w, 3), dtype=np.uint8), ext=".png"
+                ),
+                annotations=[
+                    Bbox(
+                        *np.random.randint(0, h, size=(4,)).tolist(),
+                        label=np.random.randint(0, n_labels),
+                        z_order=0,
+                        attributes={},
+                    )
+                ],
+            )
+            for subset in ["Test", "Train", "Validation"]
+            for item_id in range(n_items)
+        ],
+        categories={
+            AnnotationType.label: LabelCategories.from_iterable(
+                [f"label_{idx}" for idx in range(n_labels)]
+            )
+        },
+    )
+
+
+@mark_requirement(Requirements.DATUM_GENERAL_REQ)
+@pytest.mark.parametrize(
+    "input_format", ["coco", "yolo", "datumaro", "datumaro_binary"], ids=lambda x: f"[if:{x}]"
+)
+@pytest.mark.parametrize(
+    "output_format", ["coco", "yolo", "datumaro", "datumaro_binary"], ids=lambda x: f"[of:{x}]"
+)
+@pytest.mark.parametrize("give_input_format", [True, False])
+def test_convert_object_detection(
+    fxt_dataset: Dataset,
+    input_format: str,
+    output_format: str,
+    give_input_format: bool,
+    test_dir: str,
+    helper_tc: TestCaseHelper,
+    caplog: pytest.LogCaptureFixture,
+):
+    # TODO: Fix xfail cases...
+    if output_format == "coco":
+        pytest.xfail()
+    elif (
+        output_format == "datumaro_binary" or output_format == "datumaro"
+    ) and input_format == "coco":
+        pytest.xfail()
+
+    src_dir = osp.join(test_dir, "src")
+    dst_dir = osp.join(test_dir, "dst")
+
+    fxt_dataset.export(src_dir, format=input_format, save_media=True)
+
+    expected_code = 0 if input_format != output_format else 3
+
+    cmd = [
+        "convert",
+        "-f",
+        output_format,
+        "-i",
+        src_dir,
+        "-o",
+        dst_dir,
+    ]
+    if give_input_format:
+        cmd += [
+            "-if",
+            input_format,
+        ]
+    cmd += [
+        "--",
+        "--save-media",
+    ]
+
+    run(
+        helper_tc,
+        *cmd,
+        expected_code=expected_code,
+    )
+
+    if not give_input_format:
+        # If no input_format => detect msg
+        matched = [
+            msg
+            for msg in caplog.messages
+            if msg == f"Source dataset format detected as {input_format}"
+        ]
+        assert len(matched) == 1
+
+    if expected_code == 0:
+        actual = Dataset.import_from(dst_dir, format=output_format)
+        compare_datasets(
+            helper_tc, fxt_dataset, actual, require_media=True, ignored_attrs=IGNORE_ALL
+        )
diff --git a/tests/utils/test_utils.py b/tests/utils/test_utils.py
index ecb5c6bb3d..62aa42e68d 100644
--- a/tests/utils/test_utils.py
+++ b/tests/utils/test_utils.py
@@ -16,6 +16,7 @@
 from tempfile import TemporaryDirectory
 from typing import Any, Collection, List, Optional, Union
 
+import pytest
 from typing_extensions import Literal
 
 from datumaro.components.annotation import AnnotationType
@@ -429,3 +430,6 @@ def assertListEqual(self, list1: List[Any], list2: List[Any], err_msg: str = "")
         assert len(list1) == len(list2), err_msg
         for item1, item2 in zip(list1, list2):
             self.assertEqual(item1, item2, err_msg)
+
+    def fail(self, msg):
+        pytest.fail(reason=msg)