Skip to content

Commit

Permalink
Fix datumaro export with media (#34)
Browse files Browse the repository at this point in the history
* Fix broken type inference for media_as

* Remove duplicated 'media' field in dm export

* Add PointCloud to datumaro import namespace

* Format code

* Add tests for the issue and datumaro dataset examples

* Apply black v2024

* Apply black v2024

* Ignore some bandit warnings

* Fix paths in test

* Update changelog
  • Loading branch information
zhiltsov-max committed Mar 28, 2024
1 parent 446bad5 commit 8ba237c
Show file tree
Hide file tree
Showing 16 changed files with 637 additions and 14 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
(<https://github.com/cvat-ai/datumaro/pull/28>)
- Image stats when no image info available for some images in the dataset
(<https://github.com/cvat-ai/datumaro/pull/29>)
- Incorrect writing of `media` field in the Datumaro format, when there are specific media fields
(<https://github.com/cvat-ai/datumaro/pull/34>)

### Security
- TBD
Expand Down
4 changes: 2 additions & 2 deletions datumaro/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Copyright (C) 2019-2022 Intel Corporation
# Copyright (C) 2022 CVAT.ai Corporation
# Copyright (C) 2022-2024 CVAT.ai Corporation
#
# SPDX-License-Identifier: MIT

Expand Down Expand Up @@ -64,7 +64,7 @@
validate,
)
from .components.launcher import Launcher, ModelTransform
from .components.media import ByteImage, Image, MediaElement, Video, VideoFrame
from .components.media import ByteImage, Image, MediaElement, PointCloud, Video, VideoFrame
from .components.media_manager import MediaManager
from .components.progress_reporting import NullProgressReporter, ProgressReporter
from .components.validator import Validator
Expand Down
8 changes: 3 additions & 5 deletions datumaro/components/extractor.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Copyright (C) 2019-2022 Intel Corporation
# Copyright (C) 2022-2024 CVAT.ai Corporation
#
# SPDX-License-Identifier: MIT

Expand Down Expand Up @@ -44,7 +45,7 @@

DEFAULT_SUBSET_NAME = "default"

T = TypeVar("T", bound=MediaElement)
MediaType = TypeVar("MediaType", bound=MediaElement)


@attrs(order=False, init=False, slots=True)
Expand All @@ -64,7 +65,7 @@ class DatasetItem:
def wrap(item, **kwargs):
return attr.evolve(item, **kwargs)

def media_as(self, t: Type[T]) -> T:
def media_as(self, t: Type[MediaType]) -> MediaType:
assert issubclass(t, MediaElement)
return cast(t, self.media)

Expand Down Expand Up @@ -294,9 +295,6 @@ def get(self, id, subset=None):
return None


T = TypeVar("T")


class _ImportFail(DatumaroError):
pass

Expand Down
7 changes: 7 additions & 0 deletions datumaro/components/media.py
Original file line number Diff line number Diff line change
Expand Up @@ -501,6 +501,13 @@ def __init__(self, path: str, extra_images: Optional[List[Image]] = None):

self.extra_images: List[Image] = extra_images or []

def __eq__(self, other: object) -> bool:
return (
isinstance(other, __class__)
and self.path == other.path
and set(self.extra_images) == set(other.extra_images)
)


class MultiframeImage(MediaElement):
def __init__(
Expand Down
15 changes: 8 additions & 7 deletions datumaro/plugins/datumaro_format/converter.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
# Copyright (C) 2019-2022 Intel Corporation
# Copyright (C) 2022-2024 CVAT.ai Corporation
#
# SPDX-License-Identifier: MIT

# pylint: disable=no-self-use

from __future__ import annotations

import os
import os.path as osp
import shutil
Expand Down Expand Up @@ -37,8 +40,8 @@


class _SubsetWriter:
def __init__(self, context):
self._context = context
def __init__(self, context: DatumaroConverter):
self._context: DatumaroConverter = context

self._data = {
"info": {},
Expand Down Expand Up @@ -76,9 +79,8 @@ def add_item(self, item: DatasetItem):
item, osp.join(self._context._images_dir, item.subset, path)
)

item_desc["image"] = {
"path": path,
}
item_desc["image"] = {"path": path}

if item.media.has_size: # avoid occasional loading
item_desc["image"]["size"] = item.media.size
elif isinstance(item.media, PointCloud):
Expand Down Expand Up @@ -119,8 +121,7 @@ def add_item(self, item: DatasetItem):

if related_images:
item_desc["related_images"] = related_images

if isinstance(item.media, MediaElement):
elif isinstance(item.media, MediaElement):
item_desc["media"] = {"path": item.media.path}

self.items.append(item_desc)
Expand Down
41 changes: 41 additions & 0 deletions tests/assets/datumaro_dataset/diverse/annotations/default.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
{
"info": {},
"categories": {
"label": {
"labels": [
{ "name": "cat0", "parent": "", "attributes": ["x", "y"] },
{ "name": "cat1", "parent": "", "attributes": ["x", "y"] },
{ "name": "cat2", "parent": "", "attributes": ["x", "y"] },
{ "name": "cat3", "parent": "", "attributes": ["x", "y"] },
{ "name": "cat4", "parent": "", "attributes": ["x", "y"] }
],
"attributes": ["a", "b", "score"]
},
"mask": {
"colormap": [
{ "label_id": 0, "r": 0, "g": 0, "b": 0 },
{ "label_id": 1, "r": 128, "g": 0, "b": 0 },
{ "label_id": 2, "r": 0, "g": 128, "b": 0 },
{ "label_id": 3, "r": 128, "g": 128, "b": 0 },
{ "label_id": 4, "r": 0, "g": 0, "b": 128 }
]
},
"points": {
"items": [
{ "label_id": 0, "labels": ["cat1", "cat2"], "joints": [[0, 1]] },
{ "label_id": 1, "labels": ["cat1", "cat2"], "joints": [[0, 1]] },
{ "label_id": 2, "labels": ["cat1", "cat2"], "joints": [[0, 1]] },
{ "label_id": 3, "labels": ["cat1", "cat2"], "joints": [[0, 1]] },
{ "label_id": 4, "labels": ["cat1", "cat2"], "joints": [[0, 1]] }
]
}
},
"items": [
{ "id": "42", "annotations": [] },
{
"id": "43",
"annotations": [],
"image": { "path": "43.qq", "size": [2, 4] }
}
]
}
51 changes: 51 additions & 0 deletions tests/assets/datumaro_dataset/diverse/annotations/test.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
{
"info": {},
"categories": {
"label": {
"labels": [
{ "name": "cat0", "parent": "", "attributes": ["x", "y"] },
{ "name": "cat1", "parent": "", "attributes": ["x", "y"] },
{ "name": "cat2", "parent": "", "attributes": ["x", "y"] },
{ "name": "cat3", "parent": "", "attributes": ["x", "y"] },
{ "name": "cat4", "parent": "", "attributes": ["x", "y"] }
],
"attributes": ["a", "b", "score"]
},
"mask": {
"colormap": [
{ "label_id": 0, "r": 0, "g": 0, "b": 0 },
{ "label_id": 1, "r": 128, "g": 0, "b": 0 },
{ "label_id": 2, "r": 0, "g": 128, "b": 0 },
{ "label_id": 3, "r": 128, "g": 128, "b": 0 },
{ "label_id": 4, "r": 0, "g": 0, "b": 128 }
]
},
"points": {
"items": [
{ "label_id": 0, "labels": ["cat1", "cat2"], "joints": [[0, 1]] },
{ "label_id": 1, "labels": ["cat1", "cat2"], "joints": [[0, 1]] },
{ "label_id": 2, "labels": ["cat1", "cat2"], "joints": [[0, 1]] },
{ "label_id": 3, "labels": ["cat1", "cat2"], "joints": [[0, 1]] },
{ "label_id": 4, "labels": ["cat1", "cat2"], "joints": [[0, 1]] }
]
}
},
"items": [
{
"id": "1",
"annotations": [
{
"id": 6,
"type": "cuboid_3d",
"attributes": { "occluded": true },
"group": 6,
"label_id": 0,
"position": [1.0, 2.0, 3.0],
"rotation": [2.0, 2.0, 4.0],
"scale": [1.0, 3.0, 4.0]
}
]
},
{ "id": "42", "annotations": [], "attr": { "a1": 5, "a2": "42" } }
]
}
127 changes: 127 additions & 0 deletions tests/assets/datumaro_dataset/diverse/annotations/train.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
{
"info": {},
"categories": {
"label": {
"labels": [
{ "name": "cat0", "parent": "", "attributes": ["x", "y"] },
{ "name": "cat1", "parent": "", "attributes": ["x", "y"] },
{ "name": "cat2", "parent": "", "attributes": ["x", "y"] },
{ "name": "cat3", "parent": "", "attributes": ["x", "y"] },
{ "name": "cat4", "parent": "", "attributes": ["x", "y"] }
],
"attributes": ["a", "b", "score"]
},
"mask": {
"colormap": [
{ "label_id": 0, "r": 0, "g": 0, "b": 0 },
{ "label_id": 1, "r": 128, "g": 0, "b": 0 },
{ "label_id": 2, "r": 0, "g": 128, "b": 0 },
{ "label_id": 3, "r": 128, "g": 128, "b": 0 },
{ "label_id": 4, "r": 0, "g": 0, "b": 128 }
]
},
"points": {
"items": [
{ "label_id": 0, "labels": ["cat1", "cat2"], "joints": [[0, 1]] },
{ "label_id": 1, "labels": ["cat1", "cat2"], "joints": [[0, 1]] },
{ "label_id": 2, "labels": ["cat1", "cat2"], "joints": [[0, 1]] },
{ "label_id": 3, "labels": ["cat1", "cat2"], "joints": [[0, 1]] },
{ "label_id": 4, "labels": ["cat1", "cat2"], "joints": [[0, 1]] }
]
}
},
"items": [
{
"id": "100",
"annotations": [
{
"id": 1,
"type": "caption",
"attributes": {},
"group": 0,
"caption": "hello"
},
{
"id": 2,
"type": "caption",
"attributes": {},
"group": 5,
"caption": "world"
},
{
"id": 3,
"type": "label",
"attributes": { "x": 1, "y": "2" },
"group": 0,
"label_id": 2
},
{
"id": 4,
"type": "bbox",
"attributes": { "score": 1.0 },
"group": 0,
"label_id": 4,
"z_order": 1,
"bbox": [1.0, 2.0, 3.0, 4.0]
},
{
"id": 5,
"type": "bbox",
"attributes": { "a": 1.5, "b": "text" },
"group": 5,
"label_id": null,
"z_order": 0,
"bbox": [5.0, 6.0, 7.0, 8.0]
},
{
"id": 5,
"type": "points",
"attributes": { "x": 1, "y": "2" },
"group": 0,
"label_id": 0,
"points": [1.0, 2.0, 2.0, 0.0, 1.0, 1.0],
"z_order": 4,
"visibility": [2, 2, 2]
},
{
"id": 5,
"type": "mask",
"attributes": { "x": 1, "y": "2" },
"group": 0,
"label_id": 3,
"rle": { "counts": "06", "size": [2, 3] },
"z_order": 2
}
],
"image": { "path": "100.jpg", "size": [10, 6] }
},
{
"id": "21",
"annotations": [
{
"id": 0,
"type": "caption",
"attributes": {},
"group": 0,
"caption": "test"
},
{
"id": 0,
"type": "label",
"attributes": {},
"group": 0,
"label_id": 2
},
{
"id": 42,
"type": "bbox",
"attributes": {},
"group": 42,
"label_id": 5,
"z_order": 0,
"bbox": [1.0, 2.0, 3.0, 4.0]
}
]
}
]
}
58 changes: 58 additions & 0 deletions tests/assets/datumaro_dataset/diverse/annotations/val.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
{
"info": {},
"categories": {
"label": {
"labels": [
{ "name": "cat0", "parent": "", "attributes": ["x", "y"] },
{ "name": "cat1", "parent": "", "attributes": ["x", "y"] },
{ "name": "cat2", "parent": "", "attributes": ["x", "y"] },
{ "name": "cat3", "parent": "", "attributes": ["x", "y"] },
{ "name": "cat4", "parent": "", "attributes": ["x", "y"] }
],
"attributes": ["a", "b", "score"]
},
"mask": {
"colormap": [
{ "label_id": 0, "r": 0, "g": 0, "b": 0 },
{ "label_id": 1, "r": 128, "g": 0, "b": 0 },
{ "label_id": 2, "r": 0, "g": 128, "b": 0 },
{ "label_id": 3, "r": 128, "g": 128, "b": 0 },
{ "label_id": 4, "r": 0, "g": 0, "b": 128 }
]
},
"points": {
"items": [
{ "label_id": 0, "labels": ["cat1", "cat2"], "joints": [[0, 1]] },
{ "label_id": 1, "labels": ["cat1", "cat2"], "joints": [[0, 1]] },
{ "label_id": 2, "labels": ["cat1", "cat2"], "joints": [[0, 1]] },
{ "label_id": 3, "labels": ["cat1", "cat2"], "joints": [[0, 1]] },
{ "label_id": 4, "labels": ["cat1", "cat2"], "joints": [[0, 1]] }
]
}
},
"items": [
{
"id": "2",
"annotations": [
{
"id": 11,
"type": "polyline",
"attributes": {},
"group": 0,
"label_id": null,
"points": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
"z_order": 1
},
{
"id": 12,
"type": "polygon",
"attributes": {},
"group": 0,
"label_id": null,
"points": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
"z_order": 4
}
]
}
]
}
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit 8ba237c

Please sign in to comment.