Source code for datumaro.plugins.data_formats.vott_json

# Copyright (C) 2022-2023 Intel Corporation
#
# SPDX-License-Identifier: MIT

import errno
import os.path as osp
from typing import List, Optional

from datumaro.components.annotation import AnnotationType, Bbox, LabelCategories
from datumaro.components.dataset_base import DatasetItem, SubsetBase
from datumaro.components.format_detection import FormatDetectionContext
from datumaro.components.importer import ImportContext, Importer
from datumaro.components.media import Image
from datumaro.util import parse_json_file
from datumaro.util.meta_file_util import has_meta_file, parse_meta_file



[docs]
class VottJsonPath:
    ANNO_FILE_SUFFIX = "-export.json"




[docs]
class VottJsonBase(SubsetBase):
    def __init__(
        self,
        path: str,
        *,
        subset: Optional[str] = None,
        ctx: Optional[ImportContext] = None,
    ):
        if not osp.isfile(path):
            raise FileNotFoundError(errno.ENOENT, "Can't find annotations file", path)

        if not subset:
            subset = osp.splitext(osp.basename(path))[0].rsplit("-", maxsplit=1)[0]

        super().__init__(subset=subset, ctx=ctx)

        if has_meta_file(path):
            self._categories = {
                AnnotationType.label: LabelCategories.from_iterable(parse_meta_file(path).keys())
            }
        else:
            self._categories = {AnnotationType.label: LabelCategories()}

        self._items = list(self._load_items(path).values())

    def _load_items(self, path):
        anno_dict = parse_json_file(path)

        label_categories = self._categories[AnnotationType.label]
        tags = anno_dict.get("tags", [])
        for label in tags:
            label_name = label.get("name")
            label_idx = label_categories.find(label_name)[0]
            if label_idx is None:
                label_idx = label_categories.add(label_name)

        items = {}
        for id, asset in anno_dict.get("assets", {}).items():
            item_id = osp.splitext(asset.get("asset", {}).get("name"))[0]
            annotations = []
            for region in asset.get("regions", []):
                tags = region.get("tags", [])
                if not tags:
                    bbox = region.get("boundingBox", {})
                    if bbox:
                        annotations.append(
                            Bbox(
                                float(bbox["left"]),
                                float(bbox["top"]),
                                float(bbox["width"]),
                                float(bbox["height"]),
                                attributes={"id": region.get("id")},
                            )
                        )

                for tag in region.get("tags", []):
                    label_idx = label_categories.find(tag)[0]
                    if label_idx is None:
                        label_idx = label_categories.add(tag)

                    bbox = region.get("boundingBox", {})
                    if bbox:
                        annotations.append(
                            Bbox(
                                float(bbox["left"]),
                                float(bbox["top"]),
                                float(bbox["width"]),
                                float(bbox["height"]),
                                label=label_idx,
                                attributes={"id": region.get("id")},
                            )
                        )
                        self._ann_types.add(AnnotationType.bbox)

            items[item_id] = DatasetItem(
                id=item_id,
                subset=self._subset,
                attributes={"id": id},
                media=Image.from_file(
                    path=osp.join(osp.dirname(path), asset.get("asset", {}).get("path"))
                ),
                annotations=annotations,
            )

        return items




[docs]
class VottJsonImporter(Importer):

[docs]
    @classmethod
    def find_sources(cls, path):
        return cls._find_sources_recursive(path, ".json", "vott_json")



[docs]
    @classmethod
    def detect(cls, context: FormatDetectionContext) -> None:
        context.require_file("*" + VottJsonPath.ANNO_FILE_SUFFIX)



[docs]
    @classmethod
    def get_file_extensions(cls) -> List[str]:
        return [osp.splitext(VottJsonPath.ANNO_FILE_SUFFIX)[1]]