Source code for datumaro.plugins.data_formats.datumaro.importer

# Copyright (C) 2023 Intel Corporation
#
# SPDX-License-Identifier: MIT

import os.path as osp
from typing import Dict, List, Optional, Type

from datumaro.components.format_detection import FormatDetectionConfidence, FormatDetectionContext
from datumaro.components.importer import Importer
from datumaro.components.merge.extractor_merger import ExtractorMerger
from datumaro.rust_api import JsonSectionPageMapper

from .format import DatumaroPath


[docs] class DatumaroImporter(Importer): PATH_CLS = DatumaroPath
[docs] @classmethod def detect( cls, context: FormatDetectionContext, ) -> Optional[FormatDetectionConfidence]: annot_file = context.require_file( osp.join(cls.PATH_CLS.ANNOTATIONS_DIR, "*" + cls.PATH_CLS.ANNOTATION_EXT) ) with context.probe_text_file( annot_file, 'must be a JSON object with "categories" ' 'and "items" keys', ): fpath = osp.join(context.root_path, annot_file) page_mapper = JsonSectionPageMapper(fpath) sections = page_mapper.sections() if not {"categories", "items"} <= sections.keys(): raise Exception
[docs] @classmethod def find_sources(cls, path) -> List[Dict]: return cls._find_sources_recursive( path, cls.PATH_CLS.ANNOTATION_EXT, cls.NAME, dirname=cls.PATH_CLS.ANNOTATIONS_DIR, )
[docs] @classmethod def get_file_extensions(cls) -> List[str]: return [cls.PATH_CLS.ANNOTATION_EXT]
@property def can_stream(self) -> bool: return True
[docs] def get_extractor_merger(self) -> Type[ExtractorMerger]: return ExtractorMerger