Source code for datumaro.plugins.data_formats.datumaro.importer
# Copyright (C) 2023 Intel Corporation
#
# SPDX-License-Identifier: MIT
import os.path as osp
from typing import Dict, List, Optional, Type
from datumaro.components.format_detection import FormatDetectionConfidence, FormatDetectionContext
from datumaro.components.importer import Importer
from datumaro.components.merge.extractor_merger import ExtractorMerger
from datumaro.rust_api import JsonSectionPageMapper
from .format import DatumaroPath
[docs]
class DatumaroImporter(Importer):
PATH_CLS = DatumaroPath
[docs]
@classmethod
def detect(
cls,
context: FormatDetectionContext,
) -> Optional[FormatDetectionConfidence]:
annot_file = context.require_file(
osp.join(cls.PATH_CLS.ANNOTATIONS_DIR, "*" + cls.PATH_CLS.ANNOTATION_EXT)
)
with context.probe_text_file(
annot_file,
'must be a JSON object with "categories" ' 'and "items" keys',
):
fpath = osp.join(context.root_path, annot_file)
page_mapper = JsonSectionPageMapper(fpath)
sections = page_mapper.sections()
if not {"categories", "items"} <= sections.keys():
raise Exception
[docs]
@classmethod
def find_sources(cls, path) -> List[Dict]:
return cls._find_sources_recursive(
path,
cls.PATH_CLS.ANNOTATION_EXT,
cls.NAME,
dirname=cls.PATH_CLS.ANNOTATIONS_DIR,
)
[docs]
@classmethod
def get_file_extensions(cls) -> List[str]:
return [cls.PATH_CLS.ANNOTATION_EXT]
@property
def can_stream(self) -> bool:
return True