Source code for datumaro.plugins.data_formats.image_dir

# Copyright (C) 2019-2023 Intel Corporation
#
# SPDX-License-Identifier: MIT

import logging as log
import os
import os.path as osp
from typing import List, Optional

from datumaro.components.dataset_base import DatasetItem, SubsetBase
from datumaro.components.exporter import Exporter
from datumaro.components.format_detection import FormatDetectionConfidence
from datumaro.components.importer import ImportContext, Importer
from datumaro.components.media import Image
from datumaro.util.image import IMAGE_EXTENSIONS, find_images


[docs] class ImageDirImporter(Importer): """ Reads images from a directory as a dataset. """ DETECT_CONFIDENCE = FormatDetectionConfidence.EXTREME_LOW
[docs] @classmethod def build_cmdline_parser(cls, **kwargs): parser = super().build_cmdline_parser(**kwargs) parser.add_argument( "--subset", help="The name of the subset for the produced dataset items " "(default: none)", ) return parser
[docs] @classmethod def find_sources(cls, path): if not osp.isdir(path): return [] return [{"url": path, "format": ImageDirBase.NAME}]
[docs] @classmethod def get_file_extensions(cls) -> List[str]: return list(IMAGE_EXTENSIONS)
[docs] class ImageDirBase(SubsetBase): def __init__( self, url: str, *, subset: Optional[str] = None, ctx: Optional[ImportContext] = None, ): super().__init__(subset=subset, ctx=ctx) assert osp.isdir(url), url for path in find_images(url, recursive=True): item_id = osp.relpath(osp.splitext(path)[0], url) self._items.append( DatasetItem(id=item_id, subset=self._subset, media=Image.from_file(path=path)) ) self._ann_types = set()
[docs] class ImageDirExporter(Exporter): DEFAULT_IMAGE_EXT = ".jpg" def _apply_impl(self): os.makedirs(self._save_dir, exist_ok=True) for item in self._extractor: if item.media: self._save_image(item) else: log.debug("Item '%s' has no image info", item.id)