Source code for datumaro.plugins.data_formats.image_dir

# Copyright (C) 2019-2023 Intel Corporation
#
# SPDX-License-Identifier: MIT

import logging as log
import os
from pathlib import Path
from typing import List, Optional

from datumaro.components.dataset_base import DatasetItem, SubsetBase
from datumaro.components.exporter import Exporter
from datumaro.components.format_detection import FormatDetectionConfidence, FormatDetectionContext
from datumaro.components.importer import ImportContext, Importer
from datumaro.components.media import Image
from datumaro.util.image import IMAGE_EXTENSIONS, find_images


[docs] class ImageDirImporter(Importer): """ Reads images from a directory as a dataset. """ DETECT_CONFIDENCE = FormatDetectionConfidence.EXTREME_LOW
[docs] @classmethod def build_cmdline_parser(cls, **kwargs): parser = super().build_cmdline_parser(**kwargs) parser.add_argument( "--subset", help="The name of the subset for the produced dataset items " "(default: none)", ) return parser
[docs] @classmethod def detect(cls, context: FormatDetectionContext) -> FormatDetectionConfidence: path = Path(context.root_path) for item in path.iterdir(): if item.is_dir(): context.fail("Only flat image directories are supported") elif item.suffix.lower() not in IMAGE_EXTENSIONS: context.fail(f"File {item} is not an image.") return super().detect(context)
[docs] @classmethod def find_sources(cls, path): path = Path(path) if not path.is_dir(): return [] return [{"url": str(path), "format": ImageDirBase.NAME}]
[docs] @classmethod def get_file_extensions(cls) -> List[str]: return list(IMAGE_EXTENSIONS)
[docs] class ImageDirBase(SubsetBase): def __init__( self, url: str, *, subset: Optional[str] = None, ctx: Optional[ImportContext] = None, ): super().__init__(subset=subset, ctx=ctx) url = Path(url) assert url.is_dir(), url for path in find_images(str(url)): item_id = Path(path).stem self._items.append( DatasetItem(id=item_id, subset=self._subset, media=Image.from_file(path=path)) ) self._ann_types = set() @property def is_stream(self) -> bool: return True
[docs] class ImageDirExporter(Exporter): DEFAULT_IMAGE_EXT = ".jpg" def _apply_impl(self): os.makedirs(self._save_dir, exist_ok=True) for item in self._extractor: if item.media: self._save_image(item) else: log.debug("Item '%s' has no image info", item.id)