Source code for datumaro.plugins.data_formats.mars

# Copyright (C) 2020-2023 Intel Corporation
#
# SPDX-License-Identifier: MIT

import fnmatch
import glob
import logging as log
import os
import os.path as osp
from typing import List, Optional

from datumaro.components.annotation import AnnotationType, Label, LabelCategories
from datumaro.components.dataset import DatasetItem
from datumaro.components.dataset_base import DatasetBase
from datumaro.components.format_detection import FormatDetectionContext
from datumaro.components.importer import ImportContext, Importer
from datumaro.components.media import Image
from datumaro.util.image import find_images



[docs]
class MarsPath:
    SUBSET_DIR_PATTERN = "bbox_*"
    IMAGE_DIR_PATTERNS = ["[0-9]" * 4, "00-1"]
    IMAGE_NAME_POSTFIX = "C[0-9]" + "T" + "[0-9]" * 4 + "F" + "[0-9]" * 3 + ".jpg"




[docs]
class MarsBase(DatasetBase):
    def __init__(self, path: str, *, ctx: Optional[ImportContext] = None):
        assert osp.isdir(path), path
        super().__init__(ctx=ctx)

        self._dataset_dir = path
        self._subsets = {
            subset_dir.split("_", maxsplit=1)[1]: osp.join(path, subset_dir)
            for subset_dir in os.listdir(path)
            if (
                osp.isdir(osp.join(path, subset_dir))
                and fnmatch.fnmatch(subset_dir, MarsPath.SUBSET_DIR_PATTERN)
            )
        }

        self._categories = self._load_categories()
        self._items = []
        for subset, subset_path in self._subsets.items():
            self._items.extend(self._load_items(subset, subset_path))

    def __iter__(self):
        yield from self._items


[docs]
    def categories(self):
        return self._categories


    def _load_categories(self):
        dirs = sorted(
            [
                dir_name
                for subset_path in self._subsets.values()
                for dir_name in os.listdir(subset_path)
                if (
                    osp.isdir(osp.join(self._dataset_dir, subset_path, dir_name))
                    and any(
                        fnmatch.fnmatch(dir_name, image_dir)
                        for image_dir in MarsPath.IMAGE_DIR_PATTERNS
                    )
                )
            ]
        )
        return {AnnotationType.label: LabelCategories.from_iterable(dirs)}

    def _load_items(self, subset, path):
        items = []
        for label_cat in self._categories[AnnotationType.label]:
            label = label_cat.name
            label_id = self._categories[AnnotationType.label].find(label)[0]
            for image_path in find_images(osp.join(path, label)):
                image_name = osp.basename(image_path)
                item_id = osp.splitext(image_name)[0]
                pedestrian_id = image_name[0:4]

                if not fnmatch.fnmatch(image_name, label + MarsPath.IMAGE_NAME_POSTFIX):
                    items.append(DatasetItem(id=item_id, image=image_path))
                    continue

                if pedestrian_id != label:
                    log.warning(
                        f"The image {image_path} will be skip because"
                        "pedestrian id for it does not match with"
                        f"the directory name: {label}"
                    )
                    continue

                items.append(
                    DatasetItem(
                        id=item_id,
                        media=Image.from_file(path=image_path),
                        subset=subset,
                        annotations=[Label(label=label_id)],
                        attributes={
                            "person_id": pedestrian_id,
                            "camera_id": int(image_name[5]),
                            "track_id": int(image_name[7:11]),
                            "frame_id": int(image_name[12:15]),
                        },
                    )
                )
                self._ann_types.add(AnnotationType.label)

        return items




[docs]
class MarsImporter(Importer):

[docs]
    @classmethod
    def detect(cls, context: FormatDetectionContext):
        with context.require_any():
            for image_dir in MarsPath.IMAGE_DIR_PATTERNS:
                with context.alternative():
                    context.require_file(
                        "/".join(
                            [
                                MarsPath.SUBSET_DIR_PATTERN,
                                image_dir,
                                image_dir + MarsPath.IMAGE_NAME_POSTFIX,
                            ]
                        )
                    )



[docs]
    @classmethod
    def find_sources(cls, path):
        patterns = [
            "/".join((path, subset_dir, image_dir, image_dir + MarsPath.IMAGE_NAME_POSTFIX))
            for image_dir in MarsPath.IMAGE_DIR_PATTERNS
            for subset_dir in os.listdir(path)
            if (
                osp.isdir(osp.join(path, subset_dir))
                and fnmatch.fnmatch(subset_dir, MarsPath.SUBSET_DIR_PATTERN)
            )
        ]

        for pattern in patterns:
            try:
                next(glob.iglob(pattern))
                return [{"url": path, "format": "mars"}]
            except StopIteration:
                continue



[docs]
    @classmethod
    def get_file_extensions(cls) -> List[str]:
        return [osp.splitext(MarsPath.IMAGE_NAME_POSTFIX)[1]]