Source code for datumaro.plugins.data_formats.synthia.base

# Copyright (C) 2021-2023 Intel Corporation
#
# SPDX-License-Identifier: MIT

import errno
import os.path as osp
from collections import OrderedDict
from glob import glob
from typing import Optional

import numpy as np

from datumaro.components.annotation import (
    AnnotationType,
    ExtractedMask,
    LabelCategories,
    MaskCategories,
)
from datumaro.components.dataset_base import DatasetItem, SubsetBase
from datumaro.components.errors import InvalidAnnotationError
from datumaro.components.importer import ImportContext
from datumaro.components.media import Image
from datumaro.util.image import find_images
from datumaro.util.mask_tools import generate_colormap, load_mask
from datumaro.util.meta_file_util import has_meta_file, parse_meta_file

from .format import (
    SynthiaAlLabelMap,
    SynthiaAlPath,
    SynthiaRandLabelMap,
    SynthiaRandPath,
    SynthiaSfLabelMap,
    SynthiaSfPath,
)



[docs]
def make_categories(label_map):
    categories = {}
    label_categories = LabelCategories()
    for label in label_map:
        label_categories.add(label)
    categories[AnnotationType.label] = label_categories

    has_colors = any(v is not None for v in label_map.values())
    if not has_colors:  # generate new colors
        colormap = generate_colormap(len(label_map))
    else:  # only copy defined colors
        colormap = {
            label_id: (desc[0], desc[1], desc[2])
            for label_id, desc in enumerate(label_map.values())
        }
    mask_categories = MaskCategories(colormap)
    mask_categories.inverse_colormap  # pylint: disable=pointless-statement
    categories[AnnotationType.mask] = mask_categories
    return categories




[docs]
def parse_label_map(path):
    label_map = OrderedDict()
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            # skip empty and commented lines
            line = line.strip()
            if not line or line[0] == "#":
                continue

            # color, name
            label_desc = line.split()

            if 2 < len(label_desc):
                name = label_desc[3]
                color = tuple([int(c) for c in label_desc[:3]])
            else:
                name = label_desc[0]
                color = None

            if name in label_map:
                raise InvalidAnnotationError("Label '%s' is already defined" % name)

            label_map[name] = color
    return label_map



class _SynthiaBase(SubsetBase):
    def __init__(
        self,
        path: str,
        path_formats,
        label_map,
        *,
        subset: Optional[str] = None,
        ctx: Optional[ImportContext] = None,
    ):
        if not osp.isdir(path):
            raise NotADirectoryError(errno.ENOTDIR, "Can't find dataset directory", path)

        super().__init__(subset=subset, ctx=ctx)

        self._path_formats = path_formats
        self._label_map = label_map
        self._ann_types = set()

        self._img_dir = None
        self._inst_dir = None
        self._seg_dir = None
        for path_format in vars(path_formats).keys():
            if path_format == "IMAGES_DIR":
                self._img_dir = osp.join(path, path_formats.IMAGES_DIR)
            elif path_format == "LABELS_SEGM_DIR":
                self._inst_dir = osp.join(path, path_formats.LABELS_SEGM_DIR)
            elif path_format == "SEMANTIC_SEGM_DIR":
                self._seg_dir = osp.join(path, path_formats.SEMANTIC_SEGM_DIR)

        self._categories = self._load_categories(path)
        self._items = list(self._load_items().values())

    def _load_categories(self, path):
        if has_meta_file(path):
            return make_categories(parse_meta_file(path))

        label_map_path = osp.join(path, "label_colors.txt")
        if osp.isfile(label_map_path):
            label_map = parse_label_map(label_map_path)
        else:
            label_map = self._label_map

        return make_categories(label_map)

    def _load_items(self):
        if self._img_dir and osp.isdir(self._img_dir):
            images = {
                osp.splitext(osp.relpath(p, self._img_dir))[0].replace("\\", "/"): p
                for p in find_images(self._img_dir, recursive=True)
            }
        else:
            images = {}

        items = {}
        if self._inst_dir and osp.isdir(self._inst_dir):
            gt_labels = glob(self._inst_dir + "/*.txt")
            for gt_label in gt_labels:
                item_id = osp.splitext(osp.relpath(gt_label, self._inst_dir))[0].replace("\\", "/")

                anno = []
                labels_mask = np.loadtxt(gt_label)
                classes = np.unique(labels_mask)
                for label_id in classes:
                    anno.append(
                        ExtractedMask(index_mask=labels_mask, index=label_id, label=label_id)
                    )
                    self._ann_types.add(AnnotationType.mask)

                image = images.get(item_id)
                if image:
                    image = Image.from_file(path=image)

                items[item_id] = DatasetItem(id=item_id, media=image, annotations=anno)
        elif self._seg_dir and osp.isdir(self._seg_dir):
            for seg_img_path in find_images(self._seg_dir, recursive=True):
                item_id = osp.splitext(osp.relpath(seg_img_path, self._seg_dir))[0].replace(
                    "\\", "/"
                )

                inverse_cls_colormap = self._categories[AnnotationType.mask].inverse_colormap

                color_mask = load_mask(seg_img_path, inverse_cls_colormap, default_id=0)

                anno = []
                classes = np.unique(color_mask)
                for label_id in classes:
                    anno.append(
                        ExtractedMask(index_mask=color_mask, index=label_id, label=label_id)
                    )
                    self._ann_types.add(AnnotationType.mask)

                image = images.get(item_id)
                if image:
                    image = Image.from_file(path=image)

                items[item_id] = DatasetItem(id=item_id, media=image, annotations=anno)

        return items



[docs]
class SynthiaRandBase(_SynthiaBase):
    def __init__(self, path: str, **kwargs):
        super().__init__(
            path=path, path_formats=SynthiaRandPath, label_map=SynthiaRandLabelMap, **kwargs
        )




[docs]
class SynthiaSfBase(_SynthiaBase):
    def __init__(self, path, **kwargs):
        super().__init__(
            path=path, path_formats=SynthiaSfPath, label_map=SynthiaSfLabelMap, **kwargs
        )




[docs]
class SynthiaAlBase(_SynthiaBase):
    def __init__(self, path, **kwargs):
        super().__init__(
            path=path, path_formats=SynthiaAlPath, label_map=SynthiaAlLabelMap, **kwargs
        )