Source code for datumaro.plugins.data_formats.celeba.align_celeba

# Copyright (C) 2023 Intel Corporation
#
# SPDX-License-Identifier: MIT

import errno
import os
import os.path as osp
from typing import Optional

from datumaro.components.annotation import (
    AnnotationType,
    Label,
    LabelCategories,
    Points,
    PointsCategories,
)
from datumaro.components.dataset_base import DatasetItem, SubsetBase
from datumaro.components.errors import DatasetImportError, InvalidAnnotationError
from datumaro.components.importer import ImportContext
from datumaro.components.media import Image
from datumaro.util.image import find_images
from datumaro.util.meta_file_util import has_meta_file, parse_meta_file

from .celeba import CelebaImporter, CelebaPath



[docs]
class AlignCelebaPath(CelebaPath):
    IMAGES_DIR = osp.join("Img", "img_align_celeba")
    LANDMARKS_FILE = osp.join("Anno", "list_landmarks_align_celeba.txt")
    LANDMARKS_HEADER = (
        "lefteye_x lefteye_y righteye_x righteye_y "
        "nose_x nose_y leftmouth_x leftmouth_y rightmouth_x rightmouth_y"
    )




[docs]
class AlignCelebaBase(SubsetBase):
    def __init__(
        self,
        path: str,
        *,
        subset: Optional[str] = None,
        ctx: Optional[ImportContext] = None,
    ):
        if not osp.isdir(path):
            raise NotADirectoryError(errno.ENOTDIR, "Can't find dataset directory", path)

        super().__init__(subset=subset, ctx=ctx)
        self._anno_dir = osp.dirname(path)

        self._categories = {AnnotationType.label: LabelCategories()}
        if has_meta_file(path):
            self._categories = {
                AnnotationType.label: LabelCategories.from_iterable(parse_meta_file(path).keys())
            }

        self._items = list(self._load_items(path).values())

    def _load_items(self, root_dir):
        items = {}

        image_dir = osp.join(root_dir, AlignCelebaPath.IMAGES_DIR)

        if osp.isdir(image_dir):
            images = {
                osp.splitext(osp.relpath(p, image_dir))[0].replace("\\", "/"): p
                for p in find_images(image_dir, recursive=True)
            }
        else:
            images = {}

        label_categories = self._categories[AnnotationType.label]

        labels_path = osp.join(root_dir, AlignCelebaPath.LABELS_FILE)
        if not osp.isfile(labels_path):
            raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), labels_path)

        with open(labels_path, encoding="utf-8") as f:
            for line in f:
                item_id, item_ann = self.split_annotation(line)
                label_ids = [int(id) for id in item_ann]
                anno = []
                for label in label_ids:
                    while len(label_categories) <= label:
                        label_categories.add("class-%d" % len(label_categories))
                    anno.append(Label(label))
                    self._ann_types.add(AnnotationType.label)

                image = images.get(item_id)
                if image:
                    image = Image.from_file(path=image)

                items[item_id] = DatasetItem(id=item_id, media=image, annotations=anno)

        landmark_path = osp.join(root_dir, AlignCelebaPath.LANDMARKS_FILE)
        if osp.isfile(landmark_path):
            with open(landmark_path, encoding="utf-8") as f:
                landmarks_number = int(f.readline().strip())

                point_cat = PointsCategories()
                for i, point_name in enumerate(f.readline().strip().split()):
                    point_cat.add(i, [point_name])
                self._categories[AnnotationType.points] = point_cat

                counter = 0
                for counter, line in enumerate(f):
                    item_id, item_ann = self.split_annotation(line)
                    landmarks = [float(id) for id in item_ann]

                    if len(landmarks) != len(point_cat):
                        raise InvalidAnnotationError(
                            "File '%s', line %s: "
                            "points do not match the header of this file" % (landmark_path, line)
                        )

                    if item_id not in items:
                        raise InvalidAnnotationError(
                            "File '%s', line %s: "
                            "for this item are not label in %s "
                            % (landmark_path, line, AlignCelebaPath.LABELS_FILE)
                        )

                    anno = items[item_id].annotations
                    label = anno[0].label
                    anno.append(Points(landmarks, label=label))
                    self._ann_types.add(AnnotationType.points)

                if landmarks_number - 1 != counter:
                    raise InvalidAnnotationError(
                        "File '%s': the number of "
                        "landmarks does not match the specified number "
                        "at the beginning of the file " % landmark_path
                    )

        attr_path = osp.join(root_dir, AlignCelebaPath.ATTRS_FILE)
        if osp.isfile(attr_path):
            with open(attr_path, encoding="utf-8") as f:
                attr_number = int(f.readline().strip())
                attr_names = f.readline().split()

                counter = 0
                for counter, line in enumerate(f):
                    item_id, item_ann = self.split_annotation(line)
                    if len(attr_names) != len(item_ann):
                        raise DatasetImportError(
                            "File '%s', line %s: "
                            "the number of attributes "
                            "in the line does not match the number at the "
                            "beginning of the file " % (attr_path, line)
                        )

                    attrs = {name: 0 < int(ann) for name, ann in zip(attr_names, item_ann)}

                    if item_id not in items:
                        image = images.get(item_id)
                        if image:
                            image = Image.from_file(path=image)

                        items[item_id] = DatasetItem(id=item_id, media=image)

                    items[item_id].attributes = attrs

                if attr_number - 1 != counter:
                    raise DatasetImportError(
                        "File %s: the number of items "
                        "with attributes does not match the specified number "
                        "at the beginning of the file " % attr_path
                    )

        subset_path = osp.join(root_dir, AlignCelebaPath.SUBSETS_FILE)
        if osp.isfile(subset_path):
            with open(subset_path, encoding="utf-8") as f:
                for line in f:
                    item_id, item_ann = self.split_annotation(line)
                    subset_id = item_ann[0]
                    subset = AlignCelebaPath.SUBSETS[subset_id]

                    if item_id not in items:
                        image = images.get(item_id)
                        if image:
                            image = Image.from_file(path=image)
                        items[item_id] = DatasetItem(id=item_id, media=image)

                    items[item_id].subset = subset

                    if "default" in self._subsets:
                        self._subsets.pop()
                    self._subsets.append(subset)

        return items


[docs]
    def split_annotation(self, line):
        item = line.split('"')
        if 1 < len(item):
            if len(item) == 3:
                item_id = osp.splitext(item[1])[0]
                item = item[2].split()
            else:
                raise InvalidAnnotationError(
                    "Line %s: unexpected number " "of quotes in filename" % line
                )
        else:
            item = line.split()
            item_id = osp.splitext(item[0])[0]
        return item_id, item[1:]





[docs]
class AlignCelebaImporter(CelebaImporter):
    PATH_CLS = AlignCelebaPath