Source code for datumaro.plugins.data_formats.labelme

# Copyright (C) 2020-2023 Intel Corporation
#
# SPDX-License-Identifier: MIT

import errno
import logging as log
import os
import os.path as osp
from collections import defaultdict
from glob import glob, iglob
from typing import List, Optional

import numpy as np
from defusedxml import ElementTree

from datumaro.components.annotation import AnnotationType, Bbox, LabelCategories, Mask, Polygon
from datumaro.components.dataset_base import DatasetBase, DatasetItem
from datumaro.components.errors import MediaTypeError
from datumaro.components.exporter import Exporter
from datumaro.components.format_detection import FormatDetectionContext
from datumaro.components.importer import ImportContext, Importer
from datumaro.components.media import Image
from datumaro.util import cast, escape, unescape
from datumaro.util.image import save_image
from datumaro.util.mask_tools import find_mask_bbox, load_mask
from datumaro.util.meta_file_util import has_meta_file, parse_meta_file


[docs] class LabelMePath: IMAGES_DIR = "Images" ANNOTATIONS_DIR = "Annotations" MASKS_DIR = "Masks" IMAGE_EXT = ".jpg" ATTR_IMPORT_ESCAPES = [ ("\\=", r"%%{eq}%%"), ('\\"', r"%%{doublequote}%%"), ("\\,", r"%%{comma}%%"), ("\\\\", r"%%{backslash}%%"), # keep last ] ATTR_EXPORT_ESCAPES = [ ("\\", "\\\\"), # keep first ("=", "\\="), ('"', '\\"'), (",", "\\,"), ]
[docs] class LabelMeBase(DatasetBase): def __init__(self, path: str, *, ctx: Optional[ImportContext] = None): assert osp.isdir(path), path super().__init__(ctx=ctx) self._items, self._categories, self._subsets = self._parse(path) self._length = len(self._items) def _parse(self, path): items = [] subsets = set() if has_meta_file(path): categories = { AnnotationType.label: LabelCategories( attributes={"occluded", "username"} ).from_iterable(parse_meta_file(path).keys()) } else: categories = { AnnotationType.label: LabelCategories(attributes={"occluded", "username"}) } for xml_path in sorted(glob(osp.join(path, "**", "*.xml"), recursive=True)): root = ElementTree.parse(xml_path) subset = root.find("folder").text or "" item_id = osp.splitext(root.find("filename").text)[0] image_path = osp.join(path, "Images", subset, root.find("filename").text) image_size = None imagesize_elem = root.find("imagesize") if imagesize_elem is not None: width_elem = imagesize_elem.find("ncols").text height_elem = imagesize_elem.find("nrows").text image_size = ( (int(height_elem), int(width_elem)) if height_elem and width_elem else None ) image = Image.from_file(path=image_path, size=image_size) annotations = self._parse_annotations(root, path, subset, categories) items.append( DatasetItem(id=item_id, subset=subset, media=image, annotations=annotations) ) subsets.add(items[-1].subset) return items, categories, subsets @staticmethod def _escape(s): return escape(s, LabelMePath.ATTR_IMPORT_ESCAPES) @staticmethod def _unescape(s): s = unescape(s, LabelMePath.ATTR_IMPORT_ESCAPES) s = unescape(s, LabelMePath.ATTR_EXPORT_ESCAPES) return s @classmethod def _parse_annotations(cls, xml_root, path, subset, categories): def _parse_attributes(attr_str): parsed = [] if not attr_str: return parsed for attr in [a.strip() for a in cls._escape(attr_str).split(",")]: if not attr: continue if "=" in attr: name, value = attr.split("=", maxsplit=1) if value.lower() in {"true", "false"}: value = value.lower() == "true" elif 1 < len(value) and value[0] == '"' and value[-1] == '"': value = value[1:-1] else: for t in [int, float]: casted = cast(value, t) if casted is not None and str(casted) == value: value = casted break if isinstance(value, str): value = cls._unescape(value) parsed.append((cls._unescape(name), value)) else: parsed.append((cls._unescape(attr), True)) return parsed label_cat = categories[AnnotationType.label] def _get_label_id(label): if not label: return None idx, _ = label_cat.find(label) if idx is None: idx = label_cat.add(label) return idx image_annotations = [] parsed_annotations = dict() group_assignments = dict() root_annotations = set() for obj_elem in xml_root.iter("object"): obj_id = int(obj_elem.find("id").text) ann_items = [] label = _get_label_id(obj_elem.find("name").text) attributes = [] attributes_elem = obj_elem.find("attributes") if attributes_elem is not None and attributes_elem.text: attributes = _parse_attributes(attributes_elem.text) occluded = False occluded_elem = obj_elem.find("occluded") if occluded_elem is not None and occluded_elem.text: occluded = occluded_elem.text == "yes" attributes.append(("occluded", occluded)) deleted = False deleted_elem = obj_elem.find("deleted") if deleted_elem is not None and deleted_elem.text: deleted = bool(int(deleted_elem.text)) user = "" poly_elem = obj_elem.find("polygon") segm_elem = obj_elem.find("segm") type_elem = obj_elem.find("type") # the only value is 'bounding_box' if poly_elem is not None: user_elem = poly_elem.find("username") if user_elem is not None and user_elem.text: user = user_elem.text attributes.append(("username", user)) points = [] for point_elem in poly_elem.iter("pt"): x = float(point_elem.find("x").text) y = float(point_elem.find("y").text) points.append(x) points.append(y) if type_elem is not None and type_elem.text == "bounding_box": xmin = min(points[::2]) xmax = max(points[::2]) ymin = min(points[1::2]) ymax = max(points[1::2]) ann_items.append( Bbox( xmin, ymin, xmax - xmin, ymax - ymin, label=label, attributes=attributes, id=obj_id, ) ) else: ann_items.append( Polygon( points, label=label, attributes=attributes, id=obj_id, ) ) elif segm_elem is not None: user_elem = segm_elem.find("username") if user_elem is not None and user_elem.text: user = user_elem.text attributes.append(("username", user)) mask_path = osp.join( path, LabelMePath.MASKS_DIR, subset, segm_elem.find("mask").text ) if not osp.isfile(mask_path): raise FileNotFoundError(errno.ENOENT, "Can't find mask", mask_path) mask = load_mask(mask_path) mask = np.any(mask, axis=2) ann_items.append(Mask(image=mask, label=label, id=obj_id, attributes=attributes)) if not deleted: parsed_annotations[obj_id] = ann_items # Find parents and children parts_elem = obj_elem.find("parts") if parts_elem is not None: children_ids = [] hasparts_elem = parts_elem.find("hasparts") if hasparts_elem is not None and hasparts_elem.text: children_ids = [int(c) for c in hasparts_elem.text.split(",")] parent_ids = [] ispartof_elem = parts_elem.find("ispartof") if ispartof_elem is not None and ispartof_elem.text: parent_ids = [int(c) for c in ispartof_elem.text.split(",")] if children_ids and not parent_ids and hasparts_elem.text: root_annotations.add(obj_id) group_assignments[obj_id] = [None, children_ids] # assign single group to all grouped annotations current_group_id = 0 annotations_to_visit = list(root_annotations) while annotations_to_visit: ann_id = annotations_to_visit.pop() ann_assignment = group_assignments[ann_id] group_id, children_ids = ann_assignment if group_id: continue if ann_id in root_annotations: current_group_id += 1 # start a new group group_id = current_group_id ann_assignment[0] = group_id # continue with children annotations_to_visit.extend(children_ids) assert current_group_id == len(root_annotations) for ann_id, ann_items in parsed_annotations.items(): group_id = 0 if ann_id in group_assignments: ann_assignment = group_assignments[ann_id] group_id = ann_assignment[0] for ann_item in ann_items: if group_id: ann_item.group = group_id image_annotations.append(ann_item) return image_annotations
[docs] def categories(self): return self._categories
def __iter__(self): yield from self._items
[docs] class LabelMeImporter(Importer): _ANNO_EXT = ".xml"
[docs] @classmethod def detect(cls, context: FormatDetectionContext) -> None: annot_paths = context.require_files(f"**/*{cls._ANNO_EXT}") for annot_path in annot_paths: with context.probe_text_file( annot_path, "must be a LabelMe annotation file", ) as f: elem_parents = [] for event, elem in ElementTree.iterparse(f, events=("start", "end")): if event == "start": if elem_parents == [] and elem.tag != "annotation": raise Exception if elem_parents == ["annotation", "object"] and elem.tag in { "polygon", "segm", }: return elem_parents.append(elem.tag) elif event == "end": elem_parents.pop() if elem_parents == ["annotation"] and elem.tag == "object": # If we got here, then we found an object with no # polygon and no mask, so it's probably the wrong # format. raise Exception
# If we got here, then the current file has no objects and is thus # ambiguous - it could be ours or it could be from the VOC format. # We'll proceed to test the next one. # If we got here, then every file was ambiguous. We'll have to # (implicitly) return a match.
[docs] @classmethod def find_sources(cls, path): subsets = [] if not osp.isdir(path): return [] try: next(iglob(osp.join(path, "**", "*.xml"), recursive=True)) subsets.append( { "url": osp.normpath(path), "format": LabelMeBase.NAME, } ) except StopIteration: pass return subsets
[docs] @classmethod def get_file_extensions(cls) -> List[str]: return [cls._ANNO_EXT]
[docs] class LabelMeExporter(Exporter): DEFAULT_IMAGE_EXT = LabelMePath.IMAGE_EXT def _apply_impl(self): if self._extractor.media_type() and not issubclass(self._extractor.media_type(), Image): raise MediaTypeError("Media type is not an image") os.makedirs(self._save_dir, exist_ok=True) if self._save_dataset_meta: self._save_meta_file(self._save_dir) for subset_name, subset in self._extractor.subsets().items(): for item in subset: self._save_item(item, subset_name) def _get_label(self, label_id): if label_id is None: return "" return self._extractor.categories()[AnnotationType.label][label_id].name @staticmethod def _escape(s: str): return escape(s, escapes=LabelMePath.ATTR_EXPORT_ESCAPES) def _save_item(self, item, subset_dir): # Disable B410: import_lxml - the library is used for writing here from lxml import etree as ET # nosec log.debug("Converting item '%s'", item.id) image_filename = self._make_image_filename(item) if self._save_media: if item.media and item.media.has_data: image_dir = osp.join(self._save_dir, LabelMePath.IMAGES_DIR, subset_dir) os.makedirs(image_dir, exist_ok=True) self._save_image(item, osp.join(image_dir, image_filename)) else: log.debug("Item '%s' has no image", item.id) root_elem = ET.Element("annotation") ET.SubElement(root_elem, "filename").text = image_filename ET.SubElement(root_elem, "folder").text = subset_dir source_elem = ET.SubElement(root_elem, "source") ET.SubElement(source_elem, "sourceImage").text = "" ET.SubElement(source_elem, "sourceAnnotation").text = "Datumaro" if item.media: image_elem = ET.SubElement(root_elem, "imagesize") image_size = item.media.size ET.SubElement(image_elem, "nrows").text = str(image_size[0]) ET.SubElement(image_elem, "ncols").text = str(image_size[1]) groups = defaultdict(list) obj_id = 0 for ann in item.annotations: if ann.type not in {AnnotationType.polygon, AnnotationType.bbox, AnnotationType.mask}: continue obj_elem = ET.SubElement(root_elem, "object") ET.SubElement(obj_elem, "name").text = self._get_label(ann.label) ET.SubElement(obj_elem, "deleted").text = "0" ET.SubElement(obj_elem, "verified").text = "0" ET.SubElement(obj_elem, "occluded").text = ( "yes" if ann.attributes.get("occluded") is True else "no" ) ET.SubElement(obj_elem, "date").text = "" ET.SubElement(obj_elem, "id").text = str(obj_id) parts_elem = ET.SubElement(obj_elem, "parts") if ann.group: groups[ann.group].append((obj_id, parts_elem)) else: ET.SubElement(parts_elem, "hasparts").text = "" ET.SubElement(parts_elem, "ispartof").text = "" if ann.type == AnnotationType.bbox: ET.SubElement(obj_elem, "type").text = "bounding_box" poly_elem = ET.SubElement(obj_elem, "polygon") x0, y0, x1, y1 = ann.points points = [(x0, y0), (x1, y0), (x1, y1), (x0, y1)] for x, y in points: point_elem = ET.SubElement(poly_elem, "pt") ET.SubElement(point_elem, "x").text = "%.2f" % x ET.SubElement(point_elem, "y").text = "%.2f" % y ET.SubElement(poly_elem, "username").text = str(ann.attributes.get("username", "")) elif ann.type == AnnotationType.polygon: poly_elem = ET.SubElement(obj_elem, "polygon") for x, y in zip(ann.points[::2], ann.points[1::2]): point_elem = ET.SubElement(poly_elem, "pt") ET.SubElement(point_elem, "x").text = "%.2f" % x ET.SubElement(point_elem, "y").text = "%.2f" % y ET.SubElement(poly_elem, "username").text = str(ann.attributes.get("username", "")) elif ann.type == AnnotationType.mask: mask_filename = "%s_mask_%s.png" % (item.id, obj_id) save_image( osp.join(self._save_dir, LabelMePath.MASKS_DIR, subset_dir, mask_filename), self._paint_mask(ann.image), create_dir=True, ) segm_elem = ET.SubElement(obj_elem, "segm") ET.SubElement(segm_elem, "mask").text = mask_filename bbox = find_mask_bbox(ann.image) box_elem = ET.SubElement(segm_elem, "box") ET.SubElement(box_elem, "xmin").text = "%.2f" % bbox[0] ET.SubElement(box_elem, "ymin").text = "%.2f" % bbox[1] ET.SubElement(box_elem, "xmax").text = "%.2f" % (bbox[0] + bbox[2]) ET.SubElement(box_elem, "ymax").text = "%.2f" % (bbox[1] + bbox[3]) ET.SubElement(segm_elem, "username").text = str(ann.attributes.get("username", "")) else: raise NotImplementedError("Unknown shape type '%s'" % ann.type) attrs = [] for k, v in ann.attributes.items(): if k in {"username", "occluded"}: continue if isinstance(v, str): if ( cast(v, float) is not None and str(float(v)) == v or cast(v, int) is not None and str(int(v)) == v ): v = f'"{v}"' # add escaping for string values else: v = self._escape(v) attrs.append("%s=%s" % (self._escape(k), v)) ET.SubElement(obj_elem, "attributes").text = ", ".join(attrs) obj_id += 1 for _, group in groups.items(): leader_id, leader_parts_elem = group[0] leader_parts = [str(o_id) for o_id, _ in group[1:]] ET.SubElement(leader_parts_elem, "hasparts").text = ",".join(leader_parts) ET.SubElement(leader_parts_elem, "ispartof").text = "" for obj_id, parts_elem in group[1:]: ET.SubElement(parts_elem, "hasparts").text = "" ET.SubElement(parts_elem, "ispartof").text = str(leader_id) ann_path = osp.join(self._save_dir, LabelMePath.ANNOTATIONS_DIR, subset_dir) os.makedirs(osp.join(ann_path, osp.dirname(image_filename)), exist_ok=True) xml_path = osp.join(ann_path, osp.splitext(image_filename)[0] + ".xml") if osp.exists(xml_path): xml_path = osp.join(ann_path, image_filename + ".xml") with open(xml_path, "w", encoding="utf-8") as f: xml_data = ET.tostring(root_elem, encoding="unicode", pretty_print=True) f.write(xml_data) @staticmethod def _paint_mask(mask): # TODO: check if mask colors are random return np.array([[0, 0, 0, 0], [255, 203, 0, 153]], dtype=np.uint8)[mask.astype(np.uint8)]