Source code for datumaro.plugins.data_formats.yolo.exporter

# Copyright (C) 2019-2022 Intel Corporation
#
# SPDX-License-Identifier: MIT
import logging as log
import os
import os.path as osp
from collections import OrderedDict, defaultdict

import yaml

from datumaro.components.annotation import AnnotationType, Bbox
from datumaro.components.dataset_base import DEFAULT_SUBSET_NAME, DatasetItem, IDataset
from datumaro.components.dataset_item_storage import ItemStatus
from datumaro.components.errors import DatasetExportError, DatumaroError, MediaTypeError
from datumaro.components.exporter import Exporter
from datumaro.components.media import Image
from datumaro.util import str_to_bool
from datumaro.util.definitions import SUBSET_NAME_WHITELIST

from .format import YoloPath


def _make_yolo_bbox(img_size, box):
    # https://github.com/pjreddie/darknet/blob/master/scripts/voc_label.py
    # <x> <y> <width> <height> - values relative to width and height of image
    # <x> <y> - are center of rectangle
    x = (box[0] + box[2]) / 2 / img_size[0]
    y = (box[1] + box[3]) / 2 / img_size[1]
    w = (box[2] - box[0]) / img_size[0]
    h = (box[3] - box[1]) / img_size[1]
    return x, y, w, h


[docs] class YoloExporter(Exporter): # https://github.com/AlexeyAB/darknet#how-to-train-to-detect-your-custom-objects DEFAULT_IMAGE_EXT = ".jpg"
[docs] @classmethod def build_cmdline_parser(cls, **kwargs): parser = super().build_cmdline_parser(**kwargs) parser.add_argument( "--add-path-prefix", default=True, type=str_to_bool, help="Add the 'data/' prefix for paths in the dataset info (default: %(default)s)", ) return parser
def __init__( self, extractor: IDataset, save_dir: str, *, add_path_prefix: bool = True, **kwargs ) -> None: super().__init__(extractor, save_dir, **kwargs) self._prefix = "data" if add_path_prefix else "" def _apply_impl(self): extractor = self._extractor save_dir = self._save_dir if self._extractor.media_type() and not issubclass(self._extractor.media_type(), Image): raise MediaTypeError("Media type is not an image") os.makedirs(save_dir, exist_ok=True) if self._save_dataset_meta: self._save_meta_file(self._save_dir) label_categories = extractor.categories()[AnnotationType.label] label_ids = {label.name: idx for idx, label in enumerate(label_categories.items)} with open(osp.join(save_dir, "obj.names"), "w", encoding="utf-8") as f: f.writelines("%s\n" % l[0] for l in sorted(label_ids.items(), key=lambda x: x[1])) subset_lists = OrderedDict() subsets = self._extractor.subsets() pbars = self._ctx.progress_reporter.split(len(subsets)) for (subset_name, subset), pbar in zip(subsets.items(), pbars): if not subset_name or subset_name == DEFAULT_SUBSET_NAME: subset_name = YoloPath.DEFAULT_SUBSET_NAME elif subset_name in YoloPath.RESERVED_CONFIG_KEYS: raise DatasetExportError( f"Can't export '{subset_name}' subset in YOLO format, this word is reserved." ) subset_dir = osp.join(save_dir, "obj_%s_data" % subset_name) os.makedirs(subset_dir, exist_ok=True) image_paths = OrderedDict() for item in pbar.iter(subset, desc=f"Exporting '{subset_name}'"): try: image_fpath = self._export_media(item, subset_dir) image_name = osp.relpath(image_fpath, subset_dir) image_paths[item.id] = osp.join( self._prefix, osp.basename(subset_dir), image_name ) self._export_item_annotation(item, subset_dir) except Exception as e: self._ctx.error_policy.report_item_error(e, item_id=(item.id, item.subset)) subset_list_name = f"{subset_name}.txt" subset_list_path = osp.join(save_dir, subset_list_name) if self._patch and subset_name in self._patch.updated_subsets and not image_paths: if osp.isfile(subset_list_path): os.remove(subset_list_path) continue subset_lists[subset_name] = subset_list_name with open(subset_list_path, "w", encoding="utf-8") as f: f.writelines("%s\n" % s.replace("\\", "/") for s in image_paths.values()) with open(osp.join(save_dir, "obj.data"), "w", encoding="utf-8") as f: f.write(f"classes = {len(label_ids)}\n") for subset_name, subset_list_name in subset_lists.items(): f.write( "%s = %s\n" % (subset_name, osp.join(self._prefix, subset_list_name).replace("\\", "/")) ) f.write("names = %s\n" % osp.join(self._prefix, "obj.names")) f.write("backup = backup/\n") def _export_media(self, item: DatasetItem, subset_img_dir: str) -> str: try: if not item.media or not (item.media.has_data or item.media.has_size): raise DatasetExportError( "Failed to export item '%s': " "item has no image info" % item.id ) image_name = self._make_image_filename(item) image_fpath = osp.join(subset_img_dir, image_name) if self._save_media: self._save_image(item, image_fpath) return image_fpath except Exception as e: self._ctx.error_policy.report_item_error(e, item_id=(item.id, item.subset)) def _export_item_annotation(self, item: DatasetItem, subset_dir: str) -> None: try: height, width = item.media.size yolo_annotation = "" for bbox in item.annotations: if not isinstance(bbox, Bbox) or bbox.label is None: continue yolo_bb = _make_yolo_bbox((width, height), bbox.points) yolo_bb = " ".join("%.6f" % p for p in yolo_bb) yolo_annotation += "%s %s\n" % (bbox.label, yolo_bb) annotation_path = osp.join(subset_dir, "%s.txt" % item.id) os.makedirs(osp.dirname(annotation_path), exist_ok=True) with open(annotation_path, "w", encoding="utf-8") as f: f.write(yolo_annotation) except Exception as e: self._ctx.error_policy.report_item_error(e, item_id=(item.id, item.subset))
[docs] @classmethod def patch(cls, dataset, patch, save_dir, **kwargs): conv = cls(dataset, save_dir=save_dir, **kwargs) conv._patch = patch conv.apply() for (item_id, subset), status in patch.updated_items.items(): if status != ItemStatus.removed: item = patch.data.get(item_id, subset) else: item = DatasetItem(item_id, subset=subset) if not (status == ItemStatus.removed or not item.media): continue if subset == DEFAULT_SUBSET_NAME: subset = YoloPath.DEFAULT_SUBSET_NAME subset_dir = osp.join(save_dir, "obj_%s_data" % subset) image_path = osp.join(subset_dir, conv._make_image_filename(item)) if osp.isfile(image_path): os.remove(image_path) ann_path = osp.join(subset_dir, "%s.txt" % item.id) if osp.isfile(ann_path): os.remove(ann_path)
@property def can_stream(self) -> bool: return True
[docs] class YoloUltralyticsExporter(YoloExporter): must_subset_names = {"train", "val"} def __init__(self, extractor: IDataset, save_dir: str, **kwargs) -> None: super().__init__(extractor, save_dir, **kwargs) if self._save_media is False: log.warning( "It is recommended to turn on `save_media=True` when export to `yolo_ultralytics` format. " "If not, you will need to copy your image files and paste them into the appropriate directories." ) def _check_dataset(self): if self._extractor.media_type() and not issubclass(self._extractor.media_type(), Image): raise MediaTypeError("Media type is not an image") subset_names = set(self._extractor.subsets().keys()) for subset in subset_names: if subset not in SUBSET_NAME_WHITELIST: raise DatasetExportError( f"The allowed subset name should be in {SUBSET_NAME_WHITELIST}, " f'so that subset "{subset}" is not allowed.' ) for must_name in self.must_subset_names: if must_name not in subset_names: raise DatasetExportError( f'Subset "{must_name}" is not in {subset_names}, ' "but YoloUltralytics requires both of them." ) def _apply_impl(self): extractor = self._extractor save_dir = self._save_dir os.makedirs(save_dir, exist_ok=True) try: self._check_dataset() except DatumaroError as e: self._ctx.error_policy.fail(e) if self._save_dataset_meta: self._save_meta_file(self._save_dir) yaml_dict = {} subsets = self._extractor.subsets() pbars = self._ctx.progress_reporter.split(len(subsets)) image_fpaths = defaultdict(list) for (subset_name, subset), pbar in zip(subsets.items(), pbars): subset_fpath = osp.join(save_dir, subset_name + ".txt") subset_img_dir = osp.join(save_dir, "images", subset_name) os.makedirs(subset_img_dir, exist_ok=True) subset_label_dir = osp.join(save_dir, "labels", subset_name) os.makedirs(subset_label_dir, exist_ok=True) yaml_dict[subset_name] = subset_fpath for item in pbar.iter(subset, desc=f"Exporting '{subset_name}'"): image_fpath = self._export_media(item, subset_img_dir) self._export_item_annotation(item, subset_label_dir) image_fpaths[subset_name].append(osp.relpath(image_fpath, save_dir)) for subset_name, img_fpath_list in image_fpaths.items(): subset_fname = subset_name + ".txt" with open(osp.join(save_dir, subset_fname), "w") as fp: # Prefix (os.curdir + os.sep) is required by Ultralytics # Please see https://github.com/ultralytics/ultralytics/blob/30fc4b537ff1d9b115bc1558884f6bc2696a282c/ultralytics/yolo/data/utils.py#L40-L43 fp.writelines( [os.curdir + os.sep + img_fpath + "\n" for img_fpath in img_fpath_list] ) yaml_dict[subset_name] = subset_fname label_categories = extractor.categories()[AnnotationType.label] label_ids = {idx: label.name for idx, label in enumerate(label_categories.items)} yaml_dict["names"] = label_ids with open(osp.join(save_dir, "data.yaml"), "w") as fp: yaml.safe_dump(yaml_dict, fp, sort_keys=False, allow_unicode=True)
[docs] @classmethod def patch(cls, dataset, patch, save_dir, **kwargs): conv = cls(dataset, save_dir=save_dir, **kwargs) conv._patch = patch conv.apply() for (item_id, subset), status in patch.updated_items.items(): if status != ItemStatus.removed: item = patch.data.get(item_id, subset) else: item = DatasetItem(item_id, subset=subset) if not (status == ItemStatus.removed or not item.media): continue if subset == DEFAULT_SUBSET_NAME: subset = YoloPath.DEFAULT_SUBSET_NAME subset_dir = osp.join(save_dir, "obj_%s_data" % subset) image_path = osp.join(subset_dir, conv._make_image_filename(item)) if osp.isfile(image_path): os.remove(image_path) ann_path = osp.join(subset_dir, "%s.txt" % item.id) if osp.isfile(ann_path): os.remove(ann_path)