Source code for datumaro.cli.commands.generate

# Copyright (C) 2022 Intel Corporation
#
# SPDX-License-Identifier: MIT

import argparse
import logging as log
import os
import os.path as osp
from shutil import rmtree

from datumaro.cli.util.errors import CliException
from datumaro.util.definitions import get_datumaro_cache_dir

from ..util import MultilineFormatter


[docs] def build_parser(parser_ctor=argparse.ArgumentParser): parser = parser_ctor( help="Generate synthetic dataset", description=""" Creates a synthetic dataset with elements of the specified type and shape, and saves it in the provided directory.|n |n Currently, can only generate fractal images, useful for network compression.|n To create 3-channel images, you should provide the number of images, height and width.|n The images are colorized with a model, which will be downloaded automatically.|n Uses the algorithm from the article: https://arxiv.org/abs/2103.13023 |n |n Examples:|n - Generate 300 3-channel images with H=224, W=256 and store to data_dir:|n |s|s%(prog)s -o data_dir -k 300 --shape 224 256 """, formatter_class=MultilineFormatter, ) parser.add_argument( "-o", "--output-dir", required=True, help="Output directory to store generated dataset" ) parser.add_argument( "-k", "--count", type=int, required=True, help="Number of images to be generated" ) parser.add_argument( "--shape", nargs=2, metavar="DIM", type=int, required=True, help="Dimensions of data to be generated (height, width)", ) parser.add_argument( "-t", "--type", default="image", choices=["image"], help="Specify type of data to generate (default: %(default)s)", ) parser.add_argument( "--model-dir", type=str, default=get_datumaro_cache_dir(), help="Path to load the colorization model from. " "If no model is found, the model will be downloaded (default: %(default)s)", ) parser.add_argument( "--overwrite", action="store_true", help="Overwrite existing files in the save directory" ) parser.set_defaults(command=generate_command) return parser
[docs] def get_sensitive_args(): return {generate_command: ["output_dir", "model_dir"]}
[docs] def generate_command(args): from datumaro.plugins.synthetic_data import FractalImageGenerator log.info("Generating dataset...") output_dir = args.output_dir if osp.isdir(output_dir) and os.listdir(output_dir): if args.overwrite: rmtree(output_dir) os.mkdir(output_dir) else: raise CliException( f"Directory '{output_dir}' already exists (pass --overwrite to overwrite)" ) if args.type == "image": FractalImageGenerator( count=args.count, output_dir=output_dir, shape=args.shape, model_path=args.model_dir ).generate_dataset() else: raise NotImplementedError(f"Data type: {args.type} is not supported") log.info(f"Results have been saved to '{output_dir}'") return 0