Source code for datumaro.cli.commands.convert

# Copyright (C) 2019-2021 Intel Corporation
#
# SPDX-License-Identifier: MIT

import argparse
import logging as log
import os
import os.path as osp

from datumaro.components.dataset import Dataset
from datumaro.components.environment import DEFAULT_ENVIRONMENT
from datumaro.util.os_util import make_file_name

from ..util import MultilineFormatter
from ..util.errors import CliException
from ..util.project import FilterModes, generate_next_file_name


[docs] def build_parser(parser_ctor=argparse.ArgumentParser): builtin_readers = sorted( set(DEFAULT_ENVIRONMENT.importers) | set(DEFAULT_ENVIRONMENT.extractors) ) builtin_writers = sorted(DEFAULT_ENVIRONMENT.exporters) parser = parser_ctor( help="Convert an existing dataset to another format", description=""" Converts a dataset from one format to another. You can add your own formats and do many more by creating a Datumaro project.|n |n This command serves as an alias for the "create", "import", and "export" commands, allowing to obtain the same results simpler and faster. Check descriptions of these commands for more info.|n |n Supported input formats: {}|n |n Supported output formats: {}|n |n Examples:|n - Export a dataset as a PASCAL VOC dataset, include images:|n |s|s%(prog)s -i src/path -f voc -- --save-media|n |n - Export a dataset as a COCO dataset to a specific directory:|n |s|s%(prog)s -i src/path -f coco -o path/I/like/ """.format( ", ".join(builtin_readers), ", ".join(builtin_writers) ), formatter_class=MultilineFormatter, ) parser.add_argument( "-i", "--input-path", default=".", dest="source", help="Input dataset path (default: current dir)", ) parser.add_argument( "-if", "--input-format", help="Input dataset format. Will try to detect, if not specified." ) parser.add_argument("-f", "--output-format", required=True, help="Output format") parser.add_argument( "-o", "--output-dir", dest="dst_dir", help="Directory to save output (default: a subdir in the current one)", ) parser.add_argument( "--overwrite", action="store_true", help="Overwrite existing files in the save directory" ) parser.add_argument( "-e", "--filter", help='XML XPath filter expression for dataset items. Read "filter" ' "command docs for more info", ) parser.add_argument( "--filter-mode", default=FilterModes.i.name, type=FilterModes.parse, help="Filter mode, one of %s (default: %s)" % (", ".join(FilterModes.list_options()), "%(default)s"), ) parser.add_argument( "--encryption-key", help="Secret key. It is required only if the input dataset is encrypted.", ) parser.add_argument( "extra_args", nargs=argparse.REMAINDER, help="Additional arguments for output format (pass '-- -h' for help). " "Must be specified after the main command arguments", ) parser.set_defaults(command=convert_command) return parser
[docs] def get_sensitive_args(): return { convert_command: ["source", "dst_dir", "extra_args"], }
[docs] def convert_command(args): env = DEFAULT_ENVIRONMENT try: exporter = env.exporters[args.output_format] except KeyError: raise CliException("Exporter for format '%s' is not found" % args.output_format) extra_args = exporter.parse_cmdline(args.extra_args) filter_args = FilterModes.make_filter_args(args.filter_mode) fmt = args.input_format if not args.input_format: matches = env.detect_dataset(args.source) if len(matches) == 0: log.error( "Failed to detect dataset format. " "Try to specify format with '-if/--input-format' parameter." ) return 1 elif len(matches) != 1: log.error( "Multiple formats match the dataset: %s. " "Try to specify format with '-if/--input-format' parameter.", ", ".join(matches), ) return 2 fmt = matches[0] log.info(f"Source dataset format detected as {fmt}") if fmt == args.output_format: log.error(f"The source data format and the output data format is same as {fmt}.") return 3 source = osp.abspath(args.source) dst_dir = args.dst_dir if dst_dir: if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir): raise CliException( "Directory '%s' already exists " "(pass --overwrite to overwrite)" % dst_dir ) else: dst_dir = generate_next_file_name( "%s-%s" % (osp.basename(source), make_file_name(args.output_format)) ) dst_dir = osp.abspath(dst_dir) import_kwargs = {} if args.encryption_key: import_kwargs["encryption_key"] = args.encryption_key dataset = Dataset.import_from(source, fmt, **import_kwargs) log.info("Exporting the dataset") if args.filter: dataset = dataset.filter(args.filter, **filter_args) dataset.export(format=args.output_format, save_dir=dst_dir, **extra_args) log.info("Dataset exported to '%s' as '%s'" % (dst_dir, args.output_format)) return 0