Source code for datumaro.cli.commands.require_project.modification.add

# Copyright (C) 2021 Intel Corporation
#
# SPDX-License-Identifier: MIT


import argparse
import logging as log

from datumaro.components.environment import DEFAULT_ENVIRONMENT
from datumaro.components.errors import ProjectNotFoundError
from datumaro.util.scope import on_error_do, scope_add, scoped

from ....util import MultilineFormatter, join_cli_args, show_video_import_warning
from ....util.errors import CliException
from ....util.project import load_project

__all__ = [
    "build_parser",
    "get_sensitive_args",
]


[docs] def build_parser(parser_ctor=argparse.ArgumentParser): env = DEFAULT_ENVIRONMENT builtins = sorted(set(env.extractors) | set(env.importers)) parser = parser_ctor( help="Add data source to project", description=""" Adds a data source to a project. A data source is a dataset in a supported format (check 'formats' section below).|n |n The command adds a project-local directory as a data source in the project. Unlike the "import" command, it does not copy datasets and only works with local directories. The source name is defined by the directory name.|n |n Formats:|n Datasets come in a wide variety of formats. Each dataset format defines its own data structure and rules on how to interpret the data. Check the user manual for the list of supported formats, examples and documentation. |n The list of supported formats can be extended by plugins. Check the "plugins" section of the developer guide for information about plugin implementation.|n |n Each dataset format has its own import options, which are passed after the '--' separator (see examples), pass '-- -h' for more info.|n |n Builtin formats: {}|n |n Examples:|n - Add a local directory with a VOC-like dataset:|n |s|s%(prog)s -f voc my_dataset/|n |n - Add a directory with a COCO dataset, use only a specific file:|n |s|s%(prog)s -f coco_instances -r anns/train.json my_source/ """.format( ", ".join(builtins) ), formatter_class=MultilineFormatter, ) parser.add_argument( "_positionals", nargs=argparse.REMAINDER, help=argparse.SUPPRESS ) # workaround for -- eaten by positionals parser.add_argument("path", help="A path to a dataset directory") parser.add_argument("-f", "--format", required=True, help="Source dataset format") parser.add_argument( "-r", "--path", dest="rpath", help="A path relative to source root to the source data. Useful to " "specify a path to subset, subtask, or a specific file in dataset.", ) parser.add_argument("--no-check", action="store_true", help="Don't try to read the source") parser.add_argument( "-p", "--project", dest="project_dir", default=".", help="Directory of the project to operate on (default: current dir)", ) parser.add_argument( "extra_args", nargs=argparse.REMAINDER, help="Additional arguments for extractor (pass '-- -h' for help). " "Must be specified after the main command arguments and after " "the '--' separator", ) parser.set_defaults(command=add_command) return parser
[docs] def get_sensitive_args(): return { add_command: ["url", "project_dir", "rpath", "extra_args"], }
[docs] @scoped def add_command(args): # Workaround. Required positionals consume positionals from the end args._positionals += join_cli_args(args, "path", "extra_args") has_sep = "--" in args._positionals if has_sep: pos = args._positionals.index("--") else: pos = 1 args.path = (args._positionals[:pos] or [""])[0] args.extra_args = args._positionals[pos + has_sep :] show_plugin_help = "-h" in args.extra_args or "--help" in args.extra_args project = None try: project = scope_add(load_project(args.project_dir)) except ProjectNotFoundError: if not show_plugin_help: raise if project is not None: env = project.env else: env = DEFAULT_ENVIRONMENT fmt = args.format if fmt in env.importers: arg_parser = env.importers[fmt] elif fmt in env.extractors: arg_parser = env.extractors[fmt] else: raise CliException( "Unknown format '%s'. A format can be added" " by providing an Extractor and Importer plugins" % fmt ) extra_args = arg_parser.parse_cmdline(args.extra_args) if fmt == "video_frames": show_video_import_warning() name, _ = project.add_source( args.path, format=args.format, options=extra_args, rpath=args.rpath ) on_error_do( project.remove_source, name, ignore_errors=True, kwargs={"force": True, "keep_data": True} ) if not args.no_check: log.info("Checking the source...") project.working_tree.make_dataset(name) project.working_tree.save() log.info("Source '%s' with format '%s' has been added to the project", name, args.format) return 0