# Copyright (C) 2020-2021 Intel Corporation
#
# SPDX-License-Identifier: MIT
import argparse
import logging as log
import os
import os.path as osp
from datumaro.components.environment import DEFAULT_ENVIRONMENT
from datumaro.components.errors import ProjectNotFoundError
from datumaro.components.project import ProjectBuildTargets
from datumaro.util import str_to_bool
from datumaro.util.scope import scope_add, scoped
from ..util import MultilineFormatter
from ..util.errors import CliException
from ..util.project import load_project, parse_full_revpath
[docs]
def build_parser(parser_ctor=argparse.ArgumentParser):
builtins = sorted(DEFAULT_ENVIRONMENT.transforms)
parser = parser_ctor(
help="Transform project",
description="""
Applies a batch operation to a dataset and produces a new dataset.|n
|n
By default, datasets are updated in-place. The '-o/--output-dir'
option can be used to specify another output directory. When
updating in-place, use the '--overwrite' parameter (in-place
updates fail by default to prevent data loss), unless a project
target is modified.|n
|n
Builtin transforms: {}|n
|n
This command has the following invocation syntax:
- %(prog)s <target dataset revpath>|n
|n
<revpath> - either a dataset path or a revision path. The full
syntax is:|n
- Dataset paths:|n
|s|s- <dataset path>[ :<format> ]|n
- Revision paths:|n
|s|s- <project path> [ @<rev> ] [ :<target> ]|n
|s|s- <rev> [ :<target> ]|n
|s|s- <target>|n
|n
The current project (-p/--project) is also used as a context for
plugins, so it can be useful for dataset paths having custom formats.
When not specified, the current project's working tree is used.|n
|n
The command can be applied to a dataset or a project build target,
a stage or the combined 'project' target, in which case all the
targets will be affected. A build tree stage will be recorded
if '--stage' is enabled, and the resulting dataset(-s) will be
saved if '--apply' is enabled.|n
|n
Examples:|n
- Convert instance polygons to masks:|n |n
|s|s%(prog)s -t polygons_to_masks|n
|n
- Rename dataset items by a regular expression:|n |n
|s|s- Replace 'pattern' with 'replacement':|n |n
|s|s|s|s%(prog)s -t rename -- -e '|pattern|replacement|'|n
|n
|s|s- Remove 'frame_' from item ids:|n |n
|s|s|s|s%(prog)s -t rename -- -e '|frame_(\\d+)|\\1|'|n
|n
- Split a dataset randomly:|n |n
|s|s%(prog)s -t random_split --overwrite path/to/dataset:voc
""".format(
", ".join(builtins)
),
formatter_class=MultilineFormatter,
)
parser.add_argument(
"_positionals", nargs=argparse.REMAINDER, help=argparse.SUPPRESS
) # workaround for -- eaten by positionals
parser.add_argument(
"target", nargs="?", default="project", help="Target dataset revpath (default: project)"
)
parser.add_argument(
"-t", "--transform", required=True, help="Transform to apply to the dataset"
)
parser.add_argument(
"-o",
"--output-dir",
dest="dst_dir",
help="""
Output directory. Can be omitted for main project targets
(i.e. data sources and the 'project' target, but not
intermediate stages) and dataset targets.
If not specified, the results will be saved inplace.
""",
)
parser.add_argument(
"--overwrite", action="store_true", help="Overwrite existing files in the save directory"
)
parser.add_argument(
"-p",
"--project",
dest="project_dir",
help="Directory of the project to operate on (default: current dir)",
)
parser.add_argument(
"--stage",
type=str_to_bool,
default=True,
help="""
Include this action as a project build step.
If true, this operation will be saved in the project
build tree, allowing to reproduce the resulting dataset later.
Applicable only to main project targets (i.e. data sources
and the 'project' target, but not intermediate stages)
(default: %(default)s)
""",
)
parser.add_argument(
"--apply",
type=str_to_bool,
default=True,
help="Run this command immediately. If disabled, only the "
"build tree stage will be written (default: %(default)s)",
)
parser.add_argument(
"extra_args",
nargs=argparse.REMAINDER,
help="Additional arguments for transformation (pass '-- -h' for help). "
"Must be specified after the main command arguments and after "
"the '--' separator",
)
parser.set_defaults(command=transform_command)
return parser
[docs]
def get_sensitive_args():
return {
transform_command: ["dst_dir", "project_dir", "extra_args", "target"],
}