Source code for datumaro.cli.commands.download

# Copyright (C) 2021-2022 Intel Corporation
#
# SPDX-License-Identifier: MIT

import argparse
from typing import Dict

from ..util import MultilineFormatter
from .downloaders import IDatasetDownloader, KaggleDatasetDownloader, TfdsDatasetDownloader


[docs] def build_parser(parser_ctor=argparse.ArgumentParser): parser = parser_ctor( help="Download a publicly available dataset", description=f""" Downloads a publicly available dataset and saves it in a given format.|n |n To download the dataset, run "datum download <dataset_type> get". On the other hand, for information about the datasets, run "datum download <dataset_type> describe". Supported dataset types are: {list(DOWNLOADERS.keys())} """, formatter_class=MultilineFormatter, ) subparsers = parser.add_subparsers(title="Dataset types") for name, downloader in DOWNLOADERS.items(): dataset_type_parser = subparsers.add_parser( name=name, help=f"Download {name} dataset", formatter_class=MultilineFormatter, ) _subparsers = dataset_type_parser.add_subparsers(title="Commands") build_get_subparser(_subparsers, name, downloader) build_describe_subparser(_subparsers, name, downloader)
[docs] def build_get_subparser( subparsers: argparse._SubParsersAction, name: str, downloader: IDatasetDownloader ): parser = subparsers.add_parser( name="get", help="Download a publicly available dataset", description=downloader.get_command_description(), formatter_class=MultilineFormatter, ) parser.add_argument("-i", "--dataset-id", required=True, help="Which dataset to download") parser.add_argument( "-f", "--output-format", help="Output format (default: original format of the dataset)" ) parser.add_argument( "-o", "--output-dir", dest="dst_dir", help="Directory to save output (default: a subdir in the current one)", ) parser.add_argument( "--overwrite", action="store_true", help="Overwrite existing files in the save directory" ) parser.add_argument("-s", "--subset", help="Save only the specified subset") parser.add_argument( "extra_args", nargs=argparse.REMAINDER, help="Additional arguments for output format (pass '-- -h' for help). " "Must be specified after the main command arguments", ) parser.set_defaults(command=download_command, downloader=downloader) return parser
[docs] def build_describe_subparser( subparsers: argparse._SubParsersAction, name: str, downloader: IDatasetDownloader ): parser = subparsers.add_parser( name="describe", help="Print information about downloadable datasets", description=f""" Reports information about datasets that can be downloaded with the "datum download {name}" command. The information is reported either as human-readable text (the default) or as a JSON object.""" + downloader.describe_command_description(), formatter_class=MultilineFormatter, ) parser.add_argument( "--report-format", choices=("text", "json"), default="text", help="Format in which to report the information (default: %(default)s)", ) parser.add_argument( "--report-file", help="File to which to write the report (default: standard output)" ) parser.set_defaults(command=describe_downloads_command, downloader=downloader) return parser
[docs] def get_sensitive_args(): return { download_command: ["dst_dir", "extra_args"], describe_downloads_command: ["report-file"], }
DOWNLOADERS: Dict[str, IDatasetDownloader] = { "tfds": TfdsDatasetDownloader, "kaggle": KaggleDatasetDownloader, }
[docs] def download_command(args): args.downloader.download( args.dataset_id, args.dst_dir, args.overwrite, args.output_format, args.subset, args.extra_args, )
[docs] def describe_downloads_command(args): return args.downloader.describe(args.report_format, args.report_file)