datumaro.components.project#
Classes
|
An enumeration. |
|
An enumeration. |
|
|
|
|
|
An enumeration. |
|
|
|
|
|
|
|
|
|
|
|
|
|
- class datumaro.components.project.ProjectSourceDataset(path: str, tree: Tree, source: str, readonly: bool = False)[source]#
Bases:
IDataset
- property readonly#
- property config#
- class datumaro.components.project.IgnoreMode(value)[source]#
Bases:
Enum
An enumeration.
- rewrite = 1#
- append = 2#
- remove = 3#
- class datumaro.components.project.BuildStageType(value)[source]#
Bases:
Enum
An enumeration.
- source = 1#
- project = 2#
- transform = 3#
- filter = 4#
- convert = 5#
- inference = 6#
- explore = 7#
- class datumaro.components.project.Pipeline(config: PipelineConfig | None = None)[source]#
Bases:
object
- property head_node#
- class datumaro.components.project.ProjectBuilder(project: Project, tree: Tree)[source]#
Bases:
object
- class datumaro.components.project.ProjectBuildTargets(tree: Tree)[source]#
Bases:
CrudProxy
[BuildTarget
]- MAIN_TARGET = 'project'#
- BASE_STAGE = 'root'#
- add_target(name) BuildTarget [source]#
- add_transform_stage(target: str, transform: str, params: Dict | None = None, name: str | None = None)[source]#
- add_inference_stage(target: str, model: str, params: Dict | None = None, name: str | None = None)[source]#
- add_filter_stage(target: str, expr: str, params: Dict | None = None, name: str | None = None)[source]#
- class datumaro.components.project.GitWrapper(project_dir, repo=None)[source]#
Bases:
object
- property initialized#
- GitTree = datumaro.components.project.GitTree#
- GitStatus = datumaro.components.project.GitStatus#
- status(paths: str | GitTree | Iterable[str] = None, base_dir: str = None) Dict[str, GitStatus] [source]#
Compares working directory and index.
- Parameters:
paths – an iterable of paths to compare, a git.Tree, or None. When None, uses all the paths from HEAD.
base_dir – a base path for paths. Paths will be prepended by this. When None or ‘’, uses repo root. Can be useful, if index contains displaced paths, which needs to be mapped on real paths.
- The statuses are:
“A” for added paths
“D” for deleted paths
“R” for renamed paths
“M” for paths with modified data
“T” for changed in the type paths
Returns: { abspath(base_dir + path): status }
- rev_parse(ref: str) Tuple[str, str] [source]#
Expands named refs and tags.
Returns: object type, object hash
- ignore(paths: str | List[str], mode: None | str | IgnoreMode = None, gitignore: str | None = None)[source]#
- HASH_LEN = 40#
- class datumaro.components.project.DvcWrapper(project_dir)[source]#
Bases:
object
- property initialized#
- ignore(paths: str | List[str], mode: None | str | IgnoreMode = None, dvcignore: str | None = None)[source]#
- yaml = <module 'ruamel.yaml' from '/home/runner/work/datumaro/datumaro/.tox/build-docs/lib/python3.10/site-packages/ruamel/yaml/__init__.py'>#
- yaml_parser = <ruamel.yaml.main.YAML object>#
- FILE_HASH_LEN = 32#
- DIR_HASH_SUFFIX = '.dir'#
- DIR_HASH_LEN = 36#
- class datumaro.components.project.Tree(project: Project, config: None | Dict | Config | TreeConfig = None, rev: None | Revision = None)[source]#
Bases:
object
- property sources: ProjectSources#
- property build_targets: ProjectBuildTargets#
- property env: Environment#
- class datumaro.components.project.DiffStatus(value)[source]#
Bases:
Enum
An enumeration.
- added = 1#
- modified = 2#
- removed = 3#
- missing = 4#
- foreign_modified = 5#
- class datumaro.components.project.Project(path: str | None = None, readonly=False)[source]#
Bases:
object
- property head_rev: Revision#
- property env: Environment#
- get_rev(rev: None | Revision) Tree [source]#
- Reference conventions:
None or “” - working dir
“<40 symbols>” - revision hash
- compute_source_hash(data_dir: str, dvcfile: str | None = None, no_cache: bool = True) ObjectId [source]#
- refresh_source_hash(source: str, no_cache: bool = True) ObjectId [source]#
Computes and updates the source hash in the working directory.
Returns: hash
- import_source(name: str, url: str | None, format: str, options: Dict | None = None, *, no_cache: bool = True, no_hash: bool = True, rpath: str | None = None) Source [source]#
Adds a new source (dataset) to the working directory of the project.
When ‘rpath’ is specified, will copy all the data from URL, but read only the specified file. Required to support subtasks and subsets in datasets.
- Parameters:
name (str) – Name of the new source
url (str) – URL of the new source. A path to a file or directory
format (str) – Dataset format
options (dict) – Options for the format Extractor
no_cache (bool) – Don’t put a copy of files into the project cache. Can be used to reduce project cache size.
no_hash (bool) – Don’t compute source data hash. Implies “no_cache”. Useful to reduce import time at the cost of disabled data integrity checks.
rpath (str) – Used to specify a relative path to the dataset inside of the directory pointed by URL.
Returns: the new source config
- add_source(path: str, format: str, options: Dict | None = None, *, rpath: str | None = None) Tuple[str, Source] [source]#
Adds a new source (dataset) from the working directory of the project.
Only directories from the project root can be added. This command is useful after a source was removed and you need to re-add it, or when the dataset was copied or downloaded manually.
When ‘rpath’ is specified, will copy all the data from URL, but read only the specified file. Required to support subtasks and subsets in datasets.
- Parameters:
Returns: the name and the config of the new source
- remove_source(name: str, *, force: bool = False, keep_data: bool = True)[source]#
- Options:
force (bool) - ignores errors and tries to wipe remaining data
keep_data (bool) - leaves source data untouched
- commit(message: str, *, no_cache: bool = False, allow_empty: bool = False, allow_foreign: bool = False) Revision [source]#
Copies tree and objects from the working dir to the cache. Creates a new commit. Moves the HEAD pointer to the new commit.
Options:
- no_cache (bool) - don’t put added dataset data into cache,
store only metainfo. Can be used to reduce storage size.
allow_empty (bool) - allow commits with no changes.
allow_foreign (bool) - allow commits with changes made not by Datumaro.
Returns: the new commit hash
- checkout(rev: None | Revision = None, sources: None | str | Iterable[str] = None, *, force: bool = False)[source]#
Copies tree and objects from the cache to the working tree.
Sets HEAD to the specified revision, unless sources specified. When sources specified, only copies objects from the cache to the working tree. When no revision and no sources is specified, restores the sources from the current revision.
By default, uses the current (HEAD) revision.
- Options:
force (bool) - ignore unsaved changes. By default, an error is raised
- status() Dict[str, DiffStatus] [source]#