datumaro.plugins.configurable_validator#

Classes

ClsStats(label_categories, warnings, ...)

ClsStatsData(categories, ...)

ConfigurableValidator(tasks, ...[, ...])

DetStats(label_categories, warnings, ...)

DetStatsData(categories, ...)

SegStats(label_categories, warnings, ...)

SegStatsData(categories, ...)

StatsData(categories, undefined_attribute, ...)

TblStats(categories, warnings, ...)

TblStatsData(categories, empty_label, ...)

class datumaro.plugins.configurable_validator.StatsData(categories: Dict[str, Dict[str, Set[str]]], undefined_attribute: Set[Tuple[str, str, str]], undefined_label: Set[Tuple[str, str, str]], missing_attribute: Set[Tuple[str, str, str, str]], missing_label: Set[Tuple[str, str]])[source]#

Bases: object

categories: Dict[str, Dict[str, Set[str]]]#
undefined_attribute: Set[Tuple[str, str, str]]#
undefined_label: Set[Tuple[str, str, str]]#
missing_attribute: Set[Tuple[str, str, str, str]]#
missing_label: Set[Tuple[str, str]]#
class datumaro.plugins.configurable_validator.ClsStatsData(categories: Dict[str, Dict[str, Set[str]]], undefined_attribute: Set[Tuple[str, str, str]], undefined_label: Set[Tuple[str, str, str]], missing_attribute: Set[Tuple[str, str, str, str]], missing_label: Set[Tuple[str, str]], multiple_label: Set[Tuple[str, str]])[source]#

Bases: StatsData

multiple_label: Set[Tuple[str, str]]#
class datumaro.plugins.configurable_validator.DetStatsData(categories: Dict[str, Dict[str, Set[str]]], undefined_attribute: Set[Tuple[str, str, str]], undefined_label: Set[Tuple[str, str, str]], missing_attribute: Set[Tuple[str, str, str, str]], missing_label: Set[Tuple[str, str]], invalid_value: Set[Tuple[str, str]], negative_length: Set[Tuple[str, str]])[source]#

Bases: StatsData

invalid_value: Set[Tuple[str, str]]#
negative_length: Set[Tuple[str, str]]#
class datumaro.plugins.configurable_validator.SegStatsData(categories: Dict[str, Dict[str, Set[str]]], undefined_attribute: Set[Tuple[str, str, str]], undefined_label: Set[Tuple[str, str, str]], missing_attribute: Set[Tuple[str, str, str, str]], missing_label: Set[Tuple[str, str]], invalid_value: Set[Tuple[str, str]])[source]#

Bases: StatsData

invalid_value: Set[Tuple[str, str]]#
class datumaro.plugins.configurable_validator.TblStatsData(categories: Dict[str, Dict[str, Set[str]]], empty_label: Set[Tuple[str, str]], empty_caption: Set[Tuple[str, str]], missing_annotations: Set[Tuple[str, str]], broken_annotations: Set[Tuple[str, str]])[source]#

Bases: object

categories: Dict[str, Dict[str, Set[str]]]#
empty_label: Set[Tuple[str, str]]#
empty_caption: Set[Tuple[str, str]]#
missing_annotations: Set[Tuple[str, str]]#
broken_annotations: Set[Tuple[str, str]]#
class datumaro.plugins.configurable_validator.ClsStats(label_categories: LabelCategories, warnings: set, few_samples_thr: None, imbalance_ratio_thr: None, far_from_mean_thr: None, dominance_thr: None, topk_bins_ratio: None)[source]#

Bases: _BaseAnnStats

class datumaro.plugins.configurable_validator.DetStats(label_categories: LabelCategories, warnings: set, few_samples_thr: None, imbalance_ratio_thr: None, far_from_mean_thr: None, dominance_thr: None, topk_bins_ratio: None)[source]#

Bases: _BaseAnnStats

class datumaro.plugins.configurable_validator.SegStats(label_categories: LabelCategories, warnings: set, few_samples_thr: None, imbalance_ratio_thr: None, far_from_mean_thr: None, dominance_thr: None, topk_bins_ratio: None)[source]#

Bases: _BaseAnnStats

class datumaro.plugins.configurable_validator.TblStats(categories: dict, warnings: set, few_samples_thr: None, imbalance_ratio_thr: None, far_from_mean_thr: None, dominance_thr: None, topk_bins_ratio: None)[source]#

Bases: _BaseAnnStats

class datumaro.plugins.configurable_validator.ConfigurableValidator(tasks: ~typing.List[~datumaro.components.validator.TaskType] = [TaskType.classification, TaskType.detection, TaskType.segmentation, TaskType.tabular], warnings: ~typing.Set[~datumaro.components.errors.DatasetValidationError] = {<class 'datumaro.components.errors.AttributeDefinedButNotFound'>, <class 'datumaro.components.errors.FarFromAttrMean'>, <class 'datumaro.components.errors.FarFromLabelMean'>, <class 'datumaro.components.errors.FewSamplesInAttribute'>, <class 'datumaro.components.errors.FewSamplesInLabel'>, <class 'datumaro.components.errors.ImbalancedAttribute'>, <class 'datumaro.components.errors.ImbalancedDistInAttribute'>, <class 'datumaro.components.errors.ImbalancedDistInLabel'>, <class 'datumaro.components.errors.ImbalancedLabels'>, <class 'datumaro.components.errors.InvalidValue'>, <class 'datumaro.components.errors.LabelDefinedButNotFound'>, <class 'datumaro.components.errors.MissingAnnotation'>, <class 'datumaro.components.errors.MissingAttribute'>, <class 'datumaro.components.errors.MissingLabelCategories'>, <class 'datumaro.components.errors.MultiLabelAnnotations'>, <class 'datumaro.components.errors.NegativeLength'>, <class 'datumaro.components.errors.OnlyOneAttributeValue'>, <class 'datumaro.components.errors.OnlyOneLabel'>, <class 'datumaro.components.errors.UndefinedAttribute'>, <class 'datumaro.components.errors.UndefinedLabel'>}, few_samples_thr=None, imbalance_ratio_thr=None, far_from_mean_thr=None, dominance_ratio_thr=None, topk_bins=None)[source]#

Bases: Validator, CliPlugin

DEFAULT_FEW_SAMPLES_THR = 1#
DEFAULT_IMBALANCE_RATIO_THR = 50#
DEFAULT_FAR_FROM_MEAN_THR = 5#
DEFAULT_DOMINANCE_RATIO_THR = 0.8#
DEFAULT_TOPK_BINS = 0.1#
ALL_WARNINGS = {<class 'datumaro.components.errors.AttributeDefinedButNotFound'>, <class 'datumaro.components.errors.FarFromAttrMean'>, <class 'datumaro.components.errors.FarFromLabelMean'>, <class 'datumaro.components.errors.FewSamplesInAttribute'>, <class 'datumaro.components.errors.FewSamplesInLabel'>, <class 'datumaro.components.errors.ImbalancedAttribute'>, <class 'datumaro.components.errors.ImbalancedDistInAttribute'>, <class 'datumaro.components.errors.ImbalancedDistInLabel'>, <class 'datumaro.components.errors.ImbalancedLabels'>, <class 'datumaro.components.errors.InvalidValue'>, <class 'datumaro.components.errors.LabelDefinedButNotFound'>, <class 'datumaro.components.errors.MissingAnnotation'>, <class 'datumaro.components.errors.MissingAttribute'>, <class 'datumaro.components.errors.MissingLabelCategories'>, <class 'datumaro.components.errors.MultiLabelAnnotations'>, <class 'datumaro.components.errors.NegativeLength'>, <class 'datumaro.components.errors.OnlyOneAttributeValue'>, <class 'datumaro.components.errors.OnlyOneLabel'>, <class 'datumaro.components.errors.UndefinedAttribute'>, <class 'datumaro.components.errors.UndefinedLabel'>}#
classmethod build_cmdline_parser(**kwargs)[source]#
compute_statistics(dataset)[source]#

Computes statistics of the dataset based on task type.

Parameters:

dataset (IDataset) – a dataset to be validated

Returns:

A dict object containing statistics of the dataset.

Return type:

stats (dict)

generate_reports(task_stats)[source]#