# Copyright (C) 2020-2024 Intel Corporation
#
# SPDX-License-Identifier: MIT
from typing import Any, Optional, Tuple
from attrs import define, field, validators
from datumaro.util.attrs_util import has_length, not_empty
[docs]
class ImmutableObjectError(Exception):
def __str__(self):
return "Cannot set value of immutable object"
[docs]
class DatumaroError(Exception):
pass
[docs]
class VcsError(DatumaroError):
pass
[docs]
class ReadonlyDatasetError(VcsError):
def __str__(self):
return "Can't update a read-only dataset"
[docs]
class ReadonlyProjectError(VcsError):
def __str__(self):
return "Can't change a read-only project"
[docs]
@define(auto_exc=False)
class UnknownRefError(VcsError):
ref = field()
def __str__(self):
return f"Can't parse ref '{self.ref}'"
[docs]
class MissingObjectError(VcsError):
pass
[docs]
class MismatchingObjectError(VcsError):
pass
[docs]
@define(auto_exc=False)
class UnsavedChangesError(VcsError):
paths = field()
def __str__(self):
return "There are some uncommitted changes: %s" % ", ".join(self.paths)
[docs]
class ForeignChangesError(VcsError):
pass
[docs]
class EmptyCommitError(VcsError):
pass
[docs]
class PathOutsideSourceError(VcsError):
pass
[docs]
class SourceUrlInsideProjectError(VcsError):
def __str__(self):
return "Source URL cannot point inside the project"
[docs]
class UnexpectedUrlError(VcsError):
pass
[docs]
class MissingSourceHashError(VcsError):
pass
[docs]
class PipelineError(DatumaroError):
pass
[docs]
class InvalidPipelineError(PipelineError):
pass
[docs]
class EmptyPipelineError(InvalidPipelineError):
pass
[docs]
class MultiplePipelineHeadsError(InvalidPipelineError):
pass
[docs]
class InvalidStageError(InvalidPipelineError):
pass
[docs]
class UnknownStageError(InvalidStageError):
pass
[docs]
class MigrationError(DatumaroError):
pass
[docs]
class OldProjectError(DatumaroError):
def __str__(self):
return """
The project you're trying to load was
created by the old Datumaro version. Try to migrate the
project with 'datum project migrate' and then reload.
"""
[docs]
class NotAvailableError(DatumaroError):
pass
[docs]
@define(auto_exc=False)
class ProjectNotFoundError(DatumaroError):
path = field()
def __str__(self):
return f"Can't find project at '{self.path}'"
[docs]
@define(auto_exc=False)
class ProjectAlreadyExists(DatumaroError):
path = field()
def __str__(self):
return f"Can't create project: a project already exists " f"at '{self.path}'"
[docs]
@define(auto_exc=False)
class VcsAlreadyExists(DatumaroError):
path = field()
def __str__(self):
return f"Can't create project: a version control system already exists " f"at '{self.path}'"
[docs]
@define(auto_exc=False)
class UnknownSourceError(DatumaroError):
name = field()
def __str__(self):
return f"Unknown source '{self.name}'"
[docs]
@define(auto_exc=False)
class UnknownTargetError(DatumaroError):
name = field()
def __str__(self):
return f"Unknown target '{self.name}'"
[docs]
@define(auto_exc=False)
class SourceExistsError(DatumaroError):
name = field()
def __str__(self):
return f"Source '{self.name}' already exists"
[docs]
class DatasetExportError(DatumaroError):
pass
[docs]
@define(auto_exc=False)
class ItemExportError(DatasetExportError):
"""
Represents additional item error info. The error itself is supposed to be
in the `__cause__` member.
"""
item_id: Tuple[str, str]
def __str__(self):
return "Failed to export item %s" % (self.item_id,)
[docs]
class AnnotationExportError(ItemExportError):
pass
[docs]
class DatasetImportError(DatumaroError):
pass
[docs]
class InvalidAnnotationError(DatasetImportError):
"""
A basic dataset parsing error. Should include the problem description in
the message.
"""
[docs]
@define(auto_exc=False)
class InvalidFieldError(InvalidAnnotationError):
name: str = field(validator=[validators.instance_of(str), not_empty])
"""Field name"""
def __str__(self) -> str:
return f"Invalid annotation field '{self.name}' value"
[docs]
@define(auto_exc=False)
class InvalidFieldTypeError(InvalidFieldError):
actual: str = field(validator=[validators.instance_of(str), not_empty])
"""Actual type of the field"""
expected: Tuple[str] = field(validator=[validators.instance_of(tuple), not_empty])
"""The list of expected types of the field"""
def __str__(self) -> str:
if len(self.expected) == 1:
expected = self.expected[0]
else:
expected = "one of " + ", ".join(self.expected)
return f"Invalid annotation field '{self.name}' type '{self.actual}'. Expected {expected}"
[docs]
@define(auto_exc=False)
class MissingFieldError(InvalidFieldError):
def __str__(self) -> str:
return f"Missing annotation field '{self.name}'"
[docs]
@define(auto_exc=False)
class UndeclaredLabelError(InvalidAnnotationError):
id: str = field(validator=validators.instance_of(str))
"""Index or name"""
def __str__(self) -> str:
return f"Undeclared label '{self.id}'"
[docs]
@define(auto_exc=False)
class ItemImportError(DatasetImportError):
"""
Wraps a dataset parsing error and provides additional error context info.
The error itself is supposed to be in the `__cause__` member.
"""
item_id: Tuple[Optional[str], Optional[str]] = field(
validator=[validators.instance_of(tuple), has_length(2)]
)
"""
(id, subset) of the item with problem.
If id or subset cannot be reported, such field is set to None.
"""
def __str__(self):
return f"Failed to import item {self.item_id}: {self.__cause__}"
[docs]
class AnnotationImportError(ItemImportError):
def __str__(self):
return f"Failed to import item {self.item_id} annotation: {self.__cause__}"
[docs]
@define(auto_exc=False)
class DatasetNotFoundError(DatasetImportError):
path: str = field()
format: str = field()
template: str = field(default="Failed to find dataset '{format}' at '{path}'")
def __str__(self):
return self.template.format(path=self.path, format=self.format)
[docs]
class DatasetError(DatumaroError):
pass
[docs]
class AnnotationTypeError(DatumaroError):
pass
[docs]
class DatasetInfosRedefinedError(DatasetError):
def __str__(self):
return "Infos can only be set once for a dataset"
[docs]
class CategoriesRedefinedError(DatasetError):
def __str__(self):
return "Categories can only be set once for a dataset"
[docs]
@define(auto_exc=False)
class RepeatedItemError(DatasetError):
item_id = field()
def __str__(self):
return f"Item {self.item_id} is repeated in the source sequence."
[docs]
class DatasetQualityError(DatasetError):
pass
[docs]
@define(auto_exc=False)
class AnnotationsTooCloseError(DatasetQualityError):
item_id = field()
a = field()
b = field()
distance = field()
def __str__(self):
return "Item %s: annotations are too close: %s, %s, distance = %s" % (
self.item_id,
self.a,
self.b,
self.distance,
)
[docs]
@define(auto_exc=False)
class WrongGroupError(DatasetQualityError):
item_id = field()
found = field(converter=set)
expected = field(converter=set)
group = field(converter=list)
def __str__(self):
return "Item %s: annotation group has wrong labels: " "found %s, expected %s, group %s" % (
self.item_id,
self.found,
self.expected,
self.group,
)
[docs]
@define(auto_exc=False, init=False)
class DatasetMergeError(DatasetError):
sources = field(converter=set, factory=set, kw_only=True)
def _my__init__(self, msg=None, *, sources=None):
super().__init__(msg)
self.__attrs_init__(sources=sources or set())
# Pylint will raise false positive warnings for derived classes,
# when __init__ is defined directly
setattr(DatasetMergeError, "__init__", DatasetMergeError._my__init__)
[docs]
@define(auto_exc=False)
class MismatchingImageInfoError(DatasetMergeError):
item_id: Tuple[str, str]
a: Tuple[int, int]
b: Tuple[int, int]
def __str__(self):
return "Item %s: mismatching image size info: %s vs %s" % (self.item_id, self.a, self.b)
[docs]
@define(auto_exc=False)
class MismatchingAttributesError(DatasetMergeError):
item_id: Tuple[str, str]
key: str
a: Any
b: Any
def __str__(self):
return "Item %s: mismatching image attribute %s: %s vs %s" % (
self.item_id,
self.key,
self.a,
self.b,
)
[docs]
class ConflictingCategoriesError(DatasetMergeError):
pass
[docs]
@define(auto_exc=False)
class NoMatchingAnnError(DatasetMergeError):
item_id = field()
ann = field()
def __str__(self):
return "Item %s: can't find matching annotation " "in sources %s, annotation is %s" % (
self.item_id,
self.sources,
self.ann,
)
[docs]
@define(auto_exc=False)
class NoMatchingItemError(DatasetMergeError):
item_id = field()
def __str__(self):
return "Item %s: can't find matching item in sources %s" % (self.item_id, self.sources)
[docs]
@define(auto_exc=False)
class FailedLabelVotingError(DatasetMergeError):
item_id = field()
votes = field()
ann = field(default=None)
def __str__(self):
return "Item %s: label voting failed%s, votes %s, sources %s" % (
self.item_id,
"for ann %s" % self.ann if self.ann else "",
self.votes,
self.sources,
)
[docs]
@define(auto_exc=False)
class FailedAttrVotingError(DatasetMergeError):
item_id = field()
attr = field()
votes = field()
ann = field()
def __str__(self):
return "Item %s: attribute voting failed " "for ann %s, votes %s, sources %s" % (
self.item_id,
self.ann,
self.votes,
self.sources,
)
[docs]
@define(auto_exc=False)
class VideoMergeError(DatasetMergeError):
item_id = field()
def __str__(self):
return "Item %s: video merging is not possible" % (self.item_id,)
[docs]
@define(auto_exc=False)
class DatasetValidationError(DatumaroError):
severity = field()
[docs]
def to_dict(self):
return {
"anomaly_type": self.__class__.__name__,
"description": str(self),
"severity": self.severity.name,
}
[docs]
@define(auto_exc=False)
class DatasetItemValidationError(DatasetValidationError):
item_id = field()
subset = field()
[docs]
def to_dict(self):
dict_repr = super().to_dict()
dict_repr["item_id"] = self.item_id
dict_repr["subset"] = self.subset
return dict_repr
[docs]
@define(auto_exc=False)
class MissingLabelCategories(DatasetValidationError):
def __str__(self):
return "Metadata (ex. LabelCategories) should be defined" " to validate a dataset."
[docs]
@define(auto_exc=False)
class MissingAnnotation(DatasetItemValidationError):
ann_type = field()
def __str__(self):
return f"Item needs '{self.ann_type}' annotation(s), " "but not found."
[docs]
@define(auto_exc=False)
class BrokenAnnotation(DatasetItemValidationError):
ann_type = field()
def __str__(self):
return f"Item needs whole '{self.ann_type}' annotation(s), " "but missed some."
[docs]
@define(auto_exc=False)
class EmptyLabel(DatasetItemValidationError):
label_name = field()
def __str__(self):
return f"Item should have the label '{self.label_name}' annotation(s), " "but not found."
[docs]
@define(auto_exc=False)
class EmptyCaption(DatasetItemValidationError):
caption_name = field()
def __str__(self):
return (
f"Item should have the caption '{self.caption_name}' annotation(s), " "but not found."
)
[docs]
@define(auto_exc=False)
class MultiLabelAnnotations(DatasetItemValidationError):
def __str__(self):
return "Item needs a single label but multiple labels are found."
[docs]
@define(auto_exc=False)
class MissingAttribute(DatasetItemValidationError):
label_name = field()
attr_name = field()
def __str__(self):
return f"Item needs the attribute '{self.attr_name}' " f"for the label '{self.label_name}'."
[docs]
@define(auto_exc=False)
class UndefinedLabel(DatasetItemValidationError):
label_name = field()
def __str__(self):
return f"Item has the label '{self.label_name}' which " "is not defined in metadata."
[docs]
@define(auto_exc=False)
class UndefinedAttribute(DatasetItemValidationError):
label_name = field()
attr_name = field()
def __str__(self):
return (
f"Item has the attribute '{self.attr_name}' for the "
f"label '{self.label_name}' which is not defined in metadata."
)
[docs]
@define(auto_exc=False)
class LabelDefinedButNotFound(DatasetValidationError):
label_name = field()
def __str__(self):
return (
f"The label '{self.label_name}' is defined in "
"metadata, but not found in the dataset."
)
[docs]
@define(auto_exc=False)
class AttributeDefinedButNotFound(DatasetValidationError):
label_name = field()
attr_name = field()
def __str__(self):
return (
f"The attribute '{self.attr_name}' for the label "
f"'{self.label_name}' is defined in metadata, but not "
"found in the dataset."
)
[docs]
@define(auto_exc=False)
class OnlyOneLabel(DatasetValidationError):
label_name = field()
def __str__(self):
return f"The dataset has only one label '{self.label_name}'."
[docs]
@define(auto_exc=False)
class OnlyOneAttributeValue(DatasetValidationError):
label_name = field()
attr_name = field()
value = field()
def __str__(self):
return (
"The dataset has the only attribute value "
f"'{self.value}' for the attribute '{self.attr_name}' for the "
f"label '{self.label_name}'."
)
[docs]
@define(auto_exc=False)
class FewSamplesInLabel(DatasetValidationError):
label_name = field()
count = field()
def __str__(self):
return (
f"The number of samples in the label '{self.label_name}'"
f" might be too low. Found '{self.count}' samples."
)
[docs]
@define(auto_exc=False)
class FewSamplesInCaption(DatasetValidationError):
caption_name = field()
count = field()
def __str__(self):
return (
f"The number of samples in the caption '{self.caption_name}'"
f" might be too low. Found '{self.count}' samples."
)
[docs]
@define(auto_exc=False)
class RedundanciesInCaption(DatasetValidationError):
caption_name = field()
redundancy_type = field()
count = field()
def __str__(self):
return (
f"The number of '{self.redundancy_type}' redundancy in the caption '{self.caption_name}'"
f" have found '{self.count}'."
)
[docs]
@define(auto_exc=False)
class FewSamplesInAttribute(DatasetValidationError):
label_name = field()
attr_name = field()
attr_value = field()
count = field()
def __str__(self):
return (
"The number of samples for attribute = value "
f"'{self.attr_name} = {self.attr_value}' for the label "
f"'{self.label_name}' might be too low. "
f"Found '{self.count}' samples."
)
[docs]
@define(auto_exc=False)
class ImbalancedLabels(DatasetValidationError):
def __str__(self):
return "There is an imbalance in the label distribution."
[docs]
@define(auto_exc=False)
class ImbalancedCaptions(DatasetValidationError):
def __str__(self):
return "There is an imbalance in the caption distribution."
[docs]
@define(auto_exc=False)
class ImbalancedAttribute(DatasetValidationError):
label_name = field()
attr_name = field()
def __str__(self):
return (
"There is an imbalance in the distribution of attribute"
f" '{self. attr_name}' for the label '{self.label_name}'."
)
[docs]
@define(auto_exc=False)
class ImbalancedDistInLabel(DatasetValidationError):
label_name = field()
prop = field()
def __str__(self):
return (
f"Values of '{self.prop}' are not evenly " f"distributed for '{self.label_name}' label."
)
[docs]
@define(auto_exc=False)
class ImbalancedDistInCaption(DatasetValidationError):
caption_name = field()
def __str__(self):
return f"Values are not evenly " f"distributed for '{self.caption_name}' caption."
[docs]
@define(auto_exc=False)
class ImbalancedDistInAttribute(DatasetValidationError):
label_name = field()
attr_name = field()
attr_value = field()
prop = field()
def __str__(self):
return (
f"Values of '{self.prop}' are not evenly "
f"distributed for '{self.attr_name}' = '{self.attr_value}' for "
f"the '{self.label_name}' label."
)
[docs]
@define(auto_exc=False)
class NegativeLength(DatasetItemValidationError):
ann_id = field()
prop = field()
val = field()
def __str__(self):
return (
f"Annotation '{self.ann_id}' in "
"the item should have a positive value of "
f"'{self.prop}' but got '{self.val}'."
)
[docs]
@define(auto_exc=False)
class InvalidValue(DatasetItemValidationError):
ann_id = field()
prop = field()
def __str__(self):
return (
f"Annotation '{self.ann_id}' in "
"the item has an inf or a NaN value of "
f"'{self.prop}'."
)
[docs]
@define(auto_exc=False)
class FarFromLabelMean(DatasetItemValidationError):
label_name = field()
ann_id = field()
prop = field()
mean = field()
val = field()
def __str__(self):
return (
f"Annotation '{self.ann_id}' in "
f"the item has a value of '{self.prop}' that "
"is too far from the label average. (mean of "
f"'{self.label_name}' label: {self.mean}, got '{self.val}')."
)
[docs]
@define(auto_exc=False)
class FarFromCaptionMean(DatasetItemValidationError):
caption_name = field()
mean = field()
upper_bound = field()
lower_bound = field()
val = field()
def __str__(self):
return (
f"Annotation '{self.caption_name}' in "
"the item is too far from the caption average. (mean of "
f"'{self.caption_name}' caption: '{self.mean}', got '{self.val}')."
f"It should be between '{self.lower_bound}' and '{self.upper_bound}'."
)
[docs]
@define(auto_exc=False)
class OutlierInCaption(DatasetItemValidationError):
caption_name = field()
lower_bound = field()
upper_bound = field()
val = field()
def __str__(self):
return (
f"Annotation '{self.caption_name}' in "
"the item is estimated as outlier based on IQR. (lower and upper bound of "
f"'{self.caption_name}' caption: '{self.lower_bound}' and '{self.upper_bound}', got '{self.val}')."
)
[docs]
@define(auto_exc=False)
class FarFromAttrMean(DatasetItemValidationError):
label_name = field()
ann_id = field()
attr_name = field()
attr_value = field()
prop = field()
mean = field()
val = field()
def __str__(self):
return (
f"Annotation '{self.ann_id}' in the "
f"item has a value of '{self.prop}' that "
"is too far from the attribute average. (mean of "
f"'{self.attr_name}' = '{self.attr_value}' for the "
f"'{self.label_name}' label: {self.mean}, got '{self.val}')."
)
[docs]
@define(auto_exc=False)
class StreamedItemError(DatasetError):
def __str__(self):
return "__getitem__ is disabled for StreamDataset. Use Dataset instead."