Source code for datumaro.components.errors

# Copyright (C) 2020-2024 Intel Corporation
#
# SPDX-License-Identifier: MIT

from typing import Any, Optional, Tuple

from attrs import define, field, validators

from datumaro.util.attrs_util import has_length, not_empty


[docs] class ImmutableObjectError(Exception): def __str__(self): return "Cannot set value of immutable object"
[docs] class DatumaroError(Exception): pass
[docs] class VcsError(DatumaroError): pass
[docs] class ReadonlyDatasetError(VcsError): def __str__(self): return "Can't update a read-only dataset"
[docs] class ReadonlyProjectError(VcsError): def __str__(self): return "Can't change a read-only project"
[docs] @define(auto_exc=False) class UnknownRefError(VcsError): ref = field() def __str__(self): return f"Can't parse ref '{self.ref}'"
[docs] class MissingObjectError(VcsError): pass
[docs] class MismatchingObjectError(VcsError): pass
[docs] @define(auto_exc=False) class UnsavedChangesError(VcsError): paths = field() def __str__(self): return "There are some uncommitted changes: %s" % ", ".join(self.paths)
[docs] class ForeignChangesError(VcsError): pass
[docs] class EmptyCommitError(VcsError): pass
[docs] class PathOutsideSourceError(VcsError): pass
[docs] class SourceUrlInsideProjectError(VcsError): def __str__(self): return "Source URL cannot point inside the project"
[docs] class UnexpectedUrlError(VcsError): pass
[docs] class MissingSourceHashError(VcsError): pass
[docs] class PipelineError(DatumaroError): pass
[docs] class InvalidPipelineError(PipelineError): pass
[docs] class EmptyPipelineError(InvalidPipelineError): pass
[docs] class MultiplePipelineHeadsError(InvalidPipelineError): pass
[docs] class MissingPipelineHeadError(InvalidPipelineError): pass
[docs] class InvalidStageError(InvalidPipelineError): pass
[docs] class UnknownStageError(InvalidStageError): pass
[docs] class MigrationError(DatumaroError): pass
[docs] class OldProjectError(DatumaroError): def __str__(self): return """ The project you're trying to load was created by the old Datumaro version. Try to migrate the project with 'datum project migrate' and then reload. """
[docs] class NotAvailableError(DatumaroError): pass
[docs] @define(auto_exc=False) class ProjectNotFoundError(DatumaroError): path = field() def __str__(self): return f"Can't find project at '{self.path}'"
[docs] @define(auto_exc=False) class ProjectAlreadyExists(DatumaroError): path = field() def __str__(self): return f"Can't create project: a project already exists " f"at '{self.path}'"
[docs] @define(auto_exc=False) class VcsAlreadyExists(DatumaroError): path = field() def __str__(self): return f"Can't create project: a version control system already exists " f"at '{self.path}'"
[docs] @define(auto_exc=False) class UnknownSourceError(DatumaroError): name = field() def __str__(self): return f"Unknown source '{self.name}'"
[docs] @define(auto_exc=False) class UnknownTargetError(DatumaroError): name = field() def __str__(self): return f"Unknown target '{self.name}'"
[docs] @define(auto_exc=False) class UnknownFormatError(DatumaroError): format = field() def __str__(self): return ( f"Unknown source format '{self.format}'. To make it " "available, add the corresponding Extractor implementation " "to the environment" )
[docs] @define(auto_exc=False) class SourceExistsError(DatumaroError): name = field() def __str__(self): return f"Source '{self.name}' already exists"
[docs] class DatasetExportError(DatumaroError): pass
[docs] @define(auto_exc=False) class ItemExportError(DatasetExportError): """ Represents additional item error info. The error itself is supposed to be in the `__cause__` member. """ item_id: Tuple[str, str] def __str__(self): return "Failed to export item %s" % (self.item_id,)
[docs] class AnnotationExportError(ItemExportError): pass
[docs] class DatasetImportError(DatumaroError): pass
[docs] class InvalidAnnotationError(DatasetImportError): """ A basic dataset parsing error. Should include the problem description in the message. """
[docs] @define(auto_exc=False) class InvalidFieldError(InvalidAnnotationError): name: str = field(validator=[validators.instance_of(str), not_empty]) """Field name""" def __str__(self) -> str: return f"Invalid annotation field '{self.name}' value"
[docs] @define(auto_exc=False) class InvalidFieldTypeError(InvalidFieldError): actual: str = field(validator=[validators.instance_of(str), not_empty]) """Actual type of the field""" expected: Tuple[str] = field(validator=[validators.instance_of(tuple), not_empty]) """The list of expected types of the field""" def __str__(self) -> str: if len(self.expected) == 1: expected = self.expected[0] else: expected = "one of " + ", ".join(self.expected) return f"Invalid annotation field '{self.name}' type '{self.actual}'. Expected {expected}"
[docs] @define(auto_exc=False) class MissingFieldError(InvalidFieldError): def __str__(self) -> str: return f"Missing annotation field '{self.name}'"
[docs] @define(auto_exc=False) class UndeclaredLabelError(InvalidAnnotationError): id: str = field(validator=validators.instance_of(str)) """Index or name""" def __str__(self) -> str: return f"Undeclared label '{self.id}'"
[docs] @define(auto_exc=False) class ItemImportError(DatasetImportError): """ Wraps a dataset parsing error and provides additional error context info. The error itself is supposed to be in the `__cause__` member. """ item_id: Tuple[Optional[str], Optional[str]] = field( validator=[validators.instance_of(tuple), has_length(2)] ) """ (id, subset) of the item with problem. If id or subset cannot be reported, such field is set to None. """ def __str__(self): return f"Failed to import item {self.item_id}: {self.__cause__}"
[docs] class AnnotationImportError(ItemImportError): def __str__(self): return f"Failed to import item {self.item_id} annotation: {self.__cause__}"
[docs] @define(auto_exc=False) class DatasetNotFoundError(DatasetImportError): path: str = field() format: str = field() template: str = field(default="Failed to find dataset '{format}' at '{path}'") def __str__(self): return self.template.format(path=self.path, format=self.format)
[docs] @define(auto_exc=False) class MultipleFormatsMatchError(DatasetImportError): formats = field() def __str__(self): return ( "Failed to detect dataset format automatically:" " data matches more than one format: %s" % ", ".join(self.formats) )
[docs] class NoMatchingFormatsError(DatasetImportError): def __str__(self): return "Failed to detect dataset format automatically: " "no matching formats found"
[docs] class DatasetError(DatumaroError): pass
[docs] class AnnotationTypeError(DatumaroError): pass
[docs] class MediaTypeError(DatumaroError): pass
[docs] class MediaShapeError(DatumaroError): pass
[docs] class DatasetInfosRedefinedError(DatasetError): def __str__(self): return "Infos can only be set once for a dataset"
[docs] class CategoriesRedefinedError(DatasetError): def __str__(self): return "Categories can only be set once for a dataset"
[docs] @define(auto_exc=False) class RepeatedItemError(DatasetError): item_id = field() def __str__(self): return f"Item {self.item_id} is repeated in the source sequence."
[docs] class DatasetQualityError(DatasetError): pass
[docs] @define(auto_exc=False) class AnnotationsTooCloseError(DatasetQualityError): item_id = field() a = field() b = field() distance = field() def __str__(self): return "Item %s: annotations are too close: %s, %s, distance = %s" % ( self.item_id, self.a, self.b, self.distance, )
[docs] @define(auto_exc=False) class WrongGroupError(DatasetQualityError): item_id = field() found = field(converter=set) expected = field(converter=set) group = field(converter=list) def __str__(self): return "Item %s: annotation group has wrong labels: " "found %s, expected %s, group %s" % ( self.item_id, self.found, self.expected, self.group, )
[docs] @define(auto_exc=False, init=False) class DatasetMergeError(DatasetError): sources = field(converter=set, factory=set, kw_only=True) def _my__init__(self, msg=None, *, sources=None): super().__init__(msg) self.__attrs_init__(sources=sources or set())
# Pylint will raise false positive warnings for derived classes, # when __init__ is defined directly setattr(DatasetMergeError, "__init__", DatasetMergeError._my__init__)
[docs] @define(auto_exc=False) class MismatchingImageInfoError(DatasetMergeError): item_id: Tuple[str, str] a: Tuple[int, int] b: Tuple[int, int] def __str__(self): return "Item %s: mismatching image size info: %s vs %s" % (self.item_id, self.a, self.b)
[docs] @define(auto_exc=False) class MismatchingMediaPathError(DatasetMergeError): item_id: Tuple[str, str] a: str b: str def __str__(self): return "Item %s: mismatching media path info: %s vs %s" % (self.item_id, self.a, self.b)
[docs] @define(auto_exc=False) class MismatchingMediaError(DatasetMergeError): item_id: Tuple[str, str] a: Any b: Any def __str__(self): return "Item %s: mismatching media info: %s vs %s" % (self.item_id, self.a, self.b)
[docs] @define(auto_exc=False) class MismatchingAttributesError(DatasetMergeError): item_id: Tuple[str, str] key: str a: Any b: Any def __str__(self): return "Item %s: mismatching image attribute %s: %s vs %s" % ( self.item_id, self.key, self.a, self.b, )
[docs] class ConflictingCategoriesError(DatasetMergeError): pass
[docs] @define(auto_exc=False) class NoMatchingAnnError(DatasetMergeError): item_id = field() ann = field() def __str__(self): return "Item %s: can't find matching annotation " "in sources %s, annotation is %s" % ( self.item_id, self.sources, self.ann, )
[docs] @define(auto_exc=False) class NoMatchingItemError(DatasetMergeError): item_id = field() def __str__(self): return "Item %s: can't find matching item in sources %s" % (self.item_id, self.sources)
[docs] @define(auto_exc=False) class FailedLabelVotingError(DatasetMergeError): item_id = field() votes = field() ann = field(default=None) def __str__(self): return "Item %s: label voting failed%s, votes %s, sources %s" % ( self.item_id, "for ann %s" % self.ann if self.ann else "", self.votes, self.sources, )
[docs] @define(auto_exc=False) class FailedAttrVotingError(DatasetMergeError): item_id = field() attr = field() votes = field() ann = field() def __str__(self): return "Item %s: attribute voting failed " "for ann %s, votes %s, sources %s" % ( self.item_id, self.ann, self.votes, self.sources, )
[docs] @define(auto_exc=False) class VideoMergeError(DatasetMergeError): item_id = field() def __str__(self): return "Item %s: video merging is not possible" % (self.item_id,)
[docs] @define(auto_exc=False) class DatasetValidationError(DatumaroError): severity = field()
[docs] def to_dict(self): return { "anomaly_type": self.__class__.__name__, "description": str(self), "severity": self.severity.name, }
[docs] @define(auto_exc=False) class DatasetItemValidationError(DatasetValidationError): item_id = field() subset = field()
[docs] def to_dict(self): dict_repr = super().to_dict() dict_repr["item_id"] = self.item_id dict_repr["subset"] = self.subset return dict_repr
[docs] @define(auto_exc=False) class MissingLabelCategories(DatasetValidationError): def __str__(self): return "Metadata (ex. LabelCategories) should be defined" " to validate a dataset."
[docs] @define(auto_exc=False) class MissingAnnotation(DatasetItemValidationError): ann_type = field() def __str__(self): return f"Item needs '{self.ann_type}' annotation(s), " "but not found."
[docs] @define(auto_exc=False) class BrokenAnnotation(DatasetItemValidationError): ann_type = field() def __str__(self): return f"Item needs whole '{self.ann_type}' annotation(s), " "but missed some."
[docs] @define(auto_exc=False) class EmptyLabel(DatasetItemValidationError): label_name = field() def __str__(self): return f"Item should have the label '{self.label_name}' annotation(s), " "but not found."
[docs] @define(auto_exc=False) class EmptyCaption(DatasetItemValidationError): caption_name = field() def __str__(self): return ( f"Item should have the caption '{self.caption_name}' annotation(s), " "but not found." )
[docs] @define(auto_exc=False) class MultiLabelAnnotations(DatasetItemValidationError): def __str__(self): return "Item needs a single label but multiple labels are found."
[docs] @define(auto_exc=False) class MissingAttribute(DatasetItemValidationError): label_name = field() attr_name = field() def __str__(self): return f"Item needs the attribute '{self.attr_name}' " f"for the label '{self.label_name}'."
[docs] @define(auto_exc=False) class UndefinedLabel(DatasetItemValidationError): label_name = field() def __str__(self): return f"Item has the label '{self.label_name}' which " "is not defined in metadata."
[docs] @define(auto_exc=False) class UndefinedAttribute(DatasetItemValidationError): label_name = field() attr_name = field() def __str__(self): return ( f"Item has the attribute '{self.attr_name}' for the " f"label '{self.label_name}' which is not defined in metadata." )
[docs] @define(auto_exc=False) class LabelDefinedButNotFound(DatasetValidationError): label_name = field() def __str__(self): return ( f"The label '{self.label_name}' is defined in " "metadata, but not found in the dataset." )
[docs] @define(auto_exc=False) class AttributeDefinedButNotFound(DatasetValidationError): label_name = field() attr_name = field() def __str__(self): return ( f"The attribute '{self.attr_name}' for the label " f"'{self.label_name}' is defined in metadata, but not " "found in the dataset." )
[docs] @define(auto_exc=False) class OnlyOneLabel(DatasetValidationError): label_name = field() def __str__(self): return f"The dataset has only one label '{self.label_name}'."
[docs] @define(auto_exc=False) class OnlyOneAttributeValue(DatasetValidationError): label_name = field() attr_name = field() value = field() def __str__(self): return ( "The dataset has the only attribute value " f"'{self.value}' for the attribute '{self.attr_name}' for the " f"label '{self.label_name}'." )
[docs] @define(auto_exc=False) class FewSamplesInLabel(DatasetValidationError): label_name = field() count = field() def __str__(self): return ( f"The number of samples in the label '{self.label_name}'" f" might be too low. Found '{self.count}' samples." )
[docs] @define(auto_exc=False) class FewSamplesInCaption(DatasetValidationError): caption_name = field() count = field() def __str__(self): return ( f"The number of samples in the caption '{self.caption_name}'" f" might be too low. Found '{self.count}' samples." )
[docs] @define(auto_exc=False) class RedundanciesInCaption(DatasetValidationError): caption_name = field() redundancy_type = field() count = field() def __str__(self): return ( f"The number of '{self.redundancy_type}' redundancy in the caption '{self.caption_name}'" f" have found '{self.count}'." )
[docs] @define(auto_exc=False) class FewSamplesInAttribute(DatasetValidationError): label_name = field() attr_name = field() attr_value = field() count = field() def __str__(self): return ( "The number of samples for attribute = value " f"'{self.attr_name} = {self.attr_value}' for the label " f"'{self.label_name}' might be too low. " f"Found '{self.count}' samples." )
[docs] @define(auto_exc=False) class ImbalancedLabels(DatasetValidationError): def __str__(self): return "There is an imbalance in the label distribution."
[docs] @define(auto_exc=False) class ImbalancedCaptions(DatasetValidationError): def __str__(self): return "There is an imbalance in the caption distribution."
[docs] @define(auto_exc=False) class ImbalancedAttribute(DatasetValidationError): label_name = field() attr_name = field() def __str__(self): return ( "There is an imbalance in the distribution of attribute" f" '{self. attr_name}' for the label '{self.label_name}'." )
[docs] @define(auto_exc=False) class ImbalancedDistInLabel(DatasetValidationError): label_name = field() prop = field() def __str__(self): return ( f"Values of '{self.prop}' are not evenly " f"distributed for '{self.label_name}' label." )
[docs] @define(auto_exc=False) class ImbalancedDistInCaption(DatasetValidationError): caption_name = field() def __str__(self): return f"Values are not evenly " f"distributed for '{self.caption_name}' caption."
[docs] @define(auto_exc=False) class ImbalancedDistInAttribute(DatasetValidationError): label_name = field() attr_name = field() attr_value = field() prop = field() def __str__(self): return ( f"Values of '{self.prop}' are not evenly " f"distributed for '{self.attr_name}' = '{self.attr_value}' for " f"the '{self.label_name}' label." )
[docs] @define(auto_exc=False) class NegativeLength(DatasetItemValidationError): ann_id = field() prop = field() val = field() def __str__(self): return ( f"Annotation '{self.ann_id}' in " "the item should have a positive value of " f"'{self.prop}' but got '{self.val}'." )
[docs] @define(auto_exc=False) class InvalidValue(DatasetItemValidationError): ann_id = field() prop = field() def __str__(self): return ( f"Annotation '{self.ann_id}' in " "the item has an inf or a NaN value of " f"'{self.prop}'." )
[docs] @define(auto_exc=False) class FarFromLabelMean(DatasetItemValidationError): label_name = field() ann_id = field() prop = field() mean = field() val = field() def __str__(self): return ( f"Annotation '{self.ann_id}' in " f"the item has a value of '{self.prop}' that " "is too far from the label average. (mean of " f"'{self.label_name}' label: {self.mean}, got '{self.val}')." )
[docs] @define(auto_exc=False) class FarFromCaptionMean(DatasetItemValidationError): caption_name = field() mean = field() upper_bound = field() lower_bound = field() val = field() def __str__(self): return ( f"Annotation '{self.caption_name}' in " "the item is too far from the caption average. (mean of " f"'{self.caption_name}' caption: '{self.mean}', got '{self.val}')." f"It should be between '{self.lower_bound}' and '{self.upper_bound}'." )
[docs] @define(auto_exc=False) class OutlierInCaption(DatasetItemValidationError): caption_name = field() lower_bound = field() upper_bound = field() val = field() def __str__(self): return ( f"Annotation '{self.caption_name}' in " "the item is estimated as outlier based on IQR. (lower and upper bound of " f"'{self.caption_name}' caption: '{self.lower_bound}' and '{self.upper_bound}', got '{self.val}')." )
[docs] @define(auto_exc=False) class FarFromAttrMean(DatasetItemValidationError): label_name = field() ann_id = field() attr_name = field() attr_value = field() prop = field() mean = field() val = field() def __str__(self): return ( f"Annotation '{self.ann_id}' in the " f"item has a value of '{self.prop}' that " "is too far from the attribute average. (mean of " f"'{self.attr_name}' = '{self.attr_value}' for the " f"'{self.label_name}' label: {self.mean}, got '{self.val}')." )
[docs] @define(auto_exc=False) class StreamedItemError(DatasetError): def __str__(self): return "__getitem__ is disabled for StreamDataset. Use Dataset instead."