datumaro.components.abstracts.merger#

Classes

IMatcherContext()

IMergerContext()

class datumaro.components.abstracts.merger.IMatcherContext[source]#

Bases: ABC

abstract get_any_label_name(ann: Annotation, label_id: int) str[source]#
class datumaro.components.abstracts.merger.IMergerContext[source]#

Bases: IMatcherContext

abstract merge_infos(sources: Sequence[IDataset]) Dict[source]#
abstract merge_categories(sources: Sequence[IDataset]) Dict[source]#
abstract merge_media_types(sources: Sequence[IDataset]) Type[MediaElement] | None[source]#
abstract merge(sources: Sequence[IDataset]) DatasetItemStorage[source]#
class datumaro.components.abstracts.merger.ABC[source]#

Bases: object

Helper class that provides a standard way to create an ABC using inheritance.

class datumaro.components.abstracts.merger.Annotation(*, id: int = 0, attributes: Dict[str, Any] = _Nothing.NOTHING, group: int = 0, object_id: int = -1)[source]#

Bases: object

A base annotation class.

Derived classes must define the ‘_type’ class variable with a value from the AnnotationType enum.

Method generated by attrs for class Annotation.

id: int#
attributes: Dict[str, Any]#
group: int#
object_id: int#
property type: AnnotationType#
as_dict() Dict[str, Any][source]#

Returns a dictionary { field_name: value }

wrap(**kwargs)[source]#

Returns a modified copy of the object

class datumaro.components.abstracts.merger.DatasetItemStorage[source]#

Bases: object

is_empty() bool[source]#
put(item: DatasetItem) bool[source]#
get(id: str | DatasetItem, subset: str | None = None, dummy: Any | None = None) DatasetItem | None[source]#
remove(id: str | DatasetItem, subset: str | None = None) bool[source]#
get_subset(name)[source]#
subsets()[source]#
get_annotated_items()[source]#
get_datasetitem_by_path(path)[source]#
get_annotations()[source]#
class datumaro.components.abstracts.merger.DatasetItemStorageDatasetView(parent: DatasetItemStorage, infos: Dict[str, Any], categories: Dict[AnnotationType, Categories], media_type: Type[MediaElement] | None, ann_types: Set[AnnotationType] | None)[source]#

Bases: IDataset

class Subset(parent: DatasetItemStorageDatasetView, name: str)[source]#

Bases: IDataset

put(item)[source]#
get(id, subset=None)[source]#

Provides random access to dataset items.

remove(id, subset=None)[source]#
get_subset(name)[source]#
subsets()[source]#

Enumerates subsets in the dataset. Each subset can be a dataset itself.

infos()[source]#

Returns meta-info of dataset.

categories()[source]#

Returns metainfo about dataset labels.

media_type()[source]#

Returns media type of the dataset items.

All the items are supposed to have the same media type. Supposed to be constant and known immediately after the object construction (i.e. doesn’t require dataset iteration).

ann_types()[source]#

Returns available task type from dataset annotation types.

infos()[source]#

Returns meta-info of dataset.

categories()[source]#

Returns metainfo about dataset labels.

get_subset(name)[source]#
subsets()[source]#

Enumerates subsets in the dataset. Each subset can be a dataset itself.

get(id, subset=None)[source]#

Provides random access to dataset items.

media_type()[source]#

Returns media type of the dataset items.

All the items are supposed to have the same media type. Supposed to be constant and known immediately after the object construction (i.e. doesn’t require dataset iteration).

ann_types()[source]#

Returns available task type from dataset annotation types.

class datumaro.components.abstracts.merger.IDataset[source]#

Bases: object

subsets() Dict[str, IDataset][source]#

Enumerates subsets in the dataset. Each subset can be a dataset itself.

get_subset(name) IDataset[source]#
infos() Dict[str, Any][source]#

Returns meta-info of dataset.

categories() Dict[AnnotationType, Categories][source]#

Returns metainfo about dataset labels.

get(id: str, subset: str | None = None) DatasetItem | None[source]#

Provides random access to dataset items.

media_type() Type[MediaElement][source]#

Returns media type of the dataset items.

All the items are supposed to have the same media type. Supposed to be constant and known immediately after the object construction (i.e. doesn’t require dataset iteration).

ann_types() List[AnnotationType][source]#

Returns available task type from dataset annotation types.

property is_stream: bool#

Boolean indicating whether the dataset is a stream

If the dataset is a stream, the dataset item is generated on demand from its iterator.

class datumaro.components.abstracts.merger.MediaElement(crypter: ~datumaro.components.crypter.Crypter = <datumaro.components.crypter.NullCrypter object>)[source]#

Bases: Generic[AnyData]

as_dict() Dict[str, Any][source]#
from_self(**kwargs)[source]#
property is_encrypted: bool#
set_crypter(crypter: Crypter)[source]#
property type: MediaType#
property data: AnyData | None#
property has_data: bool#
property bytes: bytes | None#
save(fp: str | ~io.IOBase, crypter: ~datumaro.components.crypter.Crypter = <datumaro.components.crypter.NullCrypter object>)[source]#
datumaro.components.abstracts.merger.abstractmethod(funcobj)[source]#

A decorator indicating abstract methods.

Requires that the metaclass is ABCMeta or derived from it. A class that has a metaclass derived from ABCMeta cannot be instantiated unless all of its abstract methods are overridden. The abstract methods can be called using any of the normal ‘super’ call mechanisms. abstractmethod() may be used to declare abstract methods for properties and descriptors.

Usage:

class C(metaclass=ABCMeta):

@abstractmethod def my_abstract_method(self, …):