datumaro.components.dataset_item_storage#

Classes

DatasetItemStorage()

DatasetItemStorageDatasetView(parent, infos, ...)

ItemStatus(value)

An enumeration.

class datumaro.components.dataset_item_storage.ItemStatus(value)[source]#

Bases: Enum

An enumeration.

added = 1#
modified = 2#
removed = 3#
class datumaro.components.dataset_item_storage.DatasetItemStorage[source]#

Bases: object

is_empty() bool[source]#
put(item: DatasetItem) bool[source]#
get(id: str | DatasetItem, subset: str | None = None, dummy: Any | None = None) DatasetItem | None[source]#
remove(id: str | DatasetItem, subset: str | None = None) bool[source]#
get_subset(name)[source]#
subsets()[source]#
get_annotated_items()[source]#
get_datasetitem_by_path(path)[source]#
get_annotations()[source]#
class datumaro.components.dataset_item_storage.DatasetItemStorageDatasetView(parent: DatasetItemStorage, infos: Dict[str, Any], categories: Dict[AnnotationType, Categories], media_type: Type[MediaElement] | None, ann_types: Set[AnnotationType] | None)[source]#

Bases: IDataset

class Subset(parent: DatasetItemStorageDatasetView, name: str)[source]#

Bases: IDataset

put(item)[source]#
get(id, subset=None)[source]#

Provides random access to dataset items.

remove(id, subset=None)[source]#
get_subset(name)[source]#
subsets()[source]#

Enumerates subsets in the dataset. Each subset can be a dataset itself.

infos()[source]#

Returns meta-info of dataset.

categories()[source]#

Returns metainfo about dataset labels.

media_type()[source]#

Returns media type of the dataset items.

All the items are supposed to have the same media type. Supposed to be constant and known immediately after the object construction (i.e. doesn’t require dataset iteration).

ann_types()[source]#

Returns available task type from dataset annotation types.

infos()[source]#

Returns meta-info of dataset.

categories()[source]#

Returns metainfo about dataset labels.

get_subset(name)[source]#
subsets()[source]#

Enumerates subsets in the dataset. Each subset can be a dataset itself.

get(id, subset=None)[source]#

Provides random access to dataset items.

media_type()[source]#

Returns media type of the dataset items.

All the items are supposed to have the same media type. Supposed to be constant and known immediately after the object construction (i.e. doesn’t require dataset iteration).

ann_types()[source]#

Returns available task type from dataset annotation types.

class datumaro.components.dataset_item_storage.AnnotationType(value)[source]#

Bases: IntEnum

An enumeration.

unknown = 0#
label = 1#
mask = 2#
points = 3#
polygon = 4#
polyline = 5#
bbox = 6#
caption = 7#
cuboid_3d = 8#
super_resolution_annotation = 9#
depth_annotation = 10#
ellipse = 11#
hash_key = 12#
feature_vector = 13#
tabular = 14#
rotated_bbox = 15#
cuboid_2d = 16#
class datumaro.components.dataset_item_storage.DatasetItem(id: str, *, subset: str | None = None, media: str | MediaElement | None = None, annotations: List[Annotation] | None = None, attributes: Dict[str, Any] | None = None)[source]#

Bases: object

id: str#
subset: str#
media: MediaElement | None#
annotations: Annotations#
attributes: Dict[str, Any]#
wrap(**kwargs)[source]#
media_as(t: Type[T]) T[source]#
class datumaro.components.dataset_item_storage.Enum(value)[source]#

Bases: object

Generic enumeration.

Derive from this class to define new enumerations.

name#

The name of the Enum member.

value#

The value of the Enum member.

class datumaro.components.dataset_item_storage.IDataset[source]#

Bases: object

subsets() Dict[str, IDataset][source]#

Enumerates subsets in the dataset. Each subset can be a dataset itself.

get_subset(name) IDataset[source]#
infos() Dict[str, Any][source]#

Returns meta-info of dataset.

categories() Dict[AnnotationType, Categories][source]#

Returns metainfo about dataset labels.

get(id: str, subset: str | None = None) DatasetItem | None[source]#

Provides random access to dataset items.

media_type() Type[MediaElement][source]#

Returns media type of the dataset items.

All the items are supposed to have the same media type. Supposed to be constant and known immediately after the object construction (i.e. doesn’t require dataset iteration).

ann_types() List[AnnotationType][source]#

Returns available task type from dataset annotation types.

property is_stream: bool#

Boolean indicating whether the dataset is a stream

If the dataset is a stream, the dataset item is generated on demand from its iterator.

class datumaro.components.dataset_item_storage.MediaElement(crypter: ~datumaro.components.crypter.Crypter = <datumaro.components.crypter.NullCrypter object>, *args, **kwargs)[source]#

Bases: Generic[AnyData]

as_dict() Dict[str, Any][source]#
from_self(**kwargs)[source]#
property is_encrypted: bool#
set_crypter(crypter: Crypter)[source]#
property type: MediaType#
property data: AnyData | None#
property has_data: bool#
property bytes: bytes | None#
save(fp: str | ~io.IOBase, crypter: ~datumaro.components.crypter.Crypter = <datumaro.components.crypter.NullCrypter object>)[source]#
class datumaro.components.dataset_item_storage.auto[source]#

Bases: object

Instances are replaced with an appropriate value in Enum class suites.

value = <object object>#
datumaro.components.dataset_item_storage.copy(x)[source]#

Shallow copy operation on arbitrary Python objects.

See the module’s __doc__ string for more info.