datumaro.components.merge.exact_merge#

Classes

ExactMerge(**options)

Merges several datasets using the "simple" algorithm:

class datumaro.components.merge.exact_merge.ExactMerge(**options)[source]#

Bases: Merger

Merges several datasets using the “simple” algorithm:
  • All datasets should have the same categories

  • items are matched by (id, subset) pairs

  • matching items share the media info available:
    • nothing + nothing = nothing

    • nothing + something = something

    • something A + something B = conflict

  • annotations are matched by value and shared

  • in case of conflicts, throws an error

classmethod merge(sources: Sequence[IDataset]) DatasetItemStorage[source]#
classmethod merge_items(existing_item: DatasetItem, current_item: DatasetItem) DatasetItem[source]#
class datumaro.components.merge.exact_merge.Annotation(*, id: int = 0, attributes: Dict[str, Any] = _Nothing.NOTHING, group: int = 0, object_id: int = -1)[source]#

Bases: object

A base annotation class.

Derived classes must define the ‘_type’ class variable with a value from the AnnotationType enum.

Method generated by attrs for class Annotation.

id: int#
attributes: Dict[str, Any]#
group: int#
object_id: int#
property type: AnnotationType#
as_dict() Dict[str, Any][source]#

Returns a dictionary { field_name: value }

wrap(**kwargs)[source]#

Returns a modified copy of the object

class datumaro.components.merge.exact_merge.DatasetItem(id: str, *, subset: str | None = None, media: str | MediaElement | None = None, annotations: List[Annotation] | None = None, attributes: Dict[str, Any] | None = None)[source]#

Bases: object

id: str#
subset: str#
media: MediaElement | None#
annotations: List[Annotation]#
attributes: Dict[str, Any]#
wrap(**kwargs)[source]#
media_as(t: Type[T]) T[source]#
class datumaro.components.merge.exact_merge.DatasetItemStorage[source]#

Bases: object

is_empty() bool[source]#
put(item: DatasetItem) bool[source]#
get(id: str | DatasetItem, subset: str | None = None, dummy: Any | None = None) DatasetItem | None[source]#
remove(id: str | DatasetItem, subset: str | None = None) bool[source]#
get_subset(name)[source]#
subsets()[source]#
get_annotated_items()[source]#
get_datasetitem_by_path(path)[source]#
get_annotations()[source]#
exception datumaro.components.merge.exact_merge.DatasetMergeError(msg=None, *, sources=None)[source]#

Bases: DatasetError

sources#
class datumaro.components.merge.exact_merge.IDataset[source]#

Bases: object

subsets() Dict[str, IDataset][source]#

Enumerates subsets in the dataset. Each subset can be a dataset itself.

get_subset(name) IDataset[source]#
infos() Dict[str, Any][source]#

Returns meta-info of dataset.

categories() Dict[AnnotationType, Categories][source]#

Returns metainfo about dataset labels.

get(id: str, subset: str | None = None) DatasetItem | None[source]#

Provides random access to dataset items.

media_type() Type[MediaElement][source]#

Returns media type of the dataset items.

All the items are supposed to have the same media type. Supposed to be constant and known immediately after the object construction (i.e. doesn’t require dataset iteration).

property is_stream: bool#

Boolean indicating whether the dataset is a stream

If the dataset is a stream, the dataset item is generated on demand from its iterator.

class datumaro.components.merge.exact_merge.Image(size: Tuple[int, int] | None = None, ext: str | None = None, *args, **kwargs)[source]#

Bases: MediaElement[ndarray]

classmethod from_file(path: str, *args, **kwargs)[source]#
classmethod from_numpy(data: ndarray | Callable[[], ndarray], *args, **kwargs)[source]#
classmethod from_bytes(data: bytes | Callable[[], bytes], *args, **kwargs)[source]#
property has_size: bool#

Indicates that size info is cached and won’t require image loading

property size: Tuple[int, int] | None#

Returns (H, W)

property ext: str | None#

Media file extension (with the leading dot)

set_crypter(crypter: Crypter)[source]#
class datumaro.components.merge.exact_merge.MediaElement(crypter: ~datumaro.components.crypter.Crypter = <datumaro.components.crypter.NullCrypter object>)[source]#

Bases: Generic[AnyData]

as_dict() Dict[str, Any][source]#
from_self(**kwargs)[source]#
property is_encrypted: bool#
set_crypter(crypter: Crypter)[source]#
property type: MediaType#
property data: AnyData | None#
property has_data: bool#
property bytes: bytes | None#
save(fp: str | ~io.IOBase, crypter: ~datumaro.components.crypter.Crypter = <datumaro.components.crypter.NullCrypter object>)[source]#
class datumaro.components.merge.exact_merge.Merger(**options)[source]#

Bases: IMergerContext, CliPlugin

Merge multiple datasets into one dataset

static merge_infos(sources: Sequence[Dict[str, Any]]) Dict[source]#

Merge several IDataset into one IDataset

static merge_categories(sources: Sequence[Dict[AnnotationType, Categories]]) Dict[source]#
static merge_media_types(sources: Sequence[IDataset]) Type[MediaElement] | None[source]#
save_merge_report(path: str) None[source]#
get_any_label_name(ann, label_id)[source]#
exception datumaro.components.merge.exact_merge.MismatchingAttributesError(item_id: Tuple[str, str], key: str, a: Any, b: Any, *, sources=_Nothing.NOTHING)[source]#

Bases: DatasetMergeError

Method generated by attrs for class MismatchingAttributesError.

item_id: Tuple[str, str]#
key: str#
a: Any#
b: Any#
exception datumaro.components.merge.exact_merge.MismatchingImageInfoError(item_id: Tuple[str, str], a: Tuple[int, int], b: Tuple[int, int], *, sources=_Nothing.NOTHING)[source]#

Bases: DatasetMergeError

Method generated by attrs for class MismatchingImageInfoError.

item_id: Tuple[str, str]#
a: Tuple[int, int]#
b: Tuple[int, int]#
exception datumaro.components.merge.exact_merge.MismatchingMediaError(item_id: Tuple[str, str], a: Any, b: Any, *, sources=_Nothing.NOTHING)[source]#

Bases: DatasetMergeError

Method generated by attrs for class MismatchingMediaError.

item_id: Tuple[str, str]#
a: Any#
b: Any#
exception datumaro.components.merge.exact_merge.MismatchingMediaPathError(item_id: Tuple[str, str], a: str, b: str, *, sources=_Nothing.NOTHING)[source]#

Bases: DatasetMergeError

Method generated by attrs for class MismatchingMediaPathError.

item_id: Tuple[str, str]#
a: str#
b: str#
class datumaro.components.merge.exact_merge.MultiframeImage(images: Iterable[str | Image | ndarray | Callable[[str], ndarray]] | None, *, path: str | None = None)[source]#

Bases: MediaElement

property data: List[Image]#
property path: str#

Path to the media file

property ext: str#

Media file extension (with the leading dot)

class datumaro.components.merge.exact_merge.PointCloud(extra_images: List[Image] | Callable[[], List[Image]] | None = None, *args, **kwargs)[source]#

Bases: MediaElement[bytes]

classmethod from_file(path: str, *args, **kwargs)[source]#
classmethod from_bytes(data: bytes | Callable[[], bytes], *args, **kwargs)[source]#
property extra_images: List[Image]#
class datumaro.components.merge.exact_merge.Video(path: str, *, step: int = 1, start_frame: int = 0, end_frame: int | None = None)[source]#

Bases: MediaElement, Iterable[VideoFrame]

close()[source]#
get_frame_data(idx: int) VideoFrame[source]#
property length: int | None#

Returns frame count, if video provides such information.

Note that not all videos provide length / duration metainfo, so the result may be undefined.

Also note, that information may be inaccurate because of variable FPS in video or incorrect metainfo. The count is only guaranteed to be valid after video is completely read once.

The count is affected by the frame filtering options of the object, i.e. start frame, end frame and frame step.

property frame_size: Tuple[int, int]#

Returns (H, W)

save(fp: str | ~io.IOBase, crypter: ~datumaro.components.crypter.Crypter = <datumaro.components.crypter.NullCrypter object>)[source]#
property path: str#

Path to the media file

property ext: str#

Media file extension (with the leading dot)

exception datumaro.components.merge.exact_merge.VideoMergeError(item_id, *, sources=_Nothing.NOTHING)[source]#

Bases: DatasetMergeError

Method generated by attrs for class VideoMergeError.

item_id#