datumaro.plugins.data_formats.datumaro.base#

Classes

`DatumaroBase`(path, *[, subset, stream, ctx])
`JsonReader`(path, subset, rootpath, ...)
`StreamJsonReader`(path, subset, rootpath, ...)

class datumaro.plugins.data_formats.datumaro.base.DatumaroBase(path: str, *, subset: str | None = None, stream: bool = False, ctx: ImportContext | None = None)[source]#

Bases: SubsetBase

LEGACY_VERSION = 'legacy'#

CURRENT_DATUMARO_FORMAT_VERSION = '1.0'#

ALLOWED_VERSIONS = {'1.0', 'legacy'}#

property is_stream: bool#

Boolean indicating whether the dataset is a stream

If the dataset is a stream, the dataset item is generated on demand from its iterator.

infos()[source]#: Returns meta-info of dataset.

categories()[source]#: Returns metainfo about dataset labels.

media_type()[source]#

Returns media type of the dataset items.

All the items are supposed to have the same media type. Supposed to be constant and known immediately after the object construction (i.e. doesn’t require dataset iteration).

ann_types()[source]#: Returns available task type from dataset annotation types.

class datumaro.plugins.data_formats.datumaro.base.AnnotationType(value)[source]#

Bases: IntEnum

An enumeration.

unknown = 0#

label = 1#

mask = 2#

points = 3#

polygon = 4#

polyline = 5#

bbox = 6#

caption = 7#

cuboid_3d = 8#

super_resolution_annotation = 9#

depth_annotation = 10#

ellipse = 11#

hash_key = 12#

feature_vector = 13#

tabular = 14#

rotated_bbox = 15#

cuboid_2d = 16#

class datumaro.plugins.data_formats.datumaro.base.Bbox(x, y, w, h, *args, **kwargs)[source]#

Bases: Shape

Bbox annotation class. This class represents a bounding box defined by its top-left corner (x, y) and its width and height (w, h).

_type#

The type of annotation, set to AnnotationType.bbox.

Type:: AnnotationType

__init__()[source]#: Initializes the Bbox with its coordinates and dimensions.

x()#: Property to get the x-coordinate of the bounding box.

y()#: Property to get the y-coordinate of the bounding box.

w()#: Property to get the width of the bounding box.

h()#: Property to get the height of the bounding box.

get_area()[source]#: Calculates the area of the bounding box.

get_bbox()[source]#: Returns the bounding box coordinates and dimensions.

as_polygon()[source]#: Returns the bounding box as a list of points forming a polygon.

iou()[source]#: Calculates the Intersection over Union (IoU) with another shape.

wrap()[source]#: Creates a new Bbox instance with updated attributes.

Initialize the Bbox with its top-left corner (x, y) and its width and height (w, h).

Parameters:

x (float) – The x-coordinate of the top-left corner.
y (float) – The y-coordinate of the top-left corner.
w (float) – The width of the bounding box.
h (float) – The height of the bounding box.

property x#

Get the x-coordinate of the top-left corner of the bounding box.

Returns:: The x-coordinate of the bounding box.
Return type:: float

property y#

Get the y-coordinate of the top-left corner of the bounding box.

Returns:: The y-coordinate of the bounding box.
Return type:: float

property w#

Get the width of the bounding box.

Returns:: The width of the bounding box.
Return type:: float

property h#

Get the height of the bounding box.

Returns:: The height of the bounding box.
Return type:: float

get_area()[source]#

Calculate the area of the bounding box.

Returns:: The area of the bounding box.
Return type:: float

get_bbox()[source]#

Get the bounding box coordinates and dimensions.

Returns:: The bounding box as [x, y, w, h].
Return type:: List[float]

as_polygon() → List[float][source]#

Convert the bounding box into a polygon representation.

Returns:: The bounding box as a polygon.
Return type:: List[float]

iou(other: Shape) → float | ~typing.Literal[-1][source]#

Calculate the Intersection over Union (IoU) with another shape.

Parameters:: other (Shape) – The other shape to compare with.
Returns:: The IoU value or -1 if not applicable.
Return type:: Union[float, Literal[-1]]

wrap(**kwargs)[source]#

Create a new Bbox instance with updated attributes.

Parameters:

item (Bbox) – The original Bbox instance.
kwargs – Additional attributes to update.

Returns:

A new Bbox instance with updated attributes.

Return type:

Bbox

class datumaro.plugins.data_formats.datumaro.base.Caption(caption, *, id: int = 0, attributes: Dict[str, Any] = _Nothing.NOTHING, group: int = 0, object_id: int = -1)[source]#

Bases: Annotation

Represents arbitrary text annotations.

Method generated by attrs for class Caption.

caption: str#

class datumaro.plugins.data_formats.datumaro.base.Cuboid2D(_points: Iterable[Tuple[float, float]], *args, **kwargs)[source]#

Bases: Annotation

Cuboid2D annotation class. This class represents a 3D bounding box defined by its point coordinates in the following way: [(x1, y1), (x2, y2), (x3, y3), (x4, y4), (x5, y5), (x6, y6), (x7, y7), (x8, y8)].

2—3

/| /|

1-+-4 | | 5 + 6 |/ |/ 8—7

_type#

The type of annotation, set to AnnotationType.cuboid_2d.

Type:: AnnotationType

__init__()[source]#: Initializes the Cuboid2D with its coordinates.

wrap()#: Creates a new Cuboid2D instance with updated attributes.

Method generated by attrs for class Annotation.

points#

label: int | None#

z_order: int#

y_3d: float#

classmethod from_3d(dim: ndarray, location: ndarray, rotation_y: float, P: ndarray, Tr_velo_to_cam: ndarray) → Cuboid2D[source]#

Creates an instance of Cuboid2D class from 3D bounding box parameters.

Parameters:

dim (np.ndarray) – 3 scalars describing length, width and height of a 3D bounding box
location (np.ndarray) – (x, y, z) coordinates of the middle of the top face.
rotation_y (np.ndarray) – rotation along the Y-axis (from -pi to pi)
P (np.ndarray) – Camera-to-Image transformation matrix (3x4)
Tr_velo_to_cam (np.ndarray) – LiDAR-to-Camera transformation matrix (3x4)

Returns:

Projection points for the given bounding box

Return type:

Cuboid2D

to_3d(P_inv: ndarray) → tuple[ndarray, ndarray, float][source]#

Reconstructs 3D object Velodyne coordinates (dimensions, location and rotation along the Y-axis) from the given Cuboid2D instance.

Parameters:: P_inv (np.ndarray) – Pseudo-inverse of Camera-to-Image projection matrix
Returns:: dimensions, location and rotation along the Y-axis
Return type:: tuple

class datumaro.plugins.data_formats.datumaro.base.Cuboid3d(position, rotation=None, scale=None, **kwargs)[source]#

Bases: Annotation

Cuboid3d annotation class. This class represents a 3D cuboid annotation with position, rotation, and scale.

_type#

The type of annotation, set to AnnotationType.cuboid_3d.

Type:: AnnotationType

_points#

List of float values representing the position, rotation, and scale of the cuboid.

Type:: List[float]

label#

Optional label ID for the cuboid. Default is None.

Type:: Optional[int]

__init__()[source]#: Initializes the Cuboid3d with position, rotation, and scale.

position()#: Property to get and set the position of the cuboid.

rotation()#: Property to get and set the rotation of the cuboid.

scale()#: Property to get and set the scale of the cuboid.

Initialize the Cuboid3d with position, rotation, and scale.

Parameters:

position (List[float]) – List of 3 float values representing the position [x, y, z].
rotation (List[float], optional) – List of 3 float values representing the rotation [rx, ry, rz].
scale (List[float], optional) – List of 3 float values representing the scale [sx, sy, sz].

label: int | None#

property position#

Get the position of the cuboid.

Returns:: The position [x, y, z] of the cuboid.
Return type:: List[float]

property rotation#

Get the rotation of the cuboid.

Returns:: The rotation [rx, ry, rz] of the cuboid.
Return type:: List[float]

property scale#

Get the scale of the cuboid.

Returns:: The scale [sx, sy, sz] of the cuboid.
Return type:: List[float]

exception datumaro.plugins.data_formats.datumaro.base.DatasetImportError[source]#: Bases: DatumaroError

Bases: object

id: str#

subset: str#

media: MediaElement | None#

annotations: Annotations#

attributes: Dict[str, Any]#

wrap(**kwargs)[source]#

media_as(t: Type[T]) → T[source]#

class datumaro.plugins.data_formats.datumaro.base.DatumPageMapper(path: str)[source]#

Bases: object

Construct page maps for items and annotations from the JSON file, which are used for the stream importer.

It also provides __iter__() to produce item and annotation dictionaries in stream manner after constructing the page map.

get_item_dict(item_key: str) → Dict | None[source]#

iter_item_ids() → Iterator[str][source]#

property dm_format_version: str | None#: Parse “dm_format_version” section from the given JSON file using the stream json parser

property media_type: MediaType | None#: Parse “media_type” section from the given JSON file using the stream json parser

property ann_types: Set[AnnotationType] | None#: Parse “media_type” section from the given JSON file using the stream json parser

property infos: Dict[str, Any]#: Parse “infos” section from the given JSON file using the stream json parser

property categories: Dict[str, Any]#: Parse “categories” section from the given JSON file using the stream json parser

class datumaro.plugins.data_formats.datumaro.base.DatumaroPath[source]#

Bases: object

IMAGES_DIR = 'images'#

ANNOTATIONS_DIR = 'annotations'#

PCD_DIR = 'point_clouds'#

VIDEO_DIR = 'videos'#

MASKS_DIR = 'masks'#

ANNOTATION_EXT = '.json'#

IMAGE_EXT = '.jpg'#

MASK_EXT = '.png'#

class datumaro.plugins.data_formats.datumaro.base.Ellipse(x1: float, y1: float, x2: float, y2: float, *args, **kwargs)[source]#

Bases: Shape

Ellipse represents an ellipse that is encapsulated by a rectangle.

x1 and y1 represent the top-left coordinate of the encapsulating rectangle
x2 and y2 representing the bottom-right coordinate of the encapsulating rectangle

Parameters:

x1 (float) – left x coordinate of encapsulating rectangle
y1 (float) – top y coordinate of encapsulating rectangle
x2 (float) – right x coordinate of encapsulating rectangle
y2 (float) – bottom y coordinate of encapsulating rectangle

Method generated by attrs for class Shape.

property x1#

property y1#

property x2#

property y2#

property w#

property h#

property c_x#

property c_y#

get_area()[source]#: Calculate the area of the shape.

get_bbox()[source]#

Calculate and return the bounding box of the shape.

Returns:: The bounding box as [x, y, w, h].
Return type:: Tuple[float, float, float, float]

get_points(num_points: int = 720) → List[Tuple[float, float]][source]#

Return points as a list of tuples, e.g. [(x0, y0), (x1, y1), …].

Parameters:: num_points (int) – The number of boundary points of the ellipse. By default, one point is created for every 1 degree of interior angle (num_points=360).

as_polygon(num_points: int = 720) → List[float][source]#

Return a polygon as a list of tuples, e.g. [x0, y0, x1, y1, …].

Parameters:: num_points (int) – The number of boundary points of the ellipse. By default, one point is created for every 1 degree of interior angle (num_points=360).

iou(other: Shape) → float | ~typing.Literal[-1][source]#

wrap(**kwargs) → Ellipse[source]#: Returns a modified copy of the object

class datumaro.plugins.data_formats.datumaro.base.GroupType(value)[source]#

Bases: IntEnum

An enumeration.

EXCLUSIVE = 0#

INCLUSIVE = 1#

RESTRICTED = 2#

to_str() → str[source]#

classmethod from_str(text: str) → GroupType[source]#

class datumaro.plugins.data_formats.datumaro.base.Image(size: Tuple[int, int] | None = None, ext: str | None = None, *args, **kwargs)[source]#

Bases: MediaElement[ndarray]

classmethod from_file(path: str, *args, **kwargs)[source]#

classmethod from_numpy(data: ndarray | Callable[[], ndarray], *args, **kwargs)[source]#

classmethod from_bytes(data: bytes | Callable[[], bytes], *args, **kwargs)[source]#

property has_size: bool#: Indicates that size info is cached and won’t require image loading

property size: Tuple[int, int] | None#: Returns (H, W)

property ext: str | None#: Media file extension (with the leading dot)

set_crypter(crypter: Crypter)[source]#

class datumaro.plugins.data_formats.datumaro.base.ImportContext(progress_reporter=None, error_policy=None)[source]#

Bases: object

Method generated by attrs for class ImportContext.

progress_reporter: ProgressReporter#

error_policy: ImportErrorPolicy#

class datumaro.plugins.data_formats.datumaro.base.JsonReader(path: str, subset: str, rootpath: str, images_dir: str, pcd_dir: str, video_dir: str, ctx: ImportContext)[source]#: Bases: object

class datumaro.plugins.data_formats.datumaro.base.Label(label, *, id: int = 0, attributes: Dict[str, Any] = _Nothing.NOTHING, group: int = 0, object_id: int = -1)[source]#

Bases: Annotation

Method generated by attrs for class Label.

label: int#

class datumaro.plugins.data_formats.datumaro.base.LabelCategories(items: List[str] = _Nothing.NOTHING, label_groups: List[LabelGroup] = _Nothing.NOTHING, *, attributes: Set[str] = _Nothing.NOTHING)[source]#

Bases: Categories

Method generated by attrs for class LabelCategories.

class Category(name, parent: str = '', attributes: Set[str] = _Nothing.NOTHING)[source]#

Bases: object

Method generated by attrs for class LabelCategories.Category.

name: str#

parent: str#

attributes: Set[str]#

class LabelGroup(name, labels: List[str] = [], group_type: GroupType = GroupType.EXCLUSIVE)[source]#

Bases: object

Method generated by attrs for class LabelCategories.LabelGroup.

name: str#

labels: List[str]#

group_type: GroupType#

items: List[str]#

label_groups: List[LabelGroup]#

classmethod from_iterable(iterable: Iterable[str | Tuple[str] | Tuple[str, str] | Tuple[str, str, List[str]]]) → LabelCategories[source]#

Creates a LabelCategories from iterable.

Parameters:

iterable –

This iterable object can be:

a list of str - will be interpreted as list of Category names
a list of positional arguments - will generate Categories with these arguments

Returns: a LabelCategories object

add(name: str, parent: str | None = None, attributes: Set[str] | None = None) → int[source]#

add_label_group(name: str, labels: List[str], group_type: GroupType) → int[source]#

find(name: str) → Tuple[int | None, Category | None][source]#

class datumaro.plugins.data_formats.datumaro.base.MaskCategories(colormap: Dict[int, Tuple[int, int, int]] = _Nothing.NOTHING, inverse_colormap: Dict[Tuple[int, int, int], int] | None = None, *, attributes: Set[str] = _Nothing.NOTHING)[source]#

Bases: Categories

Describes a color map for segmentation masks.

Method generated by attrs for class MaskCategories.

classmethod generate(size: int = 255, include_background: bool = True) → MaskCategories[source]#

Generates MaskCategories with the specified size.

If include_background is True, the result will include the item: “0: (0, 0, 0)”, which is typically used as a background color.

colormap: Dict[int, Tuple[int, int, int]]#

property inverse_colormap: Dict[Tuple[int, int, int], int]#

class datumaro.plugins.data_formats.datumaro.base.MediaElement(crypter: ~datumaro.components.crypter.Crypter = <datumaro.components.crypter.NullCrypter object>, *args, **kwargs)[source]#

Bases: Generic[AnyData]

as_dict() → Dict[str, Any][source]#

from_self(**kwargs)[source]#

property is_encrypted: bool#

set_crypter(crypter: Crypter)[source]#

property type: MediaType#

property data: AnyData | None#

property has_data: bool#

property bytes: bytes | None#

save(fp: str | ~io.IOBase, crypter: ~datumaro.components.crypter.Crypter = <datumaro.components.crypter.NullCrypter object>)[source]#

class datumaro.plugins.data_formats.datumaro.base.MediaType(value)[source]#

Bases: IntEnum

An enumeration.

NONE = 0#

MEDIA_ELEMENT = 1#

IMAGE = 2#

BYTE_IMAGE = 3#

VIDEO_FRAME = 4#

VIDEO = 5#

POINT_CLOUD = 6#

MULTIFRAME_IMAGE = 7#

ROI_IMAGE = 8#

MOSAIC_IMAGE = 9#

TABLE_ROW = 10#

property media: Type[MediaElement] | None#

exception datumaro.plugins.data_formats.datumaro.base.MediaTypeError[source]#: Bases: DatumaroError

class datumaro.plugins.data_formats.datumaro.base.PointCloud(extra_images: List[Image] | Callable[[], List[Image]] | None = None, *args, **kwargs)[source]#

Bases: MediaElement[bytes]

classmethod from_file(path: str, *args, **kwargs)[source]#

classmethod from_bytes(data: bytes | Callable[[], bytes], *args, **kwargs)[source]#

property extra_images: List[Image]#

class datumaro.plugins.data_formats.datumaro.base.Points(points, visibility: List[IntEnum] | None = None, *, id: int = 0, attributes: Dict[str, Any] = _Nothing.NOTHING, group: int = 0, object_id: int = -1, label=None, z_order: int = 0)[source]#

Bases: Shape

Represents an ordered set of points.

_type#

The type of annotation, set to AnnotationType.points.

Type:: AnnotationType

visibility#

A list indicating the visibility status of each point.

Type:: List[IntEnum]

Nested Class:

Visibility (IntEnum): Enum representing the visibility state of points. It has three states:

absent: Point is absent (0).
hidden: Point is hidden (1).
visible: Point is visible (2).

__attrs_post_init__()[source]#: Validates that the number of points is even.

get_area()[source]#: Returns the area covered by the points, always zero.

get_bbox()[source]#: Returns the bounding box containing all visible or hidden points.

Method generated by attrs for class Points.

class Visibility(value)[source]#

Bases: IntEnum

Enum representing the visibility state of points.

absent#

Point is absent (0).

Type:: int

hidden#

Point is hidden (1).

Type:: int

visible#

Point is visible (2).

Type:: int

absent = 0#

hidden = 1#

visible = 2#

visibility: List[IntEnum]#

get_area()[source]#

Returns the area covered by the points.

Returns:: Always returns 0.
Return type:: int

get_bbox()[source]#

Returns the bounding box containing all visible or hidden points.

Returns:: The bounding box as [x0, y0, width, height].
Return type:: List[float]

class datumaro.plugins.data_formats.datumaro.base.PointsCategories(items: Dict[int, Category] = _Nothing.NOTHING, *, attributes: Set[str] = _Nothing.NOTHING)[source]#

Bases: Categories

Describes (key-)point metainfo such as point names and joints.

Method generated by attrs for class PointsCategories.

class Category(labels: List[str] = _Nothing.NOTHING, joints: Set[Tuple[int, int]] = _Nothing.NOTHING)[source]#

Bases: object

Method generated by attrs for class PointsCategories.Category.

labels: List[str]#

joints: Set[Tuple[int, int]]#

items: Dict[int, Category]#

classmethod from_iterable(iterable: Tuple[int, List[str]] | Tuple[int, List[str], Set[Tuple[int, int]]]) → PointsCategories[source]#

Create PointsCategories from an iterable.

Parameters:

iterable –

An Iterable with the following elements:

a label id
a list of positional arguments for Categories

Returns:

PointsCategories object

Return type:

PointsCategories

add(label_id: int, labels: Iterable[str] | None = None, joints: Iterable[Tuple[int, int]] | None = None)[source]#

class datumaro.plugins.data_formats.datumaro.base.PolyLine(points, *, id: int = 0, attributes: Dict[str, Any] = _Nothing.NOTHING, group: int = 0, object_id: int = -1, label=None, z_order: int = 0)[source]#

Bases: Shape

PolyLine annotation class. This class represents a polyline shape, which is a series of connected line segments.

_type#

The type of annotation, set to AnnotationType.polyline.

Type:: AnnotationType

as_polygon()[source]#: Returns the points of the polyline as a polygon.

get_area()[source]#: Returns the area of the polyline, which is always 0.

Method generated by attrs for class PolyLine.

as_polygon()[source]#: Convert the shape into a polygon representation.

get_area()[source]#: Calculate the area of the shape.

class datumaro.plugins.data_formats.datumaro.base.Polygon(points, *, id: int = 0, attributes: Dict[str, Any] = _Nothing.NOTHING, group: int = 0, object_id: int = -1, label=None, z_order: int = 0)[source]#

Bases: Shape

Polygon annotation class. This class represents a polygon shape defined by a series of points.

_type#

The type of annotation, set to AnnotationType.polygon.

Type:: AnnotationType

__attrs_post_init__()[source]#: Validates the points to ensure they form a valid polygon.

get_area()[source]#: Calculates the area of the polygon using the shoelace formula.

as_polygon()[source]#: Returns the points of the polygon.

__eq__()[source]#: Compares this polygon with another for equality.

_get_shoelace_area()[source]#: Helper method to calculate the area of the polygon using the shoelace formula.

Method generated by attrs for class Polygon.

get_area()[source]#

Calculate the area of the polygon using the shoelace formula.

Returns:: The area of the polygon.
Return type:: float

as_polygon() → List[float][source]#

Return the points of the polygon.

Returns:: The points of the polygon.
Return type:: List[float]

class datumaro.plugins.data_formats.datumaro.base.RleMask(rle, *, id: int = 0, attributes: Dict[str, Any] = _Nothing.NOTHING, group: int = 0, object_id: int = -1, label=None, z_order: int = 0)[source]#

Bases: Mask

An RLE-encoded instance segmentation mask.

Method generated by attrs for class RleMask.

property image: ndarray#

property rle#

get_area() → int[source]#

get_bbox() → Tuple[int, int, int, int][source]#

Computes the bounding box of the mask.

Returns: [x, y, w, h]

class datumaro.plugins.data_formats.datumaro.base.StreamJsonReader(path: str, subset: str, rootpath: str, images_dir: str, pcd_dir: str, video_dir: str, ctx: ImportContext)[source]#: Bases: JsonReader

class datumaro.plugins.data_formats.datumaro.base.SubsetBase(*, length: int | None = None, subset: str | None = None, media_type: ~typing.Type[~datumaro.components.media.MediaElement] = <class 'datumaro.components.media.Image'>, ann_types: ~typing.List[~datumaro.components.annotation.AnnotationType] | None = None, ctx: ~datumaro.components.contexts.importer.ImportContext | None = None)[source]#

Bases: DatasetBase

A base class for simple, single-subset extractors. Should be used by default for user-defined extractors.

infos()[source]#: Returns meta-info of dataset.

categories()[source]#: Returns metainfo about dataset labels.

get(id, subset=None)[source]#: Provides random access to dataset items.

property subset: str#: Subset name of this instance.

class datumaro.plugins.data_formats.datumaro.base.Video(path: str, step: int = 1, start_frame: int = 0, end_frame: int | None = None, *args, **kwargs)[source]#

Bases: MediaElement, Iterable[VideoFrame]

close()[source]#

get_frame_data(idx: int) → VideoFrame[source]#

property length: int | None#

Returns frame count of the closed interval [start_frame, end_frame], if video provides such information.

Note that not all videos provide length / duration metainfo, so the result may be undefined.

Also note, that information may be inaccurate because of variable FPS in video or incorrect metainfo. The count is only guaranteed to be valid after video is completely read once.

The count is affected by the frame filtering options of the object, i.e. start frame, end frame and frame step.

property frame_size: Tuple[int, int]#: Returns (H, W)

save(fp: str | ~io.IOBase, crypter: ~datumaro.components.crypter.Crypter = <datumaro.components.crypter.NullCrypter object>)[source]#

property path: str#: Path to the media file

property ext: str#: Media file extension (with the leading dot)

class datumaro.plugins.data_formats.datumaro.base.VideoFrame(video: Video, index: int)[source]#

Bases: ImageFromNumpy

as_dict() → Dict[str, Any][source]#

property size: Tuple[int, int]#: Returns (H, W)

property index: int#

property video: Video#

property path: str#

from_self(**kwargs)[source]#

datumaro.plugins.data_formats.datumaro.base.parse_json_file(path: str)[source]#