Removes the "data/" prefix from the path


YoloLooseBase(config_path[, image_info, ...])

YoloStrictBase(config_path[, image_info, ...])

YoloUltralyticsBase(config_path[, ...])

class datumaro.plugins.data_formats.yolo.base.YoloStrictBase(config_path: str, image_info: None | str | Dict[str, Tuple[int, int]] = None, *, subset: str | None = None, ctx: ImportContext | None = None, **kwargs)[source]#

Bases: SubsetBase

static parse_image_info(rootpath: str, image_info: None | str | Dict[str, Tuple[int, int]] = None) Dict[str, Tuple[int, int]][source]#
classmethod name_from_path(path: str) str[source]#

Obtains <image name> from the path like [data/]<subset>_obj/<image_name>.ext

<image name> can be <a/b/c/filename>, so it is more involved than just calling “basename()”.

property is_stream: bool#

Boolean indicating whether the dataset is a stream

If the dataset is a stream, the dataset item is generated on demand from its iterator.

class datumaro.plugins.data_formats.yolo.base.YoloLooseBase(config_path: str, image_info: None | str | Dict[str, Tuple[int, int]] = None, urls: List[str] | None = None, *, subset: str | None = None, ctx: ImportContext | None = None)[source]#

Bases: SubsetBase

META_FILE = 'obj.names'#
property is_stream: bool#

Boolean indicating whether the dataset is a stream

If the dataset is a stream, the dataset item is generated on demand from its iterator.

class datumaro.plugins.data_formats.yolo.base.YoloUltralyticsBase(config_path: str, image_info: None | str | Dict[str, Tuple[int, int]] = None, urls: List[str] | None = None, **kwargs)[source]#

Bases: YoloLooseBase

META_FILE = 'data.yaml'#
class datumaro.plugins.data_formats.yolo.base.Annotation(*, id: int = 0, attributes: Dict[str, Any] = _Nothing.NOTHING, group: int = 0, object_id: int = -1)[source]#

Bases: object

A base annotation class.

Derived classes must define the ‘_type’ class variable with a value from the AnnotationType enum.

Method generated by attrs for class Annotation.

id: int#
attributes: Dict[str, Any]#
group: int#
object_id: int#
property type: AnnotationType#
as_dict() Dict[str, Any][source]#

Returns a dictionary { field_name: value }


Returns a modified copy of the object

class datumaro.plugins.data_formats.yolo.base.AnnotationType(value)[source]#

Bases: IntEnum

An enumeration.

unknown = 0#
label = 1#
mask = 2#
points = 3#
polygon = 4#
polyline = 5#
bbox = 6#
caption = 7#
cuboid_3d = 8#
super_resolution_annotation = 9#
depth_annotation = 10#
ellipse = 11#
hash_key = 12#
feature_vector = 13#
tabular = 14#
class datumaro.plugins.data_formats.yolo.base.Bbox(x, y, w, h, *args, **kwargs)[source]#

Bases: _Shape

Method generated by attrs for class _Shape.

property x#
property y#
property w#
property h#

Returns [x, y, w, h]

as_polygon() List[float][source]#
iou(other: _Shape) float | ~typing.Literal[-1][source]#

Returns a modified copy of the object

class datumaro.plugins.data_formats.yolo.base.DatasetBase(*, length: int | None = None, subsets: ~typing.Sequence[str] | None = None, media_type: ~typing.Type[~datumaro.components.media.MediaElement] = <class 'datumaro.components.media.Image'>, ctx: ~datumaro.components.contexts.importer.ImportContext | None = None)[source]#

Bases: _DatasetBase, CliPlugin

A base class for user-defined and built-in extractors. Should be used in cases, where SubsetBase is not enough, or its use makes problems with performance, implementation etc.


Returns media type of the dataset items.

All the items are supposed to have the same media type. Supposed to be constant and known immediately after the object construction (i.e. doesn’t require dataset iteration).

exception datumaro.plugins.data_formats.yolo.base.DatasetImportError[source]#

Bases: DatumaroError

class datumaro.plugins.data_formats.yolo.base.DatasetItem(id: str, *, subset: str | None = None, media: str | MediaElement | None = None, annotations: List[Annotation] | None = None, attributes: Dict[str, Any] | None = None)[source]#

Bases: object

id: str#
subset: str#
media: MediaElement | None#
annotations: List[Annotation]#
attributes: Dict[str, Any]#
media_as(t: Type[T]) T[source]#
class datumaro.plugins.data_formats.yolo.base.Image(size: Tuple[int, int] | None = None, ext: str | None = None, *args, **kwargs)[source]#

Bases: MediaElement[ndarray]

classmethod from_file(path: str, *args, **kwargs)[source]#
classmethod from_numpy(data: ndarray | Callable[[], ndarray], *args, **kwargs)[source]#
classmethod from_bytes(data: bytes | Callable[[], bytes], *args, **kwargs)[source]#
property has_size: bool#

Indicates that size info is cached and won’t require image loading

property size: Tuple[int, int] | None#

Returns (H, W)

property ext: str | None#

Media file extension (with the leading dot)

set_crypter(crypter: Crypter)[source]#
class datumaro.plugins.data_formats.yolo.base.ImageFromFile(path: str, *args, **kwargs)[source]#

Bases: FromFileMixin, Image

property data: ndarray | None#

Image data in BGRA HWC [0; 255] (uint8) format

property size: Tuple[int, int] | None#

Returns (H, W)

save(fp: str | ~io.IOBase, ext: str | None = None, crypter: ~datumaro.components.crypter.Crypter = <datumaro.components.crypter.NullCrypter object>)[source]#
set_crypter(crypter: Crypter)[source]#
class datumaro.plugins.data_formats.yolo.base.ImportContext(progress_reporter=None, error_policy=None)[source]#

Bases: object

Method generated by attrs for class ImportContext.

progress_reporter: ProgressReporter#
error_policy: ImportErrorPolicy#
exception datumaro.plugins.data_formats.yolo.base.InvalidAnnotationError[source]#

Bases: DatasetImportError

A basic dataset parsing error. Should include the problem description in the message.

class datumaro.plugins.data_formats.yolo.base.LabelCategories(items: List[str] = _Nothing.NOTHING, label_groups: List[LabelGroup] = _Nothing.NOTHING, *, attributes: Set[str] = _Nothing.NOTHING)[source]#

Bases: Categories

Method generated by attrs for class LabelCategories.

class Category(name, parent: str = '', attributes: Set[str] = _Nothing.NOTHING)[source]#

Bases: object

Method generated by attrs for class LabelCategories.Category.

name: str#
parent: str#
attributes: Set[str]#
class LabelGroup(name, labels: List[str] = [], group_type: GroupType = GroupType.EXCLUSIVE)[source]#

Bases: object

Method generated by attrs for class LabelCategories.LabelGroup.

name: str#
labels: List[str]#
group_type: GroupType#
items: List[str]#
label_groups: List[LabelGroup]#
classmethod from_iterable(iterable: Iterable[str | Tuple[str] | Tuple[str, str] | Tuple[str, str, List[str]]]) LabelCategories[source]#

Creates a LabelCategories from iterable.



This iterable object can be:

  • a list of str - will be interpreted as list of Category names

  • a list of positional arguments - will generate Categories with these arguments

Returns: a LabelCategories object

add(name: str, parent: str | None = None, attributes: Set[str] | None = None) int[source]#
add_label_group(name: str, labels: List[str], group_type: GroupType) int[source]#
find(name: str) Tuple[int | None, Category | None][source]#
class datumaro.plugins.data_formats.yolo.base.OrderedDict[source]#

Bases: dict

Dictionary that remembers insertion order

clear() None.  Remove all items from od.#

Remove and return a (key, value) pair from the dictionary.

Pairs are returned in LIFO order if last is true or FIFO order if false.

move_to_end(key, last=True)#

Move an existing element to the end (or beginning if last is false).

Raise KeyError if the element does not exist.

update([E, ]**F) None.  Update D from dict/iterable E and F.#

If E is present and has a .keys() method, then does: for k in E: D[k] = E[k] If E is present and lacks a .keys() method, then does: for k, v in E: D[k] = v In either case, this is followed by: for k in F: D[k] = F[k]

keys() a set-like object providing a view on D's keys#
items() a set-like object providing a view on D's items#
values() an object providing a view on D's values#
pop(key[, default]) v, remove specified key and return the corresponding value.#

If the key is not found, return the default if given; otherwise, raise a KeyError.

setdefault(key, default=None)#

Insert key with a value of default if key is not in the dictionary.

Return the value for key if key is in the dictionary, else default.

copy() a shallow copy of od#

Create a new ordered dictionary with keys from iterable and values set to value.

class datumaro.plugins.data_formats.yolo.base.SubsetBase(*, length: int | None = None, subset: str | None = None, media_type: ~typing.Type[~datumaro.components.media.MediaElement] = <class 'datumaro.components.media.Image'>, ctx: ~datumaro.components.contexts.importer.ImportContext | None = None)[source]#

Bases: DatasetBase

A base class for simple, single-subset extractors. Should be used by default for user-defined extractors.


Returns meta-info of dataset.


Returns metainfo about dataset labels.

get(id, subset=None)[source]#

Provides random access to dataset items.

property subset: str#

Subset name of this instance.

class datumaro.plugins.data_formats.yolo.base.TypeVar(name, *constraints, bound=None, covariant=False, contravariant=False)[source]#

Bases: _Final, _Immutable, _TypeVarLike

Type variable.


T = TypeVar('T')  # Can be anything
A = TypeVar('A', str, bytes)  # Must be str or bytes

Type variables exist primarily for the benefit of static type checkers. They serve as the parameters for generic types as well as for generic function definitions. See class Generic for more information on generic types. Generic functions work as follows:

def repeat(x: T, n: int) -> List[T]:

‘’’Return a list containing n references to x.’’’ return [x]*n

def longest(x: A, y: A) -> A:

‘’’Return the longest of two strings.’’’ return x if len(x) >= len(y) else y

The latter example’s signature is essentially the overloading of (str, str) -> str and (bytes, bytes) -> bytes. Also note that if the arguments are instances of some subclass of str, the return type is still plain str.

At runtime, isinstance(x, T) and issubclass(C, T) will raise TypeError.

Type variables defined with covariant=True or contravariant=True can be used to declare covariant or contravariant generic types. See PEP 484 for more details. By default generic types are invariant in all type variables.

Type variables can be introspected. e.g.:

T.__name__ == ‘T’ T.__constraints__ == () T.__covariant__ == False T.__contravariant__ = False A.__constraints__ == (str, bytes)

Note that only type variables defined in global scope can be pickled.

exception datumaro.plugins.data_formats.yolo.base.UndeclaredLabelError(id: str)[source]#

Bases: InvalidAnnotationError

Method generated by attrs for class UndeclaredLabelError.

id: str#

Index or name

class datumaro.plugins.data_formats.yolo.base.YoloLoosePath[source]#

Bases: object

NAMES_FILE = 'obj.names'#
class datumaro.plugins.data_formats.yolo.base.YoloPath[source]#

Bases: object

SUBSET_NAMES = ['train', 'valid']#
RESERVED_CONFIG_KEYS = {'backup', 'classes', 'names'}#
class datumaro.plugins.data_formats.yolo.base.YoloUltralyticsPath[source]#

Bases: object

META_FILE = 'data.yaml'#
datumaro.plugins.data_formats.yolo.base.extract_subset_name_from_parent(url: str, start: str) str[source]#

Extract subset name from the given url.

For example, if url = “/a/b/images/train/img.jpg” and start = “/a/b”, it will return “train”. On the other hand, if url = “/a/b/images/img.jpg” and start = “/a/b”, it will return DEFAULT_SUBSET_NAME.

  • url (str) – Given url to extract subset

  • start – The head path of url to obtain the relative path from the url


Subset name

Return type:


datumaro.plugins.data_formats.yolo.base.find_files(dirpath: str, exts: str | Iterable[str], recursive: bool = False, max_depth: int | None = None, min_depth: int | None = None) Iterator[str][source]#
datumaro.plugins.data_formats.yolo.base.load_image_meta_file(image_meta_path: str) Dict[str, Tuple[int, int]][source]#

Loads image metadata from a file with the following format:

<image name 1> <height 1> <width 1>

<image name 2> <height 2> <width 2>

Shell-like comments and quoted fields are allowed.

This can be useful to support datasets in which image dimensions are required to interpret annotations.

datumaro.plugins.data_formats.yolo.base.localize_path(path: str) str[source]#

Removes the “data/” prefix from the path