Source code for datumaro.components.media

# Copyright (C) 2021-2024 Intel Corporation
#
# SPDX-License-Identifier: MIT

from __future__ import annotations

import errno
import io
import os
import os.path as osp
import shutil
from copy import deepcopy
from enum import IntEnum
from functools import partial
from typing import (
    TYPE_CHECKING,
    Any,
    Callable,
    Dict,
    Generic,
    Iterable,
    Iterator,
    List,
    Optional,
    Tuple,
    Type,
    TypeVar,
    Union,
)

import cv2
import imagesize
import numpy as np

from datumaro.components.crypter import NULL_CRYPTER, Crypter
from datumaro.components.errors import DatumaroError, MediaShapeError
from datumaro.util.definitions import BboxIntCoords
from datumaro.util.image import (
    _image_loading_errors,
    copyto_image,
    decode_image,
    lazy_image,
    load_image,
    save_image,
)

if TYPE_CHECKING:
    import pandas as pd
else:
    from datumaro.util.import_util import lazy_import

    pd = lazy_import("pandas")


AnyData = TypeVar("AnyData", bytes, np.ndarray)



[docs]
class MediaType(IntEnum):
    NONE = 0
    MEDIA_ELEMENT = 1
    IMAGE = 2
    BYTE_IMAGE = 3
    VIDEO_FRAME = 4
    VIDEO = 5
    POINT_CLOUD = 6
    MULTIFRAME_IMAGE = 7
    ROI_IMAGE = 8
    MOSAIC_IMAGE = 9
    TABLE_ROW = 10

    @property
    def media(self) -> Optional[Type[MediaElement]]:
        if self == self.NONE:
            return None
        if self == self.MEDIA_ELEMENT:
            return MediaElement
        if self == self.IMAGE:
            return Image
        if self == self.VIDEO_FRAME:
            return VideoFrame
        if self == self.VIDEO:
            return Video
        if self == self.POINT_CLOUD:
            return PointCloud
        if self == self.MULTIFRAME_IMAGE:
            return MultiframeImage
        if self == self.ROI_IMAGE:
            return RoIImage
        if self == self.MOSAIC_IMAGE:
            return MosaicImage
        if self == self.TABLE_ROW:
            return TableRow
        raise NotImplementedError




[docs]
class MediaElement(Generic[AnyData]):
    _type = MediaType.MEDIA_ELEMENT

    def __init__(self, crypter: Crypter = NULL_CRYPTER, *args, **kwargs) -> None:
        self._crypter = crypter


[docs]
    def as_dict(self) -> Dict[str, Any]:
        # NOTE:
        # attributes starting with a single underscore are assumed
        # to be arguments of __init__ method and
        # attributes starting with double underscores are assuemd
        # to be not directly related to __init__ method.
        return {
            key[1:]: value
            for key, value in self.__dict__.items()
            if key.startswith("_") and not key.startswith(f"_{self.__class__.__name__}")
        }



[docs]
    def from_self(self, **kwargs):
        attrs = deepcopy(self.as_dict())
        attrs.update(kwargs)
        return self.__class__(**attrs)


    @property
    def is_encrypted(self) -> bool:
        return not self._crypter.is_null_crypter


[docs]
    def set_crypter(self, crypter: Crypter):
        self._crypter = crypter


    @property
    def type(self) -> MediaType:
        return self._type

    @property
    def data(self) -> Optional[AnyData]:
        return None

    @property
    def has_data(self) -> bool:
        return False

    @property
    def bytes(self) -> Optional[bytes]:
        return None

    def __eq__(self, other: object) -> bool:
        other_type = getattr(other, "type", None)
        if self.type != other_type:
            return False
        return True


[docs]
    def save(
        self,
        fp: Union[str, io.IOBase],
        crypter: Crypter = NULL_CRYPTER,
    ):
        raise NotImplementedError





[docs]
class FromFileMixin:
    def __init__(self, path: str, *args, **kwargs):
        super().__init__(*args, **kwargs)
        assert path, "Path can't be empty"
        self._path = path

    @property
    def path(self) -> str:
        """Path to the media file"""
        # TODO: do we need this replace?
        return self._path.replace("\\", "/")

    @property
    def bytes(self) -> Optional[bytes]:
        if self.has_data:
            with open(self._path, "rb") as f:
                _bytes = f.read()
            return _bytes
        return None

    @property
    def has_data(self) -> bool:
        return os.path.exists(self.path)

    def __repr__(self) -> str:
        return f"{self.__class__.__name__}(path={repr(self._path)})"




[docs]
class FromDataMixin(Generic[AnyData]):
    def __init__(self, data: Union[Callable[[], AnyData], AnyData], *args, **kwargs):
        super().__init__(*args, **kwargs)
        self._data = data

    @property
    def data(self) -> Optional[AnyData]:
        if callable(self._data):
            return self._data()
        return self._data

    @property
    def bytes(self) -> Optional[bytes]:
        if self.has_data:
            _bytes = self._data() if callable(self._data) else self._data
            if isinstance(_bytes, bytes):
                return _bytes
        return None

    @property
    def has_data(self) -> bool:
        return self._data is not None

    def __repr__(self) -> str:
        return f"{self.__class__.__name__}(data=" + repr(self._data)[:20].replace("\n", "") + "...)"




[docs]
class Image(MediaElement[np.ndarray]):
    _type = MediaType.IMAGE

    _DEFAULT_EXT = ".png"

    def __init__(
        self,
        size: Optional[Tuple[int, int]] = None,
        ext: Optional[str] = None,
        *args,
        **kwargs,
    ) -> None:
        assert self.__class__ != Image, (
            f"Directly initalizing {self.__class__.__name__} is not supported. "
            f"Please use one of fractory functions ({self.__class__.__name__}.from_file(), "
            f"{self.__class__.__name__}.from_numpy(), {self.__class__.__name__}.from_bytes())."
        )
        super().__init__(*args, **kwargs)
        self._dtype = np.uint8

        if ext is not None:
            if not ext.startswith("."):
                ext = "." + ext
            ext = ext.lower()
        self._ext = ext

        if size is not None:
            assert (
                len(size) == 2 and 0 < size[0] and 0 < size[1]
            ), f"Invalid image size info '{size}'"
            size = tuple(map(int, size))
        self._size = size  # (H, W)


[docs]
    @classmethod
    def from_file(cls, path: str, *args, **kwargs):
        return ImageFromFile(path, *args, **kwargs)



[docs]
    @classmethod
    def from_numpy(
        cls,
        data: Union[np.ndarray, Callable[[], np.ndarray]],
        *args,
        **kwargs,
    ):
        return ImageFromNumpy(data, *args, **kwargs)



[docs]
    @classmethod
    def from_bytes(
        cls,
        data: Union[bytes, Callable[[], bytes]],
        *args,
        **kwargs,
    ):
        return ImageFromBytes(data, *args, **kwargs)


    @property
    def has_size(self) -> bool:
        """Indicates that size info is cached and won't require image loading"""
        return self._size is not None

    @property
    def size(self) -> Optional[Tuple[int, int]]:
        """Returns (H, W)"""

        if self._size is None:
            try:
                data = self.data
            except _image_loading_errors:
                return None
            if data is not None:
                self._size = tuple(map(int, data.shape[:2]))
        return self._size

    @property
    def ext(self) -> Optional[str]:
        """Media file extension (with the leading dot)"""
        return self._ext

    def _get_ext_to_save(self, fp: Union[str, io.IOBase], ext: Optional[str] = None):
        if isinstance(fp, str):
            assert ext is None, "'ext' must be empty if string is given."
            ext = osp.splitext(osp.basename(fp))[1].lower()
        else:
            ext = ext if ext else self._DEFAULT_EXT
        return ext

    def __eq__(self, other):
        # Do not compare `_type`
        # sicne Image is subclass of RoIImage and MosaicImage
        if not isinstance(other, __class__):
            return False
        return (np.array_equal(self.size, other.size)) and (np.array_equal(self.data, other.data))


[docs]
    def set_crypter(self, crypter: Crypter):
        super().set_crypter(crypter)





[docs]
class ImageFromFile(FromFileMixin, Image):
    def __init__(
        self,
        path: str,
        *args,
        **kwargs,
    ) -> None:
        super().__init__(path, *args, **kwargs)
        self.__data = lazy_image(self.path, crypter=self._crypter)

        # extension from file name and real extension can be differ
        self._ext = self._ext if self._ext else osp.splitext(osp.basename(path))[1]

    @property
    def data(self) -> Optional[np.ndarray]:
        """Image data in BGRA HWC [0; 255] (uint8) format"""

        if not self.has_data:
            return None

        if self.__data._dtype != self._dtype:
            self.__data._loader = partial(load_image, dtype=self._dtype)
        data = self.__data()

        if self._size is None and data is not None:
            if not 2 <= data.ndim <= 3:
                raise MediaShapeError("An image should have 2 (gray) or 3 (bgra) dims.")
            self._size = tuple(map(int, data.shape[:2]))
        return data

    @property
    def size(self) -> Optional[Tuple[int, int]]:
        """Returns (H, W)"""

        if self._size is None:
            try:
                width, height = imagesize.get(self.path)
                assert width != -1 and height != -1
                self._size = (height, width)
            except Exception:
                _ = super().size
        return self._size


[docs]
    def save(
        self,
        fp: Union[str, io.IOBase],
        ext: Optional[str] = None,
        crypter: Crypter = NULL_CRYPTER,
    ):
        cur_path = osp.abspath(self.path) if self.path else None
        cur_ext = self.ext
        new_ext = self._get_ext_to_save(fp, ext)
        if isinstance(fp, str):
            os.makedirs(osp.dirname(fp), exist_ok=True)

        if cur_path is not None and osp.isfile(cur_path):
            if cur_ext == new_ext:
                copyto_image(src=cur_path, dst=fp, src_crypter=self._crypter, dst_crypter=crypter)
            else:
                save_image(fp, self.data, ext=new_ext, crypter=crypter)
        else:
            raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), cur_path)



[docs]
    def set_crypter(self, crypter: Crypter):
        super().set_crypter(crypter)
        if isinstance(self.__data, lazy_image):
            self.__data._crypter = crypter



[docs]
    def get_data_as_dtype(self, dtype: Optional[np.dtype] = np.uint8) -> Optional[np.ndarray]:
        """Get image data with a specific data type"""
        self._dtype = dtype
        return self.data





[docs]
class ImageFromData(FromDataMixin, Image):

[docs]
    def save(
        self,
        fp: Union[str, io.IOBase],
        ext: Optional[str] = None,
        crypter: Crypter = NULL_CRYPTER,
    ):
        data = self.data
        if data is None:
            raise ValueError(f"{self.__class__.__name__} is empty.")
        new_ext = self._get_ext_to_save(fp, ext)
        if isinstance(fp, str):
            os.makedirs(osp.dirname(fp), exist_ok=True)
        save_image(fp, data, ext=new_ext, crypter=crypter)





[docs]
class ImageFromNumpy(ImageFromData):
    def __init__(
        self,
        data: Union[Callable[[], bytes], bytes],
        *args,
        **kwargs,
    ):
        super().__init__(data=data, *args, **kwargs)

    @property
    def data(self) -> Optional[np.ndarray]:
        """Image data in BGRA HWC [0; 255] (uint8) format"""

        data = super().data

        if isinstance(data, np.ndarray) and data.dtype != self._dtype:
            data = np.clip(data, 0.0, 255.0).astype(self._dtype)
        if self._size is None and data is not None:
            if not 2 <= data.ndim <= 3:
                raise MediaShapeError("An image should have 2 (gray) or 3 (bgra) dims.")
            self._size = tuple(map(int, data.shape[:2]))
        return data

    @property
    def has_size(self) -> bool:
        """Indicates that size info is cached and won't require image loading"""
        return self._size is not None or isinstance(self._data, np.ndarray)


[docs]
    def get_data_as_dtype(self, dtype: Optional[np.dtype] = np.uint8) -> Optional[np.ndarray]:
        """Get image data with a specific data type"""
        self._dtype = dtype
        return self.data





[docs]
class ImageFromBytes(ImageFromData):
    _FORMAT_MAGICS = (
        (b"\x89PNG\r\n\x1a\n", ".png"),
        (b"\xff\xd8\xff", ".jpg"),
        (b"BM", ".bmp"),
    )

    def __init__(
        self,
        data: Union[Callable[[], bytes], bytes],
        *args,
        **kwargs,
    ):
        super().__init__(data=data, *args, **kwargs)

        if self._ext is None and isinstance(data, bytes):
            self._ext = self._guess_ext(data)

    @classmethod
    def _guess_ext(cls, data: bytes) -> Optional[str]:
        return next(
            (ext for magic, ext in cls._FORMAT_MAGICS if data.startswith(magic)),
            None,
        )

    @property
    def data(self) -> Optional[np.ndarray]:
        """Image data in BGRA HWC [0; 255] (uint8) format"""

        data = super().data

        if isinstance(data, bytes):
            data = decode_image(data, dtype=self._dtype)
        if self._size is None and data is not None:
            if not 2 <= data.ndim <= 3:
                raise MediaShapeError("An image should have 2 (gray) or 3 (bgra) dims.")
            self._size = tuple(map(int, data.shape[:2]))
        return data


[docs]
    def get_data_as_dtype(self, dtype: Optional[np.dtype] = np.uint8) -> Optional[np.ndarray]:
        """Get image data with a specific data type"""

        if dtype != np.uint8:
            raise ValueError("ImageFromBytes only support `dtype=np.uint8`.")
        self._dtype = dtype
        return self.data





[docs]
class VideoFrame(ImageFromNumpy):
    _type = MediaType.VIDEO_FRAME

    _DEFAULT_EXT = None

    def __init__(self, video: Video, index: int):
        self._video = video
        self._index = index

        super().__init__(data=lambda: self._video.get_frame_data(self._index))


[docs]
    def as_dict(self) -> Dict[str, Any]:
        attrs = super().as_dict()
        return {
            "video": attrs["video"],
            "index": attrs["index"],
        }


    @property
    def size(self) -> Tuple[int, int]:
        return self._video.frame_size

    @property
    def index(self) -> int:
        return self._index

    @property
    def video(self) -> Video:
        return self._video

    @property
    def path(self) -> str:
        return self._video.path


[docs]
    def from_self(self, **kwargs):
        attrs = deepcopy(self.as_dict())
        if "path" in kwargs:
            attrs.update({"video": self.video.from_self(**kwargs)})
            kwargs.pop("path")
        attrs.update(kwargs)
        return self.__class__(**attrs)


    def __getstate__(self):
        # Return only the picklable parts of the state.
        state = self.__dict__.copy()
        del state["_data"]
        return state

    def __setstate__(self, state):
        # Restore the objects' state.
        self.__dict__.update(state)
        # Reinitialize unpichlable attributes
        self._data = lambda: self._video.get_frame_data(self._index)



class _VideoFrameIterator(Iterator[VideoFrame]):
    """
    Provides sequential access to the video frames.
    """

    _video: Video
    _iterator: Iterator[VideoFrame]
    _pos: int
    _current_frame_data: Optional[np.ndarray]

    def __init__(self, video: Video):
        self._video = video
        self._reset()

    def _reset(self):
        self._video._reset_reader()
        self._iterator = self._decode(self._video._get_reader())
        self._pos = -1
        self._current_frame_data = None

    def _decode(self, cap) -> Iterator[VideoFrame]:
        """
        Decodes video frames using opencv
        """

        self._pos = -1

        success, frame = cap.read()
        while success:
            self._pos += 1
            if self._video._includes_frame(self._pos):
                self._current_frame_data = frame.astype(float)
                yield self._make_frame(index=self._pos)

            success, frame = cap.read()

        if self._video._frame_count is None:
            self._video._frame_count = self._pos + 1
            if self._video._end_frame and self._video._end_frame >= self._video._frame_count:
                raise ValueError(
                    f"The end_frame value({self._video._end_frame}) of the video "
                    f"must be less than the frame count({self._video._frame_count})."
                )

    def _make_frame(self, index) -> VideoFrame:
        return VideoFrame(self._video, index=index)

    def __next__(self):
        return next(self._iterator)

    def __getitem__(self, idx: int) -> VideoFrame:
        if not self._video._includes_frame(idx):
            raise IndexError(f"Video doesn't contain frame #{idx}.")

        return self._navigate_to(idx)

    def get_frame_data(self, idx: int) -> np.ndarray:
        self._navigate_to(idx)
        return self._current_frame_data

    def _navigate_to(self, idx: int) -> VideoFrame:
        """
        Iterates over frames to the required position.
        """

        if idx < 0:
            raise IndexError()

        if idx < self._pos:
            self._reset()

        if self._pos < idx:
            try:
                while self._pos < idx:
                    v = self.__next__()
            except StopIteration as e:
                raise IndexError() from e
        else:
            v = self._make_frame(index=self._pos)

        return v



[docs]
class Video(MediaElement, Iterable[VideoFrame]):
    _type = MediaType.VIDEO

    """
    Provides random access to the video frames.
    """

    def __init__(
        self,
        path: str,
        step: int = 1,
        start_frame: int = 0,
        end_frame: Optional[int] = None,
        *args,
        **kwargs,
    ) -> None:
        super().__init__(*args, **kwargs)
        self._path = path

        assert 0 <= start_frame
        if end_frame is not None:
            assert start_frame <= end_frame
            # we can't know the video length here,
            # so we cannot validate if the end_frame is valid.
        assert 0 < step
        self._step = step
        self._start_frame = start_frame
        self._end_frame = end_frame

        self._reader = None
        self._iterator: Optional[_VideoFrameIterator] = None
        self._frame_size: Optional[Tuple[int, int]] = None

        # We don't provide frame count unless we have a reliable source of
        # this information.
        # - Not all videos provide length / duration metainfo
        # - We can get an estimation based on metadata, but it
        #   can be invalid or inaccurate due to variable frame rate
        #   or fractional values rounded up. Relying on the value will give
        #   errors during requesting frames.
        # https://stackoverflow.com/a/47796468
        self._frame_count = None
        self._length = None


[docs]
    def close(self):
        self._iterator = None

        if self._reader is not None:
            self._reader.release()
            self._reader = None


    def __getitem__(self, idx: int) -> VideoFrame:
        if not self._includes_frame(idx):
            raise IndexError(f"Video doesn't contain frame #{idx}.")

        return self._get_iterator()[idx]


[docs]
    def get_frame_data(self, idx: int) -> VideoFrame:
        if not self._includes_frame(idx):
            raise IndexError(f"Video doesn't contain frame #{idx}.")

        return self._get_iterator().get_frame_data(idx)


    def __iter__(self) -> Iterator[VideoFrame]:
        """
        Iterates over frames lazily, if possible.
        """

        if self._frame_count is not None:
            # Decoding is not necessary to get frame pointers
            # However, it can be inacurrate
            end_frame = self._get_end_frame()
            for index in range(self._start_frame, end_frame + 1, self._step):
                yield VideoFrame(video=self, index=index)
        else:
            # Need to decode to iterate over frames
            yield from self._get_iterator()

    @property
    def length(self) -> Optional[int]:
        """
        Returns frame count of the closed interval [start_frame, end_frame],
        if video provides such information.

        Note that not all videos provide length / duration metainfo, so the
        result may be undefined.

        Also note, that information may be inaccurate because of variable
        FPS in video or incorrect metainfo. The count is only guaranteed to
        be valid after video is completely read once.

        The count is affected by the frame filtering options of the object,
        i.e. start frame, end frame and frame step.
        """

        if self._length is None:
            end_frame = self._get_end_frame()

            if end_frame is not None:
                length = (end_frame + 1 - self._start_frame) // self._step
                if 0 >= length:
                    raise ValueError(
                        "There is no valid frame for the closed interval"
                        f"[start_frame({self._start_frame}),"
                        f" end_frame({end_frame})] with step({self._step})."
                    )
                self._length = length

        return self._length

    @property
    def frame_size(self) -> Tuple[int, int]:
        """Returns (H, W)"""

        if self._frame_size is None:
            self._frame_size = self._get_frame_size()
        return self._frame_size

    def _get_frame_size(self) -> Tuple[int, int]:
        cap = self._get_reader()
        w = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
        h = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)

        if h and w:
            frame_size = (int(h), int(w))
        else:
            image = next(self._get_iterator()).data
            frame_size = image.shape[0:2]

        return frame_size

    def _get_end_frame(self):
        # Note that end_frame could less than the last frame of the video
        if self._end_frame is not None and self._frame_count is not None:
            end_frame = min(self._end_frame, self._frame_count)
        elif self._end_frame is not None:
            end_frame = self._end_frame
        elif self._frame_count is not None:
            end_frame = self._frame_count - 1
        else:
            end_frame = None

        return end_frame

    def _includes_frame(self, i):
        if self._start_frame <= i:
            if (i - self._start_frame) % self._step == 0:
                end_frame = self._get_end_frame()
                if end_frame is None or i <= end_frame:
                    return True

        return False

    def _get_iterator(self):
        if self._iterator is None:
            self._iterator = _VideoFrameIterator(self)
        return self._iterator

    def _get_reader(self):
        if self._reader is None:
            self._reset_reader()
        return self._reader

    def _reset_reader(self):
        if self._reader is not None:
            self._reader.release()
        self._reader = cv2.VideoCapture(self._path)
        assert self._reader.isOpened()

    def __eq__(self, other: object) -> bool:
        def _get_frame(obj: Video, idx: int):
            try:
                return obj[idx]
            except IndexError:
                return None

        if not isinstance(other, __class__):
            return False
        if self._start_frame != other._start_frame or self._step != other._step:
            return False

        # The video path can vary if a dataset is copied.
        # So, we need to check if the video data is the same instead of checking paths.
        if self._end_frame is not None and self._end_frame == other._end_frame:
            for idx in range(self._start_frame, self._end_frame + 1, self._step):
                if self[idx] != other[idx]:
                    return False
            return True

        end_frame = self._end_frame or other._end_frame
        if end_frame is None:
            last_frame = None
            for idx, frame in enumerate(self):
                if frame != _get_frame(other, frame.index):
                    return False
                last_frame = frame
            # check if the actual last frames are same
            try:
                other[last_frame.index + self._step if last_frame else self._start_frame]
            except IndexError:
                return True
            return False

        # _end_frame values, only one of the two is valid
        for idx in range(self._start_frame, end_frame + 1, self._step):
            frame = _get_frame(self, idx)
            if frame is None:
                return False
            if frame != _get_frame(other, idx):
                return False
        # check if the actual last frames are same
        idx_next = end_frame + self._step
        return None is (_get_frame(self, idx_next) or _get_frame(other, idx_next))

    def __hash__(self):
        # Required for caching
        return hash((self._path, self._step, self._start_frame, self._end_frame))


[docs]
    def save(
        self,
        fp: Union[str, io.IOBase],
        crypter: Crypter = NULL_CRYPTER,
    ):
        if isinstance(fp, str):
            os.makedirs(osp.dirname(fp), exist_ok=True)
        if isinstance(fp, str):
            if fp != self.path:
                shutil.copyfile(self.path, fp)
        elif isinstance(fp, io.IOBase):
            with open(self.path, "rb") as f_video:
                fp.write(f_video.read())


    @property
    def path(self) -> str:
        """Path to the media file"""
        return self._path

    @property
    def ext(self) -> str:
        """Media file extension (with the leading dot)"""
        return osp.splitext(osp.basename(self.path))[1]




[docs]
class PointCloud(MediaElement[bytes]):
    _type = MediaType.POINT_CLOUD

    def __init__(
        self,
        extra_images: Optional[Union[List[Image], Callable[[], List[Image]]]] = None,
        *args,
        **kwargs,
    ):
        assert self.__class__ != PointCloud, (
            f"Directly initalizing {self.__class__.__name__} is not supported. "
            f"Please use one of fractory function ({self.__class__.__name__}.from_file(), "
            f"{self.__class__.__name__}.from_bytes())."
        )
        super().__init__(*args, **kwargs)
        self._extra_images = extra_images or []


[docs]
    @classmethod
    def from_file(cls, path: str, *args, **kwargs):
        return PointCloudFromFile(path, *args, **kwargs)



[docs]
    @classmethod
    def from_bytes(cls, data: Union[bytes, Callable[[], bytes]], *args, **kwargs):
        return PointCloudFromBytes(data, *args, **kwargs)


    @property
    def extra_images(self) -> List[Image]:
        if callable(self._extra_images):
            extra_images = self._extra_images()
            assert isinstance(extra_images, list) and all(
                [isinstance(image, Image) for image in extra_images]
            )
            return extra_images
        return self._extra_images

    def _save_extra_images(
        self,
        fn: Callable[[int, Image], Dict[str, Any]],
        crypter: Optional[Crypter] = None,
    ):
        crypter = crypter if crypter else self._crypter
        for i, img in enumerate(self.extra_images):
            if img.has_data:
                kwargs: Dict[str, Any] = {"crypter": crypter}
                kwargs.update(fn(i, img))
                img.save(**kwargs)

    def __eq__(self, other: object) -> bool:
        return (
            super().__eq__(other)
            and (self.data == other.data)
            and self.extra_images == other.extra_images
        )




[docs]
class PointCloudFromFile(FromFileMixin, PointCloud):
    @property
    def data(self) -> Optional[bytes]:
        if self.has_data:
            with open(self.path, "rb") as f:
                bytes_data = f.read()
            return bytes_data
        return None


[docs]
    def save(
        self,
        fp: Union[str, io.IOBase],
        extra_images_fn: Optional[Callable[[int, Image], Dict[str, Any]]] = None,
        crypter: Crypter = NULL_CRYPTER,
    ):
        if not crypter.is_null_crypter:
            raise NotImplementedError(
                f"{self.__class__.__name__} does not implement save() with non NullCrypter."
            )

        cur_path = osp.abspath(self.path) if self.path else None

        if cur_path is not None and osp.isfile(cur_path):
            with open(cur_path, "rb") as reader:
                _bytes = reader.read()
            if isinstance(fp, str):
                os.makedirs(osp.dirname(fp), exist_ok=True)
                with open(fp, "wb") as f:
                    f.write(_bytes)
            else:
                fp.write(_bytes)
        else:
            raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), cur_path)

        if extra_images_fn is not None:
            self._save_extra_images(extra_images_fn, crypter)





[docs]
class PointCloudFromData(FromDataMixin, PointCloud):

[docs]
    def save(
        self,
        fp: Union[str, io.IOBase],
        extra_images_fn: Optional[Callable[[int, Image], Dict[str, Any]]] = None,
        crypter: Crypter = NULL_CRYPTER,
    ):
        if not crypter.is_null_crypter:
            raise NotImplementedError(
                f"{self.__class__.__name__} does not implement save() with non NullCrypter."
            )

        _bytes = self.data
        if _bytes is None:
            raise ValueError(f"{self.__class__.__name__} is empty.")
        if isinstance(fp, str):
            os.makedirs(osp.dirname(fp), exist_ok=True)
            with open(fp, "wb") as f:
                f.write(_bytes)
        else:
            fp.write(_bytes)

        if extra_images_fn is not None:
            self._save_extra_images(extra_images_fn, crypter)





[docs]
class PointCloudFromBytes(PointCloudFromData):
    @property
    def data(self) -> Optional[bytes]:
        return super().data




[docs]
class MultiframeImage(MediaElement):
    _type = MediaType.MULTIFRAME_IMAGE

    def __init__(
        self,
        images: Optional[Iterable[Union[str, Image, np.ndarray, Callable[[str], np.ndarray]]]],
        *,
        path: Optional[str] = None,
    ):
        self._path = path

        if images is None:
            images = []

        self._images = [None] * len(images)
        for i, image in enumerate(images):
            assert isinstance(image, (str, Image, np.ndarray)) or callable(image)

            if isinstance(image, str):
                image = Image.from_file(path=image)
            elif isinstance(image, np.ndarray) or callable(image):
                image = Image.from_numpy(data=image)

            self._images[i] = image

        assert self._path or self._images

    @property
    def data(self) -> List[Image]:
        return self._images

    @property
    def path(self) -> str:
        """Path to the media file"""
        return self._path

    @property
    def ext(self) -> str:
        """Media file extension (with the leading dot)"""
        return osp.splitext(osp.basename(self.path))[1]




[docs]
class RoIImage(Image):
    _type = MediaType.ROI_IMAGE

    def __init__(
        self,
        roi: BboxIntCoords,
        *args,
        **kwargs,
    ):
        assert self.__class__ != RoIImage, (
            f"Directly initalizing {self.__class__.__name__} is not supported. "
            f"Please use a fractory function '{self.__class__.__name__}.from_image()'. "
        )

        assert len(roi) == 4 and all(isinstance(v, int) for v in roi)
        self._roi = roi
        _, _, w, h = self._roi
        super().__init__(size=(h, w), *args, **kwargs)


[docs]
    def as_dict(self) -> Dict[str, Any]:
        attrs = super().as_dict()
        attrs.pop("size", None)
        return attrs



[docs]
    @classmethod
    def from_file(cls, *args, **kwargs):
        raise DatumaroError(f"Please use a factory function '{cls.__name__}.from_image'.")



[docs]
    @classmethod
    def from_image(cls, data: Image, roi: BboxIntCoords, *args, **kwargs):
        if not isinstance(data, Image):
            raise TypeError(f"type(image)={type(data)} should be Image.")

        if isinstance(data, ImageFromFile):
            return RoIImageFromFile(path=data.path, roi=roi, ext=data._ext, *args, **kwargs)
        if isinstance(data, ImageFromNumpy):
            return RoIImageFromNumpy(data=data._data, roi=roi, ext=data._ext, *args, **kwargs)
        if isinstance(data, ImageFromBytes):
            return RoIImageFromBytes(data=data._data, roi=roi, ext=data._ext, *args, **kwargs)
        raise NotImplementedError



[docs]
    @classmethod
    def from_numpy(cls, *args, **kwargs):
        raise DatumaroError(f"Please use a factory function '{cls.__name__}.from_image'.")



[docs]
    @classmethod
    def from_bytes(cls, *args, **kwargs):
        raise DatumaroError(f"Please use a factory function '{cls.__name__}.from_image'.")


    @property
    def roi(self) -> BboxIntCoords:
        return self._roi

    def _get_roi_data(self, data: np.ndarray) -> np.ndarray:
        x, y, w, h = self._roi
        return data[y : y + h, x : x + w]


[docs]
    def save(
        self,
        fp: Union[str, io.IOBase],
        ext: Optional[str] = None,
        crypter: Crypter = NULL_CRYPTER,
    ):
        if not crypter.is_null_crypter:
            raise NotImplementedError(
                f"{self.__class__.__name__} does not implement save() with non NullCrypter."
            )
        data = self.data
        if data is None:
            raise ValueError(f"{self.__class__.__name__} is empty.")
        new_ext = self._get_ext_to_save(fp, ext)
        if isinstance(fp, str):
            os.makedirs(osp.dirname(fp), exist_ok=True)
        save_image(fp, data, ext=new_ext, crypter=crypter)





[docs]
class RoIImageFromFile(FromFileMixin, RoIImage):
    def __init__(
        self,
        path: str,
        roi: BboxIntCoords,
        *args,
        **kwargs,
    ) -> None:
        super().__init__(path, roi, *args, **kwargs)
        self.__data = lazy_image(self.path, crypter=self._crypter)

    @property
    def data(self) -> Optional[np.ndarray]:
        """Image data in BGRA HWC [0; 255] (uint8) format"""
        if not self.has_data:
            return None
        data = self.__data()
        return self._get_roi_data(data)




[docs]
class RoIImageFromData(FromDataMixin, RoIImage):
    pass




[docs]
class RoIImageFromBytes(RoIImageFromData):
    def __init__(
        self,
        data: Union[bytes, Callable[[], bytes]],
        roi: BboxIntCoords,
        *args,
        **kwargs,
    ) -> None:
        super().__init__(data, roi, *args, **kwargs)

    @property
    def data(self) -> Optional[np.ndarray]:
        """Image data in BGRA HWC [0; 255] (uint8) format"""
        data = super().data
        if data is None:
            return None
        if isinstance(data, bytes):
            data = decode_image(data)
        return self._get_roi_data(data)




[docs]
class RoIImageFromNumpy(RoIImageFromData):
    def __init__(
        self,
        data: Union[np.ndarray, Callable[[], np.ndarray]],
        roi: BboxIntCoords,
        *args,
        **kwargs,
    ) -> None:
        super().__init__(data, roi, *args, **kwargs)

    @property
    def data(self) -> Optional[np.ndarray]:
        """Image data in BGRA HWC [0; 255] (uint8) format"""
        data = super().data
        if data is None:
            return None
        return self._get_roi_data(data)



ImageWithRoI = Tuple[Image, BboxIntCoords]



[docs]
class MosaicImage(Image):
    _type = MediaType.MOSAIC_IMAGE

    def __init__(
        self,
        *args,
        **kwargs,
    ):
        assert self.__class__ != MosaicImage, (
            f"Directly initalizing {self.__class__.__name__} is not supported. "
            f"Please use a fractory function '{self.__class__.__name__}.from_image_roi_pairs()'."
        )
        super().__init__(*args, **kwargs)


[docs]
    @classmethod
    def from_file(cls, *args, **kwargs):
        raise DatumaroError(f"Please use a factory function '{cls.__name__}.from_image_roi_pairs'.")



[docs]
    @classmethod
    def from_image_roi_pairs(cls, data: List[ImageWithRoI], size: Tuple[int, int], *args, **kwargs):
        return MosaicImageFromImageRoIPairs(data, size)



[docs]
    @classmethod
    def from_numpy(cls, *args, **kwargs):
        raise DatumaroError(f"Please use a factory function '{cls.__name__}.from_image_roi_pairs'.")



[docs]
    @classmethod
    def from_bytes(cls, *args, **kwargs):
        raise DatumaroError(f"Please use a factory function '{cls.__name__}.from_image_roi_pairs'.")





[docs]
class MosaicImageFromData(FromDataMixin, MosaicImage):

[docs]
    def save(
        self,
        fp: Union[str, io.IOBase],
        ext: Optional[str] = None,
        crypter: Crypter = NULL_CRYPTER,
    ):
        if not crypter.is_null_crypter:
            raise NotImplementedError(
                f"{self.__class__.__name__} does not implement save() with non NullCrypter."
            )
        data = self.data
        if data is None:
            raise ValueError(f"{self.__class__.__name__} is empty.")
        new_ext = self._get_ext_to_save(fp, ext)
        if isinstance(fp, str):
            os.makedirs(osp.dirname(fp), exist_ok=True)
        save_image(fp, data, ext=new_ext, crypter=crypter)





[docs]
class MosaicImageFromImageRoIPairs(MosaicImageFromData):
    def __init__(self, data: List[ImageWithRoI], size: Tuple[int, int]) -> None:
        def _get_mosaic_img() -> np.ndarray:
            h, w = self.size
            mosaic_img = np.zeros(shape=(h, w, 3), dtype=np.uint8)
            for img, roi in data:
                assert isinstance(img, Image), "MosaicImage can only take a list of Images."
                x, y, w, h = roi
                mosaic_img[y : y + h, x : x + w] = img.data
            return mosaic_img

        super().__init__(data=_get_mosaic_img, size=size)
        self._data_in = data


[docs]
    def as_dict(self) -> Dict[str, Any]:
        attrs = super().as_dict()
        return {
            "data": attrs["data_in"],
            "size": attrs["size"],
        }




TableDtype = TypeVar("TableDtype", str, int, float)



[docs]
class Table:
    def __init__(
        self,
    ) -> None:
        """
        Table data with multiple rows and columns.
        This provides random access to the table row.

        Initialization must be done in the child class.
        """
        assert self.__class__ != Table, (
            f"Directly initalizing {self.__class__.__name__} is not supported. "
            f"Please use one of fractory functions ({self.__class__.__name__}.from_csv(), "
            f"{self.__class__.__name__}.from_dataframe(), or ({self.__class__.__name__}.from_list())."
        )
        self._shape: Tuple[int, int] = (0, 0)


[docs]
    @classmethod
    def from_csv(cls, path: str, *args, **kwargs) -> Type[Table]:
        """
        Returns Table instance creating from a csv file.

        Args:
            path (str) : Path to csv file.
        """
        return TableFromCSV(path, *args, **kwargs)



[docs]
    @classmethod
    def from_dataframe(
        cls,
        data: Union[pd.DataFrame, Callable[[], pd.DataFrame]],
        *args,
        **kwargs,
    ) -> Type[Table]:
        """
        Returns Table instance creating from a pandas DataFrame.

        Args:
            data (DataFrame) : Data in pandas DataFrame format.
        """
        return TableFromDataFrame(data, *args, **kwargs)



[docs]
    @classmethod
    def from_list(
        cls,
        data: List[Dict[str, TableDtype]],
        *args,
        **kwargs,
    ) -> Type[Table]:
        """
        Returns Table instance creating from a list of dicts.

        Args:
            data (list(dict(str,str|int|float))) : A list of table row data.
        """
        return TableFromListOfDict(data, *args, **kwargs)


    def __eq__(self, other: object) -> bool:
        if not isinstance(other, self.__class__):
            return False
        return self.data.equals(other)

    def __getitem__(self, idx: int) -> TableRow:
        """
        Random access to a specific row by index.
        """
        if idx >= self.shape[0]:
            raise IndexError(f"Table doesn't contain row #{idx}.")
        return TableRow(table=self, index=idx)

    def __iter__(self) -> Iterator[TableRow]:
        """
        Iterates over rows.
        """
        for index in range(self.shape[0]):
            yield TableRow(table=self, index=index)

    @property
    def shape(self) -> Tuple[int, int]:
        """Returns table size as (#rows, #cols)"""
        return self._shape

    @property
    def columns(self) -> List[str]:
        """Returns column names"""
        return self.data.columns.to_list()


[docs]
    def dtype(self, column: str) -> Optional[Type[TableDtype]]:
        """Returns native python type for a given column"""
        numpy_type = self.data.dtypes[column]
        if self.data[column].nunique() / self.shape[0] < 0.1:  # TODO
            # Convert to CategoricalDtype for efficient storage and categorical analysis
            return pd.api.types.CategoricalDtype()
        if isinstance(numpy_type, np.dtypes.ObjectDType):
            return str
        else:
            return type(np.zeros(1, numpy_type).tolist()[0])



[docs]
    def features(self, column: str, unique: Optional[bool] = False) -> List[TableDtype]:
        """Get features for a given column name."""
        if unique:
            return list(self.data[column].unique())
        else:
            return self.data[column].to_list()



[docs]
    def save(
        self,
        path: str,
    ):
        """
        Save table instance to a '.csv' file.

        Args:
            path (str) : Path to the output csv file.
        """
        data: pd.DataFrame = self.data
        os.makedirs(osp.dirname(path), exist_ok=True)
        data.to_csv(path, index=False)





[docs]
class TableFromCSV(FromFileMixin, Table):
    def __init__(
        self,
        path: str,
        dtype: Optional[Dict] = None,
        sep: Optional[str] = None,
        encoding: Optional[str] = None,
        *args,
        **kwargs,
    ) -> None:
        """
        Read a '.csv' file and compose a Table instance.

        Args:
            path (str) : Path to csv file.
            dtype (optional, dict(str,str)) : Dictionay of column name -> type str ('str', 'int', or 'float').
            sep (optional, str) : Delimiter to use.
            encoding (optional, str) : Encoding to use for UTF when reading/writing (ex. 'utf-8').
        """
        super().__init__(path, *args, **kwargs)

        # assumes that the 1st row is a header.
        data: pd.DataFrame = pd.read_csv(
            path, dtype=dtype, sep=sep, engine="python", encoding=encoding, index_col=False
        )
        if data is None:
            raise ValueError(f"Can't read csv File from {path}")
        if data.shape[1] == 0:
            raise MediaShapeError("A table should have 1 or more columns.")

        self.__data = data
        self._shape = data.shape

    @property
    def data(self) -> Optional[pd.DataFrame]:
        """Table data in pandas DataFrame format"""
        return self.__data


[docs]
    def select(self, columns: List[str]):
        self.__data = self.__data[columns]
        self._shape = self.__data.shape





[docs]
class TableFromDataFrame(FromDataMixin, Table):
    def __init__(
        self,
        data: Union[Callable[[], pd.DataFrame], pd.DataFrame],
        *args,
        **kwargs,
    ):
        """
        Read a pandas DataFrame and compose a Table instance.

        Args:
            data (DataFrame) : Data in pandas DataFrame format.
        """
        super().__init__(data=data, *args, **kwargs)

        if data is None:
            raise ValueError("'data' can't be None")
        if data.shape[1] == 0:
            raise MediaShapeError("A table should have 1 or more columns.")
        for col in data.columns:
            if not isinstance(col, str):
                raise TypeError("A table should have column names as a list of str values")

        self._shape = data.shape

    @property
    def data(self) -> Optional[pd.DataFrame]:
        """Table data in pandas DataFrame format"""
        return super().data




[docs]
class TableFromListOfDict(TableFromDataFrame):
    def __init__(
        self,
        data: List[Dict[str, TableDtype]],
        *args,
        **kwargs,
    ):
        """
        Read a list of table row data and compose a Table instance.
        The table row data is in dictionary format.

        Args:
            data (list(dict(str,str|int|float))) : A list of table row data.
        """
        super().__init__(data=pd.DataFrame(data), *args, **kwargs)




[docs]
class TableRow(MediaElement):
    _type = MediaType.TABLE_ROW

    def __init__(self, table: Table, index: int):
        """
        TableRow media refers to a Table instance and its row index.

        Args:
            table (Table) : Table instance.
            index (int) : Row index.
        """
        if table is None:
            raise ValueError("'table' can't be None")
        if index < 0 or index >= table.shape[0]:
            raise IndexError(f"'index({index})' is out of range.")
        self._table = table
        self._index = index

    @property
    def table(self) -> Table:
        """Table instance"""
        return self._table

    @property
    def index(self) -> int:
        """Row index"""
        return self._index

    @property
    def path(self) -> str:
        return self._table.data.path

    @property
    def has_data(self) -> bool:
        return self.data() is not None


[docs]
    def data(self, targets: Optional[List[str]] = None) -> Dict:
        """
        Row data in dict format.

        Args:
            targets (optional, list(str)) : If this is specified,
                the values corresponding to target colums will be returned.
                Otherwise, whole row data will be returned.
        """
        row = self.table.data.iloc[self.index]
        if targets:
            row = row[targets]
        return row.to_dict()


    def __repr__(self):
        return f"TableRow(row_idx:{self.index}, data:{self.data()})"


[docs]
    @classmethod
    def from_data(cls, data: Dict, *args, **kwargs):
        return TableRowFromData(data, *args, **kwargs)





[docs]
class TableRowFromData(FromDataMixin, TableRow):
    def __init__(self, data: Dict, *args, **kwargs):
        super().__init__(data=data, *args, **kwargs)

    @property
    def data(self) -> Dict:
        data = super().data
        return data