Source code for otx.core.config.data
# Copyright (C) 2023 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
#
"""Config data type objects for data."""
# NOTE: omegaconf would fail to parse dataclass with `from __future__ import annotations` in Python 3.8, 3.9
# ruff: noqa: FA100
from __future__ import annotations
from copy import deepcopy
from dataclasses import dataclass, field
from typing import Any
from otx.core.types.transformer_libs import TransformLibType
[docs]
@dataclass
class SubsetConfig:
"""DTO for dataset subset configuration.
Attributes:
batch_size (int): Batch size produced.
subset_name (str): Datumaro Dataset's subset name for this subset config.
It can differ from the actual usage (e.g., 'val' for the validation subset config).
transforms (list[dict[str, Any] | Transform] | Compose): List of actually used transforms.
It accepts a list of `torchvision.transforms.v2.*` Python objects
or `torchvision.transforms.v2.Compose` for `TransformLibType.TORCHVISION`.
Otherwise, it takes a Python dictionary that fits the configuration style used in mmcv
(`TransformLibType.MMCV`, `TransformLibType.MMPRETRAIN`, ...).
transform_lib_type (TransformLibType): Transform library type used by this subset.
num_workers (int): Number of workers for the dataloader of this subset.
sampler (SamplerConfig | None): Sampler configuration for the dataloader of this subset.
to_tv_image (bool): Whether to convert image to torch tensor.
input_size (int | tuple[int, int] | None) :
input size model expects. If $(input_size) exists in transforms, it will be replaced with this value.
Example:
```python
train_subset_config = SubsetConfig(
batch_size=64,
subset_name="train",
transforms=v2.Compose(
[
v2.RandomResizedCrop(size=(224, 224), antialias=True),
v2.RandomHorizontalFlip(p=0.5),
v2.ToDtype(torch.float32, scale=True),
v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
],
)
transform_lib_type=TransformLibType.TORCHVISION,
num_workers=2,
)
```
"""
batch_size: int
subset_name: str
# TODO (vinnamki): Revisit data configuration objects to support a union type in structured config
# Omegaconf does not allow to have a union type, https://github.com/omry/omegaconf/issues/144
transforms: list[dict[str, Any]]
transform_lib_type: TransformLibType = TransformLibType.TORCHVISION
num_workers: int = 2
sampler: SamplerConfig = field(default_factory=lambda: SamplerConfig())
to_tv_image: bool = True
input_size: (
Any
) = None # type is `int | tuple[int, int] | None` TODO (eunwoosh): Revisit after error above is solved
[docs]
@dataclass
class TileConfig:
"""DTO for tiler configuration."""
enable_tiler: bool = False
enable_adaptive_tiling: bool = True
tile_size: tuple[int, int] = (400, 400)
overlap: float = 0.2
iou_threshold: float = 0.45
max_num_instances: int = 1500
object_tile_ratio: float = 0.03
sampling_ratio: float = 1.0
with_full_img: bool = False
[docs]
def clone(self) -> TileConfig:
"""Return a deep copied one of this instance."""
return deepcopy(self)
[docs]
@dataclass
class VisualPromptingConfig:
"""DTO for visual prompting data module configuration."""
use_bbox: bool = False
use_point: bool = False
[docs]
@dataclass
class UnlabeledDataConfig(SubsetConfig):
"""DTO for unlabeled data."""
data_root: str | None = None
data_format: str = "image_dir"
batch_size: int = 0
subset_name: str = "unlabeled"
# TODO (harimkang): If not multi-transform, support for list type, as should support for other subsets.
transforms: dict[str, list[dict[str, Any]]] = field(default_factory=dict) # type: ignore[assignment]
transform_lib_type: TransformLibType = TransformLibType.TORCHVISION
num_workers: int = 2
to_tv_image: bool = True
[docs]
@dataclass
class SamplerConfig:
"""Configuration class for defining the sampler used in the data loading process.
This is passed in the form of a dataclass, which is instantiated when the dataloader is created.
[TODO]: Need to replace this with a proper Sampler class.
Currently, SamplerConfig, which belongs to the sampler of SubsetConfig,
belongs to the nested dataclass of dataclass, which is not easy to instantiate from the CLI.
So currently replace sampler with a corresponding dataclass that resembles the configuration of another object,
providing limited functionality.
"""
class_path: str = "torch.utils.data.RandomSampler"
init_args: dict[str, Any] = field(default_factory=dict)