Source code for otx.core.utils.utils

# Copyright (C) 2024-2025 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

"""Utility functions."""

from __future__ import annotations

import importlib
from collections import defaultdict
from multiprocessing import cpu_count
from typing import TYPE_CHECKING, Any

import torch
from datumaro.components.annotation import AnnotationType, LabelCategories

from otx.utils.device import is_xpu_available

if TYPE_CHECKING:
    from datumaro import Dataset as DmDataset



[docs]
def is_ckpt_from_otx_v1(ckpt: dict) -> bool:
    """Check the checkpoint where it comes from.

    Args:
        ckpt (dict): the checkpoint file

    Returns:
        bool: True means the checkpoint comes from otx1
    """
    return "model" in ckpt and ckpt["VERSION"] == 1




[docs]
def is_ckpt_for_finetuning(ckpt: dict) -> bool:
    """Check the checkpoint will be used to finetune.

    Args:
        ckpt (dict): the checkpoint file

    Returns:
        bool: True means the checkpoint will be used to finetune.
    """
    return "state_dict" in ckpt




[docs]
def get_adaptive_num_workers(num_dataloader: int = 1) -> int | None:
    """Measure appropriate num_workers value and return it."""
    num_devices = torch.xpu.device_count() if is_xpu_available() else torch.cuda.device_count()
    if num_devices == 0:
        return None
    return min(cpu_count() // (num_dataloader * num_devices), 8)  # max available num_workers is 8




[docs]
def get_idx_list_per_classes(dm_dataset: DmDataset, use_string_label: bool = False) -> dict[int | str, list[int]]:
    """Compute class statistics."""
    stats: dict[int | str, list[int]] = defaultdict(list)
    labels = dm_dataset.categories().get(AnnotationType.label, LabelCategories())
    for item_idx, item in enumerate(dm_dataset):
        for ann in item.annotations:
            if use_string_label:
                stats[labels.items[ann.label].name].append(item_idx)
            else:
                stats[ann.label].append(item_idx)
    # Remove duplicates in label stats idx: O(n)
    for k in stats:
        stats[k] = list(dict.fromkeys(stats[k]))
    return stats




[docs]
def import_object_from_module(obj_path: str) -> Any:  # noqa: ANN401
    """Get object from import format string."""
    module_name, obj_name = obj_path.rsplit(".", 1)
    module = importlib.import_module(module_name)
    return getattr(module, obj_name)




[docs]
def remove_state_dict_prefix(state_dict: dict[str, Any], prefix: str) -> dict[str, Any]:
    """Remove prefix from state_dict keys."""
    new_state_dict = {}
    for key, value in state_dict.items():
        new_key = key.replace(prefix, "")
        new_state_dict[new_key] = value
    return new_state_dict