Source code for otx.api.entities.model_template

"""This file defines the ModelConfiguration, ModelEntity and Model classes."""

# Copyright (C) 2021-2023 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
#
import copy
import os
from dataclasses import dataclass, field
from enum import Enum, IntEnum, auto
from typing import Dict, List, NamedTuple, Optional, Sequence, Union, cast

from omegaconf import DictConfig, ListConfig, OmegaConf

from otx.api.configuration.elements import metadata_keys
from otx.api.configuration.enums import AutoHPOState
from otx.api.configuration.helper.utils import search_in_config_dict
from otx.api.entities.label import Domain


[docs] class TargetDevice(IntEnum): """Represents the target device for a given model. This device might be used for instance be used for training or inference. """ UNSPECIFIED = auto() CPU = auto() GPU = auto() VPU = auto()
[docs] class ModelOptimizationMethod(Enum): """Optimized model format.""" TENSORRT = auto() OPENVINO = auto() def __str__(self) -> str: """Returns ModelOptimizationMethod as string.""" return str(self.name)
[docs] @dataclass class DatasetRequirements: """Expected requirements for the dataset in order to use this algorithm. Attributes: classes (Optional[List[str]]): Classes which must be present in the dataset """ classes: Optional[List[str]] = None
[docs] @dataclass class ExportableCodePaths: """The paths to the different versions of the exportable code for a given model template.""" default: Optional[str] = None openvino: Optional[str] = None
[docs] class TaskFamily(Enum): """Overall task family.""" VISION = auto() FLOW_CONTROL = auto() DATASET = auto() def __str__(self) -> str: """Returns task family as a string.""" return str(self.name)
[docs] class TaskInfo(NamedTuple): """Task information. NamedTuple to store information about the task type like label domain, if it is trainable, if it is an anomaly task and if it supports global or local labels. """ domain: Domain is_trainable: bool is_anomaly: bool is_global: bool is_local: bool
[docs] class TaskType(Enum): """The type of algorithm within the task family. Also contains relevant information about the task type like label domain, if it is trainable, if it is an anomaly task or if it supports global or local labels. Args: value (int): (Unused) Unique integer for .value property of Enum (auto() does not work) task_info (TaskInfo): NamedTuple containing information about the task's capabilities """ # pylint: disable=unused-argument def __init__( self, value: int, task_info: TaskInfo, ): self.domain = task_info.domain self.is_trainable = task_info.is_trainable self.is_anomaly = task_info.is_anomaly self.is_global = task_info.is_global self.is_local = task_info.is_local def __new__(cls, *args): """Returns new instance.""" obj = object.__new__(cls) obj._value_ = args[0] return obj NULL = 1, TaskInfo( domain=Domain.NULL, is_trainable=False, is_anomaly=False, is_global=False, is_local=False, ) DATASET = 2, TaskInfo( domain=Domain.NULL, is_trainable=False, is_anomaly=False, is_global=False, is_local=False, ) CLASSIFICATION = 3, TaskInfo( domain=Domain.CLASSIFICATION, is_trainable=True, is_anomaly=False, is_global=True, is_local=False, ) SEGMENTATION = 4, TaskInfo( domain=Domain.SEGMENTATION, is_trainable=True, is_anomaly=False, is_global=False, is_local=True, ) DETECTION = 5, TaskInfo( domain=Domain.DETECTION, is_trainable=True, is_anomaly=False, is_global=False, is_local=True, ) ANOMALY_DETECTION = 6, TaskInfo( domain=Domain.ANOMALY_DETECTION, is_trainable=True, is_anomaly=True, is_global=False, is_local=True, ) CROP = 7, TaskInfo( domain=Domain.NULL, is_trainable=False, is_anomaly=False, is_global=False, is_local=False, ) TILE = 8, TaskInfo( domain=Domain.NULL, is_trainable=False, is_anomaly=False, is_global=False, is_local=False, ) INSTANCE_SEGMENTATION = 9, TaskInfo( domain=Domain.INSTANCE_SEGMENTATION, is_trainable=True, is_anomaly=False, is_global=False, is_local=True, ) ACTIVELEARNING = 10, TaskInfo( domain=Domain.NULL, is_trainable=False, is_anomaly=False, is_global=False, is_local=False, ) ANOMALY_SEGMENTATION = 11, TaskInfo( domain=Domain.ANOMALY_SEGMENTATION, is_trainable=True, is_anomaly=True, is_global=False, is_local=True, ) ANOMALY_CLASSIFICATION = 12, TaskInfo( domain=Domain.ANOMALY_CLASSIFICATION, is_trainable=True, is_anomaly=True, is_global=True, is_local=False, ) ROTATED_DETECTION = 13, TaskInfo( domain=Domain.ROTATED_DETECTION, is_trainable=True, is_anomaly=False, is_global=False, is_local=True, ) if os.getenv("FEATURE_FLAGS_OTX_ACTION_TASKS", "0") == "1": ACTION_CLASSIFICATION = 14, TaskInfo( domain=Domain.ACTION_CLASSIFICATION, is_trainable=True, is_anomaly=False, is_global=False, is_local=True, ) ACTION_DETECTION = 15, TaskInfo( domain=Domain.ACTION_DETECTION, is_trainable=True, is_anomaly=False, is_global=False, is_local=True ) if os.getenv("FEATURE_FLAGS_OTX_VISUAL_PROMPTING_TASKS", "0") == "1": VISUAL_PROMPTING = 16, TaskInfo( # TODO: Is 16 okay when action flag is False? domain=Domain.VISUAL_PROMPTING, is_trainable=True, is_anomaly=False, is_global=False, is_local=True, # TODO: check whether is it local or not ) def __str__(self) -> str: """Returns name.""" return self.name def __repr__(self) -> str: """Returns name.""" return self.name
[docs] def task_type_to_label_domain(task_type: TaskType) -> Domain: """Links the task type to the label domain enum. Note that not all task types have an associated domain (e.g. crop task). In this case, a ``ValueError`` is raised. Args: task_type (TaskType): The task type to get the label domain for. Returns: Domain: The label domain for the task type. """ mapping = { TaskType.CLASSIFICATION: Domain.CLASSIFICATION, TaskType.DETECTION: Domain.DETECTION, TaskType.SEGMENTATION: Domain.SEGMENTATION, TaskType.INSTANCE_SEGMENTATION: Domain.INSTANCE_SEGMENTATION, TaskType.ANOMALY_CLASSIFICATION: Domain.ANOMALY_CLASSIFICATION, TaskType.ANOMALY_DETECTION: Domain.ANOMALY_DETECTION, TaskType.ANOMALY_SEGMENTATION: Domain.ANOMALY_SEGMENTATION, TaskType.ROTATED_DETECTION: Domain.ROTATED_DETECTION, } if os.getenv("FEATURE_FLAGS_OTX_ACTION_TASKS", "0") == "1": mapping = { **mapping, TaskType.ACTION_CLASSIFICATION: Domain.ACTION_CLASSIFICATION, TaskType.ACTION_DETECTION: Domain.ACTION_DETECTION, } if os.getenv("FEATURE_FLAGS_OTX_VISUAL_PROMPTING_TASKS", "0") == "1": mapping = { **mapping, TaskType.VISUAL_PROMPTING: Domain.VISUAL_PROMPTING, } try: return mapping[task_type] except KeyError as exc: raise ValueError(f"Task type {task_type} does not have any associated label domain.") from exc
[docs] @dataclass class HyperParameterData: """HyperParameter Data. Class that contains the raw hyper parameter data, for those hyper parameters for the model that are user-configurable. Attributes: base_path (Optional[str]): The path to the yaml file specifying the base configurable parameters to use in the model. Defaults to None. parameter_overrides (Dict): Nested dictionary that describes overrides for the metadata for the user-configurable hyper parameters that are used in the model. This allows multiple models to share the same base hyper-parameters, while for each individual model the defaults, parameter ranges, descriptions, etc. can still be customized. """ base_path: Optional[str] = None parameter_overrides: Dict = field(default_factory=dict) __data: Dict = field(default_factory=dict, repr=False) __has_valid_configurable_parameters: bool = field(default=False, repr=False)
[docs] def load_parameters(self, model_template_path: str): """Load hyper parameters. Loads the actual hyper parameters defined in the file at `base_path`, and performs any overrides specified in the `parameter_overrides`. Args: model_template_path (str): file path to the model template file in which the HyperParameters live. """ has_valid_configurable_parameters = False if self.base_path is not None and os.path.exists(model_template_path): model_template_dir = os.path.dirname(model_template_path) base_hyper_parameter_path = os.path.join(model_template_dir, self.base_path) config_dict = OmegaConf.load(base_hyper_parameter_path) data = OmegaConf.to_container(config_dict) if isinstance(data, dict): self.__remove_parameter_values_from_data(data) self.__data = data has_valid_configurable_parameters = True else: raise ValueError( f"Unexpected configurable parameter file found at path {base_hyper_parameter_path}" f", expected a dictionary-like format, got list-like instead." ) if self.has_overrides and has_valid_configurable_parameters: self.substitute_parameter_overrides() self.__has_valid_configurable_parameters = has_valid_configurable_parameters
@property def data(self) -> Dict: """Returns a dictionary containing the set of hyper parameters defined in the ModelTemplate. This does not contain the actual parameter values, but instead holds the parameter schema's in a structured manner. The actual values should be either loaded from the database, or will be initialized from the defaults upon creating a configurable parameter object out of this data. """ return self.__data @property def has_overrides(self) -> bool: """Returns True if any parameter overrides are defined by the HyperParameters instance, False otherwise.""" return self.parameter_overrides != {} @property def has_valid_configurable_parameters(self) -> bool: """Check if configurable parameters are valid. Returns True if the HyperParameterData instance contains valid configurable parameters, extracted from the model template. False otherwise. """ return self.__has_valid_configurable_parameters
[docs] def substitute_parameter_overrides(self): """Carries out the parameter overrides specified in the `parameter_overrides` attribute. Validates whether the overridden parameters exist in the base set of configurable parameters, and whether the metadata values that should be overridden are valid metadata attributes. """ self.__substitute_parameter_overrides(self.parameter_overrides, self.__data)
def __substitute_parameter_overrides(self, override_dict: Dict, parameter_dict: Dict): """Substitutes parameters form override_dict into parameter_dict. Recursively substitutes overridden parameter values specified in `override_dict` into the base set of hyper parameters passed in as `parameter_dict` Args: override_dict (Dict): dictionary containing the parameter overrides parameter_dict (Dict): dictionary that contains the base set of hyper parameters, in which the overridden values are substituted """ for key, value in override_dict.items(): if isinstance(value, dict) and not metadata_keys.allows_dictionary_values(key): if key in parameter_dict.keys(): self.__substitute_parameter_overrides(value, parameter_dict[key]) else: raise ValueError( f"Unable to perform parameter override. Parameter or parameter group named {key} " f"is not valid for the base hyper parameters specified in {self.base_path}" ) else: if metadata_keys.allows_model_template_override(key): parameter_dict[key] = value else: raise KeyError(f"{key} is not a valid keyword for hyper parameter overrides") @classmethod def __remove_parameter_values_from_data(cls, data: dict): """This method removes the actual parameter values from the input parameter data. These values should be removed because the parameters should be instantiated from the default_values, instead of their values. NOTE: This method modifies its input dictionary, it does not return a new copy Args: data: Parameter dictionary to remove values from """ data_copy = copy.deepcopy(data) for key, value in data_copy.items(): if isinstance(value, dict): if key != metadata_keys.UI_RULES: cls.__remove_parameter_values_from_data(data[key]) elif key == "value": data.pop(key)
[docs] def manually_set_data_and_validate(self, hyper_parameters: dict): """This function is used to manually set the hyper parameter data from a dictionary. It is meant to be used in testing only, in cases where the model template is not backed up by an actual yaml file. Args: hyper_parameters (Dict): Dictionary containing the data to be set """ self.__data = hyper_parameters self.__has_valid_configurable_parameters = True
[docs] class InstantiationType(Enum): """The method to instantiate a given task.""" NONE = auto() CLASS = auto() GRPC = auto() def __str__(self) -> str: """Returns the name of the instantiation type.""" return str(self.name)
[docs] @dataclass class Dependency: """Dependency required by the task. Attributes: source (str): Source of the dependency destination (str): Destination folder to install the dependency size (Optional[int]): Size of the dependency in bytes sha256 (Optional[str]): SHA-256 checksum of the dependency file """ source: str destination: str size: Optional[int] = None sha256: Optional[str] = None
[docs] @dataclass class EntryPoints: """Path of the Python classes implementing the task interface. Attributes: base (str): Base interface implementing the functionality in a framework such as PyTorch or TensorFlow openvino (Optional[str]): OpenVINO interface. nncf (Optional[str]): NNCF interface """ base: str openvino: Optional[str] = None nncf: Optional[str] = None
[docs] class ModelCategory(Enum): """Represents model category regarding accuracy & speed trade-off.""" SPEED = auto() BALANCE = auto() ACCURACY = auto() OTHER = auto() def __str__(self) -> str: """Returns the name of the model category.""" return str(self.name)
[docs] class ModelStatus(Enum): """Represents model status regarding deprecation process.""" ACTIVE = auto() DEPRECATED = auto() def __str__(self) -> str: """Returns the name of the model status.""" return str(self.name)
# pylint: disable=too-many-instance-attributes
[docs] @dataclass class ModelTemplate: """This class represents a Task in the Task database. It can be either a CLASS type, with the class path specified or a GRPC type with its address. The task chain uses this information to setup a `ChainLink` (A task in the chain) model_template_id (str): ID of the model template model_template_path (str): path to the original model template file name (str): user-friendly name for the algorithm used in the task task_family (TaskFamily): overall task family of the task. One of VISION, FLOW_CONTROL AND DATASET. task_type (TaskType): Type of algorithm within task family. instantiation (InstantiationType): InstantiationType (CLASS or GRPC) summary (str): Summary of what the algorithm does. Defaults to "". framework (Optional[str]): The framework used by the algorithm. Defaults to None. max_nodes (int): Max number of nodes for training. Defaults to 1. application (Optional[str]): Name of the application solved by this algorithm. Defaults to None. dependencies (Liar[Dependency]): List of dependencies required by the algorithm. Defaults to empty `field`. initial_weights (Optional[str]): Optional URL to the initial weights used by the algorithm. Defaults to None training_targets (List[TargetDevice]): device used for training. Defaults to empty `field`. inference_targets (List[TargetDevices]): device used for inference. Defaults to empty `field`. dataset_requirements (DatasetRequirements): list of dataset requirements. Defaults to empty `field`. model_optimization_methods (List[ModelOptimizationMethod]): list of ModelOptimizationMethod. This lists all methods available to optimize the inference model for the task hyper_parameters (HyperParameterData): HyperParameterData object containing the base path to the configurable parameter definition, as well as any overrides for the base parameters that are specific for the current template. is_trainable (bool): specify whether task is trainable capabilities (List[str]): list of task capabilities grpc_address (Optional[str]): the grpc host address (for instantiation type == GRPC) entrypoints (Optional[Entrypoints]): Entrypoints implementing the Python task interface base_model_path (str): Path to template file for the base model used for nncf compression. exportable_code_paths (ExportableCodePaths): if it exists, the path to the exportable code sources. Defaults to empty `field`. task_type_sort_priority (int): priority of order of how tasks are shown in the pipeline dropdown for a given task type. E.g. for classification Inception is default and has weight 0. Unassigned priority will have -1 as priority. mobilenet is less important, and has a higher value. Default is zero (the highest priority). gigaflops (float): how many billions of operations are required to do inference on a single data item. size (float): how much disk space the model will approximately take. model_category (ModelCategory): Represents model category regarding accuracy & speed trade-off. Default to OTHER. model_status (ModelStatus): Represents model status regarding deprecation process. Default to ACTIVE. is_default_for_task (bool): Whether this model is a default recommendation for the task """ model_template_id: str model_template_path: str name: str task_family: TaskFamily task_type: TaskType instantiation: InstantiationType summary: str = "" framework: Optional[str] = None max_nodes: int = 1 application: Optional[str] = None dependencies: List[Dependency] = field(default_factory=list) initial_weights: Optional[str] = None training_targets: List[TargetDevice] = field(default_factory=list) inference_targets: List[TargetDevice] = field(default_factory=list) dataset_requirements: DatasetRequirements = field(default_factory=DatasetRequirements) model_optimization_methods: List[ModelOptimizationMethod] = field(default_factory=list) hyper_parameters: HyperParameterData = field(default_factory=HyperParameterData) is_trainable: bool = True capabilities: List[str] = field(default_factory=list) grpc_address: Optional[str] = None entrypoints: Optional[EntryPoints] = None base_model_path: str = "" exportable_code_paths: ExportableCodePaths = field(default_factory=ExportableCodePaths) task_type_sort_priority: int = -1 gigaflops: float = 0 size: float = 0 hpo: Optional[Dict] = None model_category: ModelCategory = ModelCategory.OTHER model_status: ModelStatus = ModelStatus.ACTIVE is_default_for_task: bool = False def __post_init__(self): """Do sanitation checks before loading the hyper-parameters.""" if self.instantiation == InstantiationType.GRPC and self.grpc_address == "": raise ValueError("Task is registered as gRPC, but no gRPC address is specified") if self.instantiation == InstantiationType.CLASS and self.entrypoints is None: raise ValueError("Task is registered as CLASS, but entrypoints were not specified") if self.task_family == TaskFamily.VISION and self.hyper_parameters.base_path is None: raise ValueError("Task is registered as a VISION task but no hyper parameters were defined.") if self.task_family != TaskFamily.VISION and self.hyper_parameters.base_path is not None: raise ValueError("Hyper parameters are currently not supported for non-VISION tasks.") # Load the full hyper parameters self.hyper_parameters.load_parameters(self.model_template_path)
[docs] def computes_uncertainty_score(self) -> bool: """Returns true if "compute_uncertainty_score" is in capabilities false otherwise.""" return "compute_uncertainty_score" in self.capabilities
[docs] def computes_representations(self) -> bool: """Returns true if "compute_representations" is in capabilities.""" return "compute_representations" in self.capabilities
[docs] def is_task_global(self) -> bool: """Returns ``True`` if the task is global task i.e. if task produces global labels.""" return self.task_type.is_global
[docs] def supports_auto_hpo(self) -> bool: """Returns `True` if the algorithm supports automatic hyper parameter optimization, `False` otherwise.""" if not self.hyper_parameters.has_valid_configurable_parameters: return False auto_hpo_state_results = search_in_config_dict( self.hyper_parameters.data, key_to_search=metadata_keys.AUTO_HPO_STATE ) for result in auto_hpo_state_results: if str(result[0]).lower() == str(AutoHPOState.POSSIBLE): return True return False
[docs] class NullModelTemplate(ModelTemplate): """Represent an empty model template. Note that a task based on this model template cannot be instantiated.""" def __init__(self) -> None: super().__init__( model_template_id="", model_template_path="", task_family=TaskFamily.FLOW_CONTROL, task_type=TaskType.NULL, name="Null algorithm", instantiation=InstantiationType.NONE, capabilities=[], )
ANOMALY_TASK_TYPES: Sequence[TaskType] = ( TaskType.ANOMALY_DETECTION, TaskType.ANOMALY_CLASSIFICATION, TaskType.ANOMALY_SEGMENTATION, ) TRAINABLE_TASK_TYPES: Sequence[TaskType] = ( TaskType.CLASSIFICATION, TaskType.DETECTION, TaskType.SEGMENTATION, TaskType.INSTANCE_SEGMENTATION, TaskType.ANOMALY_DETECTION, TaskType.ANOMALY_CLASSIFICATION, TaskType.ANOMALY_SEGMENTATION, TaskType.ROTATED_DETECTION, ) if os.getenv("FEATURE_FLAGS_OTX_ACTION_TASKS", "0") == "1": TRAINABLE_TASK_TYPES = ( *TRAINABLE_TASK_TYPES, TaskType.ACTION_CLASSIFICATION, TaskType.ACTION_DETECTION, ) if os.getenv("FEATURE_FLAGS_OTX_VISUAL_PROMPTING_TASKS", "0") == "1": TRAINABLE_TASK_TYPES = ( *TRAINABLE_TASK_TYPES, TaskType.VISUAL_PROMPTING, ) def _parse_model_template_from_omegaconf(config: Union[DictConfig, ListConfig]) -> ModelTemplate: """Parse an OmegaConf configuration into a model template. Args: config (Union[DictConfig, ListConfig]): The configuration to parse. Returns: ModelTemplate: The parsed model template. """ schema = OmegaConf.structured(ModelTemplate) config = OmegaConf.merge(schema, config) return cast(ModelTemplate, OmegaConf.to_object(config))
[docs] def parse_model_template(model_template_path: str) -> ModelTemplate: """Read a model template from a file. Args: model_template_path (str): Path to the model template template.yaml file Returns: ModelTemplate: The model template parsed from the file. """ config = OmegaConf.load(model_template_path) if not isinstance(config, DictConfig): raise ValueError("Expected the configuration file to contain a dictionary, not a list") if "model_template_id" not in config: config["model_template_id"] = config["name"].replace(" ", "_") config["model_template_path"] = model_template_path return _parse_model_template_from_omegaconf(config)
[docs] def parse_model_template_from_dict(model_template_dict: dict) -> ModelTemplate: """Read a model template from a dictionary. Note that the model_template_id must be defined inside the dictionary. Args: model_template_dict (dict): Dictionary containing the model template. Returns: ModelTemplate: The model template. """ config = OmegaConf.create(model_template_dict) return _parse_model_template_from_omegaconf(config)