Source code for otx.algo.callbacks.ema_mean_teacher

# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
#
"""Module for exponential moving average for SemiSL mean teacher algorithm."""

from __future__ import annotations

from typing import TYPE_CHECKING, Any

import torch
from lightning import Callback, LightningModule, Trainer

if TYPE_CHECKING:
    from lightning.pytorch.utilities.types import STEP_OUTPUT


[docs] class EMAMeanTeacher(Callback): """callback for SemiSL MeanTeacher algorithm. This callback averages the weights of the teacher model. Args: momentum (float, optional): momentum. Defaults to 0.999. start_epoch (int, optional): start epoch. Defaults to 1. """ def __init__( self, momentum: float = 0.999, start_epoch: int = 1, ) -> None: super().__init__() self.momentum = momentum self.start_epoch = start_epoch self.synced_models = False
[docs] def on_train_start(self, trainer: Trainer, pl_module: LightningModule) -> None: """Set up src & dst model parameters.""" # call to nn.model model = trainer.model.model self.src_model = getattr(model, "student_model", None) self.dst_model = getattr(model, "teacher_model", None) if self.src_model is None or self.dst_model is None: msg = "student_model and teacher_model should be set for MeanTeacher algorithm" raise RuntimeError(msg) self.src_params = self.src_model.state_dict(keep_vars=True) self.dst_params = self.dst_model.state_dict(keep_vars=True)
[docs] def on_train_batch_end( self, trainer: Trainer, pl_module: LightningModule, outputs: STEP_OUTPUT, batch: Any, # noqa: ANN401 batch_idx: int, ) -> None: """Update ema parameter every iteration.""" if trainer.current_epoch < self.start_epoch: return # EMA self._ema_model(trainer.global_step)
def _copy_model(self) -> None: with torch.no_grad(): for name, src_param in self.src_params.items(): if src_param.requires_grad: dst_param = self.dst_params[name] dst_param.data.copy_(src_param.data) def _ema_model(self, global_step: int) -> None: if self.start_epoch != 0 and not self.synced_models: self._copy_model() self.synced_models = True momentum = min(1 - 1 / (global_step + 1), self.momentum) with torch.no_grad(): for name, src_param in self.src_params.items(): if src_param.requires_grad: dst_param = self.dst_params[name] dst_param.data.copy_(dst_param.data * momentum + src_param.data * (1 - momentum))