Source code for otx.algo.segmentation.segmentors.mean_teacher

# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
"""Base mean teacher algorithm for semi-supervised semantic segmentation learning."""
from __future__ import annotations

import copy
from typing import TYPE_CHECKING

import numpy as np
import torch
from torch import Tensor, nn

from otx.algo.common.utils.utils import cut_mixer

    from import ImageInfo

[docs] class MeanTeacher(nn.Module): """MeanTeacher for Semi-supervised learning. Args: model (nn.Module): model unsup_weight (float, optional): unsupervised weight. Defaults to 1.0. drop_unrel_pixels_percent (int, optional): drop unrel pixels percent. Defaults to 20. semisl_start_epoch (int, optional): semisl start epoch. Defaults to 0. filter_pixels_epochs (int, optional): filter pixels epochs. Defaults to 100. """ def __init__( self, model: nn.Module, unsup_weight: float = 1.0, drop_unrel_pixels_percent: int = 20, semisl_start_epoch: int = 0, filter_pixels_epochs: int = 100, ) -> None: super().__init__() self.teacher_model = model self.student_model = copy.deepcopy(model) # no grads for teacher model for param in self.teacher_model.parameters(): param.requires_grad = False self.unsup_weight = unsup_weight self.drop_unrel_pixels_percent = drop_unrel_pixels_percent # filter unreliable pixels during first X epochs self.filter_pixels_epochs = filter_pixels_epochs self.semisl_start_epoch = semisl_start_epoch
[docs] def forward( self, inputs: Tensor, unlabeled_weak_images: Tensor | None = None, unlabeled_strong_images: Tensor | None = None, global_step: int | None = None, steps_per_epoch: int | None = None, img_metas: list[ImageInfo] | None = None, unlabeled_img_metas: list[ImageInfo] | None = None, masks: Tensor | None = None, mode: str = "tensor", ) -> Tensor: """Step for model training. Args: inputs (Tensor): input labeled images unlabeled_weak_images (Tensor, optional): unlabeled images with weak augmentations. Defaults to None. unlabeled_strong_images (Tensor, optional): unlabeled images with strong augmentations. Defaults to None. global_step (int, optional): global step. Defaults to None. steps_per_epoch (int, optional): steps per epoch. Defaults to None. img_metas (list[ImageInfo], optional): image meta information. Defaults to None. unlabeled_img_metas (list[ImageInfo], optional): unlabeled image meta information. Defaults to None. masks (Tensor, optional): ground truth masks for training. Defaults to None. mode (str, optional): mode of forward. Defaults to "tensor". """ if mode != "loss": # only labeled images for validation and testing return self.teacher_model(inputs, img_metas, masks, mode=mode) if global_step is None or steps_per_epoch is None: msg = "global_step and steps_per_epoch should be provided" raise ValueError(msg) if global_step > self.semisl_start_epoch * steps_per_epoch: # generate pseudo labels, filter high entropy pixels, compute loss reweight percent_unreliable = self.drop_unrel_pixels_percent * ( 1 - global_step / self.filter_pixels_epochs * steps_per_epoch ) pl_from_teacher, reweight_unsup = self._generate_pseudo_labels( unlabeled_weak_images, percent_unreliable=percent_unreliable, ) unlabeled_strong_images_aug, pl_from_teacher_aug = cut_mixer(unlabeled_strong_images, pl_from_teacher) # extract features from labeled and unlabeled augmented images student_labeled_logits = self.student_model(inputs, mode="tensor") student_unlabeled_logits = self.student_model(unlabeled_strong_images_aug, mode="tensor") # loss computation loss_decode = self.student_model.calculate_loss( student_labeled_logits, img_metas, masks=masks, interpolate=True, ) loss_decode_u = self.student_model.calculate_loss( student_unlabeled_logits, unlabeled_img_metas, masks=pl_from_teacher_aug, interpolate=True, ) loss_decode_u = {f"{k}_unlabeled": v * reweight_unsup * self.unsup_weight for k, v in loss_decode_u.items()} loss_decode.update(loss_decode_u) return loss_decode return self.student_model(inputs, img_metas, masks, mode="loss")
def _generate_pseudo_labels(self, ul_w_img: Tensor, percent_unreliable: float) -> tuple[Tensor, Tensor]: """Generate pseudo labels from teacher model, apply filter loss method. Args: ul_w_img (torch.Tensor): weakly augmented unlabeled images ul_img_metas (list[ImageInfo]): unlabeled images meta data percent_unreliable (float): percent of unreliable pixels """ with torch.no_grad(): teacher_out = self.teacher_model(ul_w_img, mode="tensor") teacher_out = torch.nn.functional.interpolate( teacher_out, size=ul_w_img.shape[2:], mode="bilinear", align_corners=True, ) teacher_prob_unsup = torch.softmax(teacher_out, axis=1) _, pl_from_teacher = torch.max(teacher_prob_unsup, axis=1, keepdim=True) # drop pixels with high entropy reweight_unsup = 1.0 if percent_unreliable > 0: keep_percent = 100 - percent_unreliable batch_size, _, h, w = teacher_out.shape entropy = -torch.sum(teacher_prob_unsup * torch.log(teacher_prob_unsup + 1e-10), dim=1, keepdim=True) thresh = np.percentile(entropy[pl_from_teacher != 255].detach().cpu().numpy().flatten(), keep_percent) thresh_mask = * (pl_from_teacher != 255).bool() # mark as ignore index pl_from_teacher[thresh_mask] = 255 # reweight unsupervised loss reweight_unsup = batch_size * h * w / torch.sum(pl_from_teacher != 255) return pl_from_teacher, reweight_unsup