Source code for otx.core.model.rotated_detection

# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
"""Class definition for rotated detection model entity used in OTX."""

from __future__ import annotations

import cv2
import torch
from datumaro import Polygon
from torchvision import tv_tensors

from otx.algo.instance_segmentation.maskrcnn import MaskRCNN, MaskRCNNEfficientNet, MaskRCNNResNet50
from otx.core.data.entity.instance_segmentation import InstanceSegBatchPredEntity
from otx.core.model.instance_segmentation import OVInstanceSegmentationModel


[docs] class RotatedMaskRCNNModel(MaskRCNN): """Base class for the rotated detection models used in OTX."""
[docs] def predict_step(self, *args: torch.Any, **kwargs: torch.Any) -> InstanceSegBatchPredEntity: """Predict step for rotated detection task. Note: This method is overridden to convert masks to rotated bounding boxes. Returns: InstanceSegBatchPredEntity: The predicted polygons (rboxes), scores, labels, masks. """ preds = super().predict_step(*args, **kwargs) batch_scores: list[torch.Tensor] = [] batch_bboxes: list[tv_tensors.BoundingBoxes] = [] batch_labels: list[torch.LongTensor] = [] batch_polygons: list[list[Polygon]] = [] batch_masks: list[tv_tensors.Mask] = [] for img_info, pred_bboxes, pred_scores, pred_labels, pred_masks in zip( preds.imgs_info, preds.bboxes, preds.scores, preds.labels, preds.masks, ): boxes = [] scores = [] labels = [] masks = [] polygons = [] for bbox, score, label, mask in zip(pred_bboxes, pred_scores, pred_labels, pred_masks): if mask.sum() == 0: continue np_mask = mask.detach().cpu().numpy().astype(int) contours, hierarchies = cv2.findContours(np_mask, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE) if hierarchies is None: continue rbox_polygons = [] for contour, hierarchy in zip(contours, hierarchies[0]): # skip inner contours if hierarchy[3] != -1 or len(contour) <= 2: continue rbox_points = Polygon(cv2.boxPoints(cv2.minAreaRect(contour)).reshape(-1)) rbox_polygons.append((rbox_points, rbox_points.get_area())) # select the largest polygon if len(rbox_polygons) > 0: rbox_polygons.sort(key=lambda x: x[1], reverse=True) polygons.append(rbox_polygons[0][0]) scores.append(score) boxes.append(bbox) labels.append(label) masks.append(mask) if len(boxes): scores = torch.stack(scores) boxes = tv_tensors.BoundingBoxes(torch.stack(boxes), format="XYXY", canvas_size=img_info.ori_shape) labels = torch.stack(labels) masks = torch.stack(masks) batch_scores.append(scores) batch_bboxes.append(boxes) batch_labels.append(labels) batch_polygons.append(polygons) batch_masks.append(masks) return InstanceSegBatchPredEntity( batch_size=preds.batch_size, images=preds.images, imgs_info=preds.imgs_info, scores=batch_scores, bboxes=batch_bboxes, masks=batch_masks, polygons=batch_polygons, labels=batch_labels, )
[docs] class RotatedMaskRCNNResNet50(RotatedMaskRCNNModel, MaskRCNNResNet50): """Rotated MaskRCNN model with ResNet50 backbone."""
[docs] class RotatedMaskRCNNEfficientNet(RotatedMaskRCNNModel, MaskRCNNEfficientNet): """Rotated MaskRCNN model with EfficientNet backbone."""
[docs] class OVRotatedDetectionModel(OVInstanceSegmentationModel): """Rotated Detection model compatible for OpenVINO IR Inference. It can consume OpenVINO IR model path or model name from Intel OMZ repository and create the OTX detection model compatible for OTX testing pipeline. """