Source code for datumaro.plugins.sam_transforms.interpreters.sam_encoder
# Copyright (C) 2023 Intel Corporation
#
# SPDX-License-Identifier: MIT
from typing import List, Tuple
import cv2
import numpy as np
from datumaro.components.abstracts import IModelInterpreter
from datumaro.components.abstracts.model_interpreter import LauncherInputType, ModelPred, PrepInfo
from datumaro.components.annotation import Annotation, FeatureVector
from datumaro.components.dataset_base import DatasetItem
from datumaro.components.media import Image
from datumaro.plugins.openvino_plugin.samples.utils import rescale_img_keeping_aspect_ratio
[docs]
class SAMEncoderInterpreter(IModelInterpreter):
h_model = 1024
w_model = 1024
[docs]
def preprocess(self, inp: DatasetItem) -> Tuple[LauncherInputType, PrepInfo]:
img = inp.media_as(Image).data
output = rescale_img_keeping_aspect_ratio(img, self.h_model, self.w_model, padding=False)
# From BGR to RGB
output.image = cv2.cvtColor(output.image, cv2.COLOR_BGR2RGB)
# From HWC to CHW
output.image = output.image.transpose(2, 0, 1)
# To FP32
output.image = output.image.astype(np.float32)
return output.image, output.scale
[docs]
def postprocess(self, pred: ModelPred, info: PrepInfo) -> List[Annotation]:
embed = pred.get("image_embeddings")
assert embed is not None, 'Model output should have "image_embeddings".'
return [FeatureVector(vector=embed)]
[docs]
def get_categories(self):
return None