Source code for otx.api.usecases.evaluation.accuracy

"""This module contains the implementation of Accuracy performance provider."""

# Copyright (C) 2021-2022 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
#


import copy
import logging
from typing import List, Set, Tuple

import numpy as np
from sklearn.metrics import confusion_matrix as sklearn_confusion_matrix

from otx.api.entities.dataset_item import DatasetItemEntity
from otx.api.entities.datasets import DatasetEntity
from otx.api.entities.label import LabelEntity
from otx.api.entities.label_schema import LabelGroup
from otx.api.entities.metrics import (
    BarChartInfo,
    BarMetricsGroup,
    ColorPalette,
    MatrixChartInfo,
    MatrixMetric,
    MatrixMetricsGroup,
    MetricsGroup,
    Performance,
    ScoreMetric,
)
from otx.api.entities.resultset import ResultSetEntity
from otx.api.usecases.evaluation.averaging import MetricAverageMethod
from otx.api.usecases.evaluation.basic_operations import (
    precision_per_class,
    recall_per_class,
)
from otx.api.usecases.evaluation.performance_provider_interface import (
    IPerformanceProvider,
)

logger = logging.getLogger(__name__)


[docs] class Accuracy(IPerformanceProvider): """This class is responsible for providing Accuracy measures; mainly for Classification problems. The calculation both supports multi label and binary label predictions. Accuracy is the proportion of the predicted correct labels, to the total number (predicted and actual) labels for that instance. Overall accuracy is the average across all instances. Args: resultset (ResultSetEntity): ResultSet that score will be computed for average (MetricAverageMethod, optional): The averaging method, either MICRO or MACRO MICRO: compute average over all predictions in all label groups MACRO: compute accuracy per label group, return the average of the per-label-group accuracy scores """ def __init__( self, resultset: ResultSetEntity, average: MetricAverageMethod = MetricAverageMethod.MICRO, ): self._unnormalized_matrices: List[MatrixMetric] = compute_unnormalized_confusion_matrices_from_resultset( resultset ) # accuracy computation mean_accuracy = self._compute_accuracy(average=average, confusion_matrices=self._unnormalized_matrices) self._accuracy = ScoreMetric(value=mean_accuracy, name="Accuracy") @property def accuracy(self) -> ScoreMetric: """Returns the accuracy as ScoreMetric.""" return self._accuracy
[docs] def get_performance(self) -> Performance: """Returns the performance with accuracy and confusion metrics.""" confusion_matrix_dashboard_metrics: List[MetricsGroup] = [] # Use normalized matrix for UI normalized_matrices: List[MatrixMetric] = copy.deepcopy(self._unnormalized_matrices) for unnormalized_matrix in normalized_matrices: unnormalized_matrix.normalize() confusion_matrix_info = MatrixChartInfo( name="Confusion matrix", header="confusion", row_header="Predicted label", column_header="True label", ) confusion_matrix_dashboard_metrics.append( MatrixMetricsGroup(metrics=normalized_matrices, visualization_info=confusion_matrix_info) ) # Compute precision and recall MetricGroups and append them to the dashboard metrics for _confusion_matrix in self._unnormalized_matrices: confusion_matrix_dashboard_metrics.append(precision_metrics_group(_confusion_matrix)) confusion_matrix_dashboard_metrics.append(recall_metrics_group(_confusion_matrix)) return Performance(score=self.accuracy, dashboard_metrics=confusion_matrix_dashboard_metrics)
@staticmethod def _compute_accuracy(average: MetricAverageMethod, confusion_matrices: List[MatrixMetric]) -> float: """Compute accuracy using the confusion matrices. Args: average (MatricAverageMethod): The averaging method, either MICRO or MACRO MICRO: compute average over all predictions in all label groups MACRO: compute accuracy per label group, return the average of the per-label-group accuracy scores confusion_matrices (List[MatrixMetric]): the confusion matrices to compute accuracy from. MUST be unnormalized. Raises ValueError: when the ground truth dataset does not contain annotations RuntimeError: when the averaging methods is not known Returns: float: the accuracy score for the provided confusion matrix """ # count correct predictions and total annotations correct_per_label_group = [np.trace(mat.matrix_values) for mat in confusion_matrices] total_per_label_group = [np.sum(mat.matrix_values) for mat in confusion_matrices] # check if all label groups have annotations if not np.any(total_per_label_group): raise ValueError("The ground truth dataset must contain annotations.") # return micro or macro average if average == MetricAverageMethod.MACRO: # compute accuracy for each label group, then average across groups, ignoring groups without annotations return np.nanmean(np.divide(correct_per_label_group, total_per_label_group)) if average == MetricAverageMethod.MICRO: # average over all predictions in all label groups return np.sum(correct_per_label_group) / np.sum(total_per_label_group) raise RuntimeError(f"Unknown averaging method: {average}")
def precision_metrics_group(confusion_matrix: MatrixMetric) -> MetricsGroup: """Computes the precision per class based on a confusion matrix and returns them as ScoreMetrics in a MetricsGroup. Args: confusion_matrix: matrix to compute the precision per class for Returns: a BarMetricsGroup with the per class precision. """ labels = confusion_matrix.row_labels if labels is None: # If no labels are given, just number the classes by index if confusion_matrix.matrix_values is not None: label_range = confusion_matrix.matrix_values.shape[0] else: label_range = 0 labels = np.arange(label_range) per_class_precision = [ ScoreMetric(class_, value=precision) for (class_, precision) in zip(labels, precision_per_class(confusion_matrix.matrix_values)) ] return BarMetricsGroup( metrics=per_class_precision, visualization_info=BarChartInfo( name="Precision per class", palette=ColorPalette.LABEL, ), ) def recall_metrics_group(confusion_matrix: MatrixMetric) -> MetricsGroup: """Computes the recall per class based on a confusion matrix and returns them as ScoreMetrics in a MetricsGroup. Args: confusion_matrix: matrix to compute the recall per class for Returns: a BarMetricsGroup with the per class recall """ labels = confusion_matrix.row_labels if labels is None: # If no labels are given, just number the classes by index if confusion_matrix.matrix_values is not None: label_range = confusion_matrix.matrix_values.shape[0] else: label_range = 0 labels = np.arange(label_range) per_class_recall = [ ScoreMetric(class_, value=recall) for (class_, recall) in zip(labels, recall_per_class(confusion_matrix.matrix_values)) ] return BarMetricsGroup( metrics=per_class_recall, visualization_info=BarChartInfo( name="Recall per class", palette=ColorPalette.LABEL, ), ) def __get_gt_and_predicted_label_indices_from_resultset( resultset: ResultSetEntity, ) -> Tuple[List[Set[int]], List[Set[int]]]: """Returns the label indices lists for ground truth and prediction datasets in a tuple. Args: resultset Returns: a tuple containing two lists. The first list contains the ground truth label indices, and the second contains the prediction label indices. """ true_label_idx = [] predicted_label_idx = [] gt_dataset: DatasetEntity = resultset.ground_truth_dataset pred_dataset: DatasetEntity = resultset.prediction_dataset gt_dataset.sort_items() pred_dataset.sort_items() # Iterate over each dataset item, and collect the labels for this item (pred and gt) task_labels = resultset.model.configuration.get_label_schema().get_labels(include_empty=True) for gt_item, pred_item in zip(gt_dataset, pred_dataset): if isinstance(gt_item, DatasetItemEntity) and isinstance(pred_item, DatasetItemEntity): true_label_idx.append({task_labels.index(label) for label in gt_item.get_roi_labels(task_labels)}) predicted_label_idx.append({task_labels.index(label) for label in pred_item.get_roi_labels(task_labels)}) return true_label_idx, predicted_label_idx def __compute_unnormalized_confusion_matrices_for_label_group( true_label_idx: List[Set[int]], predicted_label_idx: List[Set[int]], label_group: LabelGroup, task_labels: List[LabelEntity], ) -> MatrixMetric: """Returns matrix metric for a certain label group. Args: true_label_idx (List[Set[int]]): list of sets of label indices for the ground truth dataset predicted_label_idx (List[Set[int]]): list of sets of label indices for the prediction dataset label_group (LabelGroup): label group to compute the confusion matrix for task_labels (List[LabelEntity]): list of labels for the task Returns: MatrixMetric: confusion matrix for the label group """ map_task_labels_idx_to_group_idx = { task_labels.index(label): i_group for i_group, label in enumerate(label_group.labels) } set_group_labels_idx = set(map_task_labels_idx_to_group_idx.keys()) group_label_names = [task_labels[label_idx].name for label_idx in set_group_labels_idx] if len(group_label_names) == 1: # Single-class # we use "not" to make presence of a class to be at index 0, while the absence of it at index 1 y_true = [int(not set_group_labels_idx.issubset(true_labels)) for true_labels in true_label_idx] y_pred = [int(not set_group_labels_idx.issubset(pred_labels)) for pred_labels in predicted_label_idx] group_label_names += [f"~ {group_label_names[0]}"] column_labels = group_label_names.copy() remove_last_row = False else: # Multiclass undefined_idx = len(group_label_names) # to define missing value # find the intersections between GT and task labels, and Prediction and task labels true_intersections = [true_labels.intersection(set_group_labels_idx) for true_labels in true_label_idx] pred_intersections = [pred_labels.intersection(set_group_labels_idx) for pred_labels in predicted_label_idx] # map the intersection to 0-index value y_true = [ map_task_labels_idx_to_group_idx[list(true_intersection)[0]] if len(true_intersection) != 0 else undefined_idx for true_intersection in true_intersections ] y_pred = [ map_task_labels_idx_to_group_idx[list(pred_intersection)[0]] if len(pred_intersection) != 0 else undefined_idx for pred_intersection in pred_intersections ] column_labels = group_label_names.copy() column_labels.append("Other") remove_last_row = True matrix_data = sklearn_confusion_matrix(y_true, y_pred, labels=list(range(len(column_labels)))) if remove_last_row: # matrix clean up matrix_data = np.delete(matrix_data, -1, 0) if sum(matrix_data[:, -1]) == 0: # if none of the GT is classified as classes from other groups, clean it up too matrix_data = np.delete(matrix_data, -1, 1) column_labels.remove(column_labels[-1]) # Use unnormalized matrix for statistics computation (accuracy, precision, recall) return MatrixMetric( name=f"{label_group.name}", matrix_values=matrix_data, row_labels=group_label_names, column_labels=column_labels, normalize=False, ) def compute_unnormalized_confusion_matrices_from_resultset( resultset: ResultSetEntity, ) -> List[MatrixMetric]: """Computes an (unnormalized) confusion matrix for every label group in the resultset. Args: resultset: the input resultset Returns: the computed unnormalized confusion matrices """ if len(resultset.ground_truth_dataset) == 0 or len(resultset.prediction_dataset) == 0: raise ValueError("Cannot compute the confusion matrix of an empty result set.") unnormalized_confusion_matrices: List[MatrixMetric] = [] ( true_label_idx, predicted_label_idx, ) = __get_gt_and_predicted_label_indices_from_resultset(resultset) task_labels = resultset.model.configuration.get_label_schema().get_labels(include_empty=False) # Confusion matrix computation for label_group in resultset.model.configuration.get_label_schema().get_groups(): matrix = __compute_unnormalized_confusion_matrices_for_label_group( true_label_idx, predicted_label_idx, label_group, task_labels ) unnormalized_confusion_matrices.append(matrix) return unnormalized_confusion_matrices