Source code for gojo.core.evaluation

# Module with tools used to calculate performance metrics.
#
# Author: Fernando García Gutiérrez
# Email: ga.gu.fernando.concat@gmail.com
#
# STATUS: completed, functional, and documented.
#
import numpy as np
import warnings
import sklearn.metrics as sk_metrics
from copy import deepcopy
from scipy.stats import spearmanr
from ..util.validation import (
    checkMultiInputTypes,
    checkInputType,
    checkCallable
)
from ..util.io import _createObjectRepresentation
from ..exception import (
    IncorrectNumberOfClasses,
    MissingArrayDimensions
)


[docs]class Metric(object): """ Base class used to create any type of performance evaluation metric compatible with the :py:mod:`gojo` framework. Parameters ---------- name : str Name given to the performance metric function : callable Function that will receive as input two `numpy.ndarray` (`y_true` and `y_pred`) and must return a scalar or a `numpy.ndarray`. bin_threshold : float or int, default=None Threshold used to binarize the input predictions. By default, no thresholding is applied. ignore_bin_threshold : bool, default=False If provided, parameter `bin_threshold` will be ignored. multiclass : bool, default=False Parameter indicating if a multi-class classification metric is being computed. number_of_classes : int, default=None Parameter indicating the number of classes in a multi-class classification problem. This parameter will not have any effect when `multiclass=False`. use_multiclass_sparse : bool, default=False Parameter indicating if the multi-class level predictions are provided as a one-hot vector. This parameter will not have any effect when `multiclass=False`. **kwargs Optional parameters provided to the input callable specified by `function`. """ def __init__(self, name: str, function: callable, bin_threshold: float or int = None, ignore_bin_threshold: bool = False, multiclass: bool = False, number_of_classes: int = None, use_multiclass_sparse: bool = True, **kwargs): self.name = name.replace(' ', '_') # replace spaces self.function = function self.function_kw = kwargs self.bin_threshold = bin_threshold self.ignore_bin_threshold = ignore_bin_threshold self.multiclass = multiclass self.number_of_classes = number_of_classes self.use_multiclass_sparse = use_multiclass_sparse # parameter checking self._checkMetricParams() def _checkMetricParams(self): """ Subroutine to perform the metric parameters. """ checkCallable('function', self.function) checkMultiInputTypes( ('name', self.name, [str]), ('bin_threshold', self.bin_threshold, [float, int, type(None)]), ('ignore_bin_threshold', self.ignore_bin_threshold, [bool]), ('multiclass', self.multiclass, [bool]), ('number_of_classes', self.number_of_classes, [int, type(None)]), ('use_multiclass_sparse', self.use_multiclass_sparse, [bool])) if self.multiclass and self.number_of_classes is None: raise TypeError( 'gojo.core.evaluation.Metric: if "multiclass" is True the number of classes must be ' 'provided using the parameter "number_of_classes". Review metric initialization or parameters.') def __repr__(self): parameters = { 'name': self.name, 'function_kw': self.function_kw } if self.multiclass: parameters['number_of_classes'] = self.number_of_classes parameters['use_multiclass_sparse'] = self.use_multiclass_sparse parameters['bin_threshold'] = self.use_multiclass_sparse parameters['ignore_bin_threshold'] = self.ignore_bin_threshold else: parameters['multiclass'] = self.multiclass return _createObjectRepresentation('Metric', **parameters) def __str__(self): return self.__repr__() def __call__(self, y_true: np.ndarray, y_pred: np.ndarray, bin_threshold: float = None) -> float or np.ndarray: """ Parameters ---------- y_true : np.ndarray True labels. y_pred : np.ndarray Predicted labels. bin_threshold : float, default=None Threshold used to binarize the input predictions. By default, no thresholding is applied. If the parameter `bin_threshold` was defined in constructor, its specification will be overwritten by this parameter. Note ---- This function do not perform inplace modifications. """ # parameter checking self._checkMetricParams() checkMultiInputTypes( ('y_true', y_true, [np.ndarray]), ('y_pred', y_pred, [np.ndarray]), ('bin_threshold', bin_threshold, [float, int, type(None)])) # if not bin_threshold was provided use the value provided in the constructor if bin_threshold is None: bin_threshold = self.bin_threshold # ignore bin_threshold if self.ignore_bin_threshold: bin_threshold = None # binarize predictions if bin_threshold is not None: if self.multiclass: warnings.warn( 'gojo.core.evaluation.Metric. bin_threshold parameter will not have effect when the multiclass ' 'parameter have been selected as True.') else: y_pred = (y_pred > bin_threshold).astype(int) if self.multiclass: checkInputType('number_of_classes', self.number_of_classes, [int]) # compare prediction and true labels coded as dummy variables if self.use_multiclass_sparse: # convert to dummy variables: (y_X) -> (y_X, n_classes) if len(y_pred.shape) == 1: # categorical output y_pred = _convertCategoricalToSparse( arr=y_pred, n_classes=self.number_of_classes, var_name='y_pred') if len(y_true.shape) == 1: y_true = _convertCategoricalToSparse( arr=y_true, n_classes=self.number_of_classes, var_name='y_true') # check the number of classes are correct _checkNumberOfClassesSparse(arr=y_pred, n_classes=self.number_of_classes, var_name='y_pred') _checkNumberOfClassesSparse(arr=y_true, n_classes=self.number_of_classes, var_name='y_true') else: # convert from dummy variables to categorical: (y_X, n_classes) -> (y_X) if len(y_pred.shape) != 1: # categorical output y_pred = _convertSparseToCategorical( arr=y_pred, n_classes=self.number_of_classes, var_name='y_pred') if len(y_true.shape) != 1: y_true = _convertSparseToCategorical( arr=y_true, n_classes=self.number_of_classes, var_name='y_true') # check that the number of classes are correct _checkNumberOfClassesCategorical(arr=y_pred, n_classes=self.number_of_classes, var_name='y_pred') _checkNumberOfClassesCategorical(arr=y_true, n_classes=self.number_of_classes, var_name='y_true') return self.function(y_true, y_pred, **self.function_kw)
[docs]def getScores(y_true: np.ndarray, y_pred: np.ndarray, metrics: list) -> dict: """ Function used to calculate the scores given by the metrics passed within the metrics parameter. Parameters ---------- y_true : np.ndarray True labels. y_pred : np.ndarray Predicted labels. metrics : List[gojo.core.Metric] List of gojo.core.Metric instances. Returns ------- metric_scores : dict Dictionary where the keys will correspond to the metric names and the values to the metric scores. """ checkMultiInputTypes( ('y_true', y_true, [np.ndarray]), ('y_pred', y_pred, [np.ndarray]), ('metrics', metrics, [list])) if len(metrics) == 0: raise TypeError('Empty metrics parameter.') for i, m in enumerate(metrics): checkInputType('metrics[%d]' % i, m, [Metric]) # check for duplicated metric names metric_names = [m.name for m in metrics] if len(metric_names) != len(set(metric_names)): raise TypeError('Detected metrics with duplicated names (%r)' % metric_names) results = {} for metric in metrics: try: results[metric.name] = metric(y_true=y_true, y_pred=y_pred) except Exception as ex: warnings.warn('Exception in metric {}'.format(metric)) raise ex return results
[docs]def flatFunctionInput(fn: callable): """ Function used to flatten the input predictions before the computation of the metric. Internally, the input `y_pred` and `y_true` will be flattened before calling the provided function. Example ------- >>> from gojo import core >>> from sklearn import metrics >>> metric = core.Metric( >>> 'accuracy', >>> core.flatFunctionInput(metrics.accuracy_score), >>> bin_threshold=0.5) >>> """ checkCallable('fn', fn) def _wrappedFunction(y_pred, y_true, **kwargs): return fn(y_pred.reshape(-1), y_true.reshape(-1), **kwargs) return _wrappedFunction
[docs]def getDefaultMetrics(task: str, select: list = None, bin_threshold: float or int = None, multiclass: bool = False, number_of_classes: int = None, use_multiclass_sparse: bool = False) -> list: """ Function used to get a series of pre-defined scores for evaluate the model performance. Parameters ---------- task : str Task-associated metrics. Currently available tasks are: `binary_classification` and `regression`. select : list, default=None Metrics of those returned that will be selected (in case you do not want to calculate all the metrics). By default, all metrics associated with the task will be returned. Note: metrics are represented by strings. bin_threshold : float or int, default=None Threshold used to binarize the input predictions. By default, no thresholding is applied. multiclass : bool, default=False Parameter indicating if a multi-class classification metric is being computed. number_of_classes : int, default=None Parameter indicating the number of classes in a multi-class classification problem. This parameter will not have any effect when `multiclass=False`. use_multiclass_sparse : bool, default=False Parameter indicating if the multi-class level predictions are provided as a one-hot vector. This parameter will not have any effect when `multiclass=False`. Returns ------- metrics : list List of instances of the gojo.core.Metric class. """ checkMultiInputTypes( ('task', task, [str]), ('select', select, [list, type(None)])) if task not in DEFINED_METRICS.keys(): raise TypeError('Unknown task "%s". Available tasks are: %r' % (task, list(DEFINED_METRICS.keys()))) # select task-metrics task_metrics = deepcopy(DEFINED_METRICS[task]) selected_task_metrics = [] if select is not None: for _metric_name in select: if _metric_name in task_metrics.keys(): selected_task_metrics.append(task_metrics[_metric_name]) else: warnings.warn( 'Metric "%s" not found in task-metrics. To see available metrics use: ' '"gojo.core.getAvailableDefaultNetrics()"' % _metric_name) else: selected_task_metrics = list(task_metrics.values()) # modify metrics according to the input parameters # - modify binary_threshold for metric in selected_task_metrics: setattr(metric, 'bin_threshold', bin_threshold) # - modify multiclass for metric in selected_task_metrics: setattr(metric, 'multiclass', multiclass) # - modify number_of_classes for metric in selected_task_metrics: setattr(metric, 'number_of_classes', number_of_classes) # - modify use_multiclass_sparse for metric in selected_task_metrics: setattr(metric, 'use_multiclass_sparse', use_multiclass_sparse) return selected_task_metrics
[docs]def getAvailableDefaultMetrics(task: str = None) -> dict: """ Return to dictionary with task names and default metrics defined for those tasks. The selected problems for which you want to see the metrics can be filtered by the task parameter indicating the task for which you want to see the metrics. Parameters ---------- task : str, default=None Specify the task to see the defined metrics associated to that task. Returns ------- task_info : dict Dictionary where the keys correspond to the task and the values to the metrics defined by default for the associated task. """ checkInputType('task', task, [str, type(None)]) task_metric_info = { key: list(task_dict.keys()) for key, task_dict in DEFINED_METRICS.items() if task is None or task == key} return task_metric_info
def _checkNumberOfClassesCategorical(arr: np.ndarray, n_classes: int, var_name: str = None): """ Function that checks that the number of classes are valid for a categorical input. """ in_n_classes = np.max(arr) + 1 # labels starts with 0, they represent array indices if np.min(arr) < 0: raise ValueError('Class label less than 0. Index should start from 0. Error in variable: "{}"'.format(var_name)) if n_classes < in_n_classes: raise IncorrectNumberOfClasses( detected_classes=in_n_classes, specified_classes=n_classes, in_var=var_name) def _checkNumberOfClassesSparse(arr: np.ndarray, n_classes: int, var_name: str = None): """ Function that checks that the number of classes are valid for a sparse input. """ if n_classes != arr.shape[1]: raise IncorrectNumberOfClasses( detected_classes=arr.shape[1], specified_classes=n_classes, in_var=var_name) def _convertCategoricalToSparse(arr: np.ndarray, n_classes: int, var_name: str = None) -> np.ndarray: """ Convert from (n_samples) to (n_samples, n_classes). """ _checkNumberOfClassesCategorical(arr=arr, n_classes=n_classes, var_name=var_name) return np.squeeze(np.eye(n_classes)[arr]) def _convertSparseToCategorical(arr: np.ndarray, n_classes: int, var_name: str = None) -> np.ndarray: """ Convert from (n_samples, n_classes) to (n_samples). """ if len(arr.shape) != 2: raise MissingArrayDimensions(expected_n_dims=2, input_n_dims=len(arr.shape), in_var=var_name) if arr.shape[1] != n_classes: raise IncorrectNumberOfClasses( detected_classes=arr.shape[1], specified_classes=n_classes, in_var=var_name) return arr.argmax(axis=1) def _specificity(y_true: np.ndarray, y_pred: np.ndarray): """ Calculate the specificity (not defined in sklearn.metrics). """ tn, fp, fn, tp = sk_metrics.confusion_matrix(y_true, y_pred).ravel() return tn / (tn + fp) def _negativePredictiveValue(y_true: np.ndarray, y_pred: np.ndarray): """ Calculate the negative predictive value (not defined in sklearn.metrics). """ tn, fp, fn, tp = sk_metrics.confusion_matrix(y_true, y_pred).ravel() if (tn + fn) == 0: # the model predicts all positive return 0.0 return tn / (tn + fn) def _correlation(y_true: np.ndarray, y_pred: np.ndarray): """ Calculate the correlation coefficient between y_true and y_pred. (not defined in sklearn.metrics). """ return np.corrcoef(y_true, y_pred)[0, 1] def _spearmanCorrelation(y_true: np.ndarray, y_pred: np.ndarray): """ Calculate the Spearman correlation between y_true and y_pred (not defined in sklearn.metrics).""" return spearmanr(y_true, y_pred).correlation # hash containing pre-defined metrics for different tasks DEFINED_METRICS = { 'binary_classification': dict( accuracy=Metric('accuracy', sk_metrics.accuracy_score), balanced_accuracy=Metric('balanced_accuracy', sk_metrics.balanced_accuracy_score), precision=Metric('precision', sk_metrics.precision_score, zero_division=0), recall=Metric('recall', sk_metrics.recall_score, zero_division=0), sensitivity=Metric('sensitivity', sk_metrics.recall_score, zero_division=0), specificity=Metric('specificity', _specificity), npv=Metric('negative_predictive_value', _negativePredictiveValue), f1_score=Metric('f1_score', sk_metrics.f1_score), auc=Metric('auc', sk_metrics.roc_auc_score, ignore_bin_threshold=True) ), 'regression': dict( explained_variance=Metric('explained_variance', sk_metrics.explained_variance_score), mse=Metric('mse', sk_metrics.mean_squared_error), mae=Metric('mae', sk_metrics.mean_absolute_error), r2_score=Metric('r2', sk_metrics.r2_score), pearson_correlation=Metric('pearson_correlation', _correlation), #spearman_correlation=Metric('spearman_correlation', _spearmanCorrelation), ) }