Source code for gojo.core.report

# Module containing the code used to collect the results of the model evaluation.
#
# Author: Fernando García Gutiérrez
# Email: ga.gu.fernando.concat@gmail.com
#
# STATUS: completed and functional
#
import warnings

import numpy as np
import pandas as pd
from copy import deepcopy

from ..interfaces import Dataset
from ..core.evaluation import (
    getScores,
    Metric
)
from ..util.validation import (
    checkMultiInputTypes,
    checkInputType
)
from ..util.io import pprint


[docs]class CVReport(object): """ Object returned by the subroutines defined in :py:mod:`gojo.core.loops` functions with the results of the cross validation. """ # Flags used to identify the columns of the generated dataframes where the predictions and true # labels are located _PRED_LABELS_FLAG = 'pred_labels' _TRUE_LABELS_FLAG = 'true_labels' _N_FOLD_FLAG = 'n_fold' _INDICES_FLAG = 'indices' def __init__(self, raw_results: list, X_dataset: Dataset, y_dataset: Dataset, n_fold_key: str, pred_test_key: str, true_test_key: str, pred_train_key: str, true_train_key: str, test_idx_key: str, train_idx_key: str, trained_model_key: str, fitted_transforms_key: str): checkMultiInputTypes( ('raw_results', raw_results, [list]), ('n_fold_key', n_fold_key, [str]), ('pred_test_key', pred_test_key, [str]), ('true_test_key', true_test_key, [str]), ('pred_train_key', pred_train_key, [str]), ('true_train_key', true_train_key, [str]), ('test_idx_key', test_idx_key, [str]), ('train_idx_key', train_idx_key, [str]), ('trained_model_key', trained_model_key, [str]), ('fitted_transforms_key', fitted_transforms_key, [str]), ) if len(raw_results) == 0: raise TypeError('gojo.core.report.CVReport input results are empty.') for i in range(len(raw_results)): # list elements should be tuples of length 2 checkInputType('raw_results[%d]' % i, raw_results[i], [tuple]) for ii in range(len(raw_results[i])): if len(raw_results[i][ii]) != 2: raise TypeError('Input results for index "%d" are not a two-length tuple' % i) # first-tuple element should be a string checkInputType('raw_results[%d][%d][0]' % (i, ii), raw_results[i][ii][0], [str]) # stack all test/train predictions test_preds = {} train_preds = {} trained_models = {} fitted_transforms = {} for fold_results in raw_results: fold_results_dict = dict(fold_results) # transform list of tuples to a hash n_fold = fold_results_dict[n_fold_key] # better prevent assert n_fold not in test_preds.keys(), \ 'Duplicated key in gojo.core.report.CVReport.__init__ (1)' assert n_fold not in train_preds.keys(), \ 'Duplicated key in gojo.core.report.CVReport.__init__ (2)' # check prediction types checkMultiInputTypes( ('[fold -> %d] (test) pred_labels' % n_fold, fold_results_dict[pred_test_key], [np.ndarray]), ('[fold -> %d] (test) true_labels' % n_fold, fold_results_dict[true_test_key], [np.ndarray]), ('[fold -> %d] (test) indices' % n_fold, fold_results_dict[test_idx_key], [np.ndarray]), ('[fold -> %d] (train) pred_labels' % n_fold, fold_results_dict[pred_train_key], [np.ndarray, type(None)]), ('[fold -> %d] (train) true_labels' % n_fold, fold_results_dict[true_train_key], [np.ndarray, type(None)]), ('[fold -> %d] (train) indices' % n_fold, fold_results_dict[train_idx_key], [np.ndarray, type(None)]), ) # process test-predictions test_preds[n_fold] = self._processRawPredictions( in_data=fold_results_dict, pred_key=pred_test_key, true_key=true_test_key, index_key=test_idx_key) # process train-predictions train_preds[n_fold] = self._processRawPredictions( in_data=fold_results_dict, pred_key=pred_train_key, true_key=true_train_key, index_key=train_idx_key) # save the trained models trained_models[n_fold] = fold_results_dict[trained_model_key] fitted_transforms[n_fold] = fold_results_dict[fitted_transforms_key] self.test_preds = test_preds self.train_preds = train_preds self.X = X_dataset self.y = y_dataset self._trained_models = trained_models self._fitted_transforms = fitted_transforms self._metadata = {} @property def metadata(self) -> dict: """ Return the report metadata. """ return deepcopy(self._metadata)
[docs] def getTestPredictions(self) -> pd.DataFrame: """ Function that returns a dataframe with the model predictions, indices, and true labels for the test set. Returns ------- test_predictions : pd.DataFrame Model predictions over the test set. """ return self._convertPredDict2Df(self.test_preds)
[docs] def getTrainPredictions(self, supress_warnings: bool = False) -> pd.DataFrame or None: """ Function that returns a dataframe with the model predictions, indices, and true labels for the train set. Predictions will only be returned if they are available. In some subroutines of :py:mod:`gojo.core.loops` it should be noted that the predictions made on the training set are not saved or this decision is relegated to the user. Parameters ---------- supress_warnings : bool, default=False Silence the warning raised when not training predictions have been made. Returns ------- test_predictions : pd.DataFrame or None Model predictions over the train set. """ return self._convertPredDict2Df(self.train_preds, supress_warnings=supress_warnings)
[docs] def getTrainedModels(self, copy: bool = True) -> dict: """ Function that returns the trained models if they have been saved in the :py:mod:`gojo.core.loops` subroutine. Parameters ---------- copy : bool, default=True Parameter that indicates whether to return a deepcopy of the models (using the `copy.deepcopy`) or directly the saved model. Defaults to True to avoid inplace modifications. Returns ------- trained_models : dict or None Trained models or None if the models were not saved. """ if copy: trained_models_copy = { n_fold: deepcopy(model) if model is not None else model for n_fold, model in self._trained_models.items() } return trained_models_copy return self._trained_models
[docs] def getFittedTransforms(self, copy: bool = True) -> dict: """ Function that returns the fitted transforms if they have been saved in the :py:mod:`gojo.core.loops` subroutine. Parameters ---------- copy : bool, default=True Parameter that indicates whether to return a deepcopy of the transforms or directly the saved transforms. Defaults to True to avoid inplace modifications. Returns ------- fitted_transforms : dict or None Trained models or None if the models were not saved. """ if copy: fitted_transforms_copy = { n_fold: deepcopy(trans) if trans is not None else trans for n_fold, trans in self._fitted_transforms.items() } return fitted_transforms_copy return self._fitted_transforms
[docs] def getScores(self, metrics: list, loocv: bool = False, supress_warnings: bool = False) -> dict: """ Method used to calculate performance metrics for folds from a list of metrics ( :class:`gojo.core.evaluation.Metric` instances) provided. If the subroutine from :py:mod:`gojo.core.loops` performed a leave-one-out cross-validation you must specify the parameter `loocv` as True. Parameters ---------- metrics : list List of :class:`gojo.core.evaluation.Metric` instances loocv : bool Parameter indicating if the predictions correspond to a LOOCV schema supress_warnings : bool, default=False Indicates whether to supress the possible warnings returned by the method. Returns ------- performance_metrics : dict Dictionary with the performance associated with the test data (identified with the 'test' key) and with the training data (identified with the 'train' key). Examples -------- >>> from gojo import core >>> >>> # ... cv_report = core.loops.evalCrossVal(...) >>> scores = cv_report.getScores(core.getDefaultMetrics('binary_classification', bin_threshold=0.5)) >>> """ # check input parameters checkInputType('metrics', metrics, [list]) for i in range(len(metrics)): checkInputType('metrics[%d]' % i, metrics[i], [Metric]) # dictionary with the output metrics scores = { 'test': None, 'train': None } # compute test-performance test_predictions_df = self.getTestPredictions() scores['test'] = self._calculatePerformanceMetrics( predictions=test_predictions_df, metrics=metrics, loocv=loocv) train_predictions_df = self.getTrainPredictions(supress_warnings=supress_warnings) if train_predictions_df is not None: scores['train'] = self._calculatePerformanceMetrics( predictions=train_predictions_df, metrics=metrics, loocv=loocv) return scores
[docs] def addMetadata(self, **kwargs): """ Function used to add metadata to the report. """ for k, v in kwargs.items(): if k in self._metadata.keys(): warnings.warn('Overwriting metadata information for key "%s".' % k) self._metadata[k] = v
def _calculatePerformanceMetrics(self, predictions: pd.DataFrame, metrics: list, loocv: bool) -> pd.DataFrame: """ Subroutine used to calculate the performance metrics over a dataframe of predictions. """ def _getMetricsDict(_n_fold, _df: pd.DataFrame, _metrics: list) -> dict: # select prediction columns y_pred = _df[[c for c in _df.columns if c.startswith(self._PRED_LABELS_FLAG)]].values y_true = _df[[c for c in _df.columns if c.startswith(self._TRUE_LABELS_FLAG)]].values # if predictions of true labels are one-dimensional reshape if y_pred.shape[1] == 1: y_pred = y_pred.reshape(-1) if y_true.shape[1] == 1: y_true = y_true.reshape(-1) # compute performance metrics metrics_out = getScores(y_true=y_true, y_pred=y_pred, metrics=_metrics) # raise a warning if self._N_FOLD_FLAG is used as it is a flag for indicating the fold to which # are the metrics calculated if self._N_FOLD_FLAG in metrics_out.keys(): warnings.warn('Metric name "%s" cannot be used. Omitting metric' % self._N_FOLD_FLAG) # save metric fold number metrics_out[self._N_FOLD_FLAG] = _n_fold return metrics_out # check input parameters checkInputType('metrics', metrics, [list]) for i in range(len(metrics)): checkInputType('metrics[%d]' % i, metrics[i], [Metric]) fold_metrics = [] detected_loocv = True if loocv: fold_metrics.append(_getMetricsDict(0, predictions, metrics)) detected_loocv = False # not-relevant else: for n_fold, fold_df in predictions.groupby(self._N_FOLD_FLAG): if fold_df.shape[0] > 1: # check that all the predictions contains more than one instance detected_loocv = False fold_metrics.append(_getMetricsDict(n_fold, fold_df, metrics)) if detected_loocv: warnings.warn( 'It has been detected that the predictions are arranged as if a leave-one-out cross-validation (LOOCV) ' 'evaluation has been performed, if this has been the case you must specify the loocv parameter as ' 'True in order to calculate the performance metrics. Review gojo.core.report.ReportCV.getScores ' 'method.') return pd.DataFrame(fold_metrics) def _convertPredDict2Df(self, d: dict, supress_warnings: bool = False) -> pd.DataFrame or None: """ Subroutine used to convert the predictions dict to a pandas DataFrame. """ checkInputType('d', d, [dict]) # check if predictions were performed all_none = True for v in d.values(): for v2 in v.values(): if v2 is not None: all_none = False if all_none: if not supress_warnings: warnings.warn('Empty predictions. Returning None') return None # gather model predictions and true labels df = [] for n_fold, preds in d.items(): # try to convert the input dictionary to a dataframe try: key_df = pd.DataFrame(preds) except Exception as ex: pprint('Internal error in gojo.core.report.CVReport._convertPredDict2Df ' 'function during pd.DataFrame creation.') raise ex assert self._N_FOLD_FLAG not in key_df.columns, \ '"%s" already exists in the dictionary keys (3).' % self._N_FOLD_FLAG # internal check assert self._INDICES_FLAG in key_df.columns, \ '"%s" must exists in the dictionary keys (4).' % self._INDICES_FLAG # internal check key_df[self._N_FOLD_FLAG] = n_fold key_df = key_df.set_index([self._N_FOLD_FLAG, self._INDICES_FLAG]) df.append(key_df) return pd.concat(df, axis=0).sort_index() def _processRawPredictions(self, in_data: dict, pred_key: str, true_key: str, index_key: str) -> dict: """ Subroutine used to arrange the input raw predictions. """ checkMultiInputTypes( ('in_data', in_data, [dict]), ('pred_key', pred_key, [str]), ('true_key', true_key, [str]), ('index_key', index_key, [str])) # check that the provided keys are in the input dictionary and correspond to the expected input type for name, key in [('pred_key', pred_key), ('true_key', true_key), ('index_key', index_key)]: if key not in in_data.keys(): raise KeyError( 'Key "%s" for parameter "%s" not in the input data keys %r' % (key, name, list(in_data.keys()))) checkInputType('in_data["%s"]' % name, in_data[key], [np.ndarray, type(None)]) # create the output directory out_dict = { self._INDICES_FLAG: in_data[index_key] } # process model predictions predictions = in_data[pred_key] if predictions is None: out_dict[self._PRED_LABELS_FLAG] = None elif len(predictions.shape) == 2: # predictions can be one-hot encoded (e.g., probabilistic outputs) for i in range(predictions.shape[1]): out_dict['%s_%d' % (self._PRED_LABELS_FLAG, i)] = predictions[:, i] elif len(predictions.shape) == 1: # test-predictions out_dict[self._PRED_LABELS_FLAG] = predictions else: assert False, \ 'Predictions contains a number of dimensions different from 1 o 2 (%d)' % len(predictions.shape) # process true labels true_labels = in_data[true_key] if true_labels is None: out_dict[self._TRUE_LABELS_FLAG] = None elif len(true_labels.shape) == 2: # true labels also can be one-hot encoded for i in range(true_labels.shape[1]): out_dict['%s_%d' % (self._TRUE_LABELS_FLAG, i)] = true_labels[:, i] elif len(true_labels.shape) == 1: # test-predictions out_dict[self._TRUE_LABELS_FLAG] = true_labels else: assert False, \ 'True labels contains a number of dimensions different from 1 o 2 (%d)' % len(true_labels.shape) return out_dict