Source code for gojo.plotting.basic

# Module with basic plotting functions.
#
# Author: Fernando García Gutiérrez
# Email: ga.gu.fernando.concat@gmail.com
#
# STATUS: completed, functional, and documented.
#
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

from ..util.validation import (
    checkMultiInputTypes,
    checkInputType
)


[docs]def linePlot( *dfs, x: str, y: str, err: str = None, err_alpha: float = 0.3, labels: list = None, ax: mpl.axes.Axes = None, figsize: tuple = (6, 3.5), style: str = 'ggplot', dpi: int = 100, colors: list or str = None, title: str = '', title_size: int or float = 15, title_pad: int = 15, hide_legend: bool = False, legend_pos: str = 'upper right', legend_size: int or float = 12, xlabel_size: float or int = 13, ylabel_size: float or int = 13, grid_alpha: float = 0.5, yvmin: float = None, yvmax: float = None, xvmin: float = None, xvmax: float = None, lw: float or int or list = None, ls: str or list = None, save: str = None, save_kw: dict = None, show: bool = True): """ Line plot function. Parameters ---------- *dfs : pd.DataFrame Input dataframes with the data to be represented. x : str X-axis variable. Must be present in the input dataframes. y : str Y-axis variable. Must be present in the input dataframes. err : str Variable indicating the errors associated with the lines. Must be present in the input dataframes. err_alpha : float, default=0.3 Opacity used to plot the errors. labels : list, default=None Labels used for identifying the input dataframes. ax : matplotlib.axes.Axes, default=None Axes used to represent the figure. figsize : tuple, default=(6, 3.5) Figure size. style : str, default='ggplot' Plot styling. (see 'matplotlib.pyplot.styles') dpi : int, default=100 Figure dpi. colors : list or str, default=None Colors used for identifying the dataframe information. A string colormap can be provided. title : str, default='' Plot title. title_size : int or float, default=0.5 Title font size. title_pad : int, default=15 Title pad. hide_legend : bool, default=False Parameter indicating whether to hide the legend. legend_pos : str, default='upper right' Legend position. legend_size : int, default=12 Legend size. yvmin : float, default=None Minimum value in the y-axis. yvmax : float, default=None Maximum value in the y-axis. xvmin : float, default=None Minimum value in the x-axis. xvmax : float, default=None Maximum value in the x-axis. xlabel_size : float or int, default=13 X-axis label size. ylabel_size : float ot int, default=13 Y-axis label size. grid_alpha : float, default=0.5 Grid opacity. lw : float or int or list, default=None Line(s) width(s). ls : str or list, default=None Line(s) styles(s). save : str, default=None Parameter indicating whether to save the generated plot. If None (default) the plot will not be saved. save_kw : dict, default=None Optional parameters for saving the plot. This parameter will not have effect if the save parameter was set as None. show : bool, default=True Parameter indicating whether to save the generated plot. Examples -------- >>> from gojo import plotting >>> >>> # train_info, test_info are pandas dataframes returned by gojo.deepl.fitNeuralNetwork >>> plotting.linePlot( >>> train_info, valid_info, >>> x='epoch', y='loss (mean)', err='loss (std)', >>> labels=['Train', 'Validation'], >>> title='Model convergence', >>> ls=['solid', 'dashed'], >>> style='default', legend_pos='center right') >>> """ checkMultiInputTypes( ('x', x, [str]), ('y', y, [str]), ('err', err, [str, type(None)]), ('err_alpha', err_alpha, [float]), ('labels', labels, [list, type(None)]), ('hide_legend', hide_legend, [bool]), ('legend_pos', legend_pos, [str]), ('legend_size', legend_size, [int, float]), ('yvmin', yvmin, [float, type(None)]), ('yvmax', yvmax, [float, type(None)]), ('xvmin', xvmin, [float, type(None)]), ('xvmax', xvmax, [float, type(None)]), ('xlabel_size', xlabel_size, [int, float]), ('ylabel_size', ylabel_size, [int, float]), ('title', title, [str]), ('title_size', title_size, [int, float]), ('title_pad', title_pad, [int, float]), ('figsize', figsize, [tuple]), ('ax', ax, [mpl.axes.Axes, type(None)]), ('colors', colors, [list, str, type(None)]), ('grid_alpha', grid_alpha, [float]), ('lw', lw, [list, float, int, type(None)]), ('ls', ls, [list, str, type(None)]), ('dpi', dpi, [int]), ('style', style, [str]), ('save', save, [str, type(None)]), ('save_kw', save_kw, [dict, type(None)]), ('show', show, [bool])) # check input data types for i, df in enumerate(dfs): checkInputType('df (%d)' % i, df, [pd.DataFrame]) if x not in df.columns: raise TypeError('Missing "x" variable "%s". Available variables are: %r' % (x, list(df.columns))) if y not in df.columns: raise TypeError('Missing "y" variable "%s". Available variables are: %r' % (y, list(df.columns))) if not (err is None or err in df.columns): raise TypeError('Missing "err" variable "%s". Available variables are: %r' % (err, list(df.columns))) if labels is None: labels = ['(%d)' % (i + 1) for i in range(len(dfs))] if lw is None: lw = [None] * len(dfs) elif isinstance(lw, (float, int)): lw = [lw] * len(dfs) if len(dfs) != len(lw): raise TypeError( 'Missmatch shape between input dataframes (%d) and "lw" (%d)' % (len(dfs), len(lw))) if ls is None: ls = ['solid'] * len(dfs) elif isinstance(ls, str): ls = [ls] * len(dfs) if len(dfs) != len(ls): raise TypeError( 'Missmatch shape between input dataframes (%d) and "ls" (%d)' % (len(dfs), len(ls))) if len(dfs) != len(labels): raise TypeError( 'Missmatch shape between input dataframes (%d) and "legend_labels" (%d)' % (len(dfs), len(labels))) if isinstance(colors, str): cmap = plt.get_cmap(colors, len(labels) + 1) colors = [mpl.colors.to_hex(cmap(i)) for i in range(len(labels))] if not (colors is None or len(dfs) == len(colors)): raise TypeError( 'Missmatch shape between input dataframes (%d) and "colors" (%d)' % (len(dfs), len(colors))) # plot information with plt.style.context(style): if ax is None: fig, ax = plt.subplots(figsize=figsize) fig.set_dpi(dpi) for i, (label, df) in enumerate(zip(labels, dfs)): # select color (if specified) color = None if colors is None else colors[i] # plot line ax.plot( df[x].values, df[y].values, label=label, lw=lw[i], ls=ls[i], color=color) # plot error if err is not None: ax.fill_between( df[x].values, df[y].values + df[err].values, df[y].values - df[err].values, color=color, alpha=err_alpha) # set axis limits ax.set_ylim(bottom=yvmin, top=yvmax) ax.set_xlim(left=xvmin, right=xvmax) # set legend if not hide_legend: ax.legend(loc=legend_pos, prop=dict(size=legend_size)) # figure layout ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.grid(alpha=grid_alpha) ax.set_xlabel(x, size=xlabel_size) ax.set_ylabel(y, size=ylabel_size) ax.set_title(title, size=title_size, pad=title_pad) # save figure if specified if save: save_kw = {} if save_kw is None else save_kw plt.savefig(save, **save_kw) if show: plt.show()
[docs]def barPlot( *dfs, x: str, y: str, labels: list = None, colors: list or str = None, ax: mpl.axes.Axes = None, figsize: tuple = (6, 3.5), style: str = 'ggplot', dpi: int = 100, err_capsize: float or int = 0.15, err_lw: float or int = 1.5, grid_alpha: float = 0.15, xlabel_size: int or float = 13, ylabel_size: int or float = 13, title: str = '', title_size: int = 15, title_pad: int = 15, hide_legend: bool = False, legend_pos: str = 'upper right', legend_bbox_to_anchor: tuple = None, legend_size: int = 12, yvmin: float = None, yvmax: float = None, xvmin: float = None, xvmax: float = None, hide_xlabel: bool = False, hide_ylabel: bool = False, xaxis_tick_size: int or float = 12, yaxis_tick_size: int or float = 12, xaxis_rotation: float or int = 0.0, yaxis_rotation: float or int = 0.0, save: str = None, save_kw: dict = None, show: bool = True): """ Bar plot function Parameters ---------- *dfs Input dataframes with the data to be represented. x : str X-axis variable. Must be present in the input dataframes. y : str Y-axis variable. Must be present in the input dataframes. labels : list, default=None Labels used for identifying the input dataframes. colors : list or str, default=None Colors used for identifying the dataframe information. A string colormap can be provided. ax : mpl.axes.Axes, default=None Axes used to represent the figure. figsize : tuple, default=(6, 3.5) Figure size. style : str, default='ggplot' Plot styling. (see 'matplotlib.pyplot.styles') dpi : int, default=100 Figure dpi. err_capsize : float, default=0.15 Error capsize. err_lw : float, default=1.5 Error linewidth. grid_alpha : float, default=0.15 Gird lines opacity. xlabel_size : int, default=13 Size of the x-label. ylabel_size : int, default=13 Size of the y-label. title : str, default='' Plot title. title_size : int, default=15 Title font size. title_pad : int, default=15 Title pad. hide_legend : bool, default=False Parameter indicating whether to hide the legend. legend_pos : str, default='upper right' Legend position. legend_bbox_to_anchor : tuple, default=None Used for modifying the legend position relative to the position defined in `legend_pos`. legend_size : int, default=12 Legend size. yvmin : float, default=None Minimum value in the y-axis. yvmax : float, default=None Maximum value in the y-axis. xvmin : float, default=None Minimum value in the x-axis. xvmax : float, default=None Maximum value in the x-axis. hide_xlabel : bool, default=False Parameter indicating whether to hide the x-axis label. hide_ylabel : bool, default=False Parameter indicating whether to hide the y-axis label. xaxis_tick_size : int, default=12 Controls the x-axis tick size. yaxis_tick_size : int, default=12 Controls the y-axis tick size. xaxis_rotation : float or int, default=0.0 Y-axis tick rotation. yaxis_rotation : float or int, default=0.0 Y-axis tick rotation. save : str, default=None Parameter indicating whether to save the generated plot. If None (default) the plot will not be saved. save_kw : dict, default=None Optional parameters for saving the plot. This parameter will not have effect if the save parameter was set as None. show : bool, default=True Parameter indicating whether to save the generated plot. Examples -------- >>> from gojo import core >>> from gojo import plotting >>> >>> # i.e., compute model performance metrics >>> scores_1 = report1.getScores( >>> core.getDefaultMetrics( >>> binary_classification, bin_threshold=0.5))['test'] >>> >>> scores_2 = report1.getScores( >>> core.getDefaultMetrics( >>> binary_classification, bin_threshold=0.5))['test'] >>> >>> # adapt for barplot representation >>> scores_1 = scores_1.melt() >>> scores_2 = scores_2.melt() >>> >>> >>> plotting.barPlot( >>> scores_1, scores_2, >>> x='variable', y='value', >>> labels=['Model 1', 'Model 2'], >>> title='Cross-validation results' >>> ) """ checkMultiInputTypes( ('x', x, [str]), ('y', y, [str]), ('labels', labels, [list, type(None)]), ('colors', colors, [list, str, type(None)]), ('ax', ax, [mpl.axes.Axes, type(None)]), ('figsize', figsize, [tuple]), ('style', style, [str]), ('dpi', dpi, [int]), ('err_capsize', err_capsize, [float, int]), ('err_lw', err_lw, [float, int]), ('grid_alpha', grid_alpha, [float]), ('xlabel_size', xlabel_size, [int, float]), ('ylabel_size', ylabel_size, [int, float]), ('title', title, [str]), ('title_size', title_size, [int, float]), ('title_pad', title_pad, [int, float]), ('hide_legend', hide_legend, [bool]), ('legend_pos', legend_pos, [str]), ('legend_size', legend_size, [int, float]), ('legend_bbox_to_anchor', legend_bbox_to_anchor, [tuple, type(None)]), ('yvmin', yvmin, [float, type(None)]), ('yvmax', yvmax, [float, type(None)]), ('xvmin', xvmin, [float, type(None)]), ('xvmax', xvmax, [float, type(None)]), ('hide_xlabel', hide_xlabel, [bool]), ('hide_ylabel', hide_ylabel, [bool]), ('xaxis_tick_size', ylabel_size, [int, float]), ('yaxis_tick_size', ylabel_size, [int, float]), ('xaxis_rotation', xaxis_rotation, [int, float]), ('yaxis_rotation', yaxis_rotation, [int, float]), ('save', save, [str, type(None)]), ('save_kw', save_kw, [dict, type(None)]), ('show', show, [bool])) # check input data types for i, df in enumerate(dfs): checkInputType('df (%d)' % i, df, [pd.DataFrame]) if x not in df.columns: raise TypeError('Missing "x" variable "%s". Available variables are: %r' % (x, list(df.columns))) if y not in df.columns: raise TypeError('Missing "y" variable "%s". Available variables are: %r' % (y, list(df.columns))) if labels is None: labels = ['(%d)' % (i + 1) for i in range(len(dfs))] # check legend_labels consistency if len(labels) != len(dfs): raise TypeError( 'Missing labels in "labels". Number of labels provided ' \ '(%d) not match the number of input DataFrames (%d)' % (len(labels), len(dfs))) if not (colors is None or isinstance(colors, str) or len(dfs) == len(colors)): raise TypeError( 'Mismatch shape between input dataframes (%d) and "colors" (%d)' % (len(dfs), len(colors))) # merge dataframes information dfs_ = [] for label, df in zip(labels, dfs): df = df.copy() df['_label'] = label dfs_.append(df) merge_df = pd.concat(dfs_, axis=0) # plot information with plt.style.context(style): if ax is None: fig, ax = plt.subplots(figsize=figsize) fig.set_dpi(dpi) sns.barplot( data=merge_df, x=x, y=y, hue='_label', err_kws={'linewidth': err_lw}, capsize=err_capsize, ax=ax, palette=colors) # set axis limits ax.set_ylim(bottom=yvmin, top=yvmax) ax.set_xlim(left=xvmin, right=xvmax) # set legend if not hide_legend: ax.legend( loc=legend_pos, bbox_to_anchor=legend_bbox_to_anchor, prop=dict(size=legend_size)) # figure layout ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.grid(alpha=grid_alpha) ax.set_xlabel('' if hide_xlabel else x, size=xlabel_size) ax.set_ylabel('' if hide_ylabel else y, size=ylabel_size) ax.set_title(title, size=title_size) plt.xticks(fontsize=xaxis_tick_size, rotation=xaxis_rotation) plt.yticks(fontsize=yaxis_tick_size, rotation=yaxis_rotation) ax.set_title(title, size=title_size, pad=title_pad) # save figure if specified if save: save_kw = {} if save_kw is None else save_kw plt.savefig(save, **save_kw) if show: plt.show()
[docs]def scatterPlot( df: pd.DataFrame, x: str, y: str, hue: str = None, hue_mapping: dict = None, ax: mpl.axes.Axes = None, figsize: tuple = (6, 4.5), style: str = 'ggplot', dpi: int = 100, maker_size: float or int = None, colors: list or str = None, title: str = '', title_size: int or float = 15, title_pad: int = 15, hide_legend: bool = False, legend_pos: str = None, legend_size: int or float = 12, xlabel_size: float or int = 13, ylabel_size: float or int = 13, grid_alpha: float = 0.5, yvmin: float = None, yvmax: float = None, xvmin: float = None, xvmax: float = None, save: str = None, save_kw: dict = None, show: bool = True): """ Scatter plot function. Parameters ---------- df : pd.DataFrame Input dataframes with the data to be represented. x : str X-axis variable. Must be present in the input dataframes. y : str Y-axis variable. Must be present in the input dataframes. hue : str ,default=None Hue variable for plotting groups. hue_mapping : dict ,default=None Hash to map group names from the hue variable in the df to user-defined names. ax : mpl.axes.Axes ,default=None Axes used to represent the figure. figsize : tuple ,default=(6, 4.5) Figure size. style : str ,default='ggplot' Plot styling. (see 'matplotlib.pyplot.styles') dpi : int ,default=100 Figure dpi. maker_size : float or int ,default=None Marker size. colors : list or str ,default=None Colors used for identifying the dataframe information. A string colormap can be provided. title : str ,default='' Plot title. title_size : int or float ,default=15 Title font size. title_pad : int ,default=15 Title pad. hide_legend : bool ,default=False Parameter indicating whether to hide the legend. legend_pos : str ,default=None Legend position. legend_size : int or float ,default=12 Legend size. xlabel_size : float or int ,default=13 X-label size. ylabel_size : float or int ,default=13 Y-label size. grid_alpha : float ,default=0.5 Opcaity of the grid lines. yvmin : float ,default=None Minimum value in the y-axis. yvmax : float ,default=None Maximum value in the y-axis. xvmin : float ,default=None Minimum value in the x-axis. xvmax : float ,default=None Maximum value in the x-axis. save : str ,default=None Parameter indicating whether to save the generated plot. If None (default) the plot will not be saved. save_kw : dict ,default=None Optional parameters for saving the plot. This parameter will not have effect if the save parameter was set as None. show : bool ,default=True Parameter indicating whether to save the generated plot. Examples -------- >>> import pandas as pd >>> from sklearn import datasets >>> from sklearn.preprocessing import StandardScaler >>> from sklearn.decomposition import PCA >>> from gojo import plotting >>> >>> # load test dataset (Wine) >>> wine_dt = datasets.load_wine() >>> data = StandardScaler().fit_transform(wine_dt['data']) >>> PCs = PCA(n_components=2).fit_transform(data) >>> PCs = pd.DataFrame(PCs, columns=['PC1', 'PC2']) >>> PCs['target'] = wine_dt['target'] >>> >>> plotting.scatterPlot( >>> df=PCs, >>> x='PC1', >>> y='PC2', >>> hue='target', >>> hue_mapping={0: 'C0', 1: 'C1', 2: 'C2'}) >>> """ checkMultiInputTypes( ('df', df, [pd.DataFrame]), ('x', x, [str]), ('y', y, [str]), ('hue', hue, [str, type(None)]), ('hue_mapping', hue_mapping, [dict, type(None)]), ('ax', ax, [mpl.axes.Axes, type(None)]), ('figsize', figsize, [tuple]), ('style', style, [str]), ('dpi', dpi, [int]), ('maker_size', maker_size, [int, float, type(None)]), ('colors', colors, [list, str, type(None)]), ('title', title, [str]), ('title_size', title_size, [int, float]), ('title_pad', title_pad, [int, float]), ('hide_legend', hide_legend, [bool]), ('legend_pos', legend_pos, [str, type(None)]), ('legend_size', legend_size, [int, float]), ('xlabel_size', xlabel_size, [int, float]), ('ylabel_size', ylabel_size, [int, float]), ('grid_alpha', grid_alpha, [float]), ('yvmin', yvmin, [float, type(None)]), ('yvmax', yvmax, [float, type(None)]), ('xvmin', xvmin, [float, type(None)]), ('xvmax', xvmax, [float, type(None)]), ('save', save, [str, type(None)]), ('save_kw', save_kw, [dict, type(None)]), ('show', show, [bool])) # check x, y and (optionally) hue variables if x not in df.columns: raise TypeError('Missing "x" variable "%s". Available variables are: %r' % (x, list(df.columns))) if y not in df.columns: raise TypeError('Missing "y" variable "%s". Available variables are: %r' % (y, list(df.columns))) if hue is not None: if hue not in df.columns: raise TypeError('Missing "hue" variable "%s". Available variables are: %r' % (hue, list(df.columns))) # avoid inplace modifications df = df.copy() # rename hue if hue_mapping is provided if not (hue is None or hue_mapping is None): df[hue] = df[hue].apply(lambda v: hue_mapping.get(v, v)) # get the number of levels n_labels = 1 hue_levels = [None] if hue is not None: n_labels = len(df[hue].unique()) hue_levels = df[hue].unique() if isinstance(colors, str): cmap = plt.get_cmap(colors, n_labels + 1) colors = [mpl.colors.to_hex(cmap(i)) for i in range(n_labels)] with plt.style.context(style): if ax is None: fig, ax = plt.subplots(figsize=figsize) fig.set_dpi(dpi) for i in range(n_labels): # select color (if specified) color = None if colors is None else colors[i] # separate the data to represent if hue is not None: df_i = df.loc[df[hue] == hue_levels[i]] else: df_i = df ax.scatter( df_i[x].values, df_i[y].values, label=hue_levels[i], s=maker_size, color=color) # set legend if not hide_legend and hue is not None: ax.legend(loc=legend_pos, prop=dict(size=legend_size)) # set axis limits ax.set_ylim(bottom=yvmin, top=yvmax) ax.set_xlim(left=xvmin, right=xvmax) # figure layout ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.grid(alpha=grid_alpha) ax.set_xlabel(x, size=xlabel_size) ax.set_ylabel(y, size=ylabel_size) ax.set_title(title, size=title_size, pad=title_pad) # save figure if specified if save: save_kw = {} if save_kw is None else save_kw plt.savefig(save, **save_kw) if show: plt.show()