Source code for adaptivesplit.sklearn_interface.utils

from ..base.utils import *
from sklearn.metrics import get_scorer as sklearn_get_scorer
from sklearn.model_selection import check_cv
from sklearn.metrics import check_scoring
import numpy as np


[docs]def get_sklearn_scorer(scoring): """Provides a scikit-learn scoring function given an input string. Args: scoring (str, callable, list, tuple or dict): Scikit-learn-like score to evaluate the performance of the cross-validated model on the test set. If scoring represents a single score, one can use: - a single string (see The scoring parameter: defining model evaluation rules); - a callable (see Defining your scoring strategy from metric functions) that returns a single value. If scoring represents multiple scores, one can use: - a list or tuple of unique strings; - a callable returning a dictionary where the keys are the metric names and the values are the metric scores; - a dictionary with metric names as keys and callables a values. If None, the estimator’s score method is used. Defaults to "scoring" as specified in the configuration file. Returns: score_func (callable): Scikit-Learn scoring function. """ scoring = sklearn_get_scorer(scoring) def score_fun(x, y, **kwargs): return scoring._sign * scoring._score_func(x, y) # _score_func had a **kwargs argument that amounted to {random_seed:None} # it crashes since the function does not accept a random_seed argument # see also power.py; score_fun.__name__ = 'sklearn_' + scoring._score_func.__name__ return score_fun
[docs]def statfun_as_callable(stat_fun): """Returns a statistical function. Args: stat_fun (str, callable): If this is a str, use sklearn.metrics.get_sklearn_scorer to make stat_fun a callable. Returns: stat_fun (callable): Statistical function. """ if isinstance(stat_fun, str): return get_sklearn_scorer(stat_fun) else: return stat_fun
[docs]def calculate_ci(X, ci='95%'): """Calculate confidence intervals. Args: X (list, np.ndarray, pd.Series): 1D array of shape (n_samples,). ci (str, optional): Confidence level to Return. Defaults to '95%'. 90%, 95%, 98%, 99% are possible inputs. Returns: ci_lower: Confidence intervals lower bound. ci_upper: Confidence intervals upper bound. """ if ci == '90%': Z = 1.64 elif ci == '95%': Z = 1.96 elif ci == '98%': Z = 2.33 elif ci == '99%': Z = 2.58 moe = Z*(np.std(X)/np.sqrt(len(X))) ci_lower = np.mean(X) - moe ci_upper = np.mean(X) + moe return ci_lower, ci_upper