Source code for ForeTiS.model._base_model

import abc
import optuna
import joblib
import pandas as pd
import numpy as np


[docs] class BaseModel(abc.ABC): """ BaseModel parent class for all models that can be used within the framework. Every model must be based on :obj:`~ForeTiS.model._base_model.BaseModel` directly or BaseModel's child classes, e.g. :obj:`~ForeTiS.model._sklearn_model.SklearnModel` or :obj:`~ForeTiS.model._torch_model.TorchModel` ** Attributes ** * Instance attributes * - optuna_trial (*optuna.trial.Trial*): trial of optuna for optimization - datasets (*list<pd.DataFrame>*): all datasets that are available - n_outputs (*int*): number of outputs of the prediction model - all_hyperparams (*dict*): dictionary with all hyperparameters with related info that can be tuned (structure see :obj:`~ForeTiS.model._base_model.BaseModel.define_hyperparams_to_tune`) - dataset (*pd.DataFrame*): the dataset for this optimization trial - model: model object - target_column: the target column for the prediction - pca_transform: whether conducting pca transformation should be a hyperparameter to optimize or not - featureset: the recent featureset :param optuna_trial: Trial of optuna for optimization :param datasets: all datasets that are available :param featureset_name: the name of the recent feature set :param target_column: the target column for the prediction :param pca_transform: whether conducting pca transformation should be a hyperparameter to optimize or not :param optimize_featureset: whether the feature set should be optimized or not """ # Constructor super class # def __init__(self, optuna_trial: optuna.trial.Trial, datasets: list, featureset_name: str, pca_transform: bool, target_column: str, optimize_featureset: bool): self.optuna_trial = optuna_trial self.datasets = datasets self.target_column = target_column self.n_outputs = 1 if not hasattr(self, 'all_hyperparams'): self.all_hyperparams = self.define_hyperparams_to_tune() else: # update in case common hyperparams are already defined self.all_hyperparams.update(self.define_hyperparams_to_tune()) if pca_transform: self.all_hyperparams.update(self.pca_transform()) self.pca_transform = self.suggest_hyperparam_to_optuna('pca') del self.all_hyperparams['pca'] else: self.pca_transform = False if optimize_featureset: self.all_hyperparams.update(self.featureset_hyperparam()) featureset_name = self.suggest_hyperparam_to_optuna('featureset') del self.all_hyperparams['featureset'] for featureset in datasets.featuresets: if featureset.name == featureset_name: self.featureset = featureset break else: for featureset in datasets.featuresets: if featureset.name == featureset_name: self.featureset = featureset break self.model = self.define_model() # Methods required by each child class #
[docs] @abc.abstractmethod def define_model(self): """ Method that defines the model that needs to be optimized. Hyperparams to tune have to be specified in all_hyperparams and suggested via suggest_hyperparam_to_optuna(). The hyperparameters have to be included directly in the model definiton to be optimized. e.g. if you want to optimize the number of layers, do something like .. code-block:: python n_layers = self.suggest_hyperparam_to_optuna('n_layers') # same name in define_hyperparams_to_tune() for layer in n_layers: do something Then the number of layers will be optimized by optuna. """
[docs] @abc.abstractmethod def define_hyperparams_to_tune(self) -> dict: """ Method that defines the hyperparameters that should be tuned during optimization and their ranges. Required format is a dictionary with: .. code-block:: python { 'name_hyperparam_1': { # MANDATORY ITEMS 'datatype': 'float' | 'int' | 'categorical', FOR DATATYPE 'categorical': 'list_of_values': [] # List of all possible values FOR DATATYPE ['float', 'int']: 'lower_bound': value_lower_bound, 'upper_bound': value_upper_bound, # OPTIONAL ITEMS (only for ['float', 'int']): 'log': True | False # sample value from log domain or not 'step': step_size # step of discretization. # Caution: cannot be combined with log=True # - in case of 'float' in general and # - for step!=1 in case of 'int' }, 'name_hyperparam_2': { ... }, ... 'name_hyperparam_k': { ... } } If you want to use a similar hyperparameter multiple times (e.g. Dropout after several layers), you only need to specify the hyperparameter once. Individual parameters for every suggestion will be created. """
[docs] @abc.abstractmethod def retrain(self, retrain: pd.DataFrame): """ Method that runs the retraining of the model :param retrain: data for retraining """
[docs] @abc.abstractmethod def update(self, update: pd.DataFrame, period: int): """ Method that runs the updating of the model :param update: data for updating """
[docs] @abc.abstractmethod def predict(self, X_in: pd.DataFrame) -> np.array: """ Method that predicts target values based on the input X_in :param X_in: feature matrix as input :return: numpy array with the predicted values """
[docs] @abc.abstractmethod def train_val_loop(self, train: pd.DataFrame, val: pd.DataFrame) -> np.array: """ Method that runs the whole training and validation loop :param train: data for the training :param val: data for validation :return: predictions on validation set """
### General methods ###
[docs] def suggest_hyperparam_to_optuna(self, hyperparam_name: str): """ Suggest a hyperparameter of hyperparam_dict to the optuna trial to optimize it. If you want to add a parameter to your model / in your pipeline to be optimized, you need to call this method :param hyperparam_name: name of the hyperparameter to be tuned (see :obj:`~ForeTiS.model._base_model.BaseModel.define_hyperparams_to_tune`) :return: suggested value """ # Get specification of the hyperparameter if hyperparam_name in self.all_hyperparams: spec = self.all_hyperparams[hyperparam_name] else: raise Exception(hyperparam_name + ' not found in all_hyperparams dictionary.') # Check if the hyperparameter already exists in the trial and needs a suffix # (e.g. same dropout specification for multiple layers that should be optimized individually) if hyperparam_name in self.optuna_trial.params: counter = 1 while True: current_name = hyperparam_name + '_' + str(counter) if current_name not in self.optuna_trial.params: optuna_param_name = current_name break counter += 1 else: optuna_param_name = hyperparam_name # Read dict with specification for the hyperparamater and suggest it to the trial if spec['datatype'] == 'categorical': if 'list_of_values' not in spec: raise Exception( '"list of values" for ' + hyperparam_name + ' not in hyperparams_dict. ' 'Check define_hyperparams_to_tune() of the model.' ) suggested_value = \ self.optuna_trial.suggest_categorical(name=optuna_param_name, choices=spec['list_of_values']) elif spec['datatype'] in ['float', 'int']: if 'step' in spec: step = spec['step'] else: step = None if spec['datatype'] == 'float' else 1 log = spec['log'] if 'log' in spec else False if 'lower_bound' not in spec or 'upper_bound' not in spec: raise Exception( '"lower_bound" or "upper_bound" for ' + hyperparam_name + ' not in all_hyperparams. ' 'Check define_hyperparams_to_tune() of the model.' ) if spec['datatype'] == 'int': suggested_value = self.optuna_trial.suggest_int( name=optuna_param_name, low=spec['lower_bound'], high=spec['upper_bound'], step=step, log=log ) else: suggested_value = self.optuna_trial.suggest_float( name=optuna_param_name, low=spec['lower_bound'], high=spec['upper_bound'], step=step, log=log ) else: raise Exception( spec['datatype'] + ' is not a valid parameter. Check define_hyperparams_to_tune() of the model.' ) return suggested_value
[docs] def suggest_all_hyperparams_to_optuna(self) -> dict: """ Some models accept a dictionary with the model parameters. This method suggests all hyperparameters in all_hyperparams and gives back a dictionary containing them. :return: dictionary with suggested hyperparameters """ for param_name in self.all_hyperparams.keys(): _ = self.suggest_hyperparam_to_optuna(param_name) return self.optuna_trial.params
[docs] def featureset_hyperparam(self): """ Method that defines the feature set hyperparameter that should be tuned during optimization and its ranges. """ featuresets_names = [] for featureset in self.datasets.featuresets: featuresets_names.append(featureset.name) return { 'featureset': { 'datatype': 'categorical', 'list_of_values': featuresets_names } }
[docs] def pca_transform(self): """ Method that defines the pca transform hyperparameter that should be tuned during optimization and its ranges. """ return { 'pca': { 'datatype': 'categorical', 'list_of_values': [True, False] } }
[docs] def save_model(self, path: str, filename: str): """ Persist the whole model object on a hard drive (can be loaded with :obj:`~ForeTiS.model._model_functions.load_model`) :param path: path where the model will be saved :param filename: filename of the model """ joblib.dump(self, path.joinpath(filename), compress=3)