Module `automl_alex.models.model_lightgbm`

Expand source code

from automl_alex._base import ModelBase
import lightgbm as lgb
import numpy as np
import pandas as pd


class LightGBM(ModelBase):
    """
    Args:
        params (dict or None): parameters for model.
            If None default params are fetched.
    """
    __name__ = 'LightGBM'

    def _init_default_model_param(self,):
        """
        Default model_param
        """
        model_param = {'random_seed': self._random_state,
                            'num_iterations': 300,
                            'verbose': -1,
                            'device_type': 'gpu' if self._gpu else 'cpu',
                            }
        
        if self._type_of_estimator == 'classifier':
            model_param['objective'] = 'binary'
        
        if self._type_of_estimator == 'regression':
            model_param['objective'] = 'regression'
        return(model_param)


    def fit(self, X_train=None, y_train=None, cat_features=None) -> None:
        """
        Args:
            X (pd.DataFrame, shape (n_samples, n_features)): the input data
            y (pd.DataFrame, shape (n_samples, ) or (n_samples, n_outputs)): the target data
        Return:
            self
        """
        y_train = self.y_format(y_train)
        
        dtrain = lgb.Dataset(X_train, y_train,)
        
        model_param = self.model_param.copy()
        num_iterations = model_param.pop('num_iterations')

        if cat_features is None:
            cat_features = 'auto'

        self.model = lgb.train(
            model_param, 
            dtrain, 
            num_boost_round=num_iterations,
            categorical_feature=cat_features,
            )
        dtrain=None
        return(self)


    def predict(self, X=None):
        """
        Args:
            X (np.array, shape (n_samples, n_features)): the input data
        Return:
            np.array, shape (n_samples, n_classes)
        """
        if self.model is None:
            raise Exception("No fit models")

        if self._type_of_estimator == 'classifier':
            predicts = np.round(self.model.predict(X),0)
        elif self._type_of_estimator == 'regression':
            predicts = self.model.predict(X)
        return predicts


    def is_possible_predict_proba(self):
        """
        Return:
            bool, whether model can predict proba
        """
        return True


    def predict_proba(self, X=None):
        """
        Args:
            X (np.array, shape (n_samples, n_features)): the input data
        Return:
            np.array, shape (n_samples, n_classes)
        """
        if self.model is None:
            raise Exception("No fit models")

        if not self.is_possible_predict_proba(): 
            raise Exception("Model cannot predict probability distribution")
        return self.model.predict(X)


    def _is_possible_feature_importance(self):
        """
        Return:
            bool, whether model can predict proba
        """
        return True


    def get_feature_importance(self, X, importance_type='gain',):
        """
        Return:
            list feature_importance
        """
        if not self._is_possible_feature_importance(): 
            raise Exception("Model cannot get feature_importance")

        fe_lst = self.model.feature_importance(importance_type=importance_type)
        return (pd.DataFrame(fe_lst, index=X.columns, columns=['value']))


    def get_model_opt_params(self, trial, opt_lvl,):
        """
        Return:
            dict of DistributionWrappers
        """
        model_param = self._init_default_model_param()
        ################################# LVL 1 ########################################
        if opt_lvl >= 1:
            model_param['num_leaves'] = trial.suggest_int('lgbm_num_leaves', 2, 100, log=True)
            model_param['learning_rate'] = trial.suggest_float('lgbm_learning_rate', 1e-2, 0.3, log=True)

        ################################# LVL 2 ########################################
        if opt_lvl == 2:
            model_param['num_iterations'] = trial.suggest_int('lgbm_num_iterations', 300, 500, step=100)

        if opt_lvl >= 2:
            model_param['min_child_samples'] = trial.suggest_int('lgbm_min_child_samples', 2, 100, log=True)
            model_param['bagging_fraction'] = trial.suggest_float('lgbm_bagging_fraction', 0.4, 1., step=0.1)
            if model_param['bagging_fraction'] < 1.:
                model_param['feature_fraction'] = trial.suggest_float('lgbm_feature_fraction', 0.4, 1., step=0.1)
                model_param['bagging_freq'] = trial.suggest_int('lgbm_bagging_freq', 2, 11,)
        
        ################################# LVL 3 ########################################
        if opt_lvl == 3:
            model_param['num_iterations'] = trial.suggest_int('lgbm_num_iterations', 300, 1000, step=100)
        
        ################################# LVL 4 ########################################
        if opt_lvl == 4:
            model_param['learning_rate'] = trial.suggest_float('lgbm_learning_rate', 1e-3, .1, log=True)

        if opt_lvl >= 4:
            model_param['boosting'] = trial.suggest_categorical('lgbm_boosting', ['gbdt', 'dart',])
            if model_param['boosting'] == 'dart':
                model_param['early_stopping_rounds'] = 0
                model_param['uniform_drop'] = trial.suggest_categorical('lgbm_uniform_drop', [True, False])
                model_param['xgboost_dart_mode'] = trial.suggest_categorical('lgbm_xgboost_dart_mode', [True, False])
                model_param['drop_rate'] = trial.suggest_float('lgbm_drop_rate', 1e-8, 1.0, log=True)
                model_param['max_drop'] = trial.suggest_int('lgbm_max_drop', 0, 100)
                model_param['skip_drop'] = trial.suggest_float('lgbm_skip_drop', 1e-3, 1.0, log=True)

            model_param['num_iterations'] = trial.suggest_int('lgbm_num_iterations', 200, 1500, step=100,)

            if self._type_of_estimator == 'classifier':
                model_param['objective'] = trial.suggest_categorical('lgbm_objective', 
                    [
                    'binary', 
                    'cross_entropy',
                    ])

            elif self._type_of_estimator == 'regression':
                model_param['objective'] = trial.suggest_categorical('lgbm_objective', 
                    [
                    'regression',
                    'regression_l1',
                    'mape',
                    'huber',
                    'quantile',
                    ])

        ################################# LVL 5 ########################################
        if opt_lvl >= 5:
            model_param['max_cat_threshold'] = trial.suggest_int('lgbm_max_cat_threshold', 1, 100)
            model_param['min_child_weight'] = trial.suggest_float('lgbm_min_child_weight', 1e-6, 1.0, log=True)
            model_param['learning_rate'] = trial.suggest_float('lgbm_learning_rate', 1e-5, .1, log=True)
            model_param['reg_lambda'] = trial.suggest_float('lgbm_reg_lambda', 1e-8, 1.0, log=True)
            model_param['reg_alpha'] = trial.suggest_float('lgbm_reg_alpha', 1e-8, 1.0, log=True)
            model_param['max_bin'] = trial.suggest_int('lgbm_max_bin', 1, 5,)*50
            #self.model_param['extra_trees'] = trial.suggest_categorical('lgbm_extra_trees', [True, False])
            model_param['enable_bundle'] = trial.suggest_categorical('lgbm_enable_bundle', [True, False])

        ################################# Other ########################################

        return(model_param)

    
    def _is_model_start_opt_params(self,):
        return(True)


    def get_model_start_opt_params(self,):
        dafault_params = {
            "lgbm_num_leaves": 31,
            "lgbm_min_child_samples": 20,
            "lgbm_learning_rate": 0.1,
            "lgbm_num_iterations": 300,
            }
        return(dafault_params)


class LightGBMClassifier(LightGBM):
    _type_of_estimator='classifier'


class LightGBMRegressor(LightGBM):
    _type_of_estimator='regression'

Classes

class LightGBM (model_param=None, type_of_estimator=None, gpu=False, verbose=None, random_state=42)

Args

params : dict or None: parameters for model. If None default params are fetched.

Expand source code

class LightGBM(ModelBase):
    """
    Args:
        params (dict or None): parameters for model.
            If None default params are fetched.
    """
    __name__ = 'LightGBM'

    def _init_default_model_param(self,):
        """
        Default model_param
        """
        model_param = {'random_seed': self._random_state,
                            'num_iterations': 300,
                            'verbose': -1,
                            'device_type': 'gpu' if self._gpu else 'cpu',
                            }
        
        if self._type_of_estimator == 'classifier':
            model_param['objective'] = 'binary'
        
        if self._type_of_estimator == 'regression':
            model_param['objective'] = 'regression'
        return(model_param)


    def fit(self, X_train=None, y_train=None, cat_features=None) -> None:
        """
        Args:
            X (pd.DataFrame, shape (n_samples, n_features)): the input data
            y (pd.DataFrame, shape (n_samples, ) or (n_samples, n_outputs)): the target data
        Return:
            self
        """
        y_train = self.y_format(y_train)
        
        dtrain = lgb.Dataset(X_train, y_train,)
        
        model_param = self.model_param.copy()
        num_iterations = model_param.pop('num_iterations')

        if cat_features is None:
            cat_features = 'auto'

        self.model = lgb.train(
            model_param, 
            dtrain, 
            num_boost_round=num_iterations,
            categorical_feature=cat_features,
            )
        dtrain=None
        return(self)


    def predict(self, X=None):
        """
        Args:
            X (np.array, shape (n_samples, n_features)): the input data
        Return:
            np.array, shape (n_samples, n_classes)
        """
        if self.model is None:
            raise Exception("No fit models")

        if self._type_of_estimator == 'classifier':
            predicts = np.round(self.model.predict(X),0)
        elif self._type_of_estimator == 'regression':
            predicts = self.model.predict(X)
        return predicts


    def is_possible_predict_proba(self):
        """
        Return:
            bool, whether model can predict proba
        """
        return True


    def predict_proba(self, X=None):
        """
        Args:
            X (np.array, shape (n_samples, n_features)): the input data
        Return:
            np.array, shape (n_samples, n_classes)
        """
        if self.model is None:
            raise Exception("No fit models")

        if not self.is_possible_predict_proba(): 
            raise Exception("Model cannot predict probability distribution")
        return self.model.predict(X)


    def _is_possible_feature_importance(self):
        """
        Return:
            bool, whether model can predict proba
        """
        return True


    def get_feature_importance(self, X, importance_type='gain',):
        """
        Return:
            list feature_importance
        """
        if not self._is_possible_feature_importance(): 
            raise Exception("Model cannot get feature_importance")

        fe_lst = self.model.feature_importance(importance_type=importance_type)
        return (pd.DataFrame(fe_lst, index=X.columns, columns=['value']))


    def get_model_opt_params(self, trial, opt_lvl,):
        """
        Return:
            dict of DistributionWrappers
        """
        model_param = self._init_default_model_param()
        ################################# LVL 1 ########################################
        if opt_lvl >= 1:
            model_param['num_leaves'] = trial.suggest_int('lgbm_num_leaves', 2, 100, log=True)
            model_param['learning_rate'] = trial.suggest_float('lgbm_learning_rate', 1e-2, 0.3, log=True)

        ################################# LVL 2 ########################################
        if opt_lvl == 2:
            model_param['num_iterations'] = trial.suggest_int('lgbm_num_iterations', 300, 500, step=100)

        if opt_lvl >= 2:
            model_param['min_child_samples'] = trial.suggest_int('lgbm_min_child_samples', 2, 100, log=True)
            model_param['bagging_fraction'] = trial.suggest_float('lgbm_bagging_fraction', 0.4, 1., step=0.1)
            if model_param['bagging_fraction'] < 1.:
                model_param['feature_fraction'] = trial.suggest_float('lgbm_feature_fraction', 0.4, 1., step=0.1)
                model_param['bagging_freq'] = trial.suggest_int('lgbm_bagging_freq', 2, 11,)
        
        ################################# LVL 3 ########################################
        if opt_lvl == 3:
            model_param['num_iterations'] = trial.suggest_int('lgbm_num_iterations', 300, 1000, step=100)
        
        ################################# LVL 4 ########################################
        if opt_lvl == 4:
            model_param['learning_rate'] = trial.suggest_float('lgbm_learning_rate', 1e-3, .1, log=True)

        if opt_lvl >= 4:
            model_param['boosting'] = trial.suggest_categorical('lgbm_boosting', ['gbdt', 'dart',])
            if model_param['boosting'] == 'dart':
                model_param['early_stopping_rounds'] = 0
                model_param['uniform_drop'] = trial.suggest_categorical('lgbm_uniform_drop', [True, False])
                model_param['xgboost_dart_mode'] = trial.suggest_categorical('lgbm_xgboost_dart_mode', [True, False])
                model_param['drop_rate'] = trial.suggest_float('lgbm_drop_rate', 1e-8, 1.0, log=True)
                model_param['max_drop'] = trial.suggest_int('lgbm_max_drop', 0, 100)
                model_param['skip_drop'] = trial.suggest_float('lgbm_skip_drop', 1e-3, 1.0, log=True)

            model_param['num_iterations'] = trial.suggest_int('lgbm_num_iterations', 200, 1500, step=100,)

            if self._type_of_estimator == 'classifier':
                model_param['objective'] = trial.suggest_categorical('lgbm_objective', 
                    [
                    'binary', 
                    'cross_entropy',
                    ])

            elif self._type_of_estimator == 'regression':
                model_param['objective'] = trial.suggest_categorical('lgbm_objective', 
                    [
                    'regression',
                    'regression_l1',
                    'mape',
                    'huber',
                    'quantile',
                    ])

        ################################# LVL 5 ########################################
        if opt_lvl >= 5:
            model_param['max_cat_threshold'] = trial.suggest_int('lgbm_max_cat_threshold', 1, 100)
            model_param['min_child_weight'] = trial.suggest_float('lgbm_min_child_weight', 1e-6, 1.0, log=True)
            model_param['learning_rate'] = trial.suggest_float('lgbm_learning_rate', 1e-5, .1, log=True)
            model_param['reg_lambda'] = trial.suggest_float('lgbm_reg_lambda', 1e-8, 1.0, log=True)
            model_param['reg_alpha'] = trial.suggest_float('lgbm_reg_alpha', 1e-8, 1.0, log=True)
            model_param['max_bin'] = trial.suggest_int('lgbm_max_bin', 1, 5,)*50
            #self.model_param['extra_trees'] = trial.suggest_categorical('lgbm_extra_trees', [True, False])
            model_param['enable_bundle'] = trial.suggest_categorical('lgbm_enable_bundle', [True, False])

        ################################# Other ########################################

        return(model_param)

    
    def _is_model_start_opt_params(self,):
        return(True)


    def get_model_start_opt_params(self,):
        dafault_params = {
            "lgbm_num_leaves": 31,
            "lgbm_min_child_samples": 20,
            "lgbm_learning_rate": 0.1,
            "lgbm_num_iterations": 300,
            }
        return(dafault_params)

Ancestors

automl_alex._base.ModelBase

Methods

def fit(self, X_train=None, y_train=None, cat_features=None) ‑> NoneType

Args

X (pd.DataFrame, shape (n_samples, n_features)): the input data y (pd.DataFrame, shape (n_samples, ) or (n_samples, n_outputs)): the target data

Return

self

Expand source code

def fit(self, X_train=None, y_train=None, cat_features=None) -> None:
    """
    Args:
        X (pd.DataFrame, shape (n_samples, n_features)): the input data
        y (pd.DataFrame, shape (n_samples, ) or (n_samples, n_outputs)): the target data
    Return:
        self
    """
    y_train = self.y_format(y_train)
    
    dtrain = lgb.Dataset(X_train, y_train,)
    
    model_param = self.model_param.copy()
    num_iterations = model_param.pop('num_iterations')

    if cat_features is None:
        cat_features = 'auto'

    self.model = lgb.train(
        model_param, 
        dtrain, 
        num_boost_round=num_iterations,
        categorical_feature=cat_features,
        )
    dtrain=None
    return(self)

def get_feature_importance(self, X, importance_type='gain')

Return

list feature_importance

Expand source code

def get_feature_importance(self, X, importance_type='gain',):
    """
    Return:
        list feature_importance
    """
    if not self._is_possible_feature_importance(): 
        raise Exception("Model cannot get feature_importance")

    fe_lst = self.model.feature_importance(importance_type=importance_type)
    return (pd.DataFrame(fe_lst, index=X.columns, columns=['value']))

def get_model_opt_params(self, trial, opt_lvl)

Return

dict of DistributionWrappers

Expand source code

def get_model_opt_params(self, trial, opt_lvl,):
    """
    Return:
        dict of DistributionWrappers
    """
    model_param = self._init_default_model_param()
    ################################# LVL 1 ########################################
    if opt_lvl >= 1:
        model_param['num_leaves'] = trial.suggest_int('lgbm_num_leaves', 2, 100, log=True)
        model_param['learning_rate'] = trial.suggest_float('lgbm_learning_rate', 1e-2, 0.3, log=True)

    ################################# LVL 2 ########################################
    if opt_lvl == 2:
        model_param['num_iterations'] = trial.suggest_int('lgbm_num_iterations', 300, 500, step=100)

    if opt_lvl >= 2:
        model_param['min_child_samples'] = trial.suggest_int('lgbm_min_child_samples', 2, 100, log=True)
        model_param['bagging_fraction'] = trial.suggest_float('lgbm_bagging_fraction', 0.4, 1., step=0.1)
        if model_param['bagging_fraction'] < 1.:
            model_param['feature_fraction'] = trial.suggest_float('lgbm_feature_fraction', 0.4, 1., step=0.1)
            model_param['bagging_freq'] = trial.suggest_int('lgbm_bagging_freq', 2, 11,)
    
    ################################# LVL 3 ########################################
    if opt_lvl == 3:
        model_param['num_iterations'] = trial.suggest_int('lgbm_num_iterations', 300, 1000, step=100)
    
    ################################# LVL 4 ########################################
    if opt_lvl == 4:
        model_param['learning_rate'] = trial.suggest_float('lgbm_learning_rate', 1e-3, .1, log=True)

    if opt_lvl >= 4:
        model_param['boosting'] = trial.suggest_categorical('lgbm_boosting', ['gbdt', 'dart',])
        if model_param['boosting'] == 'dart':
            model_param['early_stopping_rounds'] = 0
            model_param['uniform_drop'] = trial.suggest_categorical('lgbm_uniform_drop', [True, False])
            model_param['xgboost_dart_mode'] = trial.suggest_categorical('lgbm_xgboost_dart_mode', [True, False])
            model_param['drop_rate'] = trial.suggest_float('lgbm_drop_rate', 1e-8, 1.0, log=True)
            model_param['max_drop'] = trial.suggest_int('lgbm_max_drop', 0, 100)
            model_param['skip_drop'] = trial.suggest_float('lgbm_skip_drop', 1e-3, 1.0, log=True)

        model_param['num_iterations'] = trial.suggest_int('lgbm_num_iterations', 200, 1500, step=100,)

        if self._type_of_estimator == 'classifier':
            model_param['objective'] = trial.suggest_categorical('lgbm_objective', 
                [
                'binary', 
                'cross_entropy',
                ])

        elif self._type_of_estimator == 'regression':
            model_param['objective'] = trial.suggest_categorical('lgbm_objective', 
                [
                'regression',
                'regression_l1',
                'mape',
                'huber',
                'quantile',
                ])

    ################################# LVL 5 ########################################
    if opt_lvl >= 5:
        model_param['max_cat_threshold'] = trial.suggest_int('lgbm_max_cat_threshold', 1, 100)
        model_param['min_child_weight'] = trial.suggest_float('lgbm_min_child_weight', 1e-6, 1.0, log=True)
        model_param['learning_rate'] = trial.suggest_float('lgbm_learning_rate', 1e-5, .1, log=True)
        model_param['reg_lambda'] = trial.suggest_float('lgbm_reg_lambda', 1e-8, 1.0, log=True)
        model_param['reg_alpha'] = trial.suggest_float('lgbm_reg_alpha', 1e-8, 1.0, log=True)
        model_param['max_bin'] = trial.suggest_int('lgbm_max_bin', 1, 5,)*50
        #self.model_param['extra_trees'] = trial.suggest_categorical('lgbm_extra_trees', [True, False])
        model_param['enable_bundle'] = trial.suggest_categorical('lgbm_enable_bundle', [True, False])

    ################################# Other ########################################

    return(model_param)

def get_model_start_opt_params(self)

Expand source code

def get_model_start_opt_params(self,):
    dafault_params = {
        "lgbm_num_leaves": 31,
        "lgbm_min_child_samples": 20,
        "lgbm_learning_rate": 0.1,
        "lgbm_num_iterations": 300,
        }
    return(dafault_params)

def is_possible_predict_proba(self)

Return

bool, whether model can predict proba

Expand source code

def is_possible_predict_proba(self):
    """
    Return:
        bool, whether model can predict proba
    """
    return True

def predict(self, X=None)

Args

X (np.array, shape (n_samples, n_features)): the input data

Return

np.array, shape (n_samples, n_classes)

Expand source code

def predict(self, X=None):
    """
    Args:
        X (np.array, shape (n_samples, n_features)): the input data
    Return:
        np.array, shape (n_samples, n_classes)
    """
    if self.model is None:
        raise Exception("No fit models")

    if self._type_of_estimator == 'classifier':
        predicts = np.round(self.model.predict(X),0)
    elif self._type_of_estimator == 'regression':
        predicts = self.model.predict(X)
    return predicts

def predict_proba(self, X=None)

Args

X (np.array, shape (n_samples, n_features)): the input data

Return

np.array, shape (n_samples, n_classes)

Expand source code

def predict_proba(self, X=None):
    """
    Args:
        X (np.array, shape (n_samples, n_features)): the input data
    Return:
        np.array, shape (n_samples, n_classes)
    """
    if self.model is None:
        raise Exception("No fit models")

    if not self.is_possible_predict_proba(): 
        raise Exception("Model cannot predict probability distribution")
    return self.model.predict(X)

class LightGBMClassifier (model_param=None, type_of_estimator=None, gpu=False, verbose=None, random_state=42)

Args

params : dict or None: parameters for model. If None default params are fetched.

Expand source code

class LightGBMClassifier(LightGBM):
    _type_of_estimator='classifier'

Ancestors

LightGBM
automl_alex._base.ModelBase

Inherited members

LightGBM:
- fit
- get_feature_importance
- get_model_opt_params
- is_possible_predict_proba
- predict
- predict_proba

class LightGBMRegressor (model_param=None, type_of_estimator=None, gpu=False, verbose=None, random_state=42)

Args

params : dict or None: parameters for model. If None default params are fetched.

Expand source code

class LightGBMRegressor(LightGBM):
    _type_of_estimator='regression'

Ancestors

LightGBM
automl_alex._base.ModelBase

Inherited members

LightGBM:
- fit
- get_feature_importance
- get_model_opt_params
- is_possible_predict_proba
- predict
- predict_proba