Module automl_alex.models.model_catboost
Expand source code
from automl_alex._base import ModelBase
import catboost
from catboost import Pool
import numpy as np
import pandas as pd
class CatBoost(ModelBase):
"""
Args:
params (dict or None): parameters for model.
If None default params are fetched.
"""
__name__ = 'CatBoost'
def _init_default_model_param(self, model_param=None):
"""
Default model_param
"""
if model_param is None:
model_param = {'verbose': 0,
'task_type': 'GPU' if self._gpu else 'CPU',
'random_seed': self._random_state,
}
return(model_param)
def _init_model(self, model_param=None):
"""
sets new model,
Args:
params: : parameters for model.
"""
if self._type_of_estimator == 'classifier':
model = catboost.CatBoostClassifier(**model_param)
elif self._type_of_estimator == 'regression':
model = catboost.CatBoostRegressor(**model_param)
return(model)
def fit(self, X_train=None, y_train=None, cat_features=None):
"""
Args:
X (pd.DataFrame, shape (n_samples, n_features)): the input data
y (pd.DataFrame, shape (n_samples, ) or (n_samples, n_outputs)): the target data
cat_features (list)
Return:
self
"""
y_train = self.y_format(y_train)
if cat_features is not None:
cat_dims = [X_train.columns.get_loc(i) for i in cat_features[:]]
train_pool = Pool(X_train, label=y_train, cat_features=cat_dims)
else:
train_pool = Pool(X_train, label=y_train,)
params = self.model_param.copy()
self.model = self._init_model(model_param=params)
self.model.fit(train_pool, verbose=False, plot=False,)
train_pool=None
return self
def predict(self, X=None):
"""
Args:
X (np.array, shape (n_samples, n_features)): the input data
Return:
np.array, shape (n_samples, n_classes)
"""
if self.model is None:
raise Exception("No fit models")
if self._type_of_estimator == 'classifier':
predicts = np.round(self.model.predict(X),0)
elif self._type_of_estimator == 'regression':
predicts = self.model.predict(X)
return predicts
def is_possible_predict_proba(self):
"""
Return:
bool, whether model can predict proba
"""
return True
def predict_proba(self, X=None):
"""
Args:
X (np.array, shape (n_samples, n_features)): the input data
Return:
np.array, shape (n_samples, n_classes)
"""
if self.model is None:
raise Exception("No fit models")
if not self.is_possible_predict_proba():
raise Exception("Model cannot predict probability distribution")
return self.model.predict_proba(X)[:, 1]
def _is_possible_feature_importance(self):
"""
Return:
bool, whether model can predict proba
"""
return True
def get_feature_importance(self, X,):
"""
Return:
list feature_importance
"""
if not self._is_possible_feature_importance():
raise Exception("Model cannot get feature_importance")
fe_lst = self.model.get_feature_importance()
return (pd.DataFrame(fe_lst, index=X.columns, columns=['value']))
#@staticmethod
def get_model_opt_params(self, trial, opt_lvl,):
"""
Return:
dict of DistributionWrappers
"""
model_param = self._init_default_model_param()
################################# LVL 1 ########################################
if opt_lvl >= 1:
model_param['min_child_samples'] = trial.suggest_int('cb_min_child_samples', 1, 100)
model_param['depth'] = trial.suggest_int('cb_depth', 4, 10)
################################# LVL 2 ########################################
if opt_lvl >= 2:
model_param['bagging_temperature'] = trial.suggest_int('cb_bagging_temperature', 0, 10,)
model_param['subsample'] = trial.suggest_discrete_uniform('cb_subsample', 0.1, 1.0, 0.1)
################################# LVL 3 ########################################
if opt_lvl >= 3:
if self._type_of_estimator == 'classifier':
model_param['objective'] = trial.suggest_categorical('cb_objective',
[
'Logloss',
'CrossEntropy',
])
elif self._type_of_estimator == 'regression':
model_param['objective'] = trial.suggest_categorical('cb_objective',
[
'MAE',
'MAPE',
'Quantile',
'RMSE',
])
################################# LVL 4 ########################################
if opt_lvl >= 4:
model_param['depth'] = trial.suggest_int('cb_depth', 2, 16)
model_param['l2_leaf_reg'] = trial.suggest_loguniform('cb_l2_leaf_reg', 1e-8, .1)
model_param['learning_rate'] = trial.suggest_int('cb_learning_rate', 1, 100)/1000
model_param['iterations'] = trial.suggest_int('cb_iterations', 1, 10)*100
################################# Other ########################################
return(model_param)
def _is_model_start_opt_params(self,):
return(True)
def get_model_start_opt_params(self,):
dafault_params = {
"cb_depth": 6,
"cb_min_child_samples": 1,
"cb_learning_rate": 0.03,
}
return(dafault_params)
class CatBoostClassifier(CatBoost):
_type_of_estimator='classifier'
__name__ = 'CatBoostClassifier'
class CatBoostRegressor(CatBoost):
_type_of_estimator='regression'
__name__ = 'CatBoostRegressor'
Classes
class CatBoost (model_param=None, type_of_estimator=None, gpu=False, verbose=None, random_state=42)
-
Args
params
:dict
orNone
- parameters for model. If None default params are fetched.
Expand source code
class CatBoost(ModelBase): """ Args: params (dict or None): parameters for model. If None default params are fetched. """ __name__ = 'CatBoost' def _init_default_model_param(self, model_param=None): """ Default model_param """ if model_param is None: model_param = {'verbose': 0, 'task_type': 'GPU' if self._gpu else 'CPU', 'random_seed': self._random_state, } return(model_param) def _init_model(self, model_param=None): """ sets new model, Args: params: : parameters for model. """ if self._type_of_estimator == 'classifier': model = catboost.CatBoostClassifier(**model_param) elif self._type_of_estimator == 'regression': model = catboost.CatBoostRegressor(**model_param) return(model) def fit(self, X_train=None, y_train=None, cat_features=None): """ Args: X (pd.DataFrame, shape (n_samples, n_features)): the input data y (pd.DataFrame, shape (n_samples, ) or (n_samples, n_outputs)): the target data cat_features (list) Return: self """ y_train = self.y_format(y_train) if cat_features is not None: cat_dims = [X_train.columns.get_loc(i) for i in cat_features[:]] train_pool = Pool(X_train, label=y_train, cat_features=cat_dims) else: train_pool = Pool(X_train, label=y_train,) params = self.model_param.copy() self.model = self._init_model(model_param=params) self.model.fit(train_pool, verbose=False, plot=False,) train_pool=None return self def predict(self, X=None): """ Args: X (np.array, shape (n_samples, n_features)): the input data Return: np.array, shape (n_samples, n_classes) """ if self.model is None: raise Exception("No fit models") if self._type_of_estimator == 'classifier': predicts = np.round(self.model.predict(X),0) elif self._type_of_estimator == 'regression': predicts = self.model.predict(X) return predicts def is_possible_predict_proba(self): """ Return: bool, whether model can predict proba """ return True def predict_proba(self, X=None): """ Args: X (np.array, shape (n_samples, n_features)): the input data Return: np.array, shape (n_samples, n_classes) """ if self.model is None: raise Exception("No fit models") if not self.is_possible_predict_proba(): raise Exception("Model cannot predict probability distribution") return self.model.predict_proba(X)[:, 1] def _is_possible_feature_importance(self): """ Return: bool, whether model can predict proba """ return True def get_feature_importance(self, X,): """ Return: list feature_importance """ if not self._is_possible_feature_importance(): raise Exception("Model cannot get feature_importance") fe_lst = self.model.get_feature_importance() return (pd.DataFrame(fe_lst, index=X.columns, columns=['value'])) #@staticmethod def get_model_opt_params(self, trial, opt_lvl,): """ Return: dict of DistributionWrappers """ model_param = self._init_default_model_param() ################################# LVL 1 ######################################## if opt_lvl >= 1: model_param['min_child_samples'] = trial.suggest_int('cb_min_child_samples', 1, 100) model_param['depth'] = trial.suggest_int('cb_depth', 4, 10) ################################# LVL 2 ######################################## if opt_lvl >= 2: model_param['bagging_temperature'] = trial.suggest_int('cb_bagging_temperature', 0, 10,) model_param['subsample'] = trial.suggest_discrete_uniform('cb_subsample', 0.1, 1.0, 0.1) ################################# LVL 3 ######################################## if opt_lvl >= 3: if self._type_of_estimator == 'classifier': model_param['objective'] = trial.suggest_categorical('cb_objective', [ 'Logloss', 'CrossEntropy', ]) elif self._type_of_estimator == 'regression': model_param['objective'] = trial.suggest_categorical('cb_objective', [ 'MAE', 'MAPE', 'Quantile', 'RMSE', ]) ################################# LVL 4 ######################################## if opt_lvl >= 4: model_param['depth'] = trial.suggest_int('cb_depth', 2, 16) model_param['l2_leaf_reg'] = trial.suggest_loguniform('cb_l2_leaf_reg', 1e-8, .1) model_param['learning_rate'] = trial.suggest_int('cb_learning_rate', 1, 100)/1000 model_param['iterations'] = trial.suggest_int('cb_iterations', 1, 10)*100 ################################# Other ######################################## return(model_param) def _is_model_start_opt_params(self,): return(True) def get_model_start_opt_params(self,): dafault_params = { "cb_depth": 6, "cb_min_child_samples": 1, "cb_learning_rate": 0.03, } return(dafault_params)
Ancestors
- automl_alex._base.ModelBase
Subclasses
Methods
def fit(self, X_train=None, y_train=None, cat_features=None)
-
Args
X (pd.DataFrame, shape (n_samples, n_features)): the input data y (pd.DataFrame, shape (n_samples, ) or (n_samples, n_outputs)): the target data cat_features (list)
Return
self
Expand source code
def fit(self, X_train=None, y_train=None, cat_features=None): """ Args: X (pd.DataFrame, shape (n_samples, n_features)): the input data y (pd.DataFrame, shape (n_samples, ) or (n_samples, n_outputs)): the target data cat_features (list) Return: self """ y_train = self.y_format(y_train) if cat_features is not None: cat_dims = [X_train.columns.get_loc(i) for i in cat_features[:]] train_pool = Pool(X_train, label=y_train, cat_features=cat_dims) else: train_pool = Pool(X_train, label=y_train,) params = self.model_param.copy() self.model = self._init_model(model_param=params) self.model.fit(train_pool, verbose=False, plot=False,) train_pool=None return self
def get_feature_importance(self, X)
-
Return
list feature_importance
Expand source code
def get_feature_importance(self, X,): """ Return: list feature_importance """ if not self._is_possible_feature_importance(): raise Exception("Model cannot get feature_importance") fe_lst = self.model.get_feature_importance() return (pd.DataFrame(fe_lst, index=X.columns, columns=['value']))
def get_model_opt_params(self, trial, opt_lvl)
-
Return
dict of DistributionWrappers
Expand source code
def get_model_opt_params(self, trial, opt_lvl,): """ Return: dict of DistributionWrappers """ model_param = self._init_default_model_param() ################################# LVL 1 ######################################## if opt_lvl >= 1: model_param['min_child_samples'] = trial.suggest_int('cb_min_child_samples', 1, 100) model_param['depth'] = trial.suggest_int('cb_depth', 4, 10) ################################# LVL 2 ######################################## if opt_lvl >= 2: model_param['bagging_temperature'] = trial.suggest_int('cb_bagging_temperature', 0, 10,) model_param['subsample'] = trial.suggest_discrete_uniform('cb_subsample', 0.1, 1.0, 0.1) ################################# LVL 3 ######################################## if opt_lvl >= 3: if self._type_of_estimator == 'classifier': model_param['objective'] = trial.suggest_categorical('cb_objective', [ 'Logloss', 'CrossEntropy', ]) elif self._type_of_estimator == 'regression': model_param['objective'] = trial.suggest_categorical('cb_objective', [ 'MAE', 'MAPE', 'Quantile', 'RMSE', ]) ################################# LVL 4 ######################################## if opt_lvl >= 4: model_param['depth'] = trial.suggest_int('cb_depth', 2, 16) model_param['l2_leaf_reg'] = trial.suggest_loguniform('cb_l2_leaf_reg', 1e-8, .1) model_param['learning_rate'] = trial.suggest_int('cb_learning_rate', 1, 100)/1000 model_param['iterations'] = trial.suggest_int('cb_iterations', 1, 10)*100 ################################# Other ######################################## return(model_param)
def get_model_start_opt_params(self)
-
Expand source code
def get_model_start_opt_params(self,): dafault_params = { "cb_depth": 6, "cb_min_child_samples": 1, "cb_learning_rate": 0.03, } return(dafault_params)
def is_possible_predict_proba(self)
-
Return
bool, whether model can predict proba
Expand source code
def is_possible_predict_proba(self): """ Return: bool, whether model can predict proba """ return True
def predict(self, X=None)
-
Args
X (np.array, shape (n_samples, n_features)): the input data
Return
np.array, shape (n_samples, n_classes)
Expand source code
def predict(self, X=None): """ Args: X (np.array, shape (n_samples, n_features)): the input data Return: np.array, shape (n_samples, n_classes) """ if self.model is None: raise Exception("No fit models") if self._type_of_estimator == 'classifier': predicts = np.round(self.model.predict(X),0) elif self._type_of_estimator == 'regression': predicts = self.model.predict(X) return predicts
def predict_proba(self, X=None)
-
Args
X (np.array, shape (n_samples, n_features)): the input data
Return
np.array, shape (n_samples, n_classes)
Expand source code
def predict_proba(self, X=None): """ Args: X (np.array, shape (n_samples, n_features)): the input data Return: np.array, shape (n_samples, n_classes) """ if self.model is None: raise Exception("No fit models") if not self.is_possible_predict_proba(): raise Exception("Model cannot predict probability distribution") return self.model.predict_proba(X)[:, 1]
class CatBoostClassifier (model_param=None, type_of_estimator=None, gpu=False, verbose=None, random_state=42)
-
Args
params
:dict
orNone
- parameters for model. If None default params are fetched.
Expand source code
class CatBoostClassifier(CatBoost): _type_of_estimator='classifier' __name__ = 'CatBoostClassifier'
Ancestors
- CatBoost
- automl_alex._base.ModelBase
Inherited members
class CatBoostRegressor (model_param=None, type_of_estimator=None, gpu=False, verbose=None, random_state=42)
-
Args
params
:dict
orNone
- parameters for model. If None default params are fetched.
Expand source code
class CatBoostRegressor(CatBoost): _type_of_estimator='regression' __name__ = 'CatBoostRegressor'
Ancestors
- CatBoost
- automl_alex._base.ModelBase
Inherited members