"""XGBoost scikit implementation based on https://xgboost.readthedocs.io/en/latest/"""
from xgboost import XGBClassifier
from tools.helper_sklearn import sklearn_train_model
[docs]def train_model(n_estimators, max_depth, learning_rate, training_data, training_labels, test_data, test_labels, full_filename):
"""Creates and trains a XGBoost sklearn instance with given params
Args:
n_estimators (int): Number of estimators to use
max_depth (int): Maximum tree depth for base learners
learning_rate (float): Boosting learning rate (XGB's "eta")
training_data (numpy.ndarray): Data to train on
training_labels (list): List of labels corresponding to the training data
test_data (numpy.ndarray): Data to train on
test_labels (list): List of labels corresponding to the test data
full_filename (string): This filename will be used for persisting the trained model
Returns:
PredictionEntry: Named tuple with training results
"""
return sklearn_train_model(
_create_model(n_estimators, max_depth, learning_rate),
training_data, training_labels,
test_data, test_labels,
full_filename
)
def _create_model(n_estimators=100, max_depth=3, learning_rate=0.1):
"""(Internal helper) Creates a scikit-learn-compatible XGBoost instance"""
n_estimators = int(n_estimators)
max_depth = int(max_depth)
xgb = XGBClassifier(
max_depth=max_depth,
n_estimators=n_estimators,
learning_rate=learning_rate,
n_jobs=4, # Set fix n_jobs of 4 as -1 doesn't always work
random_state=0 # Fixed init state
)
print('Created Model: {}'.format(xgb))
return xgb