Source code for MLT.implementations.XGBoost

"""XGBoost scikit implementation based on https://xgboost.readthedocs.io/en/latest/"""
from xgboost import XGBClassifier

from tools.helper_sklearn import sklearn_train_model

[docs]def train_model(n_estimators, max_depth, learning_rate, training_data, training_labels, test_data, test_labels, full_filename): """Creates and trains a XGBoost sklearn instance with given params Args: n_estimators (int): Number of estimators to use max_depth (int): Maximum tree depth for base learners learning_rate (float): Boosting learning rate (XGB's "eta") training_data (numpy.ndarray): Data to train on training_labels (list): List of labels corresponding to the training data test_data (numpy.ndarray): Data to train on test_labels (list): List of labels corresponding to the test data full_filename (string): This filename will be used for persisting the trained model Returns: PredictionEntry: Named tuple with training results """ return sklearn_train_model( _create_model(n_estimators, max_depth, learning_rate), training_data, training_labels, test_data, test_labels, full_filename )
def _create_model(n_estimators=100, max_depth=3, learning_rate=0.1): """(Internal helper) Creates a scikit-learn-compatible XGBoost instance""" n_estimators = int(n_estimators) max_depth = int(max_depth) xgb = XGBClassifier( max_depth=max_depth, n_estimators=n_estimators, learning_rate=learning_rate, n_jobs=4, # Set fix n_jobs of 4 as -1 doesn't always work random_state=0 # Fixed init state ) print('Created Model: {}'.format(xgb)) return xgb