Source code for MLT.tools.helper_sklearn

"""Utility functions for scikit-learn-realted implementations"""
import os
from datetime import datetime
from sklearn.externals import joblib

from MLT.tools import prediction_entry as pe

[docs]def sklearn_train_model(model, training_data, training_labels, test_data, test_labels, model_savename): """Train the given model with data and predict the run""" starttime = datetime.now() model.fit(training_data, training_labels) finishtime = datetime.now() runtime = finishtime - starttime #predict the run test_predictions = model.predict(test_data) test_predictions_probabilities = model.predict_proba(test_data)[:, 1] # proba[:,1] returns just 1 of 2 columns. As they always add up, this is enough! sklearn_persist_model(model, model_savename) # append all this to a dataframe / JSON / whatever and return pe.PredictionEntry(test_labels, test_predictions, test_predictions_probabilities, runtime)
[docs]def sklearn_persist_model(model, model_savename): """Save a scikit model to disk""" joblib.dump(model, model_savename + '.pkl')
[docs]def sklearn_load_model(dirpath, modelname): """Load a scikit model from disk""" model_path = os.path.join(dirpath, modelname) return joblib.load(model_path)
[docs]def sklearn_load_modellist(model_filenames, model_path): """Load a list of scikit models from disk from given path""" loaded_models = [] for model_fname in model_filenames: filename_wo_ext = os.path.splitext(model_fname)[0] loaded_models.append( ( filename_wo_ext, sklearn_load_model(model_path, model_fname) ) ) return loaded_models
[docs]def predict_scikit(single_model, test_data, test_labels): """Only predict a model without fitting it""" starttime = datetime.now() test_predictions = single_model.predict(test_data) test_predictions_probabilities = single_model.predict_proba(test_data)[:, 1] finishtime = datetime.now() runtime = finishtime - starttime return pe.PredictionEntry(test_labels, test_predictions, test_predictions_probabilities, runtime)