Source code for MLT.tools.toolbelt

"""Collection of misc tools that don't fit in a standalone module"""
import os
import json
import pickle
import natsort
import numpy as np
from datetime import datetime

[docs]def list_files(dirpath, fname_start): """List all files in a folder that start with the given string.""" filelist = (filename for filename in os.listdir(dirpath) if filename.startswith(fname_start)) return natsort.natsorted(filelist)
[docs]def list_folders(dirpath): """List all subfolders in a given path""" folderlist = [f for f in os.listdir(dirpath) if not os.path.isfile(os.path.join(dirpath, f))] return natsort.natsorted(folderlist)
[docs]def create_dir(dirpath): """Create the specified path if it is not existing.""" if not os.path.exists(dirpath): os.makedirs(dirpath)
[docs]def write_to_json(full_path_with_name, data): """JSON dump the given file to disk at the given path""" with open(full_path_with_name, 'w') as mjson: json.dump(data, mjson)
[docs]def read_from_json(full_path_with_name): """Read from an arbitrary JSON and return the structure""" with open(full_path_with_name, 'r') as mjson: return json.load(mjson)
[docs]def write_to_pickle(full_path_with_name, data): """Pickle the given file to disk at the given path""" with open(full_path_with_name, 'wb') as pickle_handle: pickle.dump(data, pickle_handle)
[docs]def read_from_pickle(full_path_with_name): """PRead from pickle at given location""" with open(full_path_with_name, 'rb') as pickle_handle: return pickle.load(pickle_handle)
[docs]def write_call_params(args, result_path): """Write the parametes with wich MLT has been called to a txt file in the result path""" parampath = os.path.join(result_path, 'call_parameters.txt') with open(parampath, 'w') as paramwriter: paramwriter.write(str(vars(args))) paramwriter.close()
[docs]def save_np_to_disk(stats_dataframe, filename, result_path): """Save a given dataframe as binary numpy pickle to disk""" filepath = os.path.join(result_path, filename + '.npy') np.save(filepath, stats_dataframe)
[docs]def save_metrics_to_disk(metrics_array, modelname, result_path): """Save a given metric array as json to disk""" filepath = os.path.join(result_path, modelname +'_metrics.json') with open(filepath, 'w') as mjson: json.dump(metrics_array, mjson)
[docs]def save_results_to_disk(stats_data, filename, result_path): """save the full results for a given model as json to disk""" filepath = os.path.join(result_path, filename + '_results.json') dictlist = [] for stat in stats_data: dstat = {} dstat['test_labels'] = stat.test_labels.ravel().tolist() dstat['predicted_labels'] = stat.predicted_labels.ravel().tolist() dstat['training_time'] = str(stat.training_time) if isinstance(stat.predicted_probabilities, list): dstat['predicted_probabilities'] = stat.predicted_probabilities else: dstat['predicted_probabilities'] = stat.predicted_probabilities.ravel().tolist() dictlist.append(dstat) # convert to JSON-compatible dict with open(filepath, 'w') as jresult: json.dump(dictlist, jresult)
[docs]def load_fold_indices(path): """Load the stard and end indices of the test set for every fold.""" filename = os.path.join(path, 'dataset_fold_indices.json') with open(filename, 'r') as handle: parsed = json.load(handle) return json.dumps(parsed['short'], indent=4)
[docs]def load_result(path, modelname): """Load the metrics for a given model in the given path.""" filename = os.path.join(path, modelname + '_metrics.json') with open(filename, 'r') as handle: parsed = json.load(handle) return json.dumps(parsed, indent=4)
[docs]def load_results_from_disk(path, modelname): """Load the full result json for the given model from the path.""" filename = os.path.join(path, modelname + '_results.json') with open(filename, 'r') as handle: parsed = json.load(handle) return parsed
[docs]def prepare_folders(runner_name): """Creates all the folders needed for a test run Args: runner_name (string): Name of the calling runner. Will be the base name for results Returns result_path (string): The full path where results can be stored """ runtime_date = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') result_path = ( os.path.join( os.path.dirname(__file__), '..', 'results', runner_name, runtime_date ) ) model_savepath = os.path.join(result_path, 'models') # Housekeeping! Create the result path if not os.path.exists(result_path): os.makedirs(result_path) if not os.path.exists(model_savepath): os.makedirs(model_savepath) return result_path