-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
4d07b87
commit 928789e
Showing
51 changed files
with
17,293 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
from setuptools import setup, find_packages | ||
import os | ||
|
||
def read(fname): | ||
return open(os.path.join(os.path.dirname(__file__), fname)).read() | ||
|
||
setup( | ||
name = "spectra_ml", | ||
version = "1", | ||
author = "Matthew Dirks", | ||
author_email = "[email protected]", | ||
description = ("Python library to apply machine learning neural networks and sensor fusion models to spectroscopic data (i.e., spectra)"), | ||
keywords = "", | ||
url = "", | ||
packages=find_packages(), | ||
long_description=read('README.md'), | ||
package_data = {'spectra_ml': []}, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
import os | ||
import sys | ||
from hyperopt import mongoexp | ||
import datetime | ||
import logging | ||
|
||
from HPO_start_master import DEFAULT_DB_PORT | ||
|
||
__author__ = "Matthew Dirks" | ||
|
||
def main(experiment_name, DB_host='127.0.0.1', DB_port=None, n_jobs=sys.maxsize, timeout_hours=None): | ||
print(f'HPO_hyperopt_mongo_worker.py main({experiment_name}, {DB_port}, {DB_host})...') | ||
assert os.environ.get('OVERRIDE_n_gpu') is not None, 'Env var needed, e.g. `set OVERRIDE_n_gpu=1`' | ||
|
||
if (DB_port is None): | ||
DB_port = DEFAULT_DB_PORT | ||
print(f'HPO_hyperopt_mongo_worker.py: DB_port is {DB_port}') | ||
|
||
if (timeout_hours is not None): | ||
timeout_seconds = int(timeout_hours*60*60) | ||
else: | ||
timeout_seconds = None | ||
|
||
|
||
print('HPO_hyperopt_mongo_worker.py running mongoexp.main()...{}'.format(datetime.datetime.now().strftime("%Y-%m-%d, %H:%M:%S"))) | ||
options = My_Main_Worker_Helper(mongo=f'{DB_host}:{DB_port}/{experiment_name}', | ||
use_subprocesses=False, | ||
last_job_timeout=timeout_seconds, | ||
max_jobs=n_jobs) | ||
|
||
# hyperopt uses logging, but I'm circumventing the main entry point so I need to setup the logging module myself: | ||
logging.basicConfig(stream=sys.stderr, level=logging.INFO) | ||
|
||
mongoexp.main_worker_helper(options, []) | ||
|
||
|
||
class My_Main_Worker_Helper: | ||
def __init__(self, | ||
exp_key=None, | ||
last_job_timeout=None, # no more taking new jobs after this many seconds | ||
max_consecutive_failures=4, | ||
max_jobs=sys.maxsize, | ||
mongo="localhost/hyperopt", | ||
poll_interval=5, # seconds. 5 is fine when jobs take many minutes to complete | ||
reserve_timeout=120.0, | ||
workdir=None, | ||
use_subprocesses=True, | ||
max_jobs_in_db=sys.maxsize): | ||
|
||
""" This function helps by setting the default values as defined in mongoexp.main_worker, | ||
also, see that function for documentation on each of these arguments. """ | ||
|
||
self.exp_key = exp_key | ||
self.last_job_timeout = last_job_timeout | ||
self.max_consecutive_failures = max_consecutive_failures | ||
self.max_jobs = max_jobs | ||
self.mongo = mongo | ||
self.poll_interval = poll_interval | ||
self.reserve_timeout = reserve_timeout | ||
self.workdir = workdir | ||
self.use_subprocesses = use_subprocesses | ||
self.max_jobs_in_db = max_jobs_in_db | ||
|
||
if __name__ == '__main__': | ||
print('HPO_hyperopt_mongo_worker.py STARTING...') | ||
import fire | ||
fire.Fire(main) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
#!/usr/bin/env python3 | ||
""" Program to run hyperparam optimization via hyperopt. """ | ||
|
||
from hyperopt import fmin, tpe, rand, Trials | ||
from hyperopt.mongoexp import MongoTrials | ||
import hyperopt | ||
import time | ||
import pickle | ||
import pandas as pd | ||
import numpy as np | ||
import subprocess | ||
from os.path import join, exists | ||
import os | ||
import datetime | ||
import logging | ||
from pprint import pprint | ||
from hyperopt.pyll.stochastic import sample | ||
import matplotlib.pyplot as plt | ||
import atexit | ||
from functools import partial | ||
|
||
from spectra_ml.components.hyperopt_config.objective import calc_loss | ||
from spectra_ml.components.hyperopt_config.get_cmd_space import get_cmd_space | ||
from spectra_ml.components.console_logging import setup_logging, end_logging | ||
|
||
__author__ = "Matthew Dirks" | ||
|
||
DEFAULT_DB_PORT = 27017 | ||
MONGO_EXPERIMENT_NAME = 'experiment' | ||
|
||
def main_master(experiment_name, max_evals, DB_host='127.0.0.1', DB_port=None, out_dir='.', which_cmd_space=None, target=None): | ||
global server | ||
|
||
assert which_cmd_space is not None | ||
assert target is not None | ||
|
||
print(f'HPO_run_hyperopt.py: running main_master({experiment_name}, {max_evals}, {DB_host}, {DB_port})') | ||
logger = setup_logging(out_dir, suffix='_hyperopt_master', logger_to_use=logging.getLogger('hyperopt_master')) | ||
logger.info('==================== RUNNING HYPEROPT MASTER ====================') | ||
|
||
DB_NAME = experiment_name | ||
|
||
if (DB_port is None): | ||
DB_port = DEFAULT_DB_PORT | ||
|
||
logger.info(f'which_cmd_space = {which_cmd_space}') | ||
cmd_space, hyperhyperparams = get_cmd_space(which_cmd_space, target) | ||
|
||
hyperhyperparams['which_cmd_space'] = which_cmd_space | ||
hyperhyperparams['target'] = target | ||
|
||
connect_url = f'mongo://{DB_host}:{DB_port}/{DB_NAME}/jobs' | ||
logger.info(f'Connecting to DB at {connect_url}, using experiment name {MONGO_EXPERIMENT_NAME}') | ||
|
||
trials = MongoTrials(connect_url, exp_key=MONGO_EXPERIMENT_NAME) | ||
|
||
logger.info('NOTE: next you must run a worker, because fmin(...) will block until a worker does all the jobs.') | ||
logger.info('MASTER starting now...{}'.format(datetime.datetime.now().strftime("%Y-%m-%d, %H:%M:%S"))) | ||
|
||
f = lambda sample_of_cmd_space: calc_loss(sample_of_cmd_space, hyperhyperparams) | ||
|
||
algo = partial(tpe.suggest, n_EI_candidates=50) # default n_EI_candidates is 24 | ||
# ^ see https://github.com/hyperopt/hyperopt/issues/632 | ||
best = fmin(f, space=cmd_space, algo=tpe.suggest, max_evals=max_evals, trials=trials, verbose=True) | ||
|
||
try: | ||
logger.info(f'best: {best}') | ||
except Exception as e: | ||
logger.error(f'[1] {e}') | ||
|
||
try: | ||
logger.info(f'len(results): {len(trials.results)}') | ||
except Exception as e: | ||
logger.error(f'[3] {e}') | ||
|
||
|
||
if __name__ == '__main__': | ||
@atexit.register | ||
def goodbye(): | ||
global server | ||
|
||
logger = logging.getLogger('hyperopt_master') | ||
logger.info('Exit-handler running - goodbye!') | ||
|
||
end_logging(logger) | ||
|
||
print('DONE') | ||
|
||
import fire | ||
fire.Fire(main_master) |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
from os.path import join, split, realpath, dirname, splitext, isfile, exists, isdir | ||
import json | ||
from typing import Callable | ||
|
||
import spectra_ml.components.colorama_helpers as _c | ||
|
||
__author__ = "Matthew Dirks" | ||
|
||
def strip_quotes(s): | ||
if (s.startswith('"') and s.endswith('"')) or (s.startswith('\'') and s.endswith('\'')): | ||
return s[1:-1] | ||
else: | ||
return s | ||
|
||
def load_cmd_args_file(cmd_args_fpath): | ||
cmd_args_fpath = strip_quotes(cmd_args_fpath) | ||
assert exists(cmd_args_fpath), 'Provided cmd_args_fpath ({}) doesn\'t exist'.format(cmd_args_fpath) | ||
|
||
err = '' | ||
ext = splitext(cmd_args_fpath)[1] | ||
if (ext == '.json'): | ||
with open(cmd_args_fpath, 'r') as f: | ||
cmd_args = json.load(f) | ||
elif (ext == '.pyon'): | ||
from spectra_ml.components.config_io import config_pyon | ||
cmd_args, err = config_pyon.read(cmd_args_fpath) | ||
|
||
if (cmd_args is None): | ||
raise(Exception('Failed to load cmd args from file. cmd_args_fpath="{}", err="{}"'.format(cmd_args_fpath, err))) | ||
|
||
return cmd_args | ||
|
||
def get_main_function(run_function:Callable[..., str]): | ||
# Callable[..., str] is roughly equivalent to Callable[[VarArg(), KwArg()], int] | ||
def main(*args, **override_args): | ||
new_kwargs = merge_args_with_args_in_file(*args, **override_args) | ||
return run_function(**new_kwargs) | ||
|
||
return main | ||
|
||
def merge_args_with_args_in_file(*args, **override_args): | ||
""" | ||
Load config file from `cmd_args_fpath` keyword argument. | ||
Those settings will be overridden by any specified in `kwargs` | ||
""" | ||
|
||
#### check if cmd_args_fpath provided | ||
cmd_args_fpath = None | ||
|
||
n_pos = len(args) | ||
if (n_pos == 1 and 'cmd_args_fpath' not in override_args): | ||
# assume first positional arg is cmd_args_fpath | ||
cmd_args_fpath = args[0] | ||
elif (n_pos == 0): | ||
if ('cmd_args_fpath' in override_args): | ||
# get `cmd_args_fpath` and remove from override_args | ||
cmd_args_fpath = override_args.pop('cmd_args_fpath') | ||
else: | ||
print(_c.red('Invalid command line arguments: please specify either (a) cmd_args_fpath (positional or kwarg) and with or without additional kwargs to override those specified in cmd_args_fpath, or (b) only kwarg args (make sure all args start with "--")')) | ||
print(_c.red('len(args) = {}\nlen(override_args) = {} (kwargs)'.format(n_pos, len(override_args)))) | ||
exit() | ||
|
||
if (cmd_args_fpath is not None): | ||
# if cmd_args_fpath provided, then load the config file with command arguments | ||
cmd_args = load_cmd_args_file(cmd_args_fpath) | ||
|
||
# modify the args given in the file with arguments manually specified (overrides settings from config file) | ||
cmd_args.update(override_args) | ||
else: | ||
# no config file, so read command line args only | ||
cmd_args = override_args | ||
|
||
return cmd_args |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
from colorama import Fore, Back, Style | ||
import colorama | ||
|
||
__author__ = "Matthew Dirks" | ||
|
||
def m_green(msg): | ||
return Fore.GREEN + msg + Fore.RESET | ||
def m_green2(msg): | ||
return Back.GREEN + Fore.BLACK + msg + Style.RESET_ALL | ||
def m_cyan(msg): | ||
return Fore.CYAN + msg + Fore.RESET | ||
def m_warn1(msg): | ||
return Fore.YELLOW + msg + Fore.RESET | ||
def red(msg): | ||
return Fore.RED + msg + Fore.RESET | ||
def m_warn2(msg): | ||
return Back.YELLOW + Fore.BLACK + msg + Style.RESET_ALL | ||
def m_user_input(msg): | ||
return Back.BLUE + msg + Back.RESET |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
""" Alternative config format. | ||
Uses Python-parser to load config file (accepts valid python code). | ||
""" | ||
import ast | ||
from os.path import exists | ||
import shutil | ||
# import json | ||
import pprint | ||
|
||
def read(fpath, copyIfDoesntExist=None): | ||
""" Load a PYTHON config file. """ | ||
ob = None | ||
err = '' | ||
if (exists(fpath)): | ||
configFile = open(fpath, 'r') | ||
try: | ||
s = configFile.read() | ||
ob = ast.literal_eval(s) | ||
except Exception as e: | ||
err += str(e) | ||
|
||
configFile.close() | ||
else: | ||
if (copyIfDoesntExist is not None): | ||
# config file doesn't exist, so we will copy a default configuration file in order to create it | ||
if (exists(copyIfDoesntExist)): | ||
shutil.copyfile(copyIfDoesntExist, fpath) | ||
return read(fpath, copyIfDoesntExist=None) # read the copied file | ||
else: | ||
err = 'Config file (%s) does not exist, and base (default) config file also does not exist (%s).' % (str(fpath), str(copyIfDoesntExist)) | ||
else: | ||
# config file doesn't exist, and we are not creating a new one by copying from somewhere else | ||
err = 'Config file (%s) does not exist.' % str(fpath) | ||
|
||
return ob, err | ||
|
||
def write(fpath, configData): | ||
if (isinstance(configData, str)): # user provided exactly what they want in the config file as string | ||
# validate (will raise exceptions if any problems) | ||
ob = ast.literal_eval(configData) | ||
|
||
# write string to disk | ||
with open(fpath, 'w') as f: | ||
f.write(configData) | ||
else: | ||
# user provided a dictionary (recommended) or some other object, | ||
# now convert it and save | ||
with open(fpath, 'w') as f: | ||
txt = pprint.pformat(configData, width=1000) | ||
f.write(txt) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
#!python3 | ||
|
||
import os | ||
from os.path import exists | ||
import shutil | ||
import toml | ||
|
||
def getConfigOrCopyDefaultFile(configFpath, defaultFpath): | ||
cfgDict = None | ||
err = '' | ||
if (exists(configFpath)): | ||
cfgDict, err = getConfig(configFpath) | ||
elif (defaultFpath is not None): | ||
if (exists(defaultFpath)): | ||
# config file doesn't exist, so we will copy a default configuration file in order to create it | ||
shutil.copyfile(defaultFpath, configFpath) | ||
return getConfigOrCopyDefaultFile(configFpath, defaultFpath=None) # read the copied file | ||
else: | ||
err = 'Config file (%s) does not exist, and base (default) config file also does not exist (%s).' % (str(configFpath), str(defaultFpath)) | ||
else: | ||
# config file doesn't exist, and we are not creating a new one by copying from somewhere else | ||
err = 'Config file (%s) does not exist.' % str(configFpath) | ||
|
||
return cfgDict, err | ||
|
||
|
||
def getConfig(configFpath): | ||
if (not exists(configFpath)): | ||
errorMessage = 'config file doesn\'t exist' | ||
return None, errorMessage | ||
else: | ||
# read existing config file | ||
with open(configFpath, 'r') as f: | ||
data = toml.load(f) | ||
|
||
return data, None |
Oops, something went wrong.