Skip to content

Commit

Permalink
code by Matthew Dirks
Browse files Browse the repository at this point in the history
  • Loading branch information
skylogic004 committed Jul 23, 2023
1 parent 4d07b87 commit 928789e
Show file tree
Hide file tree
Showing 51 changed files with 17,293 additions and 2 deletions.
11 changes: 10 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
# user-specific config file
config.toml

# cached data files (can be generated by the software)
spectra_ml/data/PROCESSED*.pkl

################# github added the lines below:

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
Expand All @@ -20,6 +28,7 @@ parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
Expand Down Expand Up @@ -85,7 +94,7 @@ ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
.python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
Expand Down
283 changes: 282 additions & 1 deletion README.md

Large diffs are not rendered by default.

18 changes: 18 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from setuptools import setup, find_packages
import os

def read(fname):
return open(os.path.join(os.path.dirname(__file__), fname)).read()

setup(
name = "spectra_ml",
version = "1",
author = "Matthew Dirks",
author_email = "[email protected]",
description = ("Python library to apply machine learning neural networks and sensor fusion models to spectroscopic data (i.e., spectra)"),
keywords = "",
url = "",
packages=find_packages(),
long_description=read('README.md'),
package_data = {'spectra_ml': []},
)
67 changes: 67 additions & 0 deletions spectra_ml/HPO_hyperopt_mongo_worker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import os
import sys
from hyperopt import mongoexp
import datetime
import logging

from HPO_start_master import DEFAULT_DB_PORT

__author__ = "Matthew Dirks"

def main(experiment_name, DB_host='127.0.0.1', DB_port=None, n_jobs=sys.maxsize, timeout_hours=None):
print(f'HPO_hyperopt_mongo_worker.py main({experiment_name}, {DB_port}, {DB_host})...')
assert os.environ.get('OVERRIDE_n_gpu') is not None, 'Env var needed, e.g. `set OVERRIDE_n_gpu=1`'

if (DB_port is None):
DB_port = DEFAULT_DB_PORT
print(f'HPO_hyperopt_mongo_worker.py: DB_port is {DB_port}')

if (timeout_hours is not None):
timeout_seconds = int(timeout_hours*60*60)
else:
timeout_seconds = None


print('HPO_hyperopt_mongo_worker.py running mongoexp.main()...{}'.format(datetime.datetime.now().strftime("%Y-%m-%d, %H:%M:%S")))
options = My_Main_Worker_Helper(mongo=f'{DB_host}:{DB_port}/{experiment_name}',
use_subprocesses=False,
last_job_timeout=timeout_seconds,
max_jobs=n_jobs)

# hyperopt uses logging, but I'm circumventing the main entry point so I need to setup the logging module myself:
logging.basicConfig(stream=sys.stderr, level=logging.INFO)

mongoexp.main_worker_helper(options, [])


class My_Main_Worker_Helper:
def __init__(self,
exp_key=None,
last_job_timeout=None, # no more taking new jobs after this many seconds
max_consecutive_failures=4,
max_jobs=sys.maxsize,
mongo="localhost/hyperopt",
poll_interval=5, # seconds. 5 is fine when jobs take many minutes to complete
reserve_timeout=120.0,
workdir=None,
use_subprocesses=True,
max_jobs_in_db=sys.maxsize):

""" This function helps by setting the default values as defined in mongoexp.main_worker,
also, see that function for documentation on each of these arguments. """

self.exp_key = exp_key
self.last_job_timeout = last_job_timeout
self.max_consecutive_failures = max_consecutive_failures
self.max_jobs = max_jobs
self.mongo = mongo
self.poll_interval = poll_interval
self.reserve_timeout = reserve_timeout
self.workdir = workdir
self.use_subprocesses = use_subprocesses
self.max_jobs_in_db = max_jobs_in_db

if __name__ == '__main__':
print('HPO_hyperopt_mongo_worker.py STARTING...')
import fire
fire.Fire(main)
90 changes: 90 additions & 0 deletions spectra_ml/HPO_start_master.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
#!/usr/bin/env python3
""" Program to run hyperparam optimization via hyperopt. """

from hyperopt import fmin, tpe, rand, Trials
from hyperopt.mongoexp import MongoTrials
import hyperopt
import time
import pickle
import pandas as pd
import numpy as np
import subprocess
from os.path import join, exists
import os
import datetime
import logging
from pprint import pprint
from hyperopt.pyll.stochastic import sample
import matplotlib.pyplot as plt
import atexit
from functools import partial

from spectra_ml.components.hyperopt_config.objective import calc_loss
from spectra_ml.components.hyperopt_config.get_cmd_space import get_cmd_space
from spectra_ml.components.console_logging import setup_logging, end_logging

__author__ = "Matthew Dirks"

DEFAULT_DB_PORT = 27017
MONGO_EXPERIMENT_NAME = 'experiment'

def main_master(experiment_name, max_evals, DB_host='127.0.0.1', DB_port=None, out_dir='.', which_cmd_space=None, target=None):
global server

assert which_cmd_space is not None
assert target is not None

print(f'HPO_run_hyperopt.py: running main_master({experiment_name}, {max_evals}, {DB_host}, {DB_port})')
logger = setup_logging(out_dir, suffix='_hyperopt_master', logger_to_use=logging.getLogger('hyperopt_master'))
logger.info('==================== RUNNING HYPEROPT MASTER ====================')

DB_NAME = experiment_name

if (DB_port is None):
DB_port = DEFAULT_DB_PORT

logger.info(f'which_cmd_space = {which_cmd_space}')
cmd_space, hyperhyperparams = get_cmd_space(which_cmd_space, target)

hyperhyperparams['which_cmd_space'] = which_cmd_space
hyperhyperparams['target'] = target

connect_url = f'mongo://{DB_host}:{DB_port}/{DB_NAME}/jobs'
logger.info(f'Connecting to DB at {connect_url}, using experiment name {MONGO_EXPERIMENT_NAME}')

trials = MongoTrials(connect_url, exp_key=MONGO_EXPERIMENT_NAME)

logger.info('NOTE: next you must run a worker, because fmin(...) will block until a worker does all the jobs.')
logger.info('MASTER starting now...{}'.format(datetime.datetime.now().strftime("%Y-%m-%d, %H:%M:%S")))

f = lambda sample_of_cmd_space: calc_loss(sample_of_cmd_space, hyperhyperparams)

algo = partial(tpe.suggest, n_EI_candidates=50) # default n_EI_candidates is 24
# ^ see https://github.com/hyperopt/hyperopt/issues/632
best = fmin(f, space=cmd_space, algo=tpe.suggest, max_evals=max_evals, trials=trials, verbose=True)

try:
logger.info(f'best: {best}')
except Exception as e:
logger.error(f'[1] {e}')

try:
logger.info(f'len(results): {len(trials.results)}')
except Exception as e:
logger.error(f'[3] {e}')


if __name__ == '__main__':
@atexit.register
def goodbye():
global server

logger = logging.getLogger('hyperopt_master')
logger.info('Exit-handler running - goodbye!')

end_logging(logger)

print('DONE')

import fire
fire.Fire(main_master)
Empty file.
73 changes: 73 additions & 0 deletions spectra_ml/components/cmd_line_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
from os.path import join, split, realpath, dirname, splitext, isfile, exists, isdir
import json
from typing import Callable

import spectra_ml.components.colorama_helpers as _c

__author__ = "Matthew Dirks"

def strip_quotes(s):
if (s.startswith('"') and s.endswith('"')) or (s.startswith('\'') and s.endswith('\'')):
return s[1:-1]
else:
return s

def load_cmd_args_file(cmd_args_fpath):
cmd_args_fpath = strip_quotes(cmd_args_fpath)
assert exists(cmd_args_fpath), 'Provided cmd_args_fpath ({}) doesn\'t exist'.format(cmd_args_fpath)

err = ''
ext = splitext(cmd_args_fpath)[1]
if (ext == '.json'):
with open(cmd_args_fpath, 'r') as f:
cmd_args = json.load(f)
elif (ext == '.pyon'):
from spectra_ml.components.config_io import config_pyon
cmd_args, err = config_pyon.read(cmd_args_fpath)

if (cmd_args is None):
raise(Exception('Failed to load cmd args from file. cmd_args_fpath="{}", err="{}"'.format(cmd_args_fpath, err)))

return cmd_args

def get_main_function(run_function:Callable[..., str]):
# Callable[..., str] is roughly equivalent to Callable[[VarArg(), KwArg()], int]
def main(*args, **override_args):
new_kwargs = merge_args_with_args_in_file(*args, **override_args)
return run_function(**new_kwargs)

return main

def merge_args_with_args_in_file(*args, **override_args):
"""
Load config file from `cmd_args_fpath` keyword argument.
Those settings will be overridden by any specified in `kwargs`
"""

#### check if cmd_args_fpath provided
cmd_args_fpath = None

n_pos = len(args)
if (n_pos == 1 and 'cmd_args_fpath' not in override_args):
# assume first positional arg is cmd_args_fpath
cmd_args_fpath = args[0]
elif (n_pos == 0):
if ('cmd_args_fpath' in override_args):
# get `cmd_args_fpath` and remove from override_args
cmd_args_fpath = override_args.pop('cmd_args_fpath')
else:
print(_c.red('Invalid command line arguments: please specify either (a) cmd_args_fpath (positional or kwarg) and with or without additional kwargs to override those specified in cmd_args_fpath, or (b) only kwarg args (make sure all args start with "--")'))
print(_c.red('len(args) = {}\nlen(override_args) = {} (kwargs)'.format(n_pos, len(override_args))))
exit()

if (cmd_args_fpath is not None):
# if cmd_args_fpath provided, then load the config file with command arguments
cmd_args = load_cmd_args_file(cmd_args_fpath)

# modify the args given in the file with arguments manually specified (overrides settings from config file)
cmd_args.update(override_args)
else:
# no config file, so read command line args only
cmd_args = override_args

return cmd_args
19 changes: 19 additions & 0 deletions spectra_ml/components/colorama_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from colorama import Fore, Back, Style
import colorama

__author__ = "Matthew Dirks"

def m_green(msg):
return Fore.GREEN + msg + Fore.RESET
def m_green2(msg):
return Back.GREEN + Fore.BLACK + msg + Style.RESET_ALL
def m_cyan(msg):
return Fore.CYAN + msg + Fore.RESET
def m_warn1(msg):
return Fore.YELLOW + msg + Fore.RESET
def red(msg):
return Fore.RED + msg + Fore.RESET
def m_warn2(msg):
return Back.YELLOW + Fore.BLACK + msg + Style.RESET_ALL
def m_user_input(msg):
return Back.BLUE + msg + Back.RESET
Empty file.
50 changes: 50 additions & 0 deletions spectra_ml/components/config_io/config_pyon.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
""" Alternative config format.
Uses Python-parser to load config file (accepts valid python code).
"""
import ast
from os.path import exists
import shutil
# import json
import pprint

def read(fpath, copyIfDoesntExist=None):
""" Load a PYTHON config file. """
ob = None
err = ''
if (exists(fpath)):
configFile = open(fpath, 'r')
try:
s = configFile.read()
ob = ast.literal_eval(s)
except Exception as e:
err += str(e)

configFile.close()
else:
if (copyIfDoesntExist is not None):
# config file doesn't exist, so we will copy a default configuration file in order to create it
if (exists(copyIfDoesntExist)):
shutil.copyfile(copyIfDoesntExist, fpath)
return read(fpath, copyIfDoesntExist=None) # read the copied file
else:
err = 'Config file (%s) does not exist, and base (default) config file also does not exist (%s).' % (str(fpath), str(copyIfDoesntExist))
else:
# config file doesn't exist, and we are not creating a new one by copying from somewhere else
err = 'Config file (%s) does not exist.' % str(fpath)

return ob, err

def write(fpath, configData):
if (isinstance(configData, str)): # user provided exactly what they want in the config file as string
# validate (will raise exceptions if any problems)
ob = ast.literal_eval(configData)

# write string to disk
with open(fpath, 'w') as f:
f.write(configData)
else:
# user provided a dictionary (recommended) or some other object,
# now convert it and save
with open(fpath, 'w') as f:
txt = pprint.pformat(configData, width=1000)
f.write(txt)
36 changes: 36 additions & 0 deletions spectra_ml/components/config_io/config_toml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!python3

import os
from os.path import exists
import shutil
import toml

def getConfigOrCopyDefaultFile(configFpath, defaultFpath):
cfgDict = None
err = ''
if (exists(configFpath)):
cfgDict, err = getConfig(configFpath)
elif (defaultFpath is not None):
if (exists(defaultFpath)):
# config file doesn't exist, so we will copy a default configuration file in order to create it
shutil.copyfile(defaultFpath, configFpath)
return getConfigOrCopyDefaultFile(configFpath, defaultFpath=None) # read the copied file
else:
err = 'Config file (%s) does not exist, and base (default) config file also does not exist (%s).' % (str(configFpath), str(defaultFpath))
else:
# config file doesn't exist, and we are not creating a new one by copying from somewhere else
err = 'Config file (%s) does not exist.' % str(configFpath)

return cfgDict, err


def getConfig(configFpath):
if (not exists(configFpath)):
errorMessage = 'config file doesn\'t exist'
return None, errorMessage
else:
# read existing config file
with open(configFpath, 'r') as f:
data = toml.load(f)

return data, None
Loading

0 comments on commit 928789e

Please sign in to comment.