Skip to content

Commit

Permalink
misc/cnn2d-fixes (#93)
Browse files Browse the repository at this point in the history
* minor fixes for light curves

* update changelog
  • Loading branch information
alphasentaurii authored Sep 17, 2024
1 parent 0149d56 commit 1ab2cc2
Show file tree
Hide file tree
Showing 5 changed files with 46 additions and 35 deletions.
4 changes: 4 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ preprocessor
------------
- explicitly pass `encoding=bytes` in transform.hypersonic_pliers for numpy 2 compatibility where this will no longer be the default for np.loadtxt [#92]

builder
-------
- Various minor fixes relating to CNN 2d model usage [#93]


1.1.1 (2024-07-11)
==================
Expand Down
14 changes: 7 additions & 7 deletions spacekit/builder/architect.py
Original file line number Diff line number Diff line change
Expand Up @@ -898,7 +898,7 @@ def ensemble_cnn(self):
self.cnn.output_name = "svm_image_output"
self.cnn.name = "svm_cnn"
self.cnn.ensemble = True
self.cnn.input_shape = self.X_train[1].shape[1:] if self.X_train else None
self.cnn.input_shape = self.X_train[1].shape[1:] if self.X_train is not None else None
self.cnn.output_shape = 1
self.cnn.layers = [18, 32, 64, 32, 18]
self.cnn.activation = "leaky_relu"
Expand Down Expand Up @@ -1016,7 +1016,7 @@ def __init__(
**builder_kwargs,
)
self.blueprint = blueprint
self.input_shape = self.X_train.shape[1:] if self.X_train else None
self.input_shape = self.X_train.shape[1:] if self.X_train is not None else None
self.output_shape = 1
self.input_name = "cnn2d_inputs"
self.output_name = "cnn2d_output"
Expand All @@ -1035,7 +1035,7 @@ def __init__(
self.early_stopping = None
self.batch_size = 32
self.cost_function = "sigmoid"
self.step_size = X_train.shape[1] if X_train else None
self.step_size = X_train.shape[1] if X_train is not None else None
self.steps_per_epoch = self.step_size // self.batch_size
self.batch_maker = self.batch

Expand All @@ -1054,17 +1054,17 @@ def build(self):
)(inputs)
x = MaxPool1D(strides=self.strides)(x)
x = BatchNormalization()(x)
count = 1
for f in self.filters[1:]:
for f in list(range(len(self.filters))):
if f == 0:
continue
x = Conv1D(
filters=self.filters[f],
kernel_size=self.kernel,
activation=self.activation,
)(x)
x = MaxPool1D(strides=self.strides)(x)
if count < len(self.filters):
if f < len(self.filters) - 1:
x = BatchNormalization()(x)
count += 1
else:
x = Flatten()(x)
self.log.info("DROPOUT")
Expand Down
8 changes: 4 additions & 4 deletions spacekit/extractor/scrape.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,10 +325,10 @@ def __init__(
self.fpaths = []

def scrape(self):
"""Using the key-pair values in `dataset` dictionary attribute, download the files from a github
repo and check the hash keys match before extracting. Extraction and hash-key checking is handled
externally by the `keras.utils.data_utils.get_file` method. If extraction is successful, the
archive file will be deleted.
"""Using the key-pair values in `dataset` dictionary attribute, download the files from a website
(such as zenodo) and check the hash keys match before extracting. Extraction and hash-key checking
is handled externally by the `keras.utils.data_utils.get_file` method. If extraction is successful,
the archive file will be deleted. See spacekit.datasets.meta for dictionary formatting examples.
Returns
-------
Expand Down
9 changes: 4 additions & 5 deletions spacekit/preprocessor/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -869,7 +869,7 @@ def tensors_to_arrays(X_train, y_train, X_test, y_test):


def hypersonic_pliers(
path_to_train, path_to_test, y_col=[0], skip=1, dlm=",", encoding=bytes, subtract_y=0.0
path_to_train, path_to_test, y_col=[0], skip=1, dlm=",", encoding='bytes', subtract_y=0.0, reshape=False
):
"""Extracts data into 1-dimensional arrays, using separate target classes (y) for training and test data. Assumes y (target)
is first column in dataframe. If the target (y) classes in the raw data are 0 and 2, but you'd like them to be binaries (0
Expand Down Expand Up @@ -900,16 +900,15 @@ def hypersonic_pliers(
Train = np.loadtxt(path_to_train, skiprows=skip, delimiter=dlm, encoding=encoding)
cols = list(range(Train.shape[1]))
xcols = [c for c in cols if c not in y_col]
# X_train = Train[:, 1:]
X_train = Train[:, xcols]
# y_train = Train[:, 0, np.newaxis] - subtract_y
y_train = Train[:, y_col, np.newaxis] - subtract_y

Test = np.loadtxt(path_to_test, skiprows=skip, delimiter=dlm, encoding=encoding)
X_test = Test[:, xcols]
y_test = Test[:, y_col, np.newaxis] - subtract_y
# X_test = Test[:, 1:]
# y_test = Test[:, 0, np.newaxis] - subtract_y
if reshape is True:
y_train = y_train.reshape(y_train.shape[0], 1)
y_test = y_test.reshape(y_test.shape[0], 1)

del Train, Test
print("X_train: ", X_train.shape)
Expand Down
46 changes: 27 additions & 19 deletions spacekit/skopes/kepler/light_curves.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,21 @@
babel_fish_dispenser,
)
from spacekit.builder.architect import BuilderCNN2D
from spacekit.datasets.k2_exo import k2_uri, k2_data
from spacekit.datasets.meta import k2 as k2meta
from spacekit.extractor.scrape import WebScraper

def downloads_exist(scraper, k2_meta):
base_path = os.path.join(scraper.cache_dir, scraper.cache_subdir)
filepaths = []
for k, v in k2_meta.items():
fpath = os.path.join(base_path, v['key'])
filepaths.append(fpath)
for fp in filepaths:
if not os.path.exists(fp):
return []
print("Found existing datasets, skipping download.")
return filepaths


class LaunchK2:
def __init__(self, fpaths):
Expand All @@ -20,54 +32,50 @@ def __init__(self, fpaths):
self.history = None

def launch_prep(self):
self.X_train, self.X_test, self.y_train, self.y_test = self.split_data()
self.X_train, self.X_test = self.scale_data()
self.X_train, self.X_test = self.add_filter()
return self.X_train, self.X_test, self.y_train, self.y_test
self.split_data()
self.scale_data()
self.add_filter()

def split_data(self):
print("Splitting train-test feature and target data...")
for fpath in self.fpaths:
if fpath.endswith("Train"):
if "Train" in fpath:
train = fpath
else:
test = fpath
self.X_train, self.X_test, self.y_train, self.y_test = hypersonic_pliers(
train, test
train, test, subtract_y=1.0, reshape=True
)
print("Data split successful")
return self.X_train, self.X_test, self.y_train, self.y_test

def scale_data(self):
print("Scaling data to Zero Mean and Unit Variance...")
self.X_train, self.X_test = thermo_fusion_chisel(self.X_train, self.X_test)
print("Data scaling successful.")
return self.X_train, self.X_test

def add_filter(self):
print("Adding noise filter...")
self.X_train, self.X_test = babel_fish_dispenser(self.X_train, self.X_test)
print("Noise filter added successfully.")
return self.X_train, self.X_test

def deploy(self):
self.builder = BuilderCNN2D(
self.X_train, self.y_train, self.X_test, self.y_test
X_train=self.X_train, y_train=self.y_train, X_test=self.X_test, y_test=self.y_test
)
self.builder.build()
return self.builder

def takeoff(self):
self.history = self.builder.batch_fit()


if __name__ == "__main__":
home = os.getcwd()
data = os.path.join(home, "data")
print("Extracting data...")
fpaths = WebScraper(k2_uri, k2_data).scrape_repo()
print("Data extraction successful.")
k2 = LaunchK2(fpaths)
scraper = WebScraper(k2meta['uri'], k2meta['data'])
scraper.fpaths = downloads_exist(scraper, k2meta['data'])
if not scraper.fpaths:
scraper.scrape()
print("Data extraction successful.")
k2 = LaunchK2(scraper.fpaths)
k2.launch_prep()
k2.builder = k2.deploy()
k2.history = k2.takeoff()
k2.deploy()
k2.takeoff()

0 comments on commit 1ab2cc2

Please sign in to comment.