-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdurations_counts.py
226 lines (197 loc) · 11.9 KB
/
durations_counts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
# -*- coding: utf-8 -*-
"""
Created on Tue Dec 13 14:29:54 2022
@author: Denis
"""
# TO RUN under tf2_10 env and Denis\ML\IRIS_predspectra_intermediate_tf2 dir
import os
import tensorflow as tf
import numpy as np
from absl import app
from absl import flags
from absl import logging
from models import SP_PCUNet
import os
import gc
import datetime
import numpy as np
import pandas as pd
import random
import itertools
import zipfile
import io
import time
from copy import deepcopy
from tqdm import tqdm
from glob import glob
from natsort import natsorted
import seaborn as sns
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import TensorBoard, ModelCheckpoint, LambdaCallback
#from keras_tqdm import TQDMCallback
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
plt.ioff()
import matplotlib.gridspec as gridspec
#from mpl_toolkits.axes_grid1 import make_axes_locatable
# from matplotlib.ticker import NullFormatter
from skimage.metrics import structural_similarity as ssim
from libs.pconv_model import PConvUnet
from libs.lstm_model import LSTM
from libs.lstmsimple_model import LSTM as LSTMS
from libs.gru_model import GRU
from libs.grusimple_model import GRU as GRUS
from libs.nbeats_model import NBeats
from dataset.data_process import kinter, forplot_assignement_accuracy, kcentroids_equal, to_kcentroid_seq, chunkdata_for_longpredict, retrieve_traintimeseq, create_labelines_timeseq_dataset, convertdata_for_training, no_cosmic, rescale_data_by_seqs
from featuring.brandon_features import feature_transform, Mg_settings
from sklearn.metrics import confusion_matrix#, ConfusionMatrixDisplay
class_parms = None
from featuring.mts_metrics import NPMtsMetrics
from featuring.class_n2_metrics import tss_hss_all
try:
from main_classify import Settings, update_settings_fromclass
from models_classify import SP_Conv_Dense, create_class_mask
from featuring.center_stat import NPJointCenterStat#, NPCenterStat, CenterStat
from libs.class_pconv_model import NP_CategoricalCrossentropy, NP_BinaryCrossentropy, NP_CategoricalAccuracy, NP_BinaryAccuracy
from libs.countdict3k_acc import NPAccuracyOverTime3D
except:
print("Could not import libraries on centers and classification")
else:
print("successfuly imported libraries on centers and classification")
# name of the features studied and type of the label ticks for the graphs
feat_legends = [('intensity','%.1f'),
('triplet intensity','%.2f'),
('line center','%.1f'),
('line width','int'),
('line asymmetry','%.2f'),
('total_continium','int'),
('triplet emission','int'),
('k/h ratio integrated','%.2f'),
('kh ratio max','%.2f'),
('k hight','%.2f'),
('peak ratios','int'),
('peak separation','int')]
# SETTINGS
# To output and npz file with the physical features onthe data
output_npz_features = False
# To eventualy define only one classifier with settings
# 'classes' and 'class_inclusions'
#classes_and_inclusions = None
# To define several classifiers list[(classes, inclusions, noclass), ..]
# For model trained with all labels
classes_and_inclusions_addnoclass = list(zip(
['_'.join(clss) for clss in [
['QS','AR','PF','FL']]],
['_'.join(incs) for incs in [
['']]],
[
None]))
FLAGS = flags.FLAGS
flags.DEFINE_string("fname", 'compare_centers', "File name: path with name of the output file")
flags.DEFINE_boolean("manual_mode", True, "Use manual mode is you don't want to load dataand disable part of the code in models.py")
flags.DEFINE_boolean("change_traindata", False, "whether to enable to save/overwrite data_longformat.npz")
flags.DEFINE_string("model_type", "IBMTS", "name of the model to user ['IBMTS'], ['LSTM'], ['LSTMS'], ['GRU'], ['GRUS'], ['NBeats']")
flags.DEFINE_boolean("with_centerloss", False, "whether to add a term in the total loss optimizing the proximity to the centers")
flags.DEFINE_boolean("debug", True, "True to use debug mode (1 epoch and 1st item of generator for test)")
flags.DEFINE_integer("epoch", 100, "Epoch to train [25]")
flags.DEFINE_integer("batch_size", 4, "The size of batch images [4]")
flags.DEFINE_boolean("batch_norm", True, "True for the model with batch_normalzation")
flags.DEFINE_float("learning_rate_BN", 0.0002, "Learning rate of for adam with BN (phase 1) [0.0002]")
flags.DEFINE_float("learning_rate_FINE", 0.00005, "Learning rate of for adam without BN (phase 2 - Fine tuning) [0.00005]")
flags.DEFINE_string("dataset", "pb_2C", "The name of dataset [iris_level_2C, al_2C, pb_2C]")
flags.DEFINE_string("root_address", os.path.dirname(os.path.realpath(__file__)), "The path for the root folder of the project")
flags.DEFINE_string("dataset_address", os.path.join(FLAGS.root_address,'iris_data'), "The path of dataset")
flags.DEFINE_boolean("given_tvt", True, "Whether the data is already separated in 'train' 'valid' 'test' (these should appear in the file names)")
flags.DEFINE_float("train_ratio", 0.7, "ratio of dataset to use for training [0.7]")
flags.DEFINE_float("test_ratio", 0.25, "ratio of dataset to use for testing [0.25]")
flags.DEFINE_integer("label_length", 325, "The length of spectra. [240 (for Mghk), 137 (for al), 370 (for ld)))]")
if FLAGS.model_type == "NBeats":
flags.DEFINE_integer("n_blocks", 2,"number of blocks for NBeats")
flags.DEFINE_float("mask_ratio", 0.25, "ending ratio of the timesequences to be masked in time / max ratio is random_ratio in True")
flags.DEFINE_boolean("random_ratio", False, "True for random ending ratio of the timesequences to be masked in time (with max value = mask_ratio)")
flags.DEFINE_string("labels", '_'.join(['PB']), "label for training ['QS','AR','PF','FL'] (these should appear in the filename[:2])")
flags.DEFINE_string("nolabel", None, "allow to sample from unlabeled data and label it eg. 'nolabel'")
flags.DEFINE_string("test_labels", '_'.join(['PB']), "label for testing ['QS','AR','PF','FL']")
flags.DEFINE_string("name", 'model%s%s_B%i_M%i_R%i_%s'%([FLAGS.model_type+"%s"%['','star'][int(FLAGS.with_centerloss)],"Mghk%s"%['','star'][int(FLAGS.with_centerloss)]][int(FLAGS.model_type=="IBMTS")], FLAGS.dataset[-2:], FLAGS.batch_size, int(100*FLAGS.mask_ratio), int(FLAGS.random_ratio), FLAGS.labels), "The name of the model")
flags.DEFINE_string("checkpoint_dir", os.path.join(FLAGS.root_address,FLAGS.dataset,FLAGS.name,"checkpoint"), "Directory name to save the checkpoints [checkpoint]")
flags.DEFINE_string("logs_dir", os.path.join(FLAGS.root_address,FLAGS.dataset,FLAGS.name,"log"), "Directory name to save the log [log]")
flags.DEFINE_string("results_dir", os.path.join(FLAGS.root_address,FLAGS.dataset,FLAGS.name,"results"), "Directory name to save the image samples [samples]")
flags.DEFINE_boolean("train1", False, "True for training phase 1 (with BN) [False]")
flags.DEFINE_boolean("train2", False, "True for training phase 2 (without BN) : Fine-tuning [False]")
flags.DEFINE_boolean("preload_train", False, "True for loading a pre-trained model before training, False for testing [False]")
flags.DEFINE_boolean("testload_FINE", False, "True for loading a trained model with FINE procedure, False for loading a non FINE model [True]")
flags.DEFINE_boolean("test", False, "True for testing directly at the end of training")
flags.DEFINE_string("test_ds", '_'.join(['TE','TEL']), "chosen datasets for tests ['TR', 'VA', 'TE', 'TEL']")
flags.DEFINE_boolean("with_features", False, "whether features should be investigated")
flags.DEFINE_boolean("add_classifier", False, "True to add classification stats (it will use the params from main_classify.py).")
flags.DEFINE_string("classes", '_'.join(['PB']), "May be overriden by 'classes_and_inclusions', labels of classification ['QS','AR','PF','FL'] OR ['QS','AR-PF-FL']..")
flags.DEFINE_string("class_inclusions", '_'.join(['']), "inclusions for classification '_'.join(['QS<AR']) OR [QS<AR, QS<PF, QS<FL] OR ['']")
flags.DEFINE_string("noclass", None, "None or name for eventual events not sampling from 'classes' labels (will be assumed to output 0 values for the classifier)")
flags.DEFINE_boolean("add_centercount", False, "True to add centers stats (it will use the params from main_classify.py).")
flags.DEFINE_boolean("predict", False, "True for predicting number_predict from each chosen dataset predict_ds")
flags.DEFINE_string("predict_ds", '_'.join(['TR', 'VAL', 'TE', 'TEL']), "chosen datasets for predictions ['TR', 'VAL', 'TE', 'TEL']")
flags.DEFINE_integer("number_predict", 4, "The maximum number of predictions to do")
flags.DEFINE_boolean("show_res", True, "True for showing results at the end")
flags.DEFINE_boolean("cosmic_to_mean", False, "True for putting cosmic rays to the mean value") # V2
flags.DEFINE_integer("cosmic_t", 2000, "Threshold in DN/s for cosmic rays [2000]")
flags.DEFINE_boolean("show_dist_polar", False, "Whether to show distribution in a polar way or not")
flags.DEFINE_string("fig_form", 'pdf', "Format for saved figures in ['png', 'ps', 'pdf', 'svg']")
flags.DEFINE_boolean("backg_color", False, "Whether to colorize backgrounds or not")
flags.DEFINE_boolean("frame_res", False, "To frame marginal results in figures")
now = datetime.datetime.now
plt.rcParams.update({'font.size': 10})
plt.rcParams.update({'font.family': 'Cambria'})
manual_mode = True
change_traindata = False # whether to save/overwrite data_longformat.npz
config = FLAGS
class AugmentingDataGenerator(ImageDataGenerator):
def flow_from_data(self, seq, position, mask_ratio, random_ratio, *args, **kwargs):
generator = super().flow((seq, position), *args, **kwargs)
while True:
# Get augmentend image samples
ori, position = next(generator)
# Get masks for each image sample
mask = np.ones(ori.shape)
if random_ratio:
for i in range(mask.shape[0]):
mask_ratio_s = random.uniform(0.03,mask_ratio)
mask[i, -int(mask.shape[0] * mask_ratio_s):, :, :] = 0
else:
mask[:,-int(mask.shape[1] * mask_ratio):, :, :] = 0
# Apply masks to all image sample
masked = deepcopy(ori)
# masked[mask==0] = np.mean(masked[mask==1]) #value of the maked data is not 1 because the data could be much smaller than the max value of all data (1)
masked[mask==0] = 1
# Yield ([ori, masl], ori) training batches
gc.collect()
yield [masked, mask, position], ori
def main():
self = SP_PCUNet(FLAGS,
classes_and_inclusions_addnoclass=classes_and_inclusions_addnoclass,
feat_legends=feat_legends, manual_mode=True, change_traindata=False)
print("Load data from {}".format(os.path.join(self.dataset_address,'data_longformat.npz')))
data_info = np.load(os.path.join(self.dataset_address,'data_longformat.npz'), allow_pickle = True)
keys = [k.replace('data_', '') for k in list(data_info.keys()) if 'data_' in k]
self.data_pack = {k: (data_info['data_'+k], data_info['position_'+k]) for k in keys}
data_info.close()
change_traindata = False
counts = {}
for k in self.data_pack.keys() if 'TE_' in k:
counts[k] = []
for k in self.data_pack.keys() if 'TE_' in k:
for elem_pos in zip(*self.data_pack[k]):
counts['TE_%s'%elem_pos[1][1][:2]].append(elem_pos[0].shape[0]-int(self.label_length*(1-self.mask_ratio)))
plt.figure(figsize=(5,1.6))
for k in counts.keys():
plt.hist(counts[k], bins='stone', label=k.replace('TE_', ''))
plt.yscale('log')
plt.legend(loc='upper center')
plt.xlabel('predicted time-steps')
plt.ylabel('counts')
plt.gca().set_aspect(50)
plt.tight_layout()
plt.savefig('durations_counts.pdf')
if __name__ == '__main__':
app.run(main)