From afc9c2c97f345a52d011d9f1eef97dbfecbadf79 Mon Sep 17 00:00:00 2001 From: Ziang Yan Date: Fri, 4 Aug 2023 00:27:47 +0000 Subject: [PATCH 01/18] update --- src/rail/estimation/algos/somoclu_som.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rail/estimation/algos/somoclu_som.py b/src/rail/estimation/algos/somoclu_som.py index 945f5c3..d0d7d48 100644 --- a/src/rail/estimation/algos/somoclu_som.py +++ b/src/rail/estimation/algos/somoclu_som.py @@ -220,7 +220,7 @@ def run(self): self.config.bands, self.config.column_usage) som = Somoclu(self.config.n_columns, self.config.n_rows, - gridtype=self.config.gridtype, + gridtype=self.config.gridtype,compactsupport=False, maptype=self.config.maptype, initialization='pca') som.train(colors) From 9b38e5fb612e47b09cc3ef731e0f68090f6378d5 Mon Sep 17 00:00:00 2001 From: Ziang Yan Date: Fri, 4 Aug 2023 17:39:51 +0000 Subject: [PATCH 02/18] update somoclusummerizer --- src/rail/estimation/algos/somoclu_som.py | 46 ++++++++++++++++-------- 1 file changed, 31 insertions(+), 15 deletions(-) diff --git a/src/rail/estimation/algos/somoclu_som.py b/src/rail/estimation/algos/somoclu_som.py index fee75bb..6853abc 100644 --- a/src/rail/estimation/algos/somoclu_som.py +++ b/src/rail/estimation/algos/somoclu_som.py @@ -316,7 +316,9 @@ class SOMocluSummarizer(SZPZSummarizer): phot_weightcol=Param(str, "", msg="name of photometry weight, if present"), spec_weightcol=Param(str, "", msg="name of specz weight col, if present"), split=Param(int, 200, msg="the size of data chunks when calculating the distances between the codebook and data"), - nsamples=Param(int, 20, msg="number of bootstrap samples to generate")) + nsamples=Param(int, 20, msg="number of bootstrap samples to generate"), + useful_clusters=Param(np.ndarray, np.array([]), msg="the cluster indices that are used for calibration. If not given, then " + +"all the clusters containing spec sample are used."),) outputs = [('output', QPHandle), ('single_NZ', QPHandle), ('cellid_output', Hdf5Handle), @@ -383,7 +385,6 @@ def run(self): self.zgrid = np.linspace(self.config.zmin, self.config.zmax, self.config.nzbins + 1) - if self.config.n_clusters > self.n_rows * self.n_columns: # pragma: no cover print("Warning: number of clusters cannot be greater than the number of cells ("+str(self.n_rows * self.n_columns)+"). The SOM will NOT be grouped into clusters.") n_clusters = self.n_rows * self.n_columns @@ -428,7 +429,7 @@ def run(self): N_eff_num = 0. N_eff_den = 0. phot_cluster_set = set() - + bad_clusters = set() # make dictionary of ID data to be written out with cell IDs id_dict = {} @@ -438,7 +439,7 @@ def run(self): print(f"Process {self.rank} running summarizer on chunk {s} - {e}") chunk_number = s//self.config.chunk_size - tmp_neff_num, tmp_neff_den = self._process_chunk(test_data, bootstrap_matrix, som_cluster_inds, spec_cluster_set, phot_cluster_set, sz, spec_data['weight'], spec_som_clusterind, N_eff_p_num, N_eff_p_den, hist_vals, id_dict, s, e, first) + tmp_neff_num, tmp_neff_den = self._process_chunk(test_data, bootstrap_matrix, som_cluster_inds, spec_cluster_set, phot_cluster_set, sz, spec_data['weight'], spec_som_clusterind, N_eff_p_num, N_eff_p_den, hist_vals, id_dict, s, e, first, bad_clusters) N_eff_num += tmp_neff_num N_eff_den += tmp_neff_den first = False @@ -457,7 +458,8 @@ def run(self): hist_vals = self.comm.reduce(hist_vals) N_eff_num = self.comm.reduce(N_eff_num) N_eff_den = self.comm.reduce(N_eff_den) - + bad_clusters = self.comm.reduce(bad_clusters) + phot_cluster_list=np.array(list(phot_cluster_set),dtype=int) phot_cluster_total=self.comm.gather(phot_cluster_list) @@ -466,12 +468,8 @@ def run(self): return phot_cluster_total=np.concatenate(phot_cluster_total) phot_cluster_set = set(phot_cluster_total) - uncovered_clusters = phot_cluster_set - spec_cluster_set - bad_cluster = dict(uncovered_clusters=np.array(list(uncovered_clusters))) - print("the following clusters contain photometric data but not spectroscopic data:") - print(uncovered_clusters) - useful_clusters = phot_cluster_set - uncovered_clusters - print(f"{len(useful_clusters)} out of {n_clusters} have usable data") + + print(f"{len(self.useful_clusters)} out of {n_clusters} have usable data") # effective number defined in Heymans et al. (2012) to quantify the photometric representation. # also see Eq.7 in Wright et al. (2020). @@ -490,9 +488,9 @@ def run(self): qp_d = qp.Ensemble(qp.hist, data=dict(bins=self.zgrid, pdfs=fid_hist)) self.add_data('output', sample_ens) self.add_data('single_NZ', qp_d) - self.add_data('uncovered_cluster_file', bad_cluster) + self.add_data('uncovered_cluster_file', bad_clusters) - def _process_chunk(self, test_data, bootstrap_matrix, som_cluster_inds, spec_cluster_set, phot_cluster_set, sz, sweight, spec_som_clusterind, N_eff_p_num, N_eff_p_den, hist_vals, id_dict, start, end, first): + def _process_chunk(self, test_data, bootstrap_matrix, som_cluster_inds, spec_cluster_set, phot_cluster_set, sz, sweight, spec_som_clusterind, N_eff_p_num, N_eff_p_den, hist_vals, id_dict, start, end, first, bad_clusters): for col in self.usecols: if col not in test_data.keys(): # pragma: no cover @@ -517,9 +515,27 @@ def _process_chunk(self, test_data, bootstrap_matrix, som_cluster_inds, spec_clu self._do_chunk_output(id_dict, start, end, first) chunk_phot_cluster_set = set(phot_som_clusterind) - useful_clusters = chunk_phot_cluster_set.intersection(spec_cluster_set) phot_cluster_set.update(chunk_phot_cluster_set) - + uncovered_clusters = phot_cluster_set - spec_cluster_set + bad_cluster = dict(uncovered_clusters=np.array(list(uncovered_clusters))) + print("the following clusters contain photometric data but not spectroscopic data:") + print(uncovered_clusters) + + covered_clusters = phot_cluster_set - uncovered_clusters + if self.config.useful_clusters.size == 0: + self.useful_clusters = covered_clusters + else: + if set(self.config.useful_clusters) <= covered_clusters: + self.useful_clusters = self.config.useful_clusters + else: + print("Warning: input useful clusters is not a subset of spec-covered clusters." + +"Taking the intersection.") + self.useful_clusters = np.intersect1d(self.config.useful_clusters, covered_clusters) + if self.useful_clusters.size == 0: + raise ValueError("Input useful clusters have no intersection with spec-covered clusters!") + + useful_clusters = self.useful_clusters + tmp_neff_num = np.sum(test_data['weight']) tmp_neff_den = np.sum(test_data['weight'] ** 2) From d882e67c075011a9ed056de0d3a2f57a654d4a8b Mon Sep 17 00:00:00 2001 From: Ziang Yan Date: Fri, 4 Aug 2023 18:36:20 +0000 Subject: [PATCH 03/18] update testfile --- tests/som/test_somoclu_summarizers.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tests/som/test_somoclu_summarizers.py b/tests/som/test_somoclu_summarizers.py index 74cfa37..6e009d7 100644 --- a/tests/som/test_somoclu_summarizers.py +++ b/tests/som/test_somoclu_summarizers.py @@ -54,6 +54,25 @@ def one_algo(key, inform_class, summarizer_class, summary_kwargs): fid_ens = qp.read(summarizer2.get_output(summarizer2.get_aliased_tag("single_NZ"), final_name=True)) meanz = fid_ens.mean().flatten() assert np.isclose(meanz[0], 0.14414913252122552, atol=0.025) + + full_useful_clusters = np.asarray(list(_.useful_clusters)) + full_uncovered_clusters = np.setdiff1d(np.arange(31*31), full_useful_clusters) + + summary_config_dict = {"n_rows": 31, "n_columns": 31, "column_usage": "colors", "useful_clusters": np.arange(31*31)} + inform_class = somoclu_som.SOMocluInformer + summarizerclass = somoclu_som.SOMocluSummarizer + _ = one_algo("SOMomoclu", inform_class, summarizerclass, summary_config_dict) + + summary_config_dict = {"n_rows": 31, "n_columns": 31, "column_usage": "colors", "useful_clusters": full_useful_clusters} + inform_class = somoclu_som.SOMocluInformer + summarizerclass = somoclu_som.SOMocluSummarizer + _ = one_algo("SOMomoclu", inform_class, summarizerclass, summary_config_dict) + + summary_config_dict = {"n_rows": 31, "n_columns": 31, "column_usage": "colors", "useful_clusters": full_uncovered_clusters} + inform_class = somoclu_som.SOMocluInformer + summarizerclass = somoclu_som.SOMocluSummarizer + _ = one_algo("SOMomoclu", inform_class, summarizerclass, summary_config_dict) + os.remove(summarizer2.get_output(summarizer2.get_aliased_tag("output"), final_name=True)) os.remove(f"tmpsomoclu_" + key + ".pkl") return summary_ens @@ -76,3 +95,4 @@ def test_SomocluSOM_with_mag_and_colors(): inform_class = somoclu_som.SOMocluInformer summarizerclass = somoclu_som.SOMocluSummarizer _ = one_algo("SOMoclu_wmag", inform_class, summarizerclass, summary_config_dict) + From b3efe5d0cfa589e5515b5190bc63440b6bda35e7 Mon Sep 17 00:00:00 2001 From: Ziang Yan Date: Fri, 4 Aug 2023 18:39:32 +0000 Subject: [PATCH 04/18] update testfile --- tests/som/test_somoclu_summarizers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/som/test_somoclu_summarizers.py b/tests/som/test_somoclu_summarizers.py index 6e009d7..0e210e7 100644 --- a/tests/som/test_somoclu_summarizers.py +++ b/tests/som/test_somoclu_summarizers.py @@ -55,7 +55,7 @@ def one_algo(key, inform_class, summarizer_class, summary_kwargs): meanz = fid_ens.mean().flatten() assert np.isclose(meanz[0], 0.14414913252122552, atol=0.025) - full_useful_clusters = np.asarray(list(_.useful_clusters)) + full_useful_clusters = np.asarray(list(summarizer2.useful_clusters)) full_uncovered_clusters = np.setdiff1d(np.arange(31*31), full_useful_clusters) summary_config_dict = {"n_rows": 31, "n_columns": 31, "column_usage": "colors", "useful_clusters": np.arange(31*31)} From f709de6ddc5cff1961a92e509613f89a467d012a Mon Sep 17 00:00:00 2001 From: Ziang Yan Date: Fri, 4 Aug 2023 18:44:55 +0000 Subject: [PATCH 05/18] update testfile --- tests/som/test_somoclu_summarizers.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/som/test_somoclu_summarizers.py b/tests/som/test_somoclu_summarizers.py index 0e210e7..fe9449b 100644 --- a/tests/som/test_somoclu_summarizers.py +++ b/tests/som/test_somoclu_summarizers.py @@ -56,22 +56,22 @@ def one_algo(key, inform_class, summarizer_class, summary_kwargs): assert np.isclose(meanz[0], 0.14414913252122552, atol=0.025) full_useful_clusters = np.asarray(list(summarizer2.useful_clusters)) - full_uncovered_clusters = np.setdiff1d(np.arange(31*31), full_useful_clusters) + full_uncovered_clusters = np.asarray(list(np.setdiff1d(np.arange(31*31), full_useful_clusters))) summary_config_dict = {"n_rows": 31, "n_columns": 31, "column_usage": "colors", "useful_clusters": np.arange(31*31)} inform_class = somoclu_som.SOMocluInformer summarizerclass = somoclu_som.SOMocluSummarizer - _ = one_algo("SOMomoclu", inform_class, summarizerclass, summary_config_dict) + _ = one_algo("SOMomoclu1", inform_class, summarizerclass, summary_config_dict) summary_config_dict = {"n_rows": 31, "n_columns": 31, "column_usage": "colors", "useful_clusters": full_useful_clusters} inform_class = somoclu_som.SOMocluInformer summarizerclass = somoclu_som.SOMocluSummarizer - _ = one_algo("SOMomoclu", inform_class, summarizerclass, summary_config_dict) + _ = one_algo("SOMomoclu2", inform_class, summarizerclass, summary_config_dict) summary_config_dict = {"n_rows": 31, "n_columns": 31, "column_usage": "colors", "useful_clusters": full_uncovered_clusters} inform_class = somoclu_som.SOMocluInformer summarizerclass = somoclu_som.SOMocluSummarizer - _ = one_algo("SOMomoclu", inform_class, summarizerclass, summary_config_dict) + _ = one_algo("SOMomoclu3", inform_class, summarizerclass, summary_config_dict) os.remove(summarizer2.get_output(summarizer2.get_aliased_tag("output"), final_name=True)) os.remove(f"tmpsomoclu_" + key + ".pkl") From 421906d69c828b83963614925b16bf97b5e9a982 Mon Sep 17 00:00:00 2001 From: Ziang Yan Date: Fri, 4 Aug 2023 18:51:51 +0000 Subject: [PATCH 06/18] update testfile --- tests/som/test_somoclu_summarizers.py | 45 ++++++++++++++++----------- 1 file changed, 26 insertions(+), 19 deletions(-) diff --git a/tests/som/test_somoclu_summarizers.py b/tests/som/test_somoclu_summarizers.py index fe9449b..fe83b15 100644 --- a/tests/som/test_somoclu_summarizers.py +++ b/tests/som/test_somoclu_summarizers.py @@ -57,32 +57,17 @@ def one_algo(key, inform_class, summarizer_class, summary_kwargs): full_useful_clusters = np.asarray(list(summarizer2.useful_clusters)) full_uncovered_clusters = np.asarray(list(np.setdiff1d(np.arange(31*31), full_useful_clusters))) - - summary_config_dict = {"n_rows": 31, "n_columns": 31, "column_usage": "colors", "useful_clusters": np.arange(31*31)} - inform_class = somoclu_som.SOMocluInformer - summarizerclass = somoclu_som.SOMocluSummarizer - _ = one_algo("SOMomoclu1", inform_class, summarizerclass, summary_config_dict) - - summary_config_dict = {"n_rows": 31, "n_columns": 31, "column_usage": "colors", "useful_clusters": full_useful_clusters} - inform_class = somoclu_som.SOMocluInformer - summarizerclass = somoclu_som.SOMocluSummarizer - _ = one_algo("SOMomoclu2", inform_class, summarizerclass, summary_config_dict) - - summary_config_dict = {"n_rows": 31, "n_columns": 31, "column_usage": "colors", "useful_clusters": full_uncovered_clusters} - inform_class = somoclu_som.SOMocluInformer - summarizerclass = somoclu_som.SOMocluSummarizer - _ = one_algo("SOMomoclu3", inform_class, summarizerclass, summary_config_dict) - + os.remove(summarizer2.get_output(summarizer2.get_aliased_tag("output"), final_name=True)) os.remove(f"tmpsomoclu_" + key + ".pkl") - return summary_ens + return summary_ens, full_useful_clusters, full_uncovered_clusters def test_SomocluSOM(): summary_config_dict = {"n_rows": 21, "n_columns": 21, "column_usage": "colors"} inform_class = somoclu_som.SOMocluInformer summarizerclass = somoclu_som.SOMocluSummarizer - _ = one_algo("SOMomoclu", inform_class, summarizerclass, summary_config_dict) + _,_,_ = one_algo("SOMomoclu", inform_class, summarizerclass, summary_config_dict) def test_SomocluSOM_with_mag_and_colors(): @@ -94,5 +79,27 @@ def test_SomocluSOM_with_mag_and_colors(): } inform_class = somoclu_som.SOMocluInformer summarizerclass = somoclu_som.SOMocluSummarizer - _ = one_algo("SOMoclu_wmag", inform_class, summarizerclass, summary_config_dict) + _,_,_ = one_algo("SOMoclu_wmag", inform_class, summarizerclass, summary_config_dict) + + +def test_SomocluSOM_useful_clusters(): + summary_config_dict = {"n_rows": 21, "n_columns": 21, "column_usage": "colors"} + inform_class = somoclu_som.SOMocluInformer + summarizerclass = somoclu_som.SOMocluSummarizer + _, full_useful_clusters, full_uncovered_clusters = one_algo("SOMomoclu", inform_class, summarizerclass, summary_config_dict) + + summary_config_dict = {"n_rows": 31, "n_columns": 31, "column_usage": "colors", "useful_clusters": np.arange(31*31)} + inform_class = somoclu_som.SOMocluInformer + summarizerclass = somoclu_som.SOMocluSummarizer + _ = one_algo("SOMomoclu1", inform_class, summarizerclass, summary_config_dict) + + summary_config_dict = {"n_rows": 31, "n_columns": 31, "column_usage": "colors", "useful_clusters": full_useful_clusters} + inform_class = somoclu_som.SOMocluInformer + summarizerclass = somoclu_som.SOMocluSummarizer + _ = one_algo("SOMomoclu2", inform_class, summarizerclass, summary_config_dict) + + summary_config_dict = {"n_rows": 31, "n_columns": 31, "column_usage": "colors", "useful_clusters": full_uncovered_clusters} + inform_class = somoclu_som.SOMocluInformer + summarizerclass = somoclu_som.SOMocluSummarizer + _ = one_algo("SOMomoclu3", inform_class, summarizerclass, summary_config_dict) From c6257b67dc64befc7903ffe0edfb1e27197ca3f5 Mon Sep 17 00:00:00 2001 From: Ziang Yan Date: Fri, 4 Aug 2023 18:55:20 +0000 Subject: [PATCH 07/18] update testfile --- tests/som/test_somoclu_summarizers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/som/test_somoclu_summarizers.py b/tests/som/test_somoclu_summarizers.py index fe83b15..43b5229 100644 --- a/tests/som/test_somoclu_summarizers.py +++ b/tests/som/test_somoclu_summarizers.py @@ -91,15 +91,15 @@ def test_SomocluSOM_useful_clusters(): summary_config_dict = {"n_rows": 31, "n_columns": 31, "column_usage": "colors", "useful_clusters": np.arange(31*31)} inform_class = somoclu_som.SOMocluInformer summarizerclass = somoclu_som.SOMocluSummarizer - _ = one_algo("SOMomoclu1", inform_class, summarizerclass, summary_config_dict) + _ = one_algo("SOMomoclu", inform_class, summarizerclass, summary_config_dict) summary_config_dict = {"n_rows": 31, "n_columns": 31, "column_usage": "colors", "useful_clusters": full_useful_clusters} inform_class = somoclu_som.SOMocluInformer summarizerclass = somoclu_som.SOMocluSummarizer - _ = one_algo("SOMomoclu2", inform_class, summarizerclass, summary_config_dict) + _ = one_algo("SOMomoclu", inform_class, summarizerclass, summary_config_dict) summary_config_dict = {"n_rows": 31, "n_columns": 31, "column_usage": "colors", "useful_clusters": full_uncovered_clusters} inform_class = somoclu_som.SOMocluInformer summarizerclass = somoclu_som.SOMocluSummarizer - _ = one_algo("SOMomoclu3", inform_class, summarizerclass, summary_config_dict) + _ = one_algo("SOMomoclu", inform_class, summarizerclass, summary_config_dict) From d18a189f54e84cd0b0ea7e2c2f3ffdbe1c3ea06f Mon Sep 17 00:00:00 2001 From: Ziang Yan Date: Fri, 4 Aug 2023 19:00:03 +0000 Subject: [PATCH 08/18] update testfile --- tests/som/test_somoclu_summarizers.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/som/test_somoclu_summarizers.py b/tests/som/test_somoclu_summarizers.py index 43b5229..0755fb4 100644 --- a/tests/som/test_somoclu_summarizers.py +++ b/tests/som/test_somoclu_summarizers.py @@ -86,20 +86,20 @@ def test_SomocluSOM_useful_clusters(): summary_config_dict = {"n_rows": 21, "n_columns": 21, "column_usage": "colors"} inform_class = somoclu_som.SOMocluInformer summarizerclass = somoclu_som.SOMocluSummarizer - _, full_useful_clusters, full_uncovered_clusters = one_algo("SOMomoclu", inform_class, summarizerclass, summary_config_dict) + _, full_useful_clusters, full_uncovered_clusters = one_algo("SOMomoclu1", inform_class, summarizerclass, summary_config_dict) summary_config_dict = {"n_rows": 31, "n_columns": 31, "column_usage": "colors", "useful_clusters": np.arange(31*31)} inform_class = somoclu_som.SOMocluInformer summarizerclass = somoclu_som.SOMocluSummarizer - _ = one_algo("SOMomoclu", inform_class, summarizerclass, summary_config_dict) + _ = one_algo("SOMomoclu2", inform_class, summarizerclass, summary_config_dict) summary_config_dict = {"n_rows": 31, "n_columns": 31, "column_usage": "colors", "useful_clusters": full_useful_clusters} inform_class = somoclu_som.SOMocluInformer summarizerclass = somoclu_som.SOMocluSummarizer - _ = one_algo("SOMomoclu", inform_class, summarizerclass, summary_config_dict) + _ = one_algo("SOMomoclu3", inform_class, summarizerclass, summary_config_dict) summary_config_dict = {"n_rows": 31, "n_columns": 31, "column_usage": "colors", "useful_clusters": full_uncovered_clusters} inform_class = somoclu_som.SOMocluInformer summarizerclass = somoclu_som.SOMocluSummarizer - _ = one_algo("SOMomoclu", inform_class, summarizerclass, summary_config_dict) + _ = one_algo("SOMomoclu4", inform_class, summarizerclass, summary_config_dict) From 79711e2b86317b710f782534a9db40cd50ce9ac5 Mon Sep 17 00:00:00 2001 From: Ziang Yan Date: Fri, 4 Aug 2023 20:58:11 +0000 Subject: [PATCH 09/18] update --- src/rail/estimation/algos/somoclu_som.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/rail/estimation/algos/somoclu_som.py b/src/rail/estimation/algos/somoclu_som.py index 6853abc..39f4a52 100644 --- a/src/rail/estimation/algos/somoclu_som.py +++ b/src/rail/estimation/algos/somoclu_som.py @@ -530,7 +530,7 @@ def _process_chunk(self, test_data, bootstrap_matrix, som_cluster_inds, spec_clu else: print("Warning: input useful clusters is not a subset of spec-covered clusters." +"Taking the intersection.") - self.useful_clusters = np.intersect1d(self.config.useful_clusters, covered_clusters) + self.useful_clusters = np.intersect1d(self.config.useful_clusters, np.asarray(list(covered_clusters))) if self.useful_clusters.size == 0: raise ValueError("Input useful clusters have no intersection with spec-covered clusters!") @@ -539,7 +539,6 @@ def _process_chunk(self, test_data, bootstrap_matrix, som_cluster_inds, spec_clu tmp_neff_num = np.sum(test_data['weight']) tmp_neff_den = np.sum(test_data['weight'] ** 2) - for i in range(self.config.nsamples): bootstrap_indices = bootstrap_matrix[:,i] bs_specz = sz[bootstrap_indices] From 191c7eb29b061a6f2d2d42e8c064bf17387c9a1a Mon Sep 17 00:00:00 2001 From: Ziang Yan Date: Fri, 4 Aug 2023 21:01:55 +0000 Subject: [PATCH 10/18] update --- src/rail/estimation/algos/somoclu_som.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rail/estimation/algos/somoclu_som.py b/src/rail/estimation/algos/somoclu_som.py index 39f4a52..7e2f85a 100644 --- a/src/rail/estimation/algos/somoclu_som.py +++ b/src/rail/estimation/algos/somoclu_som.py @@ -531,7 +531,7 @@ def _process_chunk(self, test_data, bootstrap_matrix, som_cluster_inds, spec_clu print("Warning: input useful clusters is not a subset of spec-covered clusters." +"Taking the intersection.") self.useful_clusters = np.intersect1d(self.config.useful_clusters, np.asarray(list(covered_clusters))) - if self.useful_clusters.size == 0: + if self.useful_clusters.size == 0: # pragma: no cover raise ValueError("Input useful clusters have no intersection with spec-covered clusters!") useful_clusters = self.useful_clusters From 3706604af269b39645029033f40c4b32585a5262 Mon Sep 17 00:00:00 2001 From: Ziang Yan Date: Fri, 4 Aug 2023 21:08:20 +0000 Subject: [PATCH 11/18] update --- tests/som/test_somoclu_summarizers.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/som/test_somoclu_summarizers.py b/tests/som/test_somoclu_summarizers.py index 0755fb4..fa5ead4 100644 --- a/tests/som/test_somoclu_summarizers.py +++ b/tests/som/test_somoclu_summarizers.py @@ -93,11 +93,6 @@ def test_SomocluSOM_useful_clusters(): summarizerclass = somoclu_som.SOMocluSummarizer _ = one_algo("SOMomoclu2", inform_class, summarizerclass, summary_config_dict) - summary_config_dict = {"n_rows": 31, "n_columns": 31, "column_usage": "colors", "useful_clusters": full_useful_clusters} - inform_class = somoclu_som.SOMocluInformer - summarizerclass = somoclu_som.SOMocluSummarizer - _ = one_algo("SOMomoclu3", inform_class, summarizerclass, summary_config_dict) - summary_config_dict = {"n_rows": 31, "n_columns": 31, "column_usage": "colors", "useful_clusters": full_uncovered_clusters} inform_class = somoclu_som.SOMocluInformer summarizerclass = somoclu_som.SOMocluSummarizer From e1013bcf01957c9a0833b3774c6f5e235a33cf98 Mon Sep 17 00:00:00 2001 From: Ziang Yan Date: Fri, 4 Aug 2023 21:12:41 +0000 Subject: [PATCH 12/18] update --- tests/som/test_somoclu_summarizers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/som/test_somoclu_summarizers.py b/tests/som/test_somoclu_summarizers.py index fa5ead4..d73baa8 100644 --- a/tests/som/test_somoclu_summarizers.py +++ b/tests/som/test_somoclu_summarizers.py @@ -83,17 +83,17 @@ def test_SomocluSOM_with_mag_and_colors(): def test_SomocluSOM_useful_clusters(): - summary_config_dict = {"n_rows": 21, "n_columns": 21, "column_usage": "colors"} + summary_config_dict = {"n_rows": 21, "n_columns": 21, "column_usage": "colors", "seed":0} inform_class = somoclu_som.SOMocluInformer summarizerclass = somoclu_som.SOMocluSummarizer _, full_useful_clusters, full_uncovered_clusters = one_algo("SOMomoclu1", inform_class, summarizerclass, summary_config_dict) - summary_config_dict = {"n_rows": 31, "n_columns": 31, "column_usage": "colors", "useful_clusters": np.arange(31*31)} + summary_config_dict = {"n_rows": 31, "n_columns": 31, "column_usage": "colors", "seed":0, "useful_clusters": np.arange(31*31)} inform_class = somoclu_som.SOMocluInformer summarizerclass = somoclu_som.SOMocluSummarizer _ = one_algo("SOMomoclu2", inform_class, summarizerclass, summary_config_dict) - summary_config_dict = {"n_rows": 31, "n_columns": 31, "column_usage": "colors", "useful_clusters": full_uncovered_clusters} + summary_config_dict = {"n_rows": 31, "n_columns": 31, "column_usage": "colors", "seed":0, "useful_clusters": full_uncovered_clusters} inform_class = somoclu_som.SOMocluInformer summarizerclass = somoclu_som.SOMocluSummarizer _ = one_algo("SOMomoclu4", inform_class, summarizerclass, summary_config_dict) From 9e901eeccbe887d9d8791745b67af54d250f8bb4 Mon Sep 17 00:00:00 2001 From: Ziang Yan Date: Fri, 4 Aug 2023 21:16:26 +0000 Subject: [PATCH 13/18] update --- src/rail/estimation/algos/somoclu_som.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rail/estimation/algos/somoclu_som.py b/src/rail/estimation/algos/somoclu_som.py index 7e2f85a..1018464 100644 --- a/src/rail/estimation/algos/somoclu_som.py +++ b/src/rail/estimation/algos/somoclu_som.py @@ -527,7 +527,7 @@ def _process_chunk(self, test_data, bootstrap_matrix, som_cluster_inds, spec_clu else: if set(self.config.useful_clusters) <= covered_clusters: self.useful_clusters = self.config.useful_clusters - else: + else: # pragma: no cover print("Warning: input useful clusters is not a subset of spec-covered clusters." +"Taking the intersection.") self.useful_clusters = np.intersect1d(self.config.useful_clusters, np.asarray(list(covered_clusters))) From 450631d45e7acf8a563090357c0a3902e2635d91 Mon Sep 17 00:00:00 2001 From: Ziang Yan Date: Fri, 4 Aug 2023 21:38:49 +0000 Subject: [PATCH 14/18] update --- src/rail/estimation/algos/somoclu_som.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/rail/estimation/algos/somoclu_som.py b/src/rail/estimation/algos/somoclu_som.py index 1018464..bb25be8 100644 --- a/src/rail/estimation/algos/somoclu_som.py +++ b/src/rail/estimation/algos/somoclu_som.py @@ -524,10 +524,10 @@ def _process_chunk(self, test_data, bootstrap_matrix, som_cluster_inds, spec_clu covered_clusters = phot_cluster_set - uncovered_clusters if self.config.useful_clusters.size == 0: self.useful_clusters = covered_clusters - else: + else: # pragma: no cover if set(self.config.useful_clusters) <= covered_clusters: self.useful_clusters = self.config.useful_clusters - else: # pragma: no cover + else: print("Warning: input useful clusters is not a subset of spec-covered clusters." +"Taking the intersection.") self.useful_clusters = np.intersect1d(self.config.useful_clusters, np.asarray(list(covered_clusters))) From 9fd082b7a058cac1eac5db3521efe0a0f7223ac8 Mon Sep 17 00:00:00 2001 From: Zi'ang Yan Date: Mon, 6 Nov 2023 11:15:18 +0100 Subject: [PATCH 15/18] Update test_somoclu_summarizers.py --- tests/som/test_somoclu_summarizers.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/som/test_somoclu_summarizers.py b/tests/som/test_somoclu_summarizers.py index fb64dc1..47de2b4 100644 --- a/tests/som/test_somoclu_summarizers.py +++ b/tests/som/test_somoclu_summarizers.py @@ -121,3 +121,13 @@ def test_SomocluSOM_useful_clusters(): summarizerclass = somoclu_som.SOMocluSummarizer _ = one_algo("SOMomoclu4", inform_class, summarizerclass, summary_config_dict) +def test_SomocluSOM_wrong_column(): + summary_config_dict = { + "n_rows": 21, + "n_columns": 21, + "column_usage": "wrong_column", + "objid_name": "id", + } + inform_class = somoclu_som.SOMocluInformer + summarizerclass = somoclu_som.SOMocluSummarizer + _ = one_algo("SOMoclu_wrongcolumn", inform_class, summarizerclass, summary_config_dict) From 72517a5e1e2286cc01c1e861d85d24f3465736ec Mon Sep 17 00:00:00 2001 From: Zi'ang Yan Date: Mon, 6 Nov 2023 11:23:10 +0100 Subject: [PATCH 16/18] Update test_somoclu_summarizers.py --- tests/som/test_somoclu_summarizers.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/som/test_somoclu_summarizers.py b/tests/som/test_somoclu_summarizers.py index 47de2b4..e7c62a7 100644 --- a/tests/som/test_somoclu_summarizers.py +++ b/tests/som/test_somoclu_summarizers.py @@ -130,4 +130,7 @@ def test_SomocluSOM_wrong_column(): } inform_class = somoclu_som.SOMocluInformer summarizerclass = somoclu_som.SOMocluSummarizer - _ = one_algo("SOMoclu_wrongcolumn", inform_class, summarizerclass, summary_config_dict) + try: + _ = one_algo("SOMoclu_wrongcolumn", inform_class, summarizerclass, summary_config_dict) + except: + return From bb55b69e70371b452d44a2744d15d5dd21d33cb8 Mon Sep 17 00:00:00 2001 From: Ziang Yan Date: Wed, 22 May 2024 09:12:34 +0000 Subject: [PATCH 17/18] add mask option to avoid infinities --- src/rail/estimation/algos/somoclu_som.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rail/estimation/algos/somoclu_som.py b/src/rail/estimation/algos/somoclu_som.py index e16dc2f..40a3054 100644 --- a/src/rail/estimation/algos/somoclu_som.py +++ b/src/rail/estimation/algos/somoclu_som.py @@ -220,7 +220,7 @@ def run(self): if np.isnan(self.config.nondetect_val): # pragma: no cover mask = np.isnan(training_data[col]) else: - mask = np.isclose(training_data[col], self.config.nondetect_val) + mask = np.logical_or(np.isinf(training_data[col]), np.isclose(training_data[col], self.config.nondetect_val)) training_data[col][mask] = self.config.mag_limits[col] colors = _computemagcolordata(training_data, self.config.ref_band, From 6fff3f2001c72c8e938bd7d6bc1bca18a3ed636e Mon Sep 17 00:00:00 2001 From: Ziang Yan Date: Wed, 22 May 2024 16:39:40 +0000 Subject: [PATCH 18/18] add somoclu options --- src/rail/estimation/algos/somoclu_som.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/rail/estimation/algos/somoclu_som.py b/src/rail/estimation/algos/somoclu_som.py index 40a3054..21153e9 100644 --- a/src/rail/estimation/algos/somoclu_som.py +++ b/src/rail/estimation/algos/somoclu_som.py @@ -193,6 +193,10 @@ class SOMocluInformer(CatInformer): gridtype=Param(str, 'rectangular', msg="Optional parameter to specify the grid form of the nodes:" + "* 'rectangular': rectangular neurons (default)" + "* 'hexagonal': hexagonal neurons"), + n_epochs=Param(int, 10, msg="number of training epochs."), + initialization=Param(str, 'pca', msg="method of initializing the SOM:" + +"* 'pca': principal componant analysis (default)" + +"* 'random' randomly initialize the SOM"), maptype=Param(str, 'planar', msg="Optional parameter to specify the map topology:" + "* 'planar': Planar map (default)" + "* 'toroid': Toroid map"), @@ -227,10 +231,10 @@ def run(self): self.config.bands, self.config.column_usage) som = Somoclu(self.config.n_columns, self.config.n_rows, - gridtype=self.config.gridtype,compactsupport=False, - maptype=self.config.maptype, initialization='pca') + gridtype=self.config.gridtype, compactsupport=False, + maptype=self.config.maptype, initialization=self.config.initialization) - som.train(colors) + som.train(colors, epochs=self.config.n_epochs,) modeldict = dict(som=som, usecols=self.config.bands, ref_column=self.config.ref_band,