diff --git a/python/target_selection/append_to_sdss_id.py b/python/target_selection/append_to_sdss_id.py index 43af27b4..c06a76f8 100644 --- a/python/target_selection/append_to_sdss_id.py +++ b/python/target_selection/append_to_sdss_id.py @@ -1,33 +1,25 @@ import os -import time -from itertools import combinations -from datetime import date - -import numpy as np import peewee -import yaml from peewee import fn, JOIN -from playhouse.postgres_ext import ArrayField from sdssdb.peewee.sdss5db.catalogdb import database, Catalog -from sdssdb.peewee.sdss5db.targetdb import Target - -import target_selection - -from create_catalogidx_to_catalogidy import MetaXMatch, create_unique_from_region, TempMatch, UniqueMatch +from create_catalogidx_to_catalogidy import MetaXMatch, create_unique_from_region +from create_catalogidx_to_catalogidy import TempMatch, UniqueMatch database.connect(dbname="sdss5db", user="sdss_user") + class temp_catalogid_v21(peewee.Model): """Model for a temporary table of all catalogids within a version.""" pk = peewee.PrimaryKeyField() catalogid21 = peewee.BigIntegerField(index=True, null=False) - + class Meta: database = database schema = "sandbox" table_name = "temp_catalogid_v21" + class temp_catalogid_v25(peewee.Model): """Model for a temporary table of all catalogids within a version.""" pk = peewee.PrimaryKeyField() @@ -38,6 +30,7 @@ class Meta: schema = "sandbox" table_name = "temp_catalogid_v25" + class temp_catalogid_v31(peewee.Model): """Model for a temporary table of all catalogids within a version.""" pk = peewee.PrimaryKeyField() @@ -48,24 +41,26 @@ class Meta: schema = "sandbox" table_name = "temp_catalogid_v31" + class sdss_id_stacked(peewee.Model): """ Model for catalogdb sdss_id_stacked""" - + sdss_id = peewee.BigAutoField(primary_key=True) catalogid21 = peewee.BigIntegerField() catalogid25 = peewee.BigIntegerField() catalogid31 = peewee.BigIntegerField() ra_sdss_id = peewee.DoubleField() dec_sdss_id = peewee.DoubleField() - + class Meta: database = database - schema = "catalogdb" #"sandbox" + schema = "catalogdb" # "sandbox" table_name = "sdss_id_stacked" + class sdss_id_flat(peewee.Model): """ Model for catalogdb sdss_id_flat""" - + sdss_id = peewee.BigIntegerField() catalogid = peewee.BigIntegerField() version_id = peewee.SmallIntegerField() @@ -75,12 +70,13 @@ class sdss_id_flat(peewee.Model): ra_catalogid = peewee.DoubleField() dec_catalogid = peewee.DoubleField() pk = peewee.BigAutoField(primary_key=True) - + class Meta: database = database - schema = "catalogdb" #"sandbox" + schema = "catalogdb" # "sandbox" table_name = "sdss_id_flat" + class sdss_id_stacked_to_add(peewee.Model): """ Model for addendum to sdss_id_stacked""" @@ -94,6 +90,7 @@ class Meta: database = database schema = "sandbox" + class sdss_id_flat_to_add(peewee.Model): """ Model for rows to be added to sdss_id_flat """ @@ -123,104 +120,118 @@ def __init__(self, database, individual_table=None, catalogid_list=None): self.dir_path = os.path.dirname(__file__) if self.catalogid_list is not None: - config = {"version_ids_to_match" : [21,25,31], - "individual_xmatch_config" : self.dir_path+"/config/individual_crossmatches.yml", - "log_file" : f"catalogidx_to_catalogidy_from_list.log", - "show_first" : 20, - "split_insert_nunmber" : 100000, - "database_options" : {"enable_hashjoin" : "false"}, - #"split_query" : [['panstarrs1',522000000000000,5000000000000]], - "catalogid_list" : catalogid_list} + config = {"version_ids_to_match": [21, 25, 31], + "individual_xmatch_config": + self.dir_path + "/config/individual_crossmatches.yml", + "log_file": "catalogidx_to_catalogidy_from_list.log", + "show_first": 20, + "split_insert_nunmber": 100000, + "database_options": {"enable_hashjoin": "false"}, + # "split_query": [['panstarrs1',522000000000000,5000000000000]], + "catalogid_list": catalogid_list} else: if "." in self.individual_table: self.ind_table_clean = self.individual_table.split(".")[-1] - config = {"version_ids_to_match" : [21,25,31], - "individual_xmatch_config" : self.dir_path+"/config/individual_crossmatches.yml", - "log_file" : f"catalogidx_to_catalogidy_{self.ind_table_clean}.log", - "show_first" : 20, - "split_insert_nunmber" : 100000, - "database_options" : {"enable_hashjoin" : "false"}, - "split_query" : [['panstarrs1',522000000000000,5000000000000]], - "individual_table" : individual_table} + config = {"version_ids_to_match": [21, 25, 31], + "individual_xmatch_config": + self.dir_path + "/config/individual_crossmatches.yml", + "log_file": f"catalogidx_to_catalogidy_{self.ind_table_clean}.log", + "show_first": 20, + "split_insert_nunmber": 100000, + "database_options": {"enable_hashjoin": "false"}, + "split_query": [['panstarrs1', 522000000000000, 5000000000000]], + "individual_table": individual_table} self.config = config - + def run_MetaXMatch(self, database): """ This takes in an individual catalog_to_? table (labeled 'individual_table') and creates the catalogidx_to_catalogidy_? and catalogidx_to_catalogidy_?_unique tables in the sandox. """ - metax = MetaXMatch(config_filename=None, database=database, from_yaml=False, from_dict=True, config_dict=self.config) - + metax = MetaXMatch(config_filename=None, database=database, from_yaml=False, + from_dict=True, config_dict=self.config) + metax.run() create_unique_from_region(metax.output_name) return metax.output_name - def create_temp_catalogid_lists(self, database, output_name): """ foobar """ - + TempMatch._meta.table_name = output_name - UniqueMatch._meta.table_name = output_name+"_unique" - + UniqueMatch._meta.table_name = output_name + "_unique" + v21_cid_query_x = (TempMatch - .select(TempMatch.catalogidx.alias('catalogid21')) - .join(Catalog, on=((Catalog.catalogid==TempMatch.catalogidx)&(Catalog.lead==TempMatch.lead))) - .switch(TempMatch) - .join(sdss_id_stacked, join_type = JOIN.LEFT_OUTER, - on=(TempMatch.catalogidx==sdss_id_stacked.catalogid21)) - .where((TempMatch.version_idx==21)&(sdss_id_stacked.catalogid21.is_null()))) - + .select(TempMatch.catalogidx.alias('catalogid21')) + .join(Catalog, on=((Catalog.catalogid == TempMatch.catalogidx) & + (Catalog.lead == TempMatch.lead))) + .switch(TempMatch) + .join(sdss_id_stacked, join_type=JOIN.LEFT_OUTER, + on=(TempMatch.catalogidx == sdss_id_stacked.catalogid21)) + .where((TempMatch.version_idx == 21) & + (sdss_id_stacked.catalogid21.is_null()))) + v25_cid_query_x = (TempMatch - .select(TempMatch.catalogidx.alias('catalogid25')) - .join(Catalog, on=((Catalog.catalogid==TempMatch.catalogidx)&(Catalog.lead==TempMatch.lead))) - .switch(TempMatch) - .join(sdss_id_stacked, join_type = JOIN.LEFT_OUTER, - on=(TempMatch.catalogidx==sdss_id_stacked.catalogid25)) - .where((TempMatch.version_idx==25)&(sdss_id_stacked.catalogid25.is_null()))) - + .select(TempMatch.catalogidx.alias('catalogid25')) + .join(Catalog, on=((Catalog.catalogid == TempMatch.catalogidx) & + (Catalog.lead == TempMatch.lead))) + .switch(TempMatch) + .join(sdss_id_stacked, join_type=JOIN.LEFT_OUTER, + on=(TempMatch.catalogidx == sdss_id_stacked.catalogid25)) + .where((TempMatch.version_idx == 25) & + (sdss_id_stacked.catalogid25.is_null()))) + v31_cid_query_x = (TempMatch - .select(TempMatch.catalogidx.alias('catalogid31')) - .join(Catalog, on=((Catalog.catalogid==TempMatch.catalogidx)&(Catalog.lead==TempMatch.lead))) - .switch(TempMatch) - .join(sdss_id_stacked, join_type = JOIN.LEFT_OUTER, - on=(TempMatch.catalogidx==sdss_id_stacked.catalogid31)) - .where((TempMatch.version_idx==31)&(sdss_id_stacked.catalogid31.is_null()))) + .select(TempMatch.catalogidx.alias('catalogid31')) + .join(Catalog, on=((Catalog.catalogid == TempMatch.catalogidx) & + (Catalog.lead == TempMatch.lead))) + .switch(TempMatch) + .join(sdss_id_stacked, join_type=JOIN.LEFT_OUTER, + on=(TempMatch.catalogidx == sdss_id_stacked.catalogid31)) + .where((TempMatch.version_idx == 31) & + (sdss_id_stacked.catalogid31.is_null()))) v21_cid_query_y = (TempMatch - .select(TempMatch.catalogidy.alias('catalogid21')) - .join(Catalog, on=((Catalog.catalogid==TempMatch.catalogidy)&(Catalog.lead==TempMatch.lead))) - .switch(TempMatch) - .join(sdss_id_stacked, join_type = JOIN.LEFT_OUTER, - on=(TempMatch.catalogidy==sdss_id_stacked.catalogid21)) - .where((TempMatch.version_idy==21)&(sdss_id_stacked.catalogid21.is_null()))) + .select(TempMatch.catalogidy.alias('catalogid21')) + .join(Catalog, on=((Catalog.catalogid == TempMatch.catalogidy) & + (Catalog.lead == TempMatch.lead))) + .switch(TempMatch) + .join(sdss_id_stacked, join_type=JOIN.LEFT_OUTER, + on=(TempMatch.catalogidy == sdss_id_stacked.catalogid21)) + .where((TempMatch.version_idy == 21) & + (sdss_id_stacked.catalogid21.is_null()))) v25_cid_query_y = (TempMatch - .select(TempMatch.catalogidy.alias('catalogid25')) - .join(Catalog, on=((Catalog.catalogid==TempMatch.catalogidy)&(Catalog.lead==TempMatch.lead))) - .switch(TempMatch) - .join(sdss_id_stacked, join_type = JOIN.LEFT_OUTER, - on=(TempMatch.catalogidy==sdss_id_stacked.catalogid25)) - .where((TempMatch.version_idy==25)&(sdss_id_stacked.catalogid25.is_null()))) + .select(TempMatch.catalogidy.alias('catalogid25')) + .join(Catalog, on=((Catalog.catalogid == TempMatch.catalogidy) & + (Catalog.lead == TempMatch.lead))) + .switch(TempMatch) + .join(sdss_id_stacked, join_type=JOIN.LEFT_OUTER, + on=(TempMatch.catalogidy == sdss_id_stacked.catalogid25)) + .where((TempMatch.version_idy == 25) & + (sdss_id_stacked.catalogid25.is_null()))) v31_cid_query_y = (TempMatch - .select(TempMatch.catalogidy.alias('catalogid31')) - .join(Catalog, on=((Catalog.catalogid==TempMatch.catalogidy)&(Catalog.lead==TempMatch.lead))) - .switch(TempMatch) - .join(sdss_id_stacked, join_type = JOIN.LEFT_OUTER, - on=(TempMatch.catalogidy==sdss_id_stacked.catalogid31)) - .where((TempMatch.version_idy==31)&(sdss_id_stacked.catalogid31.is_null()))) + .select(TempMatch.catalogidy.alias('catalogid31')) + .join(Catalog, on=((Catalog.catalogid == TempMatch.catalogidy) & + (Catalog.lead == TempMatch.lead))) + .switch(TempMatch) + .join(sdss_id_stacked, join_type=JOIN.LEFT_OUTER, + on=(TempMatch.catalogidy == sdss_id_stacked.catalogid31)) + .where((TempMatch.version_idy == 31) & + (sdss_id_stacked.catalogid31.is_null()))) for table in [temp_catalogid_v21, temp_catalogid_v25, temp_catalogid_v31]: if table.table_exists(): database.drop_tables([table]) self.database.create_tables([temp_catalogid_v21, temp_catalogid_v25, temp_catalogid_v31]) - + temp_catalogid_v21.insert_from(v21_cid_query_x, [temp_catalogid_v21.catalogid21]).execute() - temp_catalogid_v21.insert_from(v21_cid_query_y, [temp_catalogid_v21.catalogid21]).execute() + temp_catalogid_v21.insert_from(v21_cid_query_y, [temp_catalogid_v21.catalogid21]).execute() temp_catalogid_v25.insert_from(v25_cid_query_x, [temp_catalogid_v25.catalogid25]).execute() temp_catalogid_v25.insert_from(v25_cid_query_y, [temp_catalogid_v25.catalogid25]).execute() temp_catalogid_v31.insert_from(v31_cid_query_x, [temp_catalogid_v31.catalogid31]).execute() @@ -228,91 +239,108 @@ def create_temp_catalogid_lists(self, database, output_name): if self.catalogid_list is not None: catid_input_query = (Catalog.select(Catalog.catalogid, Catalog.version_id) - .join(sdss_id_flat, join_type = JOIN.LEFT_OUTER, - on=(sdss_id_flat.catalogid==Catalog.catalogid)) - .where(Catalog.catalogid << self.catalogid_list) - .where(sdss_id_flat.catalogid.is_null())) - + .join(sdss_id_flat, join_type=JOIN.LEFT_OUTER, + on=(sdss_id_flat.catalogid == Catalog.catalogid)) + .where(Catalog.catalogid << self.catalogid_list) + .where(sdss_id_flat.catalogid.is_null())) + for row in catid_input_query: - if row.version_id==21: + if row.version_id == 21: temp_catalogid_v21.insert(catalogid21=row.catalogid).execute() - elif row.version_id==25: + elif row.version_id == 25: temp_catalogid_v25.insert(catalogid25=row.catalogid).execute() - elif row.version_id==31: + elif row.version_id == 31: temp_catalogid_v31.insert(catalogid31=row.catalogid).execute() else: try: ind_table = self.database.models[self.individual_table] except: - raise ValueError("Could not find the table in database model: "+self.individual_table) - + raise ValueError("Could not find the table in database model: " + + self.individual_table) + ind_table_input_query = (Catalog.select(Catalog.catalogid, Catalog.version_id) - .join(ind_table, on=(Catalog.catalogid==ind_table.catalogid)) - .join(sdss_id_flat, join_type = JOIN.LEFT_OUTER, - on=(sdss_id_flat.catalogid==Catalog.catalogid)) - .where(sdss_id_flat.catalogid.is_null())) + .join(ind_table, + on=(Catalog.catalogid == ind_table.catalogid)) + .join(sdss_id_flat, join_type=JOIN.LEFT_OUTER, + on=(sdss_id_flat.catalogid == Catalog.catalogid)) + .where(sdss_id_flat.catalogid.is_null())) + for row in ind_table_input_query: - if row.version_id==21: + if row.version_id == 21: temp_catalogid_v21.insert(catalogid21=row.catalogid).execute() - elif row.version_id==25: + elif row.version_id == 25: temp_catalogid_v25.insert(catalogid25=row.catalogid).execute() - elif row.version_id==31: + elif row.version_id == 31: temp_catalogid_v31.insert(catalogid31=row.catalogid).execute() - + def create_sdss_id_stacked_to_add(self, database, output_name): """ foobar """ large_cte_query = f"""DROP TABLE sandbox.sdss_id_stacked_to_add; CREATE TABLE sandbox.sdss_id_stacked_to_add as ( - with cat_ids21 as (select distinct tc.catalogid21, cat.lead from sandbox.temp_catalogid_v21 tc join catalog cat on cat.catalogid=tc.catalogid21), - cat_ids25 as (select distinct tc.catalogid25, cat.lead from sandbox.temp_catalogid_v25 tc join catalog cat on cat.catalogid=tc.catalogid25), - cat_ids31 as (select distinct tc.catalogid31, cat.lead from sandbox.temp_catalogid_v31 tc join catalog cat on cat.catalogid=tc.catalogid31), - sq21_25 as (select cc.catalogidx, cc.catalogidy from sandbox.{output_name} cc + with cat_ids21 as (select distinct tc.catalogid21, cat.lead from + sandbox.temp_catalogid_v21 tc join catalog cat on cat.catalogid=tc.catalogid21), + cat_ids25 as (select distinct tc.catalogid25, cat.lead from + sandbox.temp_catalogid_v25 tc join catalog cat on cat.catalogid=tc.catalogid25), + cat_ids31 as (select distinct tc.catalogid31, cat.lead from + sandbox.temp_catalogid_v31 tc join catalog cat on cat.catalogid=tc.catalogid31), + sq21_25 as (select cc.catalogidx, cc.catalogidy from sandbox.{output_name} cc join catalog cat on cat.catalogid=cc.catalogidx and cat.lead=cc.lead where cc.version_idx=21 and cc.version_idy=25), - sq25_31 as (select cc.catalogidx, cc.catalogidy from sandbox.{output_name} cc + sq25_31 as (select cc.catalogidx, cc.catalogidy from sandbox.{output_name} cc join catalog cat on cat.catalogid=cc.catalogidx and cat.lead=cc.lead where cc.version_idx=25 and cc.version_idy=31), - left21_to_25 as (select cat_ids21.catalogid21, sq21_25.catalogidy as catalogid25 from cat_ids21 left join sq21_25 on cat_ids21.catalogid21=sq21_25.catalogidx), - add_outer25 as (select left21_to_25.catalogid21, cat_ids25.catalogid25 as catalogid25 from left21_to_25 full outer join cat_ids25 on left21_to_25.catalogid25=cat_ids25.catalogid25), - left_25_to_31 as (select add_outer25.catalogid21, add_outer25.catalogid25, sq25_31.catalogidy as catalogid31 from add_outer25 left join sq25_31 on add_outer25.catalogid25=sq25_31.catalogidx), - add_outer31 as (select left_25_to_31.catalogid21, left_25_to_31.catalogid25, cat_ids31.catalogid31 from left_25_to_31 full outer join cat_ids31 on left_25_to_31.catalogid31=cat_ids31.catalogid31) - select * from add_outer31);""" + left21_to_25 as (select cat_ids21.catalogid21, sq21_25.catalogidy as catalogid25 from + cat_ids21 left join sq21_25 on cat_ids21.catalogid21=sq21_25.catalogidx), + add_outer25 as (select left21_to_25.catalogid21, + cat_ids25.catalogid25 as catalogid25 from + left21_to_25 full outer join cat_ids25 on + left21_to_25.catalogid25=cat_ids25.catalogid25), + left_25_to_31 as (select add_outer25.catalogid21, + add_outer25.catalogid25, sq25_31.catalogidy as catalogid31 from + add_outer25 left join sq25_31 on add_outer25.catalogid25=sq25_31.catalogidx), + add_outer31 as (select left_25_to_31.catalogid21, left_25_to_31.catalogid25, + cat_ids31.catalogid31 from + left_25_to_31 full outer join cat_ids31 on + left_25_to_31.catalogid31=cat_ids31.catalogid31) + select * from add_outer31);""" self.database.execute_sql(large_cte_query) - - add_ra_dec_columns = """ ALTER TABLE sandbox.sdss_id_stacked_to_add ADD COLUMN ra_sdss_id double precision; - ALTER TABLE sandbox.sdss_id_stacked_to_add ADD COLUMN dec_sdss_id double precision; """ + + add_ra_dec_columns = """ ALTER TABLE sandbox.sdss_id_stacked_to_add + ADD COLUMN ra_sdss_id double precision; + ALTER TABLE sandbox.sdss_id_stacked_to_add + ADD COLUMN dec_sdss_id double precision; """ self.database.execute_sql(add_ra_dec_columns) sdss_id_stacked_to_add._meta.table_name = "sdss_id_stacked_to_add" - + ra_dec_update31 = (sdss_id_stacked_to_add - .update(ra_sdss_id=Catalog.ra, dec_sdss_id=Catalog.dec) - .from_(Catalog) - .where(sdss_id_stacked_to_add.catalogid31 == Catalog.catalogid) - .where(sdss_id_stacked_to_add.catalogid31.is_null(False))) + .update(ra_sdss_id=Catalog.ra, dec_sdss_id=Catalog.dec) + .from_(Catalog) + .where(sdss_id_stacked_to_add.catalogid31 == Catalog.catalogid) + .where(sdss_id_stacked_to_add.catalogid31.is_null(False))) ra_dec_update25 = (sdss_id_stacked_to_add - .update(ra_sdss_id=Catalog.ra, dec_sdss_id=Catalog.dec) - .from_(Catalog) - .where(sdss_id_stacked_to_add.catalogid25 == Catalog.catalogid) - .where(sdss_id_stacked_to_add.catalogid31.is_null()) - .where(sdss_id_stacked_to_add.catalogid25.is_null(False))) + .update(ra_sdss_id=Catalog.ra, dec_sdss_id=Catalog.dec) + .from_(Catalog) + .where(sdss_id_stacked_to_add.catalogid25 == Catalog.catalogid) + .where(sdss_id_stacked_to_add.catalogid31.is_null()) + .where(sdss_id_stacked_to_add.catalogid25.is_null(False))) ra_dec_update21 = (sdss_id_stacked_to_add - .update(ra_sdss_id=Catalog.ra, dec_sdss_id=Catalog.dec) - .from_(Catalog) - .where(sdss_id_stacked_to_add.catalogid21 == Catalog.catalogid) - .where(sdss_id_stacked_to_add.catalogid31.is_null()) - .where(sdss_id_stacked_to_add.catalogid25.is_null()) - .where(sdss_id_stacked_to_add.catalogid21.is_null(False))) + .update(ra_sdss_id=Catalog.ra, dec_sdss_id=Catalog.dec) + .from_(Catalog) + .where(sdss_id_stacked_to_add.catalogid21 == Catalog.catalogid) + .where(sdss_id_stacked_to_add.catalogid31.is_null()) + .where(sdss_id_stacked_to_add.catalogid25.is_null()) + .where(sdss_id_stacked_to_add.catalogid21.is_null(False))) ra_dec_update31.execute() ra_dec_update25.execute() ra_dec_update21.execute() - + def add_to_sdss_id_stacked(self, database): """ foobar """ - + sid_stacked_f = [sdss_id_stacked.catalogid21, sdss_id_stacked.catalogid25, sdss_id_stacked.catalogid31, @@ -326,16 +354,16 @@ def add_to_sdss_id_stacked(self, database): sdss_id_stacked_to_add.dec_sdss_id).tuples() with self.database.atomic(): sdss_id_stacked.insert_many(to_add, fields=sid_stacked_f).execute() - + def create_sdss_id_flat_to_add(self, database): """ foobar """ - + if sdss_id_flat_to_add.table_exists(): self.database.drop_tables([sdss_id_flat_to_add]) self.database.create_tables([sdss_id_flat_to_add]) else: - self.database.create_tables([sdss_id_flat_to_add]) - + self.database.create_tables([sdss_id_flat_to_add]) + sid_flat_fields = [sdss_id_flat_to_add.sdss_id, sdss_id_flat_to_add.catalogid, sdss_id_flat_to_add.version_id, @@ -343,55 +371,57 @@ def create_sdss_id_flat_to_add(self, database): sdss_id_flat_to_add.dec_sdss_id] max_sdss_id = sdss_id_flat.select(fn.MAX(sdss_id_flat.sdss_id)).scalar() - + sid_flat_add_v21 = (sdss_id_stacked.select(sdss_id_stacked.sdss_id, - sdss_id_stacked.catalogid21, - peewee.Value(21), - sdss_id_stacked.ra_sdss_id, - sdss_id_stacked.dec_sdss_id) - .where(sdss_id_stacked.catalogid21.is_null(False)) - .where(sdss_id_stacked.sdss_id > max_sdss_id).tuples()) + sdss_id_stacked.catalogid21, + peewee.Value(21), + sdss_id_stacked.ra_sdss_id, + sdss_id_stacked.dec_sdss_id) + .where(sdss_id_stacked.catalogid21.is_null(False)) + .where(sdss_id_stacked.sdss_id > max_sdss_id).tuples()) + with self.database.atomic(): sdss_id_flat_to_add.insert_many(sid_flat_add_v21, fields=sid_flat_fields).execute() sid_flat_add_v25 = (sdss_id_stacked.select(sdss_id_stacked.sdss_id, - sdss_id_stacked.catalogid25, - peewee.Value(25), - sdss_id_stacked.ra_sdss_id, - sdss_id_stacked.dec_sdss_id) - .where(sdss_id_stacked.catalogid25.is_null(False)) - .where(sdss_id_stacked.sdss_id > max_sdss_id).tuples()) + sdss_id_stacked.catalogid25, + peewee.Value(25), + sdss_id_stacked.ra_sdss_id, + sdss_id_stacked.dec_sdss_id) + .where(sdss_id_stacked.catalogid25.is_null(False)) + .where(sdss_id_stacked.sdss_id > max_sdss_id).tuples()) + with self.database.atomic(): sdss_id_flat_to_add.insert_many(sid_flat_add_v25, fields=sid_flat_fields).execute() sid_flat_add_v31 = (sdss_id_stacked.select(sdss_id_stacked.sdss_id, - sdss_id_stacked.catalogid31, - peewee.Value(31), - sdss_id_stacked.ra_sdss_id, - sdss_id_stacked.dec_sdss_id) - .where(sdss_id_stacked.catalogid31.is_null(False)) - .where(sdss_id_stacked.sdss_id > max_sdss_id).tuples()) + sdss_id_stacked.catalogid31, + peewee.Value(31), + sdss_id_stacked.ra_sdss_id, + sdss_id_stacked.dec_sdss_id) + .where(sdss_id_stacked.catalogid31.is_null(False)) + .where(sdss_id_stacked.sdss_id > max_sdss_id).tuples()) with self.database.atomic(): sdss_id_flat_to_add.insert_many(sid_flat_add_v31, fields=sid_flat_fields).execute() cte = (sdss_id_flat_to_add - .select(sdss_id_flat_to_add.catalogid, fn.COUNT("*").alias('ct')) - .group_by(sdss_id_flat_to_add.catalogid) - .cte('catid_n_associated', columns=("catalogid", "ct"))) + .select(sdss_id_flat_to_add.catalogid, fn.COUNT("*").alias('ct')) + .group_by(sdss_id_flat_to_add.catalogid) + .cte('catid_n_associated', columns=("catalogid", "ct"))) query = (sdss_id_flat_to_add - .update(n_associated=cte.c.ct) - .from_(cte) - .where(sdss_id_flat_to_add.catalogid == cte.c.catalogid) - .with_cte(cte) - .execute()) - + .update(n_associated=cte.c.ct) + .from_(cte) + .where(sdss_id_flat_to_add.catalogid == cte.c.catalogid) + .with_cte(cte) + .execute()) + query = (sdss_id_flat_to_add - .update(ra_catalogid=Catalog.ra, - dec_catalogid=Catalog.dec) - .from_(Catalog) - .where(sdss_id_flat_to_add.catalogid == Catalog.catalogid) - .execute()) + .update(ra_catalogid=Catalog.ra, + dec_catalogid=Catalog.dec) + .from_(Catalog) + .where(sdss_id_flat_to_add.catalogid == Catalog.catalogid) + .execute()) def add_to_sdss_id_flat(self, database): """ foobar """ diff --git a/python/target_selection/create_catalogidx_to_catalogidy.py b/python/target_selection/create_catalogidx_to_catalogidy.py index 2354ca8e..71ca3cdf 100644 --- a/python/target_selection/create_catalogidx_to_catalogidy.py +++ b/python/target_selection/create_catalogidx_to_catalogidy.py @@ -82,12 +82,13 @@ class MetaXMatch: log_file, and split_inrest_number. """ - def __init__(self, database, config_filename=None, from_yaml=True, from_dict=False, config_dict=None, save_log_output=False): + def __init__(self, database, config_filename=None, from_yaml=True, from_dict=False, + config_dict=None, save_log_output=False): self.database = database - if from_yaml==True: + if from_yaml: config = yaml.load(open(config_filename, 'r'), Loader=yaml.SafeLoader) - elif from_dict==True: - if type(config_dict) != dict: + elif from_dict: + if type(config_dict) is not dict: raise ValueError("config must be a dict") config = config_dict @@ -103,7 +104,8 @@ def __init__(self, database, config_filename=None, from_yaml=True, from_dict=Fal Loader=yaml.SafeLoader) self.version_ids_to_match = config['version_ids_to_match'] - optional_parameters = ['sample_region', 'database_options', 'show_first', 'split_query', 'ra_region', 'individual_table', 'catalogid_list'] + optional_parameters = ['sample_region', 'database_options', 'show_first', 'split_query', + 'ra_region', 'individual_table', 'catalogid_list'] # All the optional parameters present in the configuration file are stored directly # As attributes of the MetaXMatch object and for the ones that are not the attribute @@ -126,27 +128,27 @@ def __init__(self, database, config_filename=None, from_yaml=True, from_dict=Fal else: st_dec = st_dec.replace('-', 'neg') output_name = (f'catalogidx_to_catalogidy' - f'_ra{racen}_dec{st_dec}_rad{int(radius)}') + f'_ra{racen}_dec{st_dec}_rad{int(radius)}') log_message = f'### Using ra={racen:5.1f} dec={deccen:5.1f} rad={radius:04.1f} ###' elif self.ra_region: ra_start, ra_stop = self.ra_region if ra_start >= ra_stop: raise ValueError("Starting RA must be smaller than stopping RA!") output_name = (f'catalogidx_to_catalogidy' - f'_ra{int(ra_start)}_ra{int(ra_stop)}') + f'_ra{int(ra_start)}_ra{int(ra_stop)}') log_message = f'### Using ra={ra_start:5.1f} to ra={ra_stop:5.1f} ###' elif self.individual_table: ind_table = self.individual_table if "." in ind_table: ind_table = ind_table.split(".")[-1] output_name = (f'catalogidx_to_catalogidy' - f'_{ind_table}') + f'_{ind_table}') log_message = f'### Using catalogids from file {ind_table} ###' elif self.catalogid_list: - output_name = (f'catalogidx_to_catalogidy_from_catalogid_list') - log_message = f'### Using catalogids from a given list ###' + output_name = ('catalogidx_to_catalogidy_from_catalogid_list') + log_message = '### Using catalogids from a given list ###' else: - output_name = f'catalogidx_to_catalogidy_all' + output_name = 'catalogidx_to_catalogidy_all' log_message = '### Using Full SKY ###' if self.ra_region and self.individual_table: @@ -155,9 +157,9 @@ def __init__(self, database, config_filename=None, from_yaml=True, from_dict=Fal if "." in ind_table: ind_table = ind_table.split(".")[-1] output_name = (f'catalogidx_to_catalogidy' - f'_{ind_table}' - f'_ra{int(ra_start)}_ra{int(ra_stop)}') - log_message = f'### Using catalogids from file {ind_table} in ra={ra_start:5.1f} to ra={ra_stop:5.1f} ###' + f'_{ind_table}' + f'_ra{int(ra_start)}_ra{int(ra_stop)}') + log_message = f'### Using catalogids from file {ind_table} in ra={ra_start:5.1f} to ra={ra_stop:5.1f} ###' self.output_name = output_name self.log.info(' ') @@ -165,11 +167,12 @@ def __init__(self, database, config_filename=None, from_yaml=True, from_dict=Fal self.log.info(log_message) self.log.info('#' * 45) - all_tables = [set(ind_xmatch_config[k]['order']) for k in ind_xmatch_config.keys()] intersecting_tables = set() for i_comb in list(combinations([i for i in range(len(all_tables))], 2)): - intersecting_tables = intersecting_tables | (all_tables[i_comb[0]] & all_tables[i_comb[1]]) + intersecting_tables = intersecting_tables | \ + (all_tables[i_comb[0]] & all_tables[i_comb[1]]) + intersecting_tables = list(intersecting_tables) intersecting_tables.sort() self.intersecting_tables = intersecting_tables @@ -219,8 +222,8 @@ def match_in_table(self, name, split=False, min_targetid=0, max_targetid=0): self.database.execute_sql(setting_message) self.log.info(setting_message) - temp_fields = [TempMatch.lead, TempMatch.catalogidx, TempMatch.catalogidy, - TempMatch.version_idx, TempMatch.version_idy] + temp_fields = [TempMatch.lead, TempMatch.catalogidx, TempMatch.catalogidy, + TempMatch.version_idx, TempMatch.version_idy] db_models = self.database.models @@ -235,43 +238,44 @@ def match_in_table(self, name, split=False, min_targetid=0, max_targetid=0): if self.individual_table: try: - ind_table = db_models[self.individual_table] + ind_table = db_models[self.individual_table] except: - raise ValueError("Could not find the table in database model: "+self.individual_table) - - if self.individual_table=='catalogdb.catalog_to_' + name: + raise ValueError("Could not find the table in database model: " + + self.individual_table) + + if self.individual_table == 'catalogdb.catalog_to_' + name: cte_targetids = (Catalog.select(rel_table.target_id, - Catalog.ra, Catalog.dec) - .join(rel_table, on=(Catalog.catalogid == rel_table.catalogid)) - .where((rel_table.version << self.version_ids_to_match) & - (rel_table.best == True)) - .distinct(rel_table.target_id)) + Catalog.ra, Catalog.dec) + .join(rel_table, on=(Catalog.catalogid == rel_table.catalogid)) + .where((rel_table.version << self.version_ids_to_match) & + (rel_table.best == True)) + .distinct(rel_table.target_id)) else: cte_targetids = (Catalog.select(rel_table.target_id, - Catalog.ra, Catalog.dec) - .join(ind_table, on=(Catalog.catalogid == ind_table.catalogid)) - .join(rel_table, on=(Catalog.catalogid == rel_table.catalogid)) - .where((rel_table.version << self.version_ids_to_match) & - (rel_table.best == True)) - .distinct(rel_table.target_id)) + Catalog.ra, Catalog.dec) + .join(ind_table, on=(Catalog.catalogid == ind_table.catalogid)) + .join(rel_table, on=(Catalog.catalogid == rel_table.catalogid)) + .where((rel_table.version << self.version_ids_to_match) & + (rel_table.best == True)) + .distinct(rel_table.target_id)) elif self.catalogid_list: cte_targetids = (Catalog.select(rel_table.target_id, - Catalog.ra, Catalog.dec) - .join(rel_table, on=(Catalog.catalogid == rel_table.catalogid)) - .where(((rel_table.version << self.version_ids_to_match) & - (rel_table.best == True))) - .where(Catalog.catalogid << self.catalogid_list) - .distinct(rel_table.target_id)) + Catalog.ra, Catalog.dec) + .join(rel_table, on=(Catalog.catalogid == rel_table.catalogid)) + .where(((rel_table.version << self.version_ids_to_match) & + (rel_table.best == True))) + .where(Catalog.catalogid << self.catalogid_list) + .distinct(rel_table.target_id)) else: - cte_targetids = (Target.select(rel_table.target_id, - Target.ra, Target.dec) - .join(rel_table, on=(Target.catalogid == rel_table.catalogid)) - .where((rel_table.version << self.version_ids_to_match) & - (rel_table.best == True)) - .distinct(rel_table.target_id)) + cte_targetids = (Target.select(rel_table.target_id, + Target.ra, Target.dec) + .join(rel_table, on=(Target.catalogid == rel_table.catalogid)) + .where((rel_table.version << self.version_ids_to_match) & + (rel_table.best == True)) + .distinct(rel_table.target_id)) # If sample_region is included in the configuration file, then the entire process # restricted to a (single) sample region where the tuple in the configuration file @@ -288,7 +292,6 @@ def match_in_table(self, name, split=False, min_targetid=0, max_targetid=0): elif self.ra_region: ra_start, ra_stop = self.ra_region cte_targetids = cte_targetids.where(Target.ra.between(ra_start, ra_stop)) - cte_targetids = cte_targetids.cte('cte_targets') @@ -301,11 +304,10 @@ def match_in_table(self, name, split=False, min_targetid=0, max_targetid=0): .over(partition_by=rel_table.target_id).alias('version_ids')) .join(cte_targetids, on=(rel_table.target_id == cte_targetids.c.target_id)) .where((rel_table.version << self.version_ids_to_match) & - (rel_table.best == True)) + (rel_table.best == True)) .distinct(rel_table.target_id) .with_cte(cte_targetids)) - if split: query = query.where(rel_table.target_id >= min_targetid, rel_table.target_id < max_targetid) @@ -333,8 +335,8 @@ def match_in_table(self, name, split=False, min_targetid=0, max_targetid=0): npair += 1 sorted_catids = np.sort(catid_combs[ind_comb]) sorted_verids = np.sort(verid_combs[ind_comb]) - curr_tuple = (entry['table'], sorted_catids[0], sorted_catids[1], - sorted_verids[0], sorted_verids[1]) + curr_tuple = (entry['table'], sorted_catids[0], sorted_catids[1], + sorted_verids[0], sorted_verids[1]) results_list.append(curr_tuple) if self.show_first and npair <= self.show_first and not split: tuple_log = str(npair) + ' ' + ' '.join([str(el) for el in curr_tuple]) @@ -445,7 +447,7 @@ def create_unique_from_region(input_tablename, save_log_output=False): database.bind([UniqueMatch]) log = target_selection.log if save_log_output: - log_path_and_name = os.path.realpath("./"+input_tablename+"_unique_table_creation.log") + log_path_and_name = os.path.realpath("./" + input_tablename + "_unique_table_creation.log") log.start_file_logger(log_path_and_name, rotating=False, mode='a') if UniqueMatch.table_exists(): @@ -477,7 +479,6 @@ def create_unique_from_region(input_tablename, save_log_output=False): f'entries in {(tf-ti):.2f} seconds') - """ EXAMPLE OF USAGE metax = MetaXMatch('cat_to_cat.yml', database)