Skip to content

Commit

Permalink
Colr speedup (#491)
Browse files Browse the repository at this point in the history
* tiny tweak to hopefully speed up processing of bhm_colr_galaxies_lsdr10_d3

* moved some selection criteria to post_process() to speed up query

* Fix post-processing steps, remoce debug criteria.

* ruff: bhm_galaxies.py

* edit target_selection.yml

---------

Co-authored-by: Tom Dwelly <[email protected]>
  • Loading branch information
astronomygupta and tdwelly authored Aug 12, 2024
1 parent a8d180f commit a650a1a
Show file tree
Hide file tree
Showing 2 changed files with 100 additions and 7 deletions.
69 changes: 62 additions & 7 deletions python/target_selection/cartons/bhm_galaxies.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
# @Filename: bhm_galaxies.py
# @License: BSD 3-clause (http://www.opensource.org/licenses/BSD-3-Clause)

import numpy
import pandas
import peewee

from sdssdb.peewee.sdss5db.catalogdb import (
Expand Down Expand Up @@ -65,8 +67,8 @@ def build_query(self, version_id, query_region=None):
instrument = peewee.Value(self.instrument)
# cadence = peewee.Value(self.parameters.get('cadence', self.cadence))

dered_flux_z_min = AB2nMgy(self.parameters["dered_mag_z_max"])
dered_fiberflux_z_min = AB2nMgy(self.parameters["dered_fibermag_z_max"])
# dered_flux_z_min = AB2nMgy(self.parameters["dered_mag_z_max"]) # moved to post_process
# dered_fiberflux_z_min = AB2nMgy(self.parameters["dered_fibermag_z_max"]) # ditto
fiberflux_z_min = AB2nMgy(self.parameters["fibermag_z_max"])
fiberflux_z_max = AB2nMgy(self.parameters["fibermag_z_min"])
fiberflux_r_min = AB2nMgy(self.parameters["fibermag_r_max"])
Expand Down Expand Up @@ -169,6 +171,7 @@ def build_query(self, version_id, query_region=None):
)

# compute the abs(Galactic latitude):
# we return this in the query result, and then and test aginst it in post_process
gal_lat = peewee.fn.abs(
90.0 - peewee.fn.q3c_dist(north_gal_pole_ra, north_gal_pole_dec, c.ra, c.dec)
)
Expand Down Expand Up @@ -206,10 +209,12 @@ def build_query(self, version_id, query_region=None):
ls.flux_r.alias("ls10_flux_r"), # extra
ls.flux_i.alias("ls10_flux_i"), # extra
ls.flux_z.alias("ls10_flux_z"), # extra
ls.fiberflux_z.alias("ls10_fiberflux_z"), # extra
ls.ebv.alias("ls10_ebv"), # extra
ls.mw_transmission_z.alias("ls10_mw_transmission_z"), # extra
ls.shape_r.alias("ls10_shape_r"), # extra
ls.maskbits.alias("ls10_maskbits"), # extra
ls.fitbits.alias("ls10_fitbits"), # extra
ls.mw_transmission_z.alias("ls10_mw_transmission_z"), # extra
gal_lat.alias("abs_gal_lat"), # extra
)
.join(c2ls)
Expand All @@ -219,28 +224,30 @@ def build_query(self, version_id, query_region=None):
c2ls.version_id == version_id,
ls.type != "PSF",
ls.parallax <= 0.0,
ls.flux_z > dered_flux_z_min * ls.mw_transmission_z,
# ls.flux_z > dered_flux_z_min * ls.mw_transmission_z, # moved to post_process
# ls.fiberflux_z > dered_fiberflux_z_min * ls.mw_transmission_z, # ditto
ls.fiberflux_g > fiberflux_g_min,
ls.fiberflux_r > fiberflux_r_min,
ls.fiberflux_z > fiberflux_z_min,
ls.fiberflux_z > dered_fiberflux_z_min * ls.mw_transmission_z,
ls.fiberflux_g < fiberflux_g_max,
ls.fiberflux_r < fiberflux_r_max,
ls.fiberflux_z < fiberflux_z_max,
# safety check using Gaia mags to avoid bad ls photometry
~(ls.gaia_phot_g_mean_mag.between(0.1, self.parameters["gaia_g_mag_limit"])),
~(ls.gaia_phot_rp_mean_mag.between(0.1, self.parameters["gaia_rp_mag_limit"])),
ls.shape_r >= self.parameters["shape_r_min"],
gal_lat > self.parameters["min_gal_lat"],
# gal_lat > self.parameters["min_gal_lat"], # moved to post_process
ls.ebv < self.parameters["max_ebv"],
(ls.maskbits.bin_and(maskbits_mask) == 0), # avoid bad ls data
# (ls.fitbits.bin_and(fitbits_mask) == 0), # avoid bad ls fits
)
# .where(c.catalogid.between(63050396500000000, # debug
# 63050396550000000)) # debug
.distinct(c.catalogid)
)

if self.only_faintest_cadence:
query = query.where(cadence == cadence3)
query = query.where(ls.fiberflux_r <= fiberflux_r_min_for_cadence2)

# query_region[0] is ra of center of the region, degrees
# query_region[1] is dec of center of the region, degrees
Expand All @@ -254,6 +261,54 @@ def build_query(self, version_id, query_region=None):

return query

def post_process(self, model, **kwargs):
"""Runs post-process."""
dered_flux_z_min = AB2nMgy(self.parameters["dered_mag_z_max"])
dered_fiberflux_z_min = AB2nMgy(self.parameters["dered_fibermag_z_max"])

data = pandas.read_sql(
(
"SELECT catalogid,abs_gal_lat,ls10_flux_z,"
"ls10_fiberflux_z,ls10_mw_transmission_z "
f"from {self.path}"
),
self.database,
)

valid = numpy.where(
(data["abs_gal_lat"] > self.parameters["min_gal_lat"])
& (data["ls10_flux_z"] > dered_flux_z_min * data["ls10_mw_transmission_z"])
& (data["ls10_fiberflux_z"] > dered_fiberflux_z_min * data["ls10_mw_transmission_z"]),
True,
False,
)

print(
"During post-processing we down-selected "
f"{numpy.count_nonzero(valid)}/{len(data)} rows"
)

data = data[valid]

valid_cids = data.catalogid.values

# This way seems faster than updating from a list of values.
values_cids = ",".join(f"({vc})" for vc in valid_cids)
self.database.execute_sql(
"CREATE TEMP TABLE valid_cids AS SELECT * "
f"FROM (VALUES {values_cids}) "
"AS t (catalogid)"
)
self.database.execute_sql("CREATE INDEX ON valid_cids (catalogid);")
self.database.execute_sql(f"UPDATE {self.path} SET selected = false")
self.database.execute_sql(
f"UPDATE {self.path} SET selected = true "
"FROM valid_cids vc "
f"WHERE {self.path}.catalogid = vc.catalogid"
)

return super().post_process(model, **kwargs)


class BhmColrGalaxiesLsdr10D3Carton(BhmColrGalaxiesLsdr10Carton):
name = "bhm_colr_galaxies_lsdr10_d3"
Expand Down
38 changes: 38 additions & 0 deletions python/target_selection/config/target_selection.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,41 @@
'1.2.15':
xmatch_plan: 1.0.0
cartons:
- bhm_colr_galaxies_lsdr10_d3
open_fiber_path: /uufs/chpc.utah.edu/common/home/sdss50/sdsswork/target/open_fiber/postv1/draft2/
schema: sandbox
magnitudes:
h: [catalog_to_twomass_psc, twomass_psc, twomass_psc.h_m]
j: [catalog_to_twomass_psc, twomass_psc, twomass_psc.j_m]
k: [catalog_to_twomass_psc, twomass_psc, twomass_psc.k_m]
bp: [catalog_to_gaia_dr3_source, gaia_dr3_source, gaia_dr3_source.phot_bp_mean_mag]
rp: [catalog_to_gaia_dr3_source, gaia_dr3_source, gaia_dr3_source.phot_rp_mean_mag]
gaia_g: [catalog_to_gaia_dr3_source, gaia_dr3_source, gaia_dr3_source.phot_g_mean_mag]
database_options:
work_mem: '2000MB'
parameters:
bhm_colr_galaxies_lsdr10_d3:
priority: 7101
value: 0.0
cadence1: 'bright_1x1'
cadence2: 'dark_1x1'
cadence3: 'dark_flexible_3x1'
fibermag_r_for_cadence1: 17.0
fibermag_r_for_cadence2: 18.0
fibermag_r_min: 16.0
fibermag_g_min: 16.0
fibermag_z_min: 16.0
dered_mag_z_max: 19.0
dered_fibermag_z_max: 19.5
fibermag_g_max: 22.5
fibermag_r_max: 21.5
fibermag_z_max: 20.0
gaia_g_mag_limit: 15.0
gaia_rp_mag_limit: 15.0
shape_r_min: 1.0
min_gal_lat: 18.0
max_ebv: 0.2

'1.2.14':
xmatch_plan: 1.0.0
cartons:
Expand Down

0 comments on commit a650a1a

Please sign in to comment.