diff --git a/changes/302.general.rst b/changes/302.general.rst new file mode 100644 index 00000000..4be32450 --- /dev/null +++ b/changes/302.general.rst @@ -0,0 +1 @@ +Remove redundant calculations and unneeded array allocations. diff --git a/src/stcal/jump/twopoint_difference.py b/src/stcal/jump/twopoint_difference.py index 5be6b595..728de733 100644 --- a/src/stcal/jump/twopoint_difference.py +++ b/src/stcal/jump/twopoint_difference.py @@ -4,6 +4,7 @@ import numpy as np import warnings from astropy import stats +from astropy.utils.exceptions import AstropyUserWarning log = logging.getLogger(__name__) log.setLevel(logging.DEBUG) @@ -128,12 +129,11 @@ def find_crs( pixels above current row also to be flagged as a CR """ - # copy data and group DQ array + # copy group DQ array. data array never gets modified. + dat = dataa if copy_arrs: - dat = dataa.copy() gdq = group_dq.copy() else: - dat = dataa gdq = group_dq # Get data characteristics nints, ngroups, nrows, ncols = dataa.shape @@ -172,30 +172,24 @@ def find_crs( dtype=np.float32) return gdq, row_below_gdq, row_above_gdq, 0, dummy else: - # set 'saturated' or 'do not use' pixels to nan in data - dat[np.where(np.bitwise_and(gdq, sat_flag))] = np.nan - dat[np.where(np.bitwise_and(gdq, dnu_flag))] = np.nan - dat[np.where(np.bitwise_and(gdq, dnu_flag + sat_flag))] = np.nan + # boolean array for 'saturated' or 'do not use' pixels + set_to_nan = gdq & (dnu_flag | sat_flag) != 0 # calculate the differences between adjacent groups (first diffs) # use mask on data, so the results will have sat/donotuse groups masked first_diffs = np.diff(dat, axis=1) + first_diffs[set_to_nan[:, 1:] | set_to_nan[:, :-1]] = np.nan + del set_to_nan + first_diffs_finite = np.isfinite(first_diffs) # calc. the median of first_diffs for each pixel along the group axis - first_diffs_masked = np.ma.masked_array(first_diffs, mask=np.isnan(first_diffs)) - median_diffs = np.ma.median(first_diffs_masked, axis=(0, 1)) + median_diffs = np.nanmedian(first_diffs.reshape((-1, nrows, ncols)), axis=0) # calculate sigma for each pixel sigma = np.sqrt(np.abs(median_diffs) + read_noise_2 / nframes) - # reset sigma so pxels with 0 readnoise are not flagged as jumps - sigma[np.where(sigma == 0.)] = np.nan - - # compute 'ratio' for each group. this is the value that will be - # compared to 'threshold' to classify jumps. subtract the median of - # first_diffs from first_diffs, take the absolute value and divide by sigma. - e_jump_4d = first_diffs - median_diffs[np.newaxis, :, :] - ratio_all = np.abs(first_diffs - median_diffs[np.newaxis, np.newaxis, :, :]) / \ - sigma[np.newaxis, np.newaxis, :, :] + # reset sigma so pixels with 0 readnoise are not flagged as jumps + sigma[sigma == 0.] = np.nan + # Test to see if there are enough groups to use sigma clipping if (only_use_ints and nints >= minimum_sigclip_groups) or \ (not only_use_ints and total_groups >= minimum_sigclip_groups): @@ -204,23 +198,24 @@ def find_crs( warnings.filterwarnings("ignore", ".*All-NaN slice encountered.*", RuntimeWarning) warnings.filterwarnings("ignore", ".*Mean of empty slice.*", RuntimeWarning) warnings.filterwarnings("ignore", ".*Degrees of freedom <= 0.*", RuntimeWarning) + warnings.filterwarnings("ignore", ".*Input data contains invalid values*", AstropyUserWarning) if only_use_ints: - mean, median, stddev = stats.sigma_clipped_stats(first_diffs_masked, sigma=normal_rej_thresh, - axis=0) - clipped_diffs = stats.sigma_clip(first_diffs_masked, sigma=normal_rej_thresh, - axis=0, masked=True) + clipped_diffs, alow, ahigh = stats.sigma_clip(first_diffs, sigma=normal_rej_thresh, + axis=0, masked=True, return_bounds=True) else: - mean, median, stddev = stats.sigma_clipped_stats(first_diffs_masked, sigma=normal_rej_thresh, - axis=(0, 1)) - clipped_diffs = stats.sigma_clip(first_diffs_masked, sigma=normal_rej_thresh, - axis=(0, 1), masked=True) - jump_mask = np.logical_and(clipped_diffs.mask, np.logical_not(first_diffs_masked.mask)) - jump_mask[np.bitwise_and(jump_mask, gdq[:, 1:, :, :] == sat_flag)] = False - jump_mask[np.bitwise_and(jump_mask, gdq[:, 1:, :, :] == dnu_flag)] = False - jump_mask[np.bitwise_and(jump_mask, gdq[:, 1:, :, :] == (dnu_flag + sat_flag))] = False - gdq[:, 1:, :, :] = np.bitwise_or(gdq[:, 1:, :, :], jump_mask * - np.uint8(dqflags["JUMP_DET"])) + clipped_diffs, alow, ahigh = stats.sigma_clip(first_diffs, sigma=normal_rej_thresh, + axis=(0, 1), masked=True, return_bounds=True) + + # get the standard deviation from the bounds of sigma clipping + stddev = 0.5*(ahigh - alow)/normal_rej_thresh + + jump_candidates = clipped_diffs.mask + sat_or_dnu_not_set = gdq[:, 1:] & (sat_flag | dnu_flag) == 0 + jump_mask = jump_candidates & first_diffs_finite & sat_or_dnu_not_set + del clipped_diffs + gdq[:, 1:] |= jump_mask * np.uint8(jump_flag) + # if grp is all jump set to do not use for integ in range(nints): for grp in range(ngrps): @@ -231,52 +226,41 @@ def find_crs( warnings.resetwarnings() else: # There are not enough groups for sigma clipping - # set 'saturated' or 'do not use' pixels to nan in data - dat[np.where(np.bitwise_and(gdq, sat_flag))] = np.nan - dat[np.where(np.bitwise_and(gdq, dnu_flag))] = np.nan - - # calculate the differences between adjacent groups (first diffs) - # use mask on data, so the results will have sat/donotuse groups masked - first_diffs = np.diff(dat, axis=1) - if total_usable_diffs >= min_diffs_single_pass: warnings.filterwarnings("ignore", ".*All-NaN slice encountered.*", RuntimeWarning) - median_diffs = np.nanmedian(first_diffs, axis=(0, 1)) warnings.resetwarnings() - # calculate sigma for each pixel - sigma = np.sqrt(np.abs(median_diffs) + read_noise_2 / nframes) - # reset sigma so pixels with 0 read noise are not flagged as jumps - sigma[np.where(sigma == 0.)] = np.nan # compute 'ratio' for each group. this is the value that will be # compared to 'threshold' to classify jumps. subtract the median of # first_diffs from first_diffs, take the abs. value and divide by sigma. - e_jump = first_diffs - median_diffs[np.newaxis, np.newaxis, :, :] - - ratio = np.abs(e_jump) / sigma[np.newaxis, np.newaxis, :, :] - masked_ratio = np.ma.masked_greater(ratio, normal_rej_thresh) - # The jump mask is the ratio greater than the threshold and the difference is usable - jump_mask = np.logical_and(masked_ratio.mask, np.logical_not(first_diffs_masked.mask)) - gdq[:, 1:, :, :] = np.bitwise_or(gdq[:, 1:, :, :], jump_mask * - np.uint8(dqflags["JUMP_DET"])) + # The jump mask is the ratio greater than the threshold and the + # difference is usable. Loop over integrations to minimize the memory + # footprint. + + jump_mask = np.zeros(first_diffs.shape, dtype=bool) + for i in range(nints): + jump_candidates = np.abs(first_diffs[i] - median_diffs[np.newaxis, :]) / sigma[np.newaxis, :] > normal_rej_thresh + jump_mask = jump_candidates & first_diffs_finite[i] + gdq[i, 1:] |= jump_mask * np.uint8(jump_flag) + else: # low number of diffs requires iterative flagging # calculate the differences between adjacent groups (first diffs) # use mask on data, so the results will have sat/donotuse groups masked - first_diffs = np.abs(np.diff(dat, axis=1)) + first_diffs_abs = np.abs(first_diffs) # calc. the median of first_diffs for each pixel along the group axis - median_diffs = calc_med_first_diffs(first_diffs) + median_diffs_abs = calc_med_first_diffs(first_diffs_abs) # calculate sigma for each pixel - sigma = np.sqrt(np.abs(median_diffs) + read_noise_2 / nframes) + sigma_abs = np.sqrt(np.abs(median_diffs_abs) + read_noise_2 / nframes) # reset sigma so pxels with 0 readnoise are not flagged as jumps - sigma[np.where(sigma == 0.0)] = np.nan + sigma_abs[np.where(sigma_abs == 0.0)] = np.nan # compute 'ratio' for each group. this is the value that will be # compared to 'threshold' to classify jumps. subtract the median of # first_diffs from first_diffs, take the abs. value and divide by sigma. - e_jump = first_diffs - median_diffs[np.newaxis, :, :] - ratio = np.abs(e_jump) / sigma[np.newaxis, :, :] + e_jump = first_diffs_abs - median_diffs_abs[np.newaxis, :, :] + ratio = np.abs(e_jump) / sigma_abs[np.newaxis, :, :] # create a 2d array containing the value of the largest 'ratio' for each pixel warnings.filterwarnings("ignore", ".*All-NaN slice encountered.*", RuntimeWarning) @@ -284,7 +268,7 @@ def find_crs( warnings.resetwarnings() # now see if the largest ratio of all groups for each pixel exceeds the threshold. # there are different threshold for 4+, 3, and 2 usable groups - num_unusable_groups = np.sum(np.isnan(first_diffs), axis=(0, 1)) + num_unusable_groups = np.sum(np.isnan(first_diffs_abs), axis=(0, 1)) int4cr, row4cr, col4cr = np.where( np.logical_and(ndiffs - num_unusable_groups >= 4, max_ratio > normal_rej_thresh) ) @@ -306,7 +290,7 @@ def find_crs( # repeat this process until no more CRs are found. for j in range(len(all_crs_row)): # get arrays of abs(diffs), ratio, readnoise for this pixel - pix_first_diffs = first_diffs[:, :, all_crs_row[j], all_crs_col[j]] + pix_first_diffs = first_diffs_abs[:, :, all_crs_row[j], all_crs_col[j]] pix_ratio = ratio[:, :, all_crs_row[j], all_crs_col[j]] pix_rn2 = read_noise_2[all_crs_row[j], all_crs_col[j]] @@ -354,89 +338,64 @@ def find_crs( gdq[:, 1:, all_crs_row[j], all_crs_col[j]], dqflags["JUMP_DET"] * np.invert(pix_cr_mask), ) - cr_integ, cr_group, cr_row, cr_col = np.where(np.bitwise_and(gdq, jump_flag)) - num_primary_crs = len(cr_group) - if flag_4_neighbors: # iterate over each 'jump' pixel - for j in range(len(cr_group)): - ratio_this_pix = ratio_all[cr_integ[j], cr_group[j] - 1, cr_row[j], cr_col[j]] - - # Jumps must be in a certain range to have neighbors flagged - if (ratio_this_pix < max_jump_to_flag_neighbors) and ( - ratio_this_pix > min_jump_to_flag_neighbors - ): - integ = cr_integ[j] - group = cr_group[j] - row = cr_row[j] - col = cr_col[j] - - # This section saves flagged neighbors that are above or - # below the current range of row. If this method - # running in a single process, the row above and below are - # not used. If it is running in multiprocessing mode, then - # the rows above and below need to be returned to - # find_jumps to use when it reconstructs the full group dq - # array from the slices. - - # Only flag adjacent pixels if they do not already have the - # 'SATURATION' or 'DONOTUSE' flag set - if row != 0: - if (gdq[integ, group, row - 1, col] & sat_flag) == 0 and ( - gdq[integ, group, row - 1, col] & dnu_flag - ) == 0: - gdq[integ, group, row - 1, col] = np.bitwise_or( - gdq[integ, group, row - 1, col], jump_flag - ) - else: - row_below_gdq[integ, cr_group[j], cr_col[j]] = jump_flag - - if row != nrows - 1: - if (gdq[integ, group, row + 1, col] & sat_flag) == 0 and ( - gdq[integ, group, row + 1, col] & dnu_flag - ) == 0: - gdq[integ, group, row + 1, col] = np.bitwise_or( - gdq[integ, group, row + 1, col], jump_flag - ) - else: - row_above_gdq[integ, cr_group[j], cr_col[j]] = jump_flag - - # Here we are just checking that we don't flag neighbors of - # jumps that are off the detector. - if ( - cr_col[j] != 0 - and (gdq[integ, group, row, col - 1] & sat_flag) == 0 - and (gdq[integ, group, row, col - 1] & dnu_flag) == 0 - ): - gdq[integ, group, row, col - 1] = np.bitwise_or( - gdq[integ, group, row, col - 1], jump_flag - ) - if ( - cr_col[j] != ncols - 1 - and (gdq[integ, group, row, col + 1] & sat_flag) == 0 - and (gdq[integ, group, row, col + 1] & dnu_flag) == 0 - ): - gdq[integ, group, row, col + 1] = np.bitwise_or( - gdq[integ, group, row, col + 1], jump_flag - ) + num_primary_crs = np.sum(gdq & jump_flag == jump_flag) + + # Flag the four neighbors using bitwise or, shifting the reference + # boolean flag on pixel right, then left, then up, then down. + + if flag_4_neighbors: + for i in range(nints): + for j in range(ngroups - 1): + + ratio = np.abs(first_diffs[i, j] - median_diffs)/sigma + jump_set = gdq[i, j + 1] & jump_flag != 0 + + flag = (ratio < max_jump_to_flag_neighbors) & \ + (ratio > min_jump_to_flag_neighbors) & \ + (jump_set) + + flagsave = flag.copy() + flag[1:] |= flagsave[:-1] + flag[:-1] |= flagsave[1:] + flag[:, 1:] |= flagsave[:, :-1] + flag[:, :-1] |= flagsave[:, 1:] + + sat_or_dnu_notset = gdq[i, j + 1] & (sat_flag | dnu_flag) == 0 + + gdq[i, j + 1][sat_or_dnu_notset & flag] |= jump_flag + + row_below_gdq[i, j + 1] = np.uint8(jump_flag) * flag[0] + row_above_gdq[i, j + 1] = np.uint8(jump_flag) * flag[-1] + + # flag n groups after jumps above the specified thresholds to + # account for the transient seen after ramp jumps. Again, use + # boolean arrays; the propagation happens in a separate function. - # flag n groups after jumps above the specified thresholds to account for - # the transient seen after ramp jumps - flag_e_threshold = [after_jump_flag_e1, after_jump_flag_e2] - flag_groups = [after_jump_flag_n1, after_jump_flag_n2] - for cthres, cgroup in zip(flag_e_threshold, flag_groups): - if cgroup > 0: - cr_intg, cr_group, cr_row, cr_col = np.where(np.bitwise_and(gdq, jump_flag)) - for j in range(len(cr_group)): - intg = cr_intg[j] - group = cr_group[j] - row = cr_row[j] - col = cr_col[j] - if e_jump_4d[intg, group - 1, row, col] >= cthres: - for kk in range(group, min(group + cgroup + 1, ngroups)): - if (gdq[intg, kk, row, col] & sat_flag) == 0 and ( - gdq[intg, kk, row, col] & dnu_flag - ) == 0: - gdq[intg, kk, row, col] = np.bitwise_or(gdq[intg, kk, row, col], jump_flag) + if after_jump_flag_n1 > 0 or after_jump_flag_n2 > 0: + for i in range(nints): + + ejump = first_diffs[i] - median_diffs[np.newaxis, :] + jump_set = gdq[i] & jump_flag != 0 + + bigjump = np.zeros(jump_set.shape, dtype=bool) + verybigjump = np.zeros(jump_set.shape, dtype=bool) + + bigjump[1:] = (ejump >= after_jump_flag_e1) & jump_set[1:] + verybigjump[1:] = (ejump >= after_jump_flag_e2) & jump_set[1:] + + # Propagate flags forward + + propagate_flags(bigjump, after_jump_flag_n1) + propagate_flags(verybigjump, after_jump_flag_n2) + + # Set the flags for pixels after these jumps that are not + # already flagged as saturated or do not use. + + sat_or_dnu_notset = gdq[i] & (sat_flag | dnu_flag) == 0 + + addflag = (bigjump | verybigjump) & sat_or_dnu_notset + gdq[i][addflag] |= jump_flag if "stddev" in locals(): return gdq, row_below_gdq, row_above_gdq, num_primary_crs, stddev @@ -445,10 +404,47 @@ def find_crs( dummy = np.zeros((dataa.shape[1] - 1, dataa.shape[2], dataa.shape[3]), dtype=np.float32) else: dummy = np.zeros((dataa.shape[2], dataa.shape[3]), dtype=np.float32) - return gdq, row_below_gdq, row_above_gdq, num_primary_crs, dummy +def propagate_flags(boolean_flag, n_groups_flag): + """Propagate a boolean flag array npix groups along the first axis. + + If the number of groups to propagate is not too large, or if a + high percentage of pixels are flagged, use boolean or on the + array. Otherwise use np.where. In both cases operate on the + array in-place. + + Parameters + ---------- + boolean_flag : 3D boolean array + Should be True where the flag is to be propagated. + n_groups_flag : int + Number of groups to propagate flags forward. + + Returns + ------- + None + + """ + ngroups = boolean_flag.shape[0] + jmax = min(n_groups_flag, ngroups - 2) + + # Option A: iteratively propagate all flags forward by one + # group at a time. + if (jmax <= 50 and jmax > 0) or np.mean(boolean_flag) > 1e-3: + for j in range(jmax): + boolean_flag[j + 1:] |= boolean_flag[j:-1] + + # Option B: find the flags and propagate them individually. + elif jmax > 0: + igrp, icol, irow = np.where(boolean_flag) + for j in range(len(igrp)): + boolean_flag[igrp[j]:igrp[j] + n_groups_flag + 1, icol[j], irow[j]] = True + + return + + def calc_med_first_diffs(in_first_diffs): """Calculate the median of `first diffs` along the group axis.