Skip to content

Commit

Permalink
various bug fixes revealed by > vs == problem
Browse files Browse the repository at this point in the history
  • Loading branch information
jsdillon committed May 28, 2024
1 parent 23bff6b commit a895496
Showing 1 changed file with 22 additions and 17 deletions.
39 changes: 22 additions & 17 deletions notebooks/full_day_auto_checker.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"source": [
"# Full-Day Autocorrelation Checking\n",
"\n",
"**by Josh Dillon and Steven Murray**, last updated May 27, 2024\n",
"**by Josh Dillon and Steven Murray**, last updated May 28, 2024\n",
"\n",
"This notebook is designed to assess per-day data quality from just autocorrelations, enabling a quick assessment of whether the day is worth pushing through further analysis. In particular, it is designed to look for times that are particularly contaminated by broadband RFI (e.g. from lightning), picking out fraction of days worth analyzing. It's output is a an a priori flag yaml file readable by `hera_qm.metrics_io` functions `read_a_priori_chan_flags()`, `read_a_priori_int_flags()`, and `read_a_priori_ant_flags()`.\n",
"\n",
Expand Down Expand Up @@ -79,7 +79,7 @@
"source": [
"# get filenames\n",
"AUTO_FILE = os.environ.get(\"AUTO_FILE\", None)\n",
"# AUTO_FILE = '/mnt/sn1/data1/2460457/zen.2460457.16886.sum.autos.uvh5'\n",
"# AUTO_FILE = '/mnt/sn1/data2/2460458/zen.2460458.16881.sum.uvh5'\n",
"\n",
"SUM_AUTOS_SUFFIX = os.environ.get(\"SUM_AUTOS_SUFFIX\", 'sum.autos.uvh5')\n",
"DIFF_AUTOS_SUFFIX = os.environ.get(\"DIFF_AUTOS_SUFFIX\", 'diff.autos.uvh5')\n",
Expand Down Expand Up @@ -197,18 +197,23 @@
"metadata": {},
"outputs": [],
"source": [
"def auto_bl_zscores(data, flag_array, cache={}):\n",
"def auto_bl_zscores(data, flag_array, prior_class=None, cache={}):\n",
" '''This function computes z-score arrays for each delay-filtered autocorrelation, normalized by the expected noise. \n",
" Flagged times/channels for the whole array are given 0 weight in filtering and are np.nan in the z-score.'''\n",
" zscores = {}\n",
" for bl in auto_bls:\n",
" int_time = 24 * 3600 * np.median(np.diff(data.times))\n",
" chan_res = np.median(np.diff(data.freqs))\n",
" int_count = int(int_time * chan_res) \n",
" for bl in data:\n",
" if utils.split_bl(bl)[0] != utils.split_bl(bl)[1]:\n",
" continue\n",
" if prior_class is not None:\n",
" if (prior_class[utils.split_bl(bl)[0]] == 'bad'):\n",
" continue\n",
" wgts = np.array(np.logical_not(flag_array), dtype=np.float64)\n",
" model, _, _ = dspec.fourier_filter(hd.freqs, data[bl], wgts, filter_centers=[0], filter_half_widths=[RFI_DPSS_HALFWIDTH], mode='dpss_solve',\n",
" model, _, _ = dspec.fourier_filter(data.freqs, data[bl], wgts, filter_centers=[0], filter_half_widths=[RFI_DPSS_HALFWIDTH], mode='dpss_solve',\n",
" suppression_factors=[1e-9], eigenval_cutoff=[1e-9], cache=cache)\n",
" res = data[bl] - model\n",
" int_time = 24 * 3600 * np.median(np.diff(data.times))\n",
" chan_res = np.median(np.diff(data.freqs))\n",
" int_count = int(int_time * chan_res)\n",
" sigma = np.abs(model) / np.sqrt(int_count / 2)\n",
" zscores[bl] = res / sigma \n",
" zscores[bl][flag_array] = np.nan\n",
Expand All @@ -227,7 +232,7 @@
" '''Average together all baselines in auto_bls_to_use, then find an RFI mask by looking for outliers after DPSS filtering.'''\n",
" \n",
" # Compute int_count for all unflagged autocorrelations averaged together\n",
" int_time = 24 * 3600 * np.median(np.diff(data.times_by_bl[auto_bls[0][0:2]]))\n",
" int_time = 24 * 3600 * np.median(np.diff(data.times))\n",
" chan_res = np.median(np.diff(data.freqs))\n",
" int_count = int(int_time * chan_res) * len(auto_bls_to_use)\n",
" avg_auto = {(-1, -1, 'ee'): np.mean([data[bl] for bl in auto_bls_to_use], axis=0)}\n",
Expand Down Expand Up @@ -266,15 +271,14 @@
" auto_slope_class = ant_class.auto_slope_checker(sum_autos, good=auto_slope_good, suspect=auto_slope_suspect, edge_cut=100, filt_size=17) \n",
" \n",
" overall_class = zeros_class + auto_power_class + auto_slope_class\n",
" if len(overall_class.good_ants) + len(overall_class.suspect_ants) > 0:\n",
" if (len(overall_class.good_ants) + len(overall_class.suspect_ants)) == 0:\n",
" return overall_class, np.ones((len(sum_autos.times), len(sum_autos.freqs)), dtype=bool)\n",
" \n",
" # find initial set of flags\n",
" antenna_flags, array_flags = xrfi.flag_autos(sum_autos, flag_method=\"channel_diff_flagger\", nsig=RFI_NSIG * 5, \n",
" antenna_class=overall_class, flag_broadcast_thresh=.5)\n",
" for key in antenna_flags:\n",
" antenna_flags[key] = array_flags\n",
" cache = {}\n",
" _, array_flags = xrfi.flag_autos(sum_autos, freqs=sum_autos.freqs, flag_method=\"dpss_flagger\",\n",
" nsig=RFI_NSIG, antenna_class=overall_class,\n",
" filter_centers=[0], filter_half_widths=[RFI_DPSS_HALFWIDTH],\n",
Expand All @@ -284,11 +288,11 @@
" # check for non-noiselike x-engine diffs\n",
" xengine_diff_class = ant_class.non_noiselike_diff_by_xengine_checker(sum_autos, diff_autos, flag_waterfall=array_flags, \n",
" antenna_class=overall_class, \n",
" xengine_chans=96, bad_xengine_zcut=bad_xengine_zcut)\n",
" xengine_chans=96, bad_xengine_zcut=BAD_XENGINE_ZCUT)\n",
"\n",
" # update overall_class and return if all antennas are bad\n",
" overall_class += xengine_diff_class\n",
" if len(overall_class.good_ants) + len(overall_class.suspect_ants) > 0:\n",
" if (len(overall_class.good_ants) + len(overall_class.suspect_ants)) == 0:\n",
" return overall_class, np.ones((len(sum_autos.times), len(sum_autos.freqs)), dtype=bool)\n",
" \n",
" \n",
Expand All @@ -298,11 +302,12 @@
" prior_end_states = set()\n",
" while True:\n",
" # compute DPSS-filtered z-scores with current array-wide RFI mask\n",
" zscores = auto_bl_zscores(sum_autos, rfi_flags)\n",
" zscores = auto_bl_zscores(sum_autos, rfi_flags, cache=cache,\n",
" prior_class = auto_power_class + auto_slope_class + zeros_class + xengine_diff_class)\n",
" rms = {bl: np.nanmean(zscores[bl]**2)**.5 if np.any(np.isfinite(zscores[bl])) else np.inf for bl in zscores}\n",
"\n",
" # figure out which autos to use for finding new set of flags\n",
" candidate_autos = [bl for bl in auto_bls if overall_class[utils.split_bl(bl)[0]] != 'bad']\n",
" candidate_autos = [bl for bl in sum_autos if overall_class[utils.split_bl(bl)[0]] != 'bad']\n",
" if stage == 1:\n",
" # use best half of the unflagged antennas\n",
" med_rms = np.nanmedian([rms[bl] for bl in candidate_autos])\n",
Expand Down Expand Up @@ -339,10 +344,10 @@
" stage += 1 \n",
" \n",
" # return all flagged if all antennnas are bad, otherwise return overall class and rfi_flags\n",
" if len(overall_class.good_ants) + len(overall_class.suspect_ants) > 0:\n",
" if (len(overall_class.good_ants) + len(overall_class.suspect_ants)) == 0:\n",
" return overall_class, np.ones((len(sum_autos.times), len(sum_autos.freqs)), dtype=bool)\n",
" else:\n",
" overall_class, rfi_flags\n",
" return overall_class, rfi_flags\n",
" "
]
},
Expand Down

0 comments on commit a895496

Please sign in to comment.