various bug fixes revealed by > vs == problem

HERA-Team · May 28, 2024 · a895496 · a895496
1 parent 23bff6b
commit a895496
Showing 1 changed file with 22 additions and 17 deletions.
diff --git a/notebooks/full_day_auto_checker.ipynb b/notebooks/full_day_auto_checker.ipynb
@@ -7,7 +7,7 @@
    "source": [
     "# Full-Day Autocorrelation Checking\n",
     "\n",
-    "**by Josh Dillon and Steven Murray**, last updated May 27, 2024\n",
+    "**by Josh Dillon and Steven Murray**, last updated May 28, 2024\n",
     "\n",
     "This notebook is designed to assess per-day data quality from just autocorrelations, enabling a quick assessment of whether the day is worth pushing through further analysis. In particular, it is designed to look for times that are particularly contaminated by broadband RFI (e.g. from lightning), picking out fraction of days worth analyzing. It's output is a an a priori flag yaml file readable by `hera_qm.metrics_io` functions `read_a_priori_chan_flags()`, `read_a_priori_int_flags()`, and `read_a_priori_ant_flags()`.\n",
     "\n",
@@ -79,7 +79,7 @@
    "source": [
     "# get filenames\n",
     "AUTO_FILE = os.environ.get(\"AUTO_FILE\", None)\n",
-    "# AUTO_FILE = '/mnt/sn1/data1/2460457/zen.2460457.16886.sum.autos.uvh5'\n",
+    "# AUTO_FILE = '/mnt/sn1/data2/2460458/zen.2460458.16881.sum.uvh5'\n",
     "\n",
     "SUM_AUTOS_SUFFIX = os.environ.get(\"SUM_AUTOS_SUFFIX\", 'sum.autos.uvh5')\n",
     "DIFF_AUTOS_SUFFIX = os.environ.get(\"DIFF_AUTOS_SUFFIX\", 'diff.autos.uvh5')\n",
@@ -197,18 +197,23 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "def auto_bl_zscores(data, flag_array, cache={}):\n",
+    "def auto_bl_zscores(data, flag_array, prior_class=None, cache={}):\n",
     "    '''This function computes z-score arrays for each delay-filtered autocorrelation, normalized by the expected noise. \n",
     "    Flagged times/channels for the whole array are given 0 weight in filtering and are np.nan in the z-score.'''\n",
     "    zscores = {}\n",
-    "    for bl in auto_bls:\n",
+    "    int_time = 24 * 3600 * np.median(np.diff(data.times))\n",
+    "    chan_res = np.median(np.diff(data.freqs))\n",
+    "    int_count = int(int_time * chan_res)    \n",
+    "    for bl in data:\n",
+    "        if utils.split_bl(bl)[0] != utils.split_bl(bl)[1]:\n",
+    "            continue\n",
+    "        if prior_class is not None:\n",
+    "            if (prior_class[utils.split_bl(bl)[0]] == 'bad'):\n",
+    "                continue\n",
     "        wgts = np.array(np.logical_not(flag_array), dtype=np.float64)\n",
-    "        model, _, _ = dspec.fourier_filter(hd.freqs, data[bl], wgts, filter_centers=[0], filter_half_widths=[RFI_DPSS_HALFWIDTH], mode='dpss_solve',\n",
+    "        model, _, _ = dspec.fourier_filter(data.freqs, data[bl], wgts, filter_centers=[0], filter_half_widths=[RFI_DPSS_HALFWIDTH], mode='dpss_solve',\n",
     "                                            suppression_factors=[1e-9], eigenval_cutoff=[1e-9], cache=cache)\n",
     "        res = data[bl] - model\n",
-    "        int_time = 24 * 3600 * np.median(np.diff(data.times))\n",
-    "        chan_res = np.median(np.diff(data.freqs))\n",
-    "        int_count = int(int_time * chan_res)\n",
     "        sigma = np.abs(model) / np.sqrt(int_count / 2)\n",
     "        zscores[bl] = res / sigma    \n",
     "        zscores[bl][flag_array] = np.nan\n",
@@ -227,7 +232,7 @@
     "    '''Average together all baselines in auto_bls_to_use, then find an RFI mask by looking for outliers after DPSS filtering.'''\n",
     "    \n",
     "    # Compute int_count for all unflagged autocorrelations averaged together\n",
-    "    int_time = 24 * 3600 * np.median(np.diff(data.times_by_bl[auto_bls[0][0:2]]))\n",
+    "    int_time = 24 * 3600 * np.median(np.diff(data.times))\n",
     "    chan_res = np.median(np.diff(data.freqs))\n",
     "    int_count = int(int_time * chan_res) * len(auto_bls_to_use)\n",
     "    avg_auto = {(-1, -1, 'ee'): np.mean([data[bl] for bl in auto_bls_to_use], axis=0)}\n",
@@ -266,15 +271,14 @@
     "    auto_slope_class = ant_class.auto_slope_checker(sum_autos, good=auto_slope_good, suspect=auto_slope_suspect, edge_cut=100, filt_size=17)   \n",
     "    \n",
     "    overall_class = zeros_class + auto_power_class + auto_slope_class\n",
-    "    if len(overall_class.good_ants) + len(overall_class.suspect_ants) > 0:\n",
+    "    if (len(overall_class.good_ants) + len(overall_class.suspect_ants)) == 0:\n",
     "        return overall_class, np.ones((len(sum_autos.times), len(sum_autos.freqs)), dtype=bool)\n",
     "    \n",
     "    # find initial set of flags\n",
     "    antenna_flags, array_flags = xrfi.flag_autos(sum_autos, flag_method=\"channel_diff_flagger\", nsig=RFI_NSIG * 5, \n",
     "                                                 antenna_class=overall_class, flag_broadcast_thresh=.5)\n",
     "    for key in antenna_flags:\n",
     "        antenna_flags[key] = array_flags\n",
-    "    cache = {}\n",
     "    _, array_flags = xrfi.flag_autos(sum_autos, freqs=sum_autos.freqs, flag_method=\"dpss_flagger\",\n",
     "                                     nsig=RFI_NSIG, antenna_class=overall_class,\n",
     "                                     filter_centers=[0], filter_half_widths=[RFI_DPSS_HALFWIDTH],\n",
@@ -284,11 +288,11 @@
     "    # check for non-noiselike x-engine diffs\n",
     "    xengine_diff_class = ant_class.non_noiselike_diff_by_xengine_checker(sum_autos, diff_autos, flag_waterfall=array_flags, \n",
     "                                                                 antenna_class=overall_class, \n",
-    "                                                                 xengine_chans=96, bad_xengine_zcut=bad_xengine_zcut)\n",
+    "                                                                 xengine_chans=96, bad_xengine_zcut=BAD_XENGINE_ZCUT)\n",
     "\n",
     "    # update overall_class and return if all antennas are bad\n",
     "    overall_class += xengine_diff_class\n",
-    "    if len(overall_class.good_ants) + len(overall_class.suspect_ants) > 0:\n",
+    "    if (len(overall_class.good_ants) + len(overall_class.suspect_ants)) == 0:\n",
     "        return overall_class, np.ones((len(sum_autos.times), len(sum_autos.freqs)), dtype=bool)\n",
     "    \n",
     "    \n",
@@ -298,11 +302,12 @@
     "    prior_end_states = set()\n",
     "    while True:\n",
     "        # compute DPSS-filtered z-scores with current array-wide RFI mask\n",
-    "        zscores = auto_bl_zscores(sum_autos, rfi_flags)\n",
+    "        zscores = auto_bl_zscores(sum_autos, rfi_flags, cache=cache,\n",
+    "                                  prior_class = auto_power_class + auto_slope_class + zeros_class + xengine_diff_class)\n",
     "        rms = {bl: np.nanmean(zscores[bl]**2)**.5 if np.any(np.isfinite(zscores[bl])) else np.inf for bl in zscores}\n",
     "\n",
     "        # figure out which autos to use for finding new set of flags\n",
-    "        candidate_autos = [bl for bl in auto_bls if overall_class[utils.split_bl(bl)[0]] != 'bad']\n",
+    "        candidate_autos = [bl for bl in sum_autos if overall_class[utils.split_bl(bl)[0]] != 'bad']\n",
     "        if stage == 1:\n",
     "            # use best half of the unflagged antennas\n",
     "            med_rms = np.nanmedian([rms[bl] for bl in candidate_autos])\n",
@@ -339,10 +344,10 @@
     "            stage += 1    \n",
     "    \n",
     "    # return all flagged if all antennnas are bad, otherwise return overall class and rfi_flags\n",
-    "    if len(overall_class.good_ants) + len(overall_class.suspect_ants) > 0:\n",
+    "    if (len(overall_class.good_ants) + len(overall_class.suspect_ants)) == 0:\n",
     "        return overall_class, np.ones((len(sum_autos.times), len(sum_autos.freqs)), dtype=bool)\n",
     "    else:\n",
-    "        overall_class, rfi_flags\n",
+    "        return overall_class, rfi_flags\n",
     "        "
    ]
   },