Skip to content

Commit

Permalink
implement new RFI finding and classification algorithm
Browse files Browse the repository at this point in the history
  • Loading branch information
jsdillon committed Apr 18, 2024
1 parent 80f2c3d commit db4ab86
Showing 1 changed file with 146 additions and 72 deletions.
218 changes: 146 additions & 72 deletions notebooks/file_calibration.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
"from uvtools.plot import plot_antpos, plot_antclass\n",
"from hera_qm import ant_metrics, ant_class, xrfi\n",
"from hera_cal import io, utils, redcal, apply_cal, datacontainer, abscal\n",
"from hera_filters import dspec\n",
"from hera_notebook_templates.data import DATA_PATH as HNBT_DATA\n",
"from IPython.display import display, HTML\n",
"import linsolve\n",
Expand Down Expand Up @@ -155,7 +156,7 @@
"\n",
"# parse RFI settings\n",
"RFI_DPSS_HALFWIDTH = float(os.environ.get(\"RFI_DPSS_HALFWIDTH\", 300e-9))\n",
"RFI_NSIG = float(os.environ.get(\"RFI_NSIG\", 6))\n",
"RFI_NSIG = float(os.environ.get(\"RFI_NSIG\", 4))\n",
"\n",
"# parse abscal settings\n",
"ABSCAL_MODEL_FILES_GLOB = os.environ.get(\"ABSCAL_MODEL_FILES_GLOB\", None)\n",
Expand Down Expand Up @@ -211,8 +212,8 @@
"auto_slope_suspect = (float(os.environ.get(\"AUTO_SLOPE_SUSPECT_LOW\", -0.6)), float(os.environ.get(\"AUTO_SLOPE_SUSPECT_HIGH\", 0.6)))\n",
"\n",
"# bounds on autocorrelation RFI\n",
"auto_rfi_good = (0, float(os.environ.get(\"AUTO_RFI_GOOD\", 0.015)))\n",
"auto_rfi_suspect = (0, float(os.environ.get(\"AUTO_RFI_SUSPECT\", 0.03)))\n",
"auto_rfi_good = (0, float(os.environ.get(\"AUTO_RFI_GOOD\", 1.5)))\n",
"auto_rfi_suspect = (0, float(os.environ.get(\"AUTO_RFI_SUSPECT\", 2)))\n",
"\n",
"# bounds on autocorrelation shape\n",
"auto_shape_good = (0, float(os.environ.get(\"AUTO_SHAPE_GOOD\", 0.1)))\n",
Expand Down Expand Up @@ -380,81 +381,186 @@
"id": "102587ce",
"metadata": {},
"source": [
"### Examine and classify autocorrelation power, slope, and RFI occpancy\n",
"### Examine and classify autocorrelation power and slope\n",
"\n",
"These classifiers look for antennas with too high or low power, to steep a slope, or too much excess RFI."
"These classifiers look for antennas with too high or low power or to steep a slope."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bca07198",
"id": "ba1b4d4f-633e-46a8-a6f9-b7d600845ae0",
"metadata": {},
"outputs": [],
"source": [
"auto_power_class = ant_class.auto_power_checker(data, good=auto_power_good, suspect=auto_power_suspect)\n",
"auto_slope_class = ant_class.auto_slope_checker(data, good=auto_slope_good, suspect=auto_slope_suspect, edge_cut=100, filt_size=17)\n",
"cache = {}\n",
"auto_rfi_class = ant_class.auto_rfi_checker(data, good=auto_rfi_good, suspect=auto_rfi_suspect, \n",
" filter_half_widths=[RFI_DPSS_HALFWIDTH], nsig=RFI_NSIG, cache=cache)\n",
"auto_class = auto_power_class + auto_slope_class + auto_rfi_class\n",
"if np.all([auto_class[utils.split_bl(bl)[0]] == 'bad' for bl in auto_bls]):\n",
" raise ValueError('All antennas are flagged for bad autocorrelation power/slope/RFI.')"
"if np.all([(auto_power_class + auto_slope_class)[utils.split_bl(bl)[0]] == 'bad' for bl in auto_bls]):\n",
" raise ValueError('All antennas are flagged for bad autocorrelation power/slope.')\n",
"overall_class = auto_power_class + auto_slope_class + zeros_class + ant_metrics_class + solar_class"
]
},
{
"cell_type": "markdown",
"id": "fd79bfe5-8ca6-4d6d-87ff-740e4c3fa517",
"metadata": {},
"source": [
"### Examine and classify autocorrelation excess RFI and shape, finding consensus RFI mask along the way\n",
"\n",
"This classifier iteratively identifies antennas for excess RFI (characterized by RMS of DPSS-filtered autocorrelations after RFI flagging) and bad shape, as determined by a discrepancy with the mean good normalized autocorrelation's shape. Along the way, it iteratively discovers a conensus array-wide RFI mask."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b3e755bf",
"id": "6fdc4209-6fe0-4cb7-8564-dda570adecdf",
"metadata": {},
"outputs": [],
"source": [
"del cache\n",
"malloc_trim()"
"def auto_bl_zscores(data, flag_array, cache={}):\n",
" '''This function computes z-score arrays for each delay-filtered autocorrelation, normalized by the expected noise. \n",
" Flagged times/channels for the whole array are given 0 weight in filtering and are np.nan in the z-score.'''\n",
" zscores = {}\n",
" for bl in auto_bls:\n",
" wgts = np.array(np.logical_not(flag_array), dtype=np.float64)\n",
" model, _, _ = dspec.fourier_filter(hd.freqs, data[bl], wgts, filter_centers=[0], filter_half_widths=[RFI_DPSS_HALFWIDTH], mode='dpss_solve',\n",
" suppression_factors=[1e-9], eigenval_cutoff=[1e-9], cache=cache)\n",
" res = data[bl] - model\n",
" int_time = 24 * 3600 * np.median(np.diff(data.times))\n",
" chan_res = np.median(np.diff(data.freqs))\n",
" int_count = int(int_time * chan_res)\n",
" sigma = np.abs(model) / np.sqrt(int_count / 2)\n",
" zscores[bl] = res / sigma \n",
" zscores[bl][flag_array] = np.nan\n",
"\n",
" return zscores"
]
},
{
"cell_type": "markdown",
"id": "9464e41e",
"cell_type": "code",
"execution_count": null,
"id": "4d7f374a-8936-43b2-b521-cc68a256a932",
"metadata": {},
"outputs": [],
"source": [
"### Find and flag RFI"
"def rfi_from_avg_autos(data, auto_bls_to_use, prior_flags=None, nsig=RFI_NSIG):\n",
" '''Average together all baselines in auto_bls_to_use, then find an RFI mask by looking for outliers after DPSS filtering.'''\n",
" \n",
" # Compute int_count for all unflagged autocorrelations averaged together\n",
" int_time = 24 * 3600 * np.median(np.diff(data.times_by_bl[auto_bls[0][0:2]]))\n",
" chan_res = np.median(np.diff(data.freqs))\n",
" int_count = int(int_time * chan_res) * len(auto_bls_to_use)\n",
" avg_auto = {(-1, -1, 'ee'): np.mean([data[bl] for bl in auto_bls_to_use], axis=0)}\n",
" \n",
" # Flag RFI first with channel differences and then with DPSS\n",
" antenna_flags, _ = xrfi.flag_autos(avg_auto, int_count=int_count, nsig=(RFI_NSIG * 5))\n",
" if prior_flags is not None:\n",
" antenna_flags[(-1, -1, 'ee')] = prior_flags\n",
" _, rfi_flags = xrfi.flag_autos(avg_auto, int_count=int_count, flag_method='dpss_flagger',\n",
" flags=antenna_flags, freqs=data.freqs, filter_centers=[0],\n",
" filter_half_widths=[RFI_DPSS_HALFWIDTH], eigenval_cutoff=[1e-9], nsig=nsig)\n",
"\n",
" return rfi_flags"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c321e1d7",
"id": "3252ed36-41e8-4309-be23-80aeb8b0c234",
"metadata": {},
"outputs": [],
"source": [
"# Compute int_count for all unflagged autocorrelations averaged together\n",
"int_time = 24 * 3600 * np.median(np.diff(data.times_by_bl[auto_bls[0][0:2]]))\n",
"chan_res = np.median(np.diff(data.freqs))\n",
"final_class = ant_metrics_class + zeros_class + auto_class\n",
"if np.all([final_class[utils.split_bl(bl)[0]] == 'bad' for bl in auto_bls]):\n",
" raise ValueError('All antennas are flagged for ant_metrics, zeros, or autos.') \n",
"int_count = int(int_time * chan_res) * (len(final_class.good_ants) + len(final_class.suspect_ants))\n",
"avg_auto = {(-1, -1, 'ee'): np.mean([data[bl] for bl in auto_bls if final_class[utils.split_bl(bl)[0]] != 'bad'], axis=0)}\n",
"# Flag RFI first with channel differences and then with DPSS\n",
"antenna_flags, _ = xrfi.flag_autos(avg_auto, int_count=int_count, nsig=(RFI_NSIG * 5))\n",
"_, rfi_flags = xrfi.flag_autos(avg_auto, int_count=int_count, flag_method='dpss_flagger',\n",
" flags=antenna_flags, freqs=data.freqs, filter_centers=[0],\n",
" filter_half_widths=[RFI_DPSS_HALFWIDTH], eigenval_cutoff=[1e-9], nsig=RFI_NSIG)\n",
"malloc_trim()"
"# Find starting set of array flags\n",
"antenna_flags, array_flags = xrfi.flag_autos(data, flag_method=\"channel_diff_flagger\", nsig=RFI_NSIG * 5, \n",
" antenna_class=overall_class, flag_broadcast_thresh=.5)\n",
"for key in antenna_flags:\n",
" antenna_flags[key] = array_flags\n",
"cache = {}\n",
"_, array_flags = xrfi.flag_autos(data, freqs=data.freqs, flag_method=\"dpss_flagger\",\n",
" nsig=RFI_NSIG, antenna_class=overall_class,\n",
" filter_centers=[0], filter_half_widths=[RFI_DPSS_HALFWIDTH],\n",
" eigenval_cutoff=[1e-9], flags=antenna_flags, mode='dpss_matrix', \n",
" cache=cache, flag_broadcast_thresh=.5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "75531f17-befd-41f1-aaf5-20ee2a18cacb",
"metadata": {},
"outputs": [],
"source": [
"# Iteratively develop RFI mask, excess RFI classification, and autocorrelation shape classification\n",
"stage = 1\n",
"rfi_flags = np.array(array_flags)\n",
"while True:\n",
" # compute DPSS-filtered z-scores with current array-wide RFI mask\n",
" zscores = auto_bl_zscores(data, rfi_flags)\n",
" rms = {bl: np.nanmean(zscores[bl]**2)**.5 if np.any(np.isfinite(zscores[bl])) else np.inf for bl in zscores}\n",
" \n",
" # figure out which autos to use for finding new set of flags\n",
" candidate_autos = [bl for bl in auto_bls if overall_class[utils.split_bl(bl)[0]] != 'bad']\n",
" if stage == 1:\n",
" # use best half of the unflagged antennas\n",
" med_rms = np.nanmedian([rms[bl] for bl in candidate_autos])\n",
" autos_to_use = [bl for bl in candidate_autos if rms[bl] <= med_rms]\n",
" stage += 1 # advance to stage 2\n",
" elif stage == 2:\n",
" # use all unflagged antennas which are auto RFI good, or the best half, whichever is larger\n",
" med_rms = np.nanmedian([rms[bl] for bl in candidate_autos])\n",
" best_half_autos = [bl for bl in candidate_autos if rms[bl] <= med_rms]\n",
" good_autos = [bl for bl in candidate_autos if (overall_class[utils.split_bl(bl)[0]] != 'bad')\n",
" and (auto_rfi_class[utils.split_bl(bl)[0]] == 'good')]\n",
" autos_to_use = (best_half_autos if len(best_half_autos) > len(good_autos) else good_autos)\n",
" stage += 1 # advance to stage 3\n",
" elif stage == 3:\n",
" # use all unflagged antennas which are auto RFI good or suspect\n",
" autos_to_use = [bl for bl in candidate_autos if (overall_class[utils.split_bl(bl)[0]] != 'bad')]\n",
"\n",
" # compute new RFI flags\n",
" rfi_flags = rfi_from_avg_autos(data, autos_to_use)\n",
"\n",
" # perform auto shape and RFI classification\n",
" overall_class = auto_power_class + auto_slope_class + zeros_class + ant_metrics_class + solar_class\n",
" auto_rfi_class = ant_class.antenna_bounds_checker(rms, good=auto_rfi_good, suspect=auto_rfi_suspect, bad=(0, np.inf))\n",
" overall_class += auto_rfi_class\n",
" auto_shape_class = ant_class.auto_shape_checker(data, good=auto_shape_good, suspect=auto_shape_suspect,\n",
" flag_spectrum=np.sum(rfi_flags, axis=0).astype(bool), \n",
" antenna_class=overall_class)\n",
" overall_class += auto_shape_class\n",
" \n",
" # check for convergence on flagged antennas and channels \n",
" if stage == 3:\n",
" if (len(overall_class.bad_ants) == n_current_bad_ants) and (np.sum(rfi_flags) == current_flagged_chan_times):\n",
" break\n",
" current_flagged_chan_times = np.sum(rfi_flags)\n",
" n_current_bad_ants = len(overall_class.bad_ants)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d8eae3de",
"id": "1b3f992f-402b-4fb8-aac9-4e82149cb4ad",
"metadata": {},
"outputs": [],
"source": [
"def rfi_plot():\n",
"auto_class = auto_power_class + auto_slope_class + auto_rfi_class + auto_shape_class\n",
"if np.all([auto_class[utils.split_bl(bl)[0]] == 'bad' for bl in auto_bls]):\n",
" raise ValueError('All antennas are flagged for bad autos power/slope/rfi/shape.')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b2e3af97-94bc-4ed1-abb4-1fbf03ab1b0d",
"metadata": {},
"outputs": [],
"source": [
"def rfi_plot(cls, flags=rfi_flags):\n",
" avg_auto = {(-1, -1, 'ee'): np.mean([data[bl] for bl in auto_bls if not cls[utils.split_bl(bl)[0]] == 'bad'], axis=0)}\n",
" plt.figure(figsize=(12, 5), dpi=100)\n",
" plt.semilogy(hd.freqs / 1e6, np.where(rfi_flags, np.nan, avg_auto[(-1, -1, 'ee')])[1], label = 'Average Good or Suspect Autocorrelation', zorder=100)\n",
" plt.semilogy(hd.freqs / 1e6, np.where(False, np.nan, avg_auto[(-1, -1, 'ee')])[1], 'r', lw=.5, label=f'{np.sum(rfi_flags[0])} Channels Flagged for RFI')\n",
" plt.semilogy(hd.freqs / 1e6, np.where(flags, np.nan, avg_auto[(-1, -1, 'ee')])[0], label = 'Average Good or Suspect Autocorrelation', zorder=100)\n",
" plt.semilogy(hd.freqs / 1e6, np.where(False, np.nan, avg_auto[(-1, -1, 'ee')])[0], 'r', lw=.5, label=f'{np.sum(flags[0])} Channels Flagged for RFI')\n",
" plt.legend()\n",
" plt.xlabel('Frequency (MHz)')\n",
" plt.ylabel('Uncalibrated Autocorrelation')\n",
Expand All @@ -478,7 +584,7 @@
"metadata": {},
"outputs": [],
"source": [
"rfi_plot()"
"rfi_plot(overall_class)"
]
},
{
Expand All @@ -502,45 +608,13 @@
"\n",
" axes[0].set_ylabel('Raw Autocorrelation', fontsize=12)\n",
" axes[1].legend([matplotlib.lines.Line2D([0], [0], color=color) for color in colors], \n",
" [cls.capitalize() for cls in auto_class.quality_classes], ncol=1, fontsize=12, loc='upper right', framealpha=1)\n",
" [cl.capitalize() for cl in cls.quality_classes], ncol=1, fontsize=12, loc='upper right', framealpha=1)\n",
" plt.tight_layout()"
]
},
{
"cell_type": "markdown",
"id": "32f9153c",
"metadata": {},
"source": [
"### Classify antennas based on shapes, excluding RFI-contamined channels"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e005cb17",
"metadata": {
"code_folding": []
},
"outputs": [],
"source": [
"auto_shape_class = ant_class.auto_shape_checker(data, good=auto_shape_good, suspect=auto_shape_suspect,\n",
" flag_spectrum=np.sum(rfi_flags, axis=0).astype(bool), antenna_class=final_class)\n",
"auto_class += auto_shape_class"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7a13a063",
"metadata": {},
"outputs": [],
"source": [
"final_class = ant_metrics_class + solar_class + zeros_class + auto_class"
]
},
{
"cell_type": "markdown",
"id": "d99ad916",
"id": "5066f32a-5831-4dce-be8a-b28fad7584dc",
"metadata": {},
"source": [
"# *Figure 2: Plot of autocorrelations with classifications*\n",
Expand Down

0 comments on commit db4ab86

Please sign in to comment.