diff --git a/hera_notebook_templates/notebooks/lststack.ipynb b/hera_notebook_templates/notebooks/lststack.ipynb index 827a2c1..b18e0b9 100644 --- a/hera_notebook_templates/notebooks/lststack.ipynb +++ b/hera_notebook_templates/notebooks/lststack.ipynb @@ -24,7 +24,7 @@ "id": "543efff3", "metadata": {}, "source": [ - "**by Steven Murray, Tyler Cox and Josh Dillon**, last updated 4th Jun, 2024." + "**by Steven Murray, Tyler Cox and Josh Dillon**, last updated 31st July, 2024." ] }, { @@ -144,7 +144,7 @@ "do_extra_flagging: bool = False\n", "do_lstcal: bool = True\n", "\n", - "outdir: str = \".\"\n", + "outdir: str = Path(\"~/lststack-outputs\").expanduser()\n", "bl_chunk_size: int = 0\n", "rephase: bool = True\n", "fname_format: str = '{inpaint_mode}/zen.{kind}.{lst:7.5f}.{blchunk}.sum.uvh5'\n", @@ -673,10 +673,10 @@ "source": [ "all_ee = lambda bl: bl[2] == 'ee'\n", "all_nn = lambda bl: bl[2] == 'nn'\n", - "short_bls = lambda bl: getbllen(bl[0], bl[1])<=60.0\n", - "long_bls = lambda bl: getbllen(bl[0], bl[1])>60.0\n", - "intersector_bls = lambda bl: sectors[bl[0]] != sectors[bl[1]]\n", - "intrasector_bls = lambda bl: sectors[bl[0]] == sectors[bl[1]]\n", + "short_bls = lambda bl: getbllen(bl[0], bl[1])<=60.0 and len(set(bl[2]))==1\n", + "long_bls = lambda bl: getbllen(bl[0], bl[1])>60.0 and len(set(bl[2]))==1\n", + "intersector_bls = lambda bl: sectors[bl[0]] != sectors[bl[1]] and len(set(bl[2]))==1\n", + "intrasector_bls = lambda bl: sectors[bl[0]] == sectors[bl[1]] and len(set(bl[2]))==1\n", "\n", "subsets = {\n", " 'all': lambda bl: True,\n", @@ -924,7 +924,7 @@ " \n", " fig, ax = plt.subplots(\n", " len(stackconf.autopairs)*len(stackconf.pols), len(auto_stacks), \n", - " sharex=True, sharey=True, squeeze=False, constrained_layout=True,\n", + " sharex=True, squeeze=False, constrained_layout=True,\n", " figsize=(12, 6)\n", " )\n", "\n", @@ -933,13 +933,12 @@ " for p, pol in enumerate(stackconf.pols):\n", " axx = ax[j*len(stackconf.pols) + p, i]\n", " \n", - " for k, t in enumerate(stack.time_array[::stack.Nbls]):\n", + " for k, t in enumerate(stack.times):\n", " flg = stack.get_flags(autopair + (pol,))[k]\n", " d = stack.get_data(autopair+(pol,))[k]\n", - " \n", " axx.plot(\n", " stack.freq_array / 1e6,\n", - " np.where(flg, np.nan, d.real),\n", + " np.where(flg, np.nan, np.abs(d)),\n", " label=f\"{int(t)}\" if not p else None,\n", " **styles[int(t)]\n", " )\n", @@ -949,7 +948,7 @@ " # plot the mean\n", " axx.plot(\n", " stack.freq_array / 1e6,\n", - " np.where(avg['flags'][j, :, p], np.nan, avg['data'][j, :, p].real),\n", + " np.where(avg['flags'][j, :, p], np.nan, np.abs(avg['data'][j, :, p])),\n", " label='LSTBIN',\n", " color='k', lw=2\n", " )\n", @@ -1132,6 +1131,72 @@ " original_data_mean = [lstavg['data'].copy() for lstavg in cross_lstavg]" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "13f99665-3f4b-4e13-b306-6836d64f89b1", + "metadata": {}, + "outputs": [], + "source": [ + "# from copy import deepcopy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d877cd3b-0a15-46da-bbd8-f89103abf1ce", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# FILLED_CACHE = deepcopy(_INPAINT_CACHE_)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c0fead3f-be97-4ae1-9bfc-d0315f323a98", + "metadata": {}, + "outputs": [], + "source": [ + "# from hera_cal.lst_stack import LSTStack" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "96cbe153-186e-42ad-82c6-4b82d63da2c3", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# smallstack = LSTStack(stack.select(bls=stack.antpairs[:30], inplace=False))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2a9e0899-6786-40ea-a44e-51b693ed6b94", + "metadata": {}, + "outputs": [], + "source": [ + "# %load_ext line_profiler" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "293728f0-2894-4404-8bf9-4739a0b0441b", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# %lprun -f avg.average_and_inpaint_simultaneously_single_bl avg.average_and_inpaint_simultaneously(smallstack, auto_stacks[0], inpaint_bands = inpaint_bands, return_models = make_plots, cache = {}, filter_properties = {\"min_dly\": inpaint_mindelay, \"horizon\": inpaint_horizon, \"standoff\": inpaint_standoff},eigenval_cutoff=[inpaint_eigencutoff], max_gap_factor=inpaint_max_gap_factor,max_convolved_flag_frac=inpaint_max_convolved_flag_frac,use_unbiased_estimator=inpaint_use_unbiased_estimator,sample_cov_fraction=inpaint_sample_cov_fraction)\n" + ] + }, { "cell_type": "code", "execution_count": null, @@ -1353,7 +1418,6 @@ " lstbin.io.format_outfile_name,\n", " lst=stackconf.lst_grid_edges[0],\n", " pols=stackconf.pols,\n", - " #fname_format=fname_format,\n", " inpaint_mode=True,\n", " lst_branch_cut=stackconf.properties[\"lst_branch_cut\"],\n", ")" @@ -1539,9 +1603,10 @@ " dndzsq = zdist_pred.pdf(xc)\n", "\n", " for i, zsq in enumerate(zsquare):\n", - " ax[0].hist(zsq.metrics.flatten(), bins=x, label=f'LST Bin {i}', density=True, histtype='step')\n", + " for j, pol in enumerate(zsq.pols):\n", + " ax[0].hist(zsq.metrics[..., j].flatten(), bins=x, label=f\"LST Bin {i}. Pol '{pol}'\", density=True, histtype='step', lw=3 if pol[0]==pol[1] else 1, ls=['-', ':'][i], color=f'C{j}', alpha=0.7)\n", "\n", - " ax[0].plot(xc, dndzsq, label='Predicted')\n", + " ax[0].plot(xc, dndzsq, color='k', ls ='--', label='Predicted')\n", " ax[0].set_xscale('log')\n", " ax[0].set_yscale('log')\n", " ax[0].set_ylim(1e-12, 1e4)\n", @@ -1553,13 +1618,15 @@ " x = np.logspace(-5, np.log10(max(np.nanmax(zsq.metrics) for zsq in zsquare)), 100)\n", "\n", " for zsq in zsquare:\n", - " zsq = zsq.metrics[np.isfinite(zsq.metrics)]\n", - " size = zsq.size\n", - " \n", - " cdf_data = [np.sum(zsq < c)/size for c in x]\n", - " ax[1].plot(x, cdf_data)\n", + " for j, pol in enumerate(zsq.pols):\n", + " zsqm = zsq.metrics[..., j]\n", + " zsqm = zsqm[np.isfinite(zsqm)]\n", + " size = zsqm.size\n", + "\n", + " cdf_data = [np.sum(zsqm < c)/size for c in x]\n", + " ax[1].plot(x, cdf_data, lw=3 if pol[0]==pol[1] else 1, ls=['-', ':'][i], color=f'C{j}', alpha=0.7)\n", " \n", - " ax[1].plot(x, zdist_pred.cdf(x))\n", + " ax[1].plot(x, zdist_pred.cdf(x), color='k', ls='--')\n", " ax[1].set_xlabel(r\"$Z^2$\")\n", " ax[1].set_title(\"CDF of $Z^2$\")\n", " ax[1].set_xscale('log')" @@ -1625,10 +1692,10 @@ " inpaint_flags = stack.inpainted()\n", " for iap, (a, b) in enumerate(zuv.antpairs):\n", " for ipol, pol in enumerate(zuv.pols):\n", - " pol = {'ee': 0, 'nn': 1}[pol]\n", - "\n", "\n", " for night, zsqn in enumerate(zuv.metrics[:, iap, :, ipol]):\n", + " key = (lstidx, a, b, ipol, jdint)\n", + "\n", " jdint = zuv.nights[night]\n", "\n", " # Get contiguous regions of bad (|Z| > 3) data\n", @@ -1637,7 +1704,7 @@ " ranges = consecutive(badfreqs)\n", "\n", " for rng in ranges:\n", - " allbad[(lstidx, a, b, pol, jdint, rng[0], rng[1])] = zsqn[rng[0]:rng[1]]\n", + " allbad[key+(rng[0], rng[1])] = zsqn[rng[0]:rng[1]]\n", "\n", " if stackconf.inpaint_files is not None:\n", " badfreqs = np.nonzero(inpaint_flags[night, iap, :, ipol])\n", @@ -1645,7 +1712,7 @@ " ranges = consecutive(badfreqs)\n", "\n", " for rng in ranges:\n", - " allbad[(lstidx, a, b, pol, jdint, rng[0], rng[1])] = zsqn[rng[0]:rng[1]]\n", + " allbad[key+(rng[0], rng[1])] = zsqn[rng[0]:rng[1]]\n", " \n", " else:\n", " # Get contiguous regions of inpainted data (anything that is flagged outside FM)\n", @@ -1656,40 +1723,45 @@ " ranges = consecutive(badfreqs)\n", "\n", " for rng in ranges:\n", - " inpainted_regions[(lstidx, a, b, pol, jdint, rng[0], rng[1])] = zsqn[band][rng[0]:rng[1]]\n", + " inpainted_regions[key+(rng[0], rng[1])] = zsqn[band][rng[0]:rng[1]]\n", " " ] }, { "cell_type": "code", "execution_count": null, - "id": "76f321de-8fd9-41b5-a2fb-92d5ff4d94db", + "id": "61998a93-638a-4f4b-9e54-161ecc543691", "metadata": { "tags": [] }, "outputs": [], "source": [ - "chunk_lengths = [b - a for _, _, _, _, _, a, b in allbad.keys()]\n", - "if len(chunk_lengths) > 0:\n", - " print(\"Biggest Frequency Chunk With |Z|>3: \", np.max(chunk_lengths))" + "if save_metric_data:\n", + " # Write out the \"bad\" data\n", + " fname = out_fname.format(kind=\"HIGHZ\")\n", + "\n", + " with h5py.File(outdir / fname, 'w') as fl:\n", + " fl['indices'] = np.array(list(allbad.keys())) # integer array\n", + " fl['zsq'] = np.concatenate(tuple(allbad.values()))\n" ] }, { "cell_type": "code", "execution_count": null, - "id": "61998a93-638a-4f4b-9e54-161ecc543691", + "id": "76f321de-8fd9-41b5-a2fb-92d5ff4d94db", "metadata": { "tags": [] }, "outputs": [], "source": [ - "if save_metric_data:\n", - " # Write out the \"bad\" data\n", - " fname = out_fname.format(kind=\"HIGHZ\")\n", + "chunk_lengths_all = [\n", + " [b - a for _, _, _, polidx, _, a, b in allbad.keys() if polidx==i]\n", + " for i in range(len(cross_stacks[0].pols))\n", + "]\n", "\n", - " with h5py.File(outdir / fname, 'w') as fl:\n", - " fl['indices'] = np.array(list(allbad.keys())) # integer array\n", - " fl['zsq'] = np.concatenate(tuple(allbad.values()))\n" + "for i, chunk_lengths in enumerate(chunk_lengths_all):\n", + " if len(chunk_lengths) > 0:\n", + " print(f\"Biggest Frequency Chunk With |Z|>3 for Pol {cross_stacks[0].pols[i]}: \", np.max(chunk_lengths))\n" ] }, { @@ -1717,13 +1789,16 @@ }, "outputs": [], "source": [ - "if make_plots and len(chunk_lengths) > 0:\n", - " plt.hist(chunk_lengths, bins=np.arange(np.min(chunk_lengths), np.max(chunk_lengths)+1))\n", - " plt.yscale('log')\n", - " plt.xlabel(\"Channel-Chunk Length with |Z|>3\")\n", - " plt.ylabel(\"Number of Occurences\");\n", - "elif make_plots:\n", - " print(\"No |Z| > 3 data found\")" + "if make_plots:\n", + " for i, chunk_lengths in enumerate(chunk_lengths_all):\n", + " if len(chunk_lengths) > 0:\n", + " plt.hist(chunk_lengths, bins=np.arange(np.min(chunk_lengths), np.max(chunk_lengths)+1), label=cross_stacks[0].pols[i], histtype='step')\n", + " plt.yscale('log')\n", + " plt.xlabel(\"Channel-Chunk Length with |Z|>3\")\n", + " plt.ylabel(\"Number of Occurences\");\n", + " elif make_plots:\n", + " print(f\"No |Z| > 3 data found for pol {cross_stacks[0].pols[i]}\")\n", + " plt.legend()" ] }, { @@ -1795,7 +1870,7 @@ " bplot = ax.boxplot(\n", " allz, positions = [i-0.3 + 0.05*n], \n", " showfliers=False, whis=(0,100), showmeans=True,\n", - " labels=[f\"chs {band[0]}-{band[1]}\" if (n==len(data_jd_ints)//2 and j==(len(subsets)-1)) else \"\"], \n", + " tick_labels=[f\"chs {band[0]}-{band[1]}\" if (n==len(data_jd_ints)//2 and j==(len(subsets)-1)) else \"\"], \n", " )\n", " bplot['boxes'][0].set_color(styles[night]['color'])\n", " bplot['boxes'][0].set_linestyle(styles[night]['ls'])\n", @@ -2047,7 +2122,7 @@ "def make_baseline_zsq_plot(zscores, stack):\n", " # TODO: need a better cmap to easily see what's \"good\" and \"bad\"\n", " \n", - " fig, axx = plt.subplots(len(bands_considered)-3, 2, sharey=True, figsize=(24, 5*(len(bands_considered)-3)), layout='constrained')\n", + " fig, axx = plt.subplots(len(bands_considered)-3, len(stackconf.pols), sharey=True, figsize=(24, 5*(len(bands_considered)-3)), layout='constrained')\n", " \n", " cmap = mpl.colors.ListedColormap([\"C0\", f\"C1\", f\"C3\"])\n", " for i, band in enumerate(bands_considered):\n", @@ -2061,17 +2136,12 @@ " uvws = stack.uvw_array[:stack.Nbls][:, :2]\n", " uvws[uvws[:, 1] < 0] *= -1\n", "\n", - " ax[0].scatter(uvws[:, 0], uvws[:, 1], c=mean_zsq[:, 0], norm=mpl.colors.LogNorm( vmin=1, vmax=1000), marker='H', s=60, cmap=cmap)\n", - " ax[0].set_title(stackconf.pols[0])\n", - " ax[0].set_aspect(\"equal\", 'datalim')\n", - " ax[0].set_xlim(-200, 200)\n", - "\n", - " cbar = ax[1].scatter(uvws[:, 0], uvws[:, 1], c=mean_zsq[:, 1], norm=mpl.colors.LogNorm( vmin=1, vmax=1000), marker='H', s=60, cmap=cmap)\n", - " ax[1].set_title(stackconf.pols[1])\n", - " ax[1].set_aspect(\"equal\", 'datalim')\n", - " ax[1].set_xlim(-200, 200)\n", - " ax[0].grid(True)\n", - " ax[1].grid(True)\n", + " for ipol, pol in enumerate(stackconf.pols):\n", + " cbar = ax[ipol].scatter(uvws[:, 0], uvws[:, 1], c=mean_zsq[:, ipol], norm=mpl.colors.LogNorm( vmin=1, vmax=1000), marker='H', s=60, cmap=cmap)\n", + " ax[ipol].set_title(pol)\n", + " ax[ipol].set_aspect(\"equal\", 'datalim')\n", + " ax[ipol].set_xlim(-200, 200)\n", + " ax[ipol].grid(True)\n", " \n", " ax[0].set_ylabel(str(band))\n", " \n", @@ -2426,21 +2496,28 @@ }, "outputs": [], "source": [ - "def get_worst_mean_over_each_band(zscores, n=1):\n", + "def get_worst_mean_over_each_band(zscores, n=1, autopols_only=True):\n", " bad_fellas = {}\n", "\n", " nights0 = [data_jd_ints.index(jd) for jd in zscores[0].time_array[::zscores[0].Nbls].astype(int)]\n", " nights1 = [data_jd_ints.index(jd) for jd in zscores[1].time_array[::zscores[1].Nbls].astype(int)]\n", + " \n", + " if autopols_only:\n", + " polidx = [i for i, p in enumerate(stackconf.pols) if len(set(p))==1]\n", + " else:\n", + " polidx = np.arange(len(stackconf.pols))\n", " \n", - " newmeans = {band: np.ones((len(zscores), len(data_jd_ints), len(stackconf.antpairs), len(stackconf.pols)))*np.nan for band in metrics['band_reduced_mean']}\n", + " npols = len(polidx)\n", + " \n", + " newmeans = {band: np.ones((len(zscores), len(data_jd_ints), len(stackconf.antpairs), npols))*np.nan for band in metrics['band_reduced_mean']}\n", " \n", " for band, zsqs in metrics['band_reduced_mean'].items():\n", " # zsqs is length(lstbins), where each is an array of shape (nights, antpairs, pols)\n", " # however, the number of nights for each lstbin could be different, so make them the same here....\n", - " newmeans[band][0, nights0] = zsqs[0]\n", - " newmeans[band][1, nights1] = zsqs[1]\n", + " newmeans[band][0, nights0] = zsqs[0][..., polidx]\n", + " newmeans[band][1, nights1] = zsqs[1][..., polidx]\n", " \n", - " lst_night_bl_pols = [(lst, jd, bl + (pol,)) for lst in range(len(zscores)) for jd in data_jd_ints for bl in stackconf.antpairs for pol in stackconf.pols]\n", + " lst_night_bl_pols = [(lst, jd, bl + (pol,)) for lst in range(len(zscores)) for jd in data_jd_ints for bl in stackconf.antpairs for j, pol in enumerate(stackconf.pols) if j in polidx]\n", " \n", " for band, zsq in newmeans.items():\n", " zsq = np.where(np.isnan(zsq.flatten()), -1, zsq.flatten())\n", @@ -2469,7 +2546,7 @@ }, "outputs": [], "source": [ - "def get_worst_mean_for_continuously_bad_stuff(zscores, n=1):\n", + "def get_worst_mean_for_continuously_bad_stuff(zscores, n=1, autopols_only=True):\n", " \n", " bad_fellas = {}\n", " \n", @@ -2484,12 +2561,17 @@ " if ch[0] <= s < ch[1]:\n", " sized[ch][k] = v\n", "\n", + " if autopols_only:\n", + " polidx = set([i for i, p in enumerate(stackconf.pols) if len(set(p))==1])\n", + " else:\n", + " polidx = set(range(len(stackconf.pols)))\n", + " \n", " for chsize, thesebads in sized.items():\n", " if not thesebads:\n", " continue\n", " \n", - " keys = list(thesebads.keys())\n", - " meanz = np.array([np.nanmean(v) for v in thesebads.values()])\n", + " keys = [k for k in thesebads.keys() if k[3] in polidx]\n", + " meanz = np.array([np.nanmean(v) for k, v in thesebads.items() if k[3] in polidx])\n", " \n", " nn = min(n, len(meanz))\n", " worst_idx = np.argpartition(meanz, -nn)[-nn:]\n", @@ -2517,25 +2599,32 @@ }, "outputs": [], "source": [ - "def get_worst_continuous_bad_zscore(zscores, n=1):\n", + "def get_worst_continuous_bad_zscore(zscores, n=1, autopols_only=True):\n", " bad_fellas = {}\n", " nights0 = [data_jd_ints.index(jd) for jd in zscores[0].time_array[::zscores[0].Nbls].astype(int)]\n", " nights1 = [data_jd_ints.index(jd) for jd in zscores[1].time_array[::zscores[1].Nbls].astype(int)]\n", " \n", " smallbands = [b for b in bands_considered if b[1] - b[0] <= 200]\n", " \n", + " if autopols_only:\n", + " polidx = [i for i, p in enumerate(stackconf.pols) if len(set(p))==1]\n", + " else:\n", + " polidx = np.arange(len(stackconf.pols))\n", + " \n", + " npols = len(polidx)\n", + "\n", " newmeans = np.ones(\n", - " (len(smallbands), len(zscores), len(data_jd_ints), len(stackconf.antpairs), len(stackconf.pols))\n", + " (len(smallbands), len(zscores), len(data_jd_ints), len(stackconf.antpairs), npols)\n", " )*np.nan\n", " \n", " for i, band in enumerate(smallbands):\n", " zsqs = metrics['band_reduced_mean'][band]\n", " # zsqs is length(lstbins), where each is an array of shape (nights, antpairs, pols)\n", " # however, the number of nights for each lstbin could be different, so make them the same here....\n", - " newmeans[i, 0, nights0] = zsqs[0]\n", - " newmeans[i, 1, nights1] = zsqs[1]\n", + " newmeans[i, 0, nights0] = zsqs[0][..., polidx]\n", + " newmeans[i, 1, nights1] = zsqs[1][..., polidx]\n", "\n", - " lst_night_bl_pols = [(lst, jd, bl + (pol,)) for lst in range(len(zscores)) for jd in data_jd_ints for bl in stackconf.antpairs for pol in stackconf.pols]\n", + " lst_night_bl_pols = [(lst, jd, bl + (pol,)) for lst in range(len(zscores)) for jd in data_jd_ints for bl in stackconf.antpairs for j, pol in enumerate(stackconf.pols) if j in polidx]\n", "\n", " zsq = np.nanmin(newmeans, axis=0)\n", " \n", @@ -2566,12 +2655,17 @@ }, "outputs": [], "source": [ - "def get_bad_inpaints(zscores, n=1):\n", + "def get_bad_inpaints(zscores, n=1, autopols_only=True):\n", " \n", " bad_fellas = {}\n", " \n", " nights = [zsq.nights.tolist() for zsq in zscores]\n", "\n", + " if autopols_only:\n", + " polidx = set([i for i, p in enumerate(stackconf.pols) if len(set(p))==1])\n", + " else:\n", + " polidx = set(range(len(stackconf.pols)))\n", + "\n", " chsizes = [(2, 5), (5, 10), (10, 20)] \n", " sized = {ch: {} for ch in chsizes}\n", " for k, v in inpainted_regions.items():\n", @@ -2586,11 +2680,11 @@ " for chsize, bads in sized.items():\n", " \n", " \n", - " keys = list(bads.keys())\n", + " keys = [k for k in bads.keys() if k[3] in polidx]\n", " \n", " meanz = np.array([\n", " np.nanmean(zscores[lst].metrics[nights[lst].index(jdint), zscores[lst].antpairs.index((a,b)), low:high, pol]) \n", - " for (lst, a, b, pol, jdint, low, high) in bads.keys()\n", + " for (lst, a, b, pol, jdint, low, high) in bads.keys() if pol in polidx\n", " ])\n", " \n", " nn = min(n, len(meanz))\n", @@ -2759,9 +2853,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "h6c", "language": "python", - "name": "python3" + "name": "h6c" }, "language_info": { "codemirror_mode": { @@ -2773,7 +2867,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.3" + "version": "3.10.6" }, "toc": { "base_numbering": 1,