Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
198 changes: 150 additions & 48 deletions notebooks/cross_seg_comp_user_guide.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,24 @@
"from scanpy import read_h5ad\n",
"from IPython.display import HTML\n",
"\n",
"barcode = '1370519421'\n",
"seg_name_a = 'XEN'\n",
"seg_name_b = 'SIS'\n",
"base_path = '/allen/programs/celltypes/workgroups/hct/emilyg/mapping/xen_seg_comp/'\n",
"anndata_a = read_h5ad(base_path+'xenium_1370519421_mapped_orig_seg_subset_genes_adj.h5ad') #xenium\n",
"anndata_b = read_h5ad(base_path+'xenium_1370519421_mapped_new_seg_subset_genes_adj.h5ad') #sis\n",
"barcode = \"1370519421\"\n",
"seg_name_a = \"XEN\"\n",
"seg_name_b = \"SIS\"\n",
"base_path = \"/allen/programs/celltypes/workgroups/hct/emilyg/mapping/xen_seg_comp/\"\n",
"anndata_a = read_h5ad(\n",
" base_path + \"xenium_1370519421_mapped_orig_seg_subset_genes_adj.h5ad\"\n",
") # xenium\n",
"anndata_b = read_h5ad(\n",
" base_path + \"xenium_1370519421_mapped_new_seg_subset_genes_adj.h5ad\"\n",
") # sis\n",
"\n",
"#Run segmentation comparison\n",
"sc = SpatialCompare(anndata_a, anndata_b, data_names=[seg_name_a, seg_name_b], obsm_key='spatial')\n",
"seg_comp_df = sc.collect_mutual_match_and_doublets(bc=barcode, save=True, reuse_saved=False, savepath=base_path, min_transcripts=40)"
"# Run segmentation comparison\n",
"sc = SpatialCompare(\n",
" anndata_a, anndata_b, data_names=[seg_name_a, seg_name_b], obsm_key=\"spatial\"\n",
")\n",
"seg_comp_df = sc.collect_mutual_match_and_doublets(\n",
" bc=barcode, save=True, reuse_saved=False, savepath=base_path, min_transcripts=40\n",
")"
]
},
{
Expand Down Expand Up @@ -185,7 +193,7 @@
}
],
"source": [
"#check that both sections seem (visually) to be scaled correctly \n",
"# check that both sections seem (visually) to be scaled correctly\n",
"sc.scaling_check(seg_comp_df)"
]
},
Expand Down Expand Up @@ -228,11 +236,32 @@
],
"source": [
"fig, ax = plt.subplots()\n",
"filtered_seg_a_df = seg_comp_df[(seg_comp_df['source']==seg_name_a)&(seg_comp_df['low_quality_cells']==True)]\n",
"filtered_seg_a_df.plot('center_x', 'center_y', kind='scatter', s=.1, ax=ax, label='low quality cells '+seg_name_a, color='red')\n",
"seg_comp_df[(seg_comp_df['source']==seg_name_a)&(seg_comp_df['low_quality_cells']==False)].plot('center_x', 'center_y', kind='scatter', s=.1, label='high quality cells '+seg_name_a, ax=ax, color='blue', alpha=.5)\n",
"plt.axis('equal')\n",
"plt.title(barcode+' all cells overview '+seg_name_a)\n"
"filtered_seg_a_df = seg_comp_df[\n",
" (seg_comp_df[\"source\"] == seg_name_a) & (seg_comp_df[\"low_quality_cells\"] == True)\n",
"]\n",
"filtered_seg_a_df.plot(\n",
" \"center_x\",\n",
" \"center_y\",\n",
" kind=\"scatter\",\n",
" s=0.1,\n",
" ax=ax,\n",
" label=\"low quality cells \" + seg_name_a,\n",
" color=\"red\",\n",
")\n",
"seg_comp_df[\n",
" (seg_comp_df[\"source\"] == seg_name_a) & (seg_comp_df[\"low_quality_cells\"] == False)\n",
"].plot(\n",
" \"center_x\",\n",
" \"center_y\",\n",
" kind=\"scatter\",\n",
" s=0.1,\n",
" label=\"high quality cells \" + seg_name_a,\n",
" ax=ax,\n",
" color=\"blue\",\n",
" alpha=0.5,\n",
")\n",
"plt.axis(\"equal\")\n",
"plt.title(barcode + \" all cells overview \" + seg_name_a)"
]
},
{
Expand Down Expand Up @@ -275,19 +304,26 @@
}
],
"source": [
"#Nearest neighbor distance cutoff\n",
"# Nearest neighbor distance cutoff\n",
"from scipy.spatial import cKDTree\n",
"dfa = seg_comp_df[seg_comp_df['source']==seg_name_a]\n",
"dfb = seg_comp_df[seg_comp_df['source']==seg_name_b]\n",
"\n",
"dfa = seg_comp_df[seg_comp_df[\"source\"] == seg_name_a]\n",
"dfb = seg_comp_df[seg_comp_df[\"source\"] == seg_name_b]\n",
"tree = cKDTree(dfa[[\"center_x\", \"center_y\"]].copy())\n",
"dists, inds = tree.query(dfb[['center_x', 'center_y']].copy(), 1)\n",
"lim_dists = dists[dists<12] #max is around 350, cutting this off for ease of viewing \n",
"dists, inds = tree.query(dfb[[\"center_x\", \"center_y\"]].copy(), 1)\n",
"lim_dists = dists[dists < 12] # max is around 350, cutting this off for ease of viewing\n",
"plt.hist(lim_dists, bins=100)\n",
"plt.axvline(2.5, c='red', label = 'nn dist cutoff', linestyle='--', alpha=0.5)\n",
"plt.axvline(4.0, c='blue', label = 'potentially distinct cell', linestyle='--', alpha=0.5)\n",
"plt.title('nearest neighbor distances of '+seg_name_a+' and '+seg_name_b+' cells - zoomed in')\n",
"plt.xlabel('nearest neighbor distance')\n",
"plt.ylabel('counts of cells')\n",
"plt.axvline(2.5, c=\"red\", label=\"nn dist cutoff\", linestyle=\"--\", alpha=0.5)\n",
"plt.axvline(4.0, c=\"blue\", label=\"potentially distinct cell\", linestyle=\"--\", alpha=0.5)\n",
"plt.title(\n",
" \"nearest neighbor distances of \"\n",
" + seg_name_a\n",
" + \" and \"\n",
" + seg_name_b\n",
" + \" cells - zoomed in\"\n",
")\n",
"plt.xlabel(\"nearest neighbor distance\")\n",
"plt.ylabel(\"counts of cells\")\n",
"plt.legend()"
]
},
Expand Down Expand Up @@ -323,12 +359,36 @@
"seg_names = [seg_name_a, seg_name_b]\n",
"\n",
"for i, name in enumerate(seg_names):\n",
" filtered_seg_df = seg_comp_df[(seg_comp_df['source']==name)&(seg_comp_df['low_quality_cells']==False)]\n",
" filtered_seg_df.plot('center_x', 'center_y', kind='scatter', s=.1, ax=ax[i], label='all '+name+' cells', color='red', alpha=.5)\n",
" filtered_seg_df[filtered_seg_df['match_'+seg_name_a+'_filt_'+seg_name_b+'_filt'].isna()==False].plot('center_x', 'center_y', kind='scatter', s=.1, ax=ax[i], label='matched '+name+' cells', color='blue', alpha=0.5, title=name)\n",
" ax[i].set_aspect('equal', 'box')\n",
" filtered_seg_df = seg_comp_df[\n",
" (seg_comp_df[\"source\"] == name) & (seg_comp_df[\"low_quality_cells\"] == False)\n",
" ]\n",
" filtered_seg_df.plot(\n",
" \"center_x\",\n",
" \"center_y\",\n",
" kind=\"scatter\",\n",
" s=0.1,\n",
" ax=ax[i],\n",
" label=\"all \" + name + \" cells\",\n",
" color=\"red\",\n",
" alpha=0.5,\n",
" )\n",
" filtered_seg_df[\n",
" filtered_seg_df[\"match_\" + seg_name_a + \"_filt_\" + seg_name_b + \"_filt\"].isna()\n",
" == False\n",
" ].plot(\n",
" \"center_x\",\n",
" \"center_y\",\n",
" kind=\"scatter\",\n",
" s=0.1,\n",
" ax=ax[i],\n",
" label=\"matched \" + name + \" cells\",\n",
" color=\"blue\",\n",
" alpha=0.5,\n",
" title=name,\n",
" )\n",
" ax[i].set_aspect(\"equal\", \"box\")\n",
"\n",
"plt.suptitle(barcode+' all matched cells, both segmentations')\n",
"plt.suptitle(barcode + \" all matched cells, both segmentations\")\n",
"fig.tight_layout()"
]
},
Expand All @@ -351,20 +411,46 @@
],
"source": [
"import numpy as np\n",
"\n",
"fig, ax = plt.subplots()\n",
"\n",
"filtered_seg_df = seg_comp_df[(seg_comp_df['source']==seg_name_a)&(seg_comp_df['low_quality_cells']==False)]\n",
"arr = filtered_seg_df[['center_x', 'center_y']].values\n",
"center = [np.mean(arr[:,0]), np.mean(arr[:,1])]\n",
"x_range = [center[0] - center[0]*.05, center[0]+ center[0]*.05]\n",
"y_range = [center[1] - center[1]*.05, center[1]+ center[1]*.05]\n",
"subs_a = filtered_seg_df[(filtered_seg_df['center_x'].between(x_range[0], x_range[1]))&(filtered_seg_df['center_y'].between(y_range[0], y_range[1]))]\n",
"filtered_seg_df = seg_comp_df[\n",
" (seg_comp_df[\"source\"] == seg_name_a) & (seg_comp_df[\"low_quality_cells\"] == False)\n",
"]\n",
"arr = filtered_seg_df[[\"center_x\", \"center_y\"]].values\n",
"center = [np.mean(arr[:, 0]), np.mean(arr[:, 1])]\n",
"x_range = [center[0] - center[0] * 0.05, center[0] + center[0] * 0.05]\n",
"y_range = [center[1] - center[1] * 0.05, center[1] + center[1] * 0.05]\n",
"subs_a = filtered_seg_df[\n",
" (filtered_seg_df[\"center_x\"].between(x_range[0], x_range[1]))\n",
" & (filtered_seg_df[\"center_y\"].between(y_range[0], y_range[1]))\n",
"]\n",
"\n",
"subs_a.plot('center_x', 'center_y', kind='scatter', s=2,ax=ax, label='all '+seg_name_a+' cells', color='red')\n",
"subs_a[subs_a['match_'+seg_name_a+'_filt_'+seg_name_b+'_filt'].isna()==False].plot('center_x', 'center_y', kind='scatter', s=20, ax=ax, label='matched '+seg_name_a+' cells', facecolors='none', edgecolors='blue', alpha=0.5)\n",
"ax.set_aspect('equal', 'box')\n",
"subs_a.plot(\n",
" \"center_x\",\n",
" \"center_y\",\n",
" kind=\"scatter\",\n",
" s=2,\n",
" ax=ax,\n",
" label=\"all \" + seg_name_a + \" cells\",\n",
" color=\"red\",\n",
")\n",
"subs_a[\n",
" subs_a[\"match_\" + seg_name_a + \"_filt_\" + seg_name_b + \"_filt\"].isna() == False\n",
"].plot(\n",
" \"center_x\",\n",
" \"center_y\",\n",
" kind=\"scatter\",\n",
" s=20,\n",
" ax=ax,\n",
" label=\"matched \" + seg_name_a + \" cells\",\n",
" facecolors=\"none\",\n",
" edgecolors=\"blue\",\n",
" alpha=0.5,\n",
")\n",
"ax.set_aspect(\"equal\", \"box\")\n",
"\n",
"plt.title(barcode+' matched cells zoom, '+seg_name_a)\n",
"plt.title(barcode + \" matched cells zoom, \" + seg_name_a)\n",
"plt.legend(bbox_to_anchor=[1.05, 1])\n",
"fig.tight_layout()"
]
Expand Down Expand Up @@ -610,11 +696,25 @@
}
],
"source": [
"display(\n",
" seg_comp_df[\n",
" (seg_comp_df[\"source\"] == seg_name_a)\n",
" & (\n",
" seg_comp_df[\n",
" \"match_\" + seg_name_a + \"_filt_\" + seg_name_b + \"_unfilt\"\n",
" ].isna()\n",
" == False\n",
" )\n",
" ].head()\n",
")\n",
"\n",
"display(seg_comp_df[(seg_comp_df['source']==seg_name_a)&(seg_comp_df['match_'+seg_name_a+'_filt_'+seg_name_b+'_unfilt'].isna()==False)].head())\n",
"\n",
"seg_comp_df[(seg_comp_df['source']==seg_name_b)&(seg_comp_df['match_'+seg_name_a+'_unfilt_'+seg_name_b+'_filt'].isna()==False)].head()\n",
"\n"
"seg_comp_df[\n",
" (seg_comp_df[\"source\"] == seg_name_b)\n",
" & (\n",
" seg_comp_df[\"match_\" + seg_name_a + \"_unfilt_\" + seg_name_b + \"_filt\"].isna()\n",
" == False\n",
" )\n",
"].head()"
]
},
{
Expand Down Expand Up @@ -1604,7 +1704,9 @@
}
],
"source": [
"unknown_unmatched_cells, filepath = sc.generate_sankey_diagram(seg_comp_df, barcode, save=True, savepath='/home/emilyg/spatial_compare')"
"unknown_unmatched_cells, filepath = sc.generate_sankey_diagram(\n",
" seg_comp_df, barcode, save=True, savepath=\"/home/emilyg/spatial_compare\"\n",
")"
]
},
{
Expand All @@ -1625,9 +1727,9 @@
}
],
"source": [
"plt.figure(figsize = (30, 20))\n",
"plt.figure(figsize=(30, 20))\n",
"test = plt.imread(filepath)\n",
"plt.imshow(test) \n",
"plt.imshow(test)\n",
"plt.show()"
]
},
Expand All @@ -1649,7 +1751,7 @@
}
],
"source": [
"unknown_unmatched_cells #view indexes of cells which have no match or cause for lack thereof, along with source"
"unknown_unmatched_cells # view indexes of cells which have no match or cause for lack thereof, along with source"
]
},
{
Expand Down
Loading
Loading