|
2 | 2 | "cells": [
|
3 | 3 | {
|
4 | 4 | "cell_type": "code",
|
5 |
| - "execution_count": 1, |
| 5 | + "execution_count": null, |
6 | 6 | "metadata": {},
|
7 | 7 | "outputs": [
|
8 | 8 | {
|
|
31 | 31 | "from orthologue_analysis.orthogroups import init_orthogroup_df\n",
|
32 | 32 | "from orthologue_analysis.species import PristionchusFromTool, SpeciesList\n",
|
33 | 33 | "from orthologue_analysis.utils import SequenceIDMapping, orthofinder_paths\n",
|
34 |
| - "from ppac_merged_split_run_utils import pickle_cache_suspicious_orthologue_pipeline\n", |
35 | 34 | "from reannotation.analysis import (\n",
|
36 | 35 | " interpro_accessions_frequently_missed_by_all_tools,\n",
|
37 | 36 | " interpro_accessions_in_novel_transcripts,\n",
|
38 | 37 | " interpro_accessions_in_missed_transcripts,\n",
|
39 | 38 | " missed_transcripts_with_significantly_more_frequent_accessions\n",
|
40 | 39 | ")\n",
|
41 |
| - "from reannotation.pipelines import interpro_accession_pipeline, suspicious_orthologue_pipeline, novel_orthologue_pipeline\n", |
| 40 | + "from reannotation.pipelines import (\n", |
| 41 | + " interpro_accession_pipeline,\n", |
| 42 | + " suspicious_orthologue_pipeline,\n", |
| 43 | + " pickle_cache_suspicious_orthologue_pipeline,\n", |
| 44 | + " novel_orthologue_pipeline\n", |
| 45 | + ")\n", |
42 | 46 | "from reannotation.statistics import fisher_exact_for_two_lists_of_accessions\n",
|
43 | 47 | "from reannotation.utils import extract_accessions_from_transcript\n",
|
44 | 48 | "from utils.esm import extract_esm_means\n",
|
|
129 | 133 | "metadata": {},
|
130 | 134 | "outputs": [],
|
131 | 135 | "source": [
|
132 |
| - "braker_merged, braker_split = pickle_cache_suspicious_orthologue_pipeline(\"braker\", og_df, wbps_col, braker_col, species_list, seq_id_map, wbps_prefix=\"Transcript\")\n", |
133 |
| - "anno_merged, anno_split = pickle_cache_suspicious_orthologue_pipeline(\"anno\", og_df, wbps_col, anno_col, species_list, seq_id_map, wbps_prefix=\"Transcript\")\n", |
134 |
| - "helixer_merged, helixer_split = pickle_cache_suspicious_orthologue_pipeline(\"helixer\", og_df, wbps_col, helixer_col, species_list, seq_id_map, wbps_prefix=\"Transcript\")" |
| 136 | + "braker_merged, braker_split = pickle_cache_suspicious_orthologue_pipeline(\"braker\", \"ppac\", og_df, wbps_col, braker_col, species_list, seq_id_map, wbps_prefix=\"Transcript\")\n", |
| 137 | + "anno_merged, anno_split = pickle_cache_suspicious_orthologue_pipeline(\"anno\", \"ppac\", og_df, wbps_col, anno_col, species_list, seq_id_map, wbps_prefix=\"Transcript\")\n", |
| 138 | + "helixer_merged, helixer_split = pickle_cache_suspicious_orthologue_pipeline(\"helixer\", \"ppac\", og_df, wbps_col, helixer_col, species_list, seq_id_map, wbps_prefix=\"Transcript\")" |
135 | 139 | ]
|
136 | 140 | },
|
137 | 141 | {
|
138 | 142 | "cell_type": "code",
|
139 |
| - "execution_count": 5, |
| 143 | + "execution_count": 6, |
140 | 144 | "metadata": {},
|
141 | 145 | "outputs": [
|
142 | 146 | {
|
143 | 147 | "name": "stdout",
|
144 | 148 | "output_type": "stream",
|
145 | 149 | "text": [
|
146 |
| - "BRAKER3: merged=19, split=96, total=0.56\n", |
147 |
| - "Helixer: merged=349, split=591, total=4.0\n", |
148 |
| - "Anno: merged=958, split=100, total=7.13\n" |
| 150 | + "BRAKER3: merged=708, split=102, total=6.3%\n", |
| 151 | + "\tTotal genes: 24077\n", |
| 152 | + "Helixer: merged=351, split=533, total=3.83%\n", |
| 153 | + "\tTotal genes: 32221\n", |
| 154 | + "Anno: merged=1009, split=96, total=7.47%\n", |
| 155 | + "\tTotal genes: 28283\n" |
149 | 156 | ]
|
150 | 157 | }
|
151 | 158 | ],
|
152 | 159 | "source": [
|
153 | 160 | "num_genes = len(list(braker_species.db.all_features(featuretype=\"gene\")))\n",
|
154 |
| - "print(f\"BRAKER3: merged={len(braker_merged)}, split={len(braker_split)}, total={round(100*(len(braker_split) + len(braker_merged)*2)/num_genes, 2)}\")\n", |
| 161 | + "print(f\"BRAKER3: merged={len(braker_merged)}, split={len(braker_split)}, total={round(100*(len(braker_split) + len(braker_merged)*2)/num_genes, 2)}%\")\n", |
| 162 | + "print(f\"\\tTotal genes: {num_genes}\")\n", |
155 | 163 | "num_genes = len(list(helixer_species.db.all_features(featuretype=\"gene\")))\n",
|
156 |
| - "print(f\"Helixer: merged={len(helixer_merged)}, split={len(helixer_split)}, total={round(100*(len(helixer_split) + len(helixer_merged)*2)/num_genes, 2)}\")\n", |
| 164 | + "print(f\"Helixer: merged={len(helixer_merged)}, split={len(helixer_split)}, total={round(100*(len(helixer_split) + len(helixer_merged)*2)/num_genes, 2)}%\")\n", |
| 165 | + "print(f\"\\tTotal genes: {num_genes}\")\n", |
157 | 166 | "num_genes = len(list(anno_species.db.all_features(featuretype=\"gene\")))\n",
|
158 |
| - "print(f\"Anno: merged={len(anno_merged)}, split={len(anno_split)}, total={round(100*(len(anno_split) + len(anno_merged)*2)/num_genes, 2)}\")" |
| 167 | + "print(f\"Anno: merged={len(anno_merged)}, split={len(anno_split)}, total={round(100*(len(anno_split) + len(anno_merged)*2)/num_genes, 2)}%\")\n", |
| 168 | + "print(f\"\\tTotal genes: {num_genes}\")" |
159 | 169 | ]
|
160 | 170 | },
|
161 | 171 | {
|
|
0 commit comments