Skip to content

Commit 4e7d0c6

Browse files
authored
Take the strain, disentangle clustersets (#1093)
1 parent 1550010 commit 4e7d0c6

File tree

13 files changed

+277
-113
lines changed

13 files changed

+277
-113
lines changed

modules/EnsEMBL/Web/Component/Gene/ComparaOrthologs.pm

Lines changed: 64 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ use strict;
2323

2424
use HTML::Entities qw(encode_entities);
2525

26+
use EnsEMBL::Web::Utils::Compara qw(orthoset_prod_names);
2627
use EnsEMBL::Web::Utils::FormatText qw(glossary_helptip get_glossary_entry pluralise);
2728

2829
use base qw(EnsEMBL::Web::Component::Gene);
@@ -46,8 +47,9 @@ sub content {
4647
my $biotype = $object->Obj->get_Biotype; # We expect a Biotype object, though it could be a biotype name.
4748
my $is_ncrna = ( ref $biotype eq 'Bio::EnsEMBL::Biotype' ? $biotype->biotype_group =~ /noncoding$/ : $biotype =~ /RNA/ );
4849
my $species_name = $species_defs->GROUP_DISPLAY_NAME;
49-
my $strain_url = $hub->is_strain ? "Strain_" : "";
50-
my $strain_param = $hub->is_strain ? ";strain=1" : ""; # initialize variable even if is_strain is false, to avoid warnings
50+
my $is_strain_view = $hub->action =~ /^Strain_/ ? 1 : 0;
51+
my $strain_url = $is_strain_view ? "Strain_" : "";
52+
my $strain_param = $is_strain_view ? ";strain=1" : ""; # initialize variable even if is_strain_view is false, to avoid warnings
5153

5254
my @orthologues = (
5355
$object->get_homology_matches('ENSEMBL_ORTHOLOGUES', undef, undef, $cdb),
@@ -78,11 +80,14 @@ sub content {
7880
delete $compara_species->{'ancestral_sequences'};
7981
}
8082

83+
my $orthoset_prod_names = EnsEMBL::Web::Utils::Compara::orthoset_prod_names($hub, $cdb, $is_strain_view);
84+
my $orthoset_prod_name_set = {map {$_ => 1} @$orthoset_prod_names};
85+
8186
## Work out which species we want to skip over, based on page type and user's configuration
8287
my $this_group = $species_defs->STRAIN_GROUP;
8388
my $species_not_shown = {};
84-
my $strains_not_shown = {};
85-
my $strain_refs = {};
89+
my $species_not_relevant = {};
90+
my $unshown_strain_types = {};
8691
my $hidden = {};
8792

8893
foreach my $prod_name (keys %$compara_species) {
@@ -92,30 +97,23 @@ sub content {
9297
next if $species eq $hub->species; ## Ignore current species
9398
my $label = $is_pan ? $pan_lookup->{$prod_name}{'display_name'} : $species_defs->species_label($species);
9499

95-
## Should we be showing this orthologue on this pagpe by default?
96-
my $strain_group = $species_defs->get_config($species, 'STRAIN_GROUP');
97-
my $related_taxon = $species_defs->get_config($species, 'RELATED_TAXON');
98-
if ($hub->action =~ /^Strain_/) {
99-
unless (($strain_group && $strain_group eq $this_group) || ($related_taxon && $related_taxon eq $species_defs->RELATED_TAXON)) {
100-
$species_not_shown->{$species} = $label;
101-
next;
102-
}
103-
}
104-
else {
105-
if ($strain_group) {
106-
if ($strain_group eq $prod_name) {
107-
$strain_refs->{$species} = $label;
108-
}
109-
else {
110-
## Do not show any strain species on main species view
111-
$strains_not_shown->{$species} = $label;
112-
next;
113-
}
114-
}
100+
# Should we be showing this orthologue on this page by default?
101+
unless ($orthoset_prod_name_set->{$prod_name}) {
102+
$species_not_relevant->{$species} = 1;
103+
next;
115104
}
116105

117106
## Do we even have an orthologue for this species?
118107
unless ($orthologue_list{$species}) {
108+
109+
my $strain_group = $species_defs->get_config($species, 'STRAIN_GROUP');
110+
my $strain_type = $strain_group && $prod_name ne $strain_group
111+
? $species_defs->get_config($species, 'STRAIN_TYPE')
112+
: 'species'
113+
;
114+
115+
$unshown_strain_types->{$strain_type} += 1;
116+
119117
$species_not_shown->{$species} = $label;
120118
next;
121119
}
@@ -200,8 +198,8 @@ sub content {
200198
my $anc_node_ids = $self->fetch_anc_node_ids($cdb);
201199
foreach my $species (sort { ($a =~ /^<.*?>(.+)/ ? $1 : $a) cmp ($b =~ /^<.*?>(.+)/ ? $1 : $b) } keys %orthologue_list) {
202200
next unless $species;
201+
next if $species_not_relevant->{$species};
203202
next if $species_not_shown->{$species};
204-
next if $strains_not_shown->{$species};
205203
next if $hidden->{$species};
206204

207205
my ($species_label, $prodname);
@@ -367,29 +365,26 @@ sub content {
367365
);
368366
}
369367

370-
if (($hub->action =~ /^Strain_/ && keys %$strains_not_shown)
371-
|| ($hub->action !~ /^Strain_/ && keys %$species_not_shown)) {
372-
my ($total, $no_ortho_species, $strain_refs_html);
373-
if ($hub->action =~ /^Strain_/) {
374-
$total = scalar keys %$strains_not_shown;
375-
$no_ortho_species = $self->get_no_ortho_species_html($strains_not_shown, $sets_by_species);
376-
$strain_refs_html = '';
377-
}
378-
else {
379-
$total = scalar keys %$species_not_shown;
380-
unless ($is_pan) {
381-
$no_ortho_species = $self->get_no_ortho_species_html($species_not_shown, $sets_by_species);
382-
$strain_refs_html = $self->get_strain_refs_html($strain_refs, $species_not_shown);
383-
}
368+
if (keys %$species_not_shown) {
369+
my $no_ortho_species;
370+
my $total = scalar keys %$species_not_shown;
371+
unless ($is_pan) {
372+
$no_ortho_species = $self->get_no_ortho_species_html($species_not_shown, $sets_by_species);
384373
}
385374
my $not_shown_list = $is_pan ? '' : sprintf('<ul id="no_ortho_species">%s</ul>', $no_ortho_species);
375+
my $strain_type_breakdown = $self->get_strain_type_breakdown($unshown_strain_types, $total);
376+
377+
my $not_shown_desc = $total > 1
378+
? "are not shown in the table above because they don't have any orthologue with"
379+
: "is not shown in the table above because it doesn't have any orthologue with"
380+
;
381+
386382
$html .= '<br /><a name="list_no_ortho"/>' . $self->_info(
387383
'Species without orthologues',
388384
sprintf(
389-
qq(<p><span class="no_ortho_count">%d</span> species are not shown in the table above because they don't have any orthologue with %s.</p>
385+
qq(<p><span class="no_ortho_count">%d</span> %s %s %s.</p>
390386
%s
391-
%s
392-
</p> <input type="hidden" class="panel_type" value="ComparaOrtholog" />), $total, $self->object->Obj->stable_id, $not_shown_list, $strain_refs_html),
387+
</p> <input type="hidden" class="panel_type" value="ComparaOrtholog" />), $total, $strain_type_breakdown, $not_shown_desc, $self->object->Obj->stable_id, $not_shown_list),
393388
undef,
394389
'no_ortho_message_pad'
395390
);
@@ -410,7 +405,7 @@ sub create_gene_tree_links {
410405
my $orthologue = $params->{orthologue};
411406

412407
my $hub = $self->hub;
413-
my $strain_url = $hub->is_strain ? "Strain_" : "";
408+
my $strain_url = $hub->action =~ /^Strain_/ ? "Strain_" : "";
414409

415410
my $tree_url = $hub->url({
416411
type => 'Gene',
@@ -432,32 +427,23 @@ sub species_sets {
432427
my $hub = $self->hub;
433428
my $species_defs = $hub->species_defs;
434429
my $is_pan = $cdb =~ /compara_pan_ensembl/;
430+
my $is_strain_view = $self->hub->action =~ /^Strain_/;
435431

436-
return "" if $self->hub->action =~ /^Strain/; #No summary table needed for strains
432+
return "" if $is_strain_view; #No summary table needed for strains
437433

438434
my ($set_order, $species_sets, $set_mappings) = $self->species_set_config($cdb); #setting $cdb enables us to fetch Pan species sets
439435

440436
return "" unless $set_order;
441437

442-
my $compara_spp = {};
438+
my $compara_spp = EnsEMBL::Web::Utils::Compara::orthoset_prod_names($hub, $cdb, $is_strain_view);
443439
my $lookup = $species_defs->prodnames_to_urls_lookup($cdb);
444-
my $pan_info = {};
445-
if ($is_pan) {
446-
$pan_info = $species_defs->multi_val('PAN_COMPARA_LOOKUP');
447-
$compara_spp = {map { $_ => 1} keys %$pan_info};
448-
}
449-
else {
450-
$compara_spp = { %{$species_defs->multi_hash->{'DATABASE_COMPARA'}{'COMPARA_SPECIES'}} };
451-
delete $compara_spp->{'ancestral_sequences'};
452-
}
440+
my $pan_info = $is_pan ? $species_defs->multi_val('PAN_COMPARA_LOOKUP') : {};
453441
my %orthologue_map = qw(SEED BRH PIP RHS);
454442
my $sets_by_species = {};
455443
my $ortho_type = {};
456444

457-
foreach (keys %$compara_spp) {
445+
foreach (@$compara_spp) {
458446
my $species = $lookup->{$_};
459-
next unless $species; #skip species absent from URL lookup (e.g. Human in Ensembl Plants)
460-
next if $self->hub->is_strain($species); #skip strain species
461447

462448
my $orthologues = $orthologue_list->{$species} || {};
463449
my $no_ortho = 0;
@@ -503,7 +489,7 @@ sub species_set_config {} # Stub, as it's clade-specific - implement in plugins
503489

504490
sub fetch_anc_node_ids {} # Another stub, only for specific divisions (e.g. Metazoa)
505491

506-
sub get_strain_refs_html {
492+
sub get_strain_refs_html { # not in use as of 2025-06
507493
my ($self, $strain_refs, $species_not_shown) = @_;
508494
return '' unless keys %{$strain_refs||{}};
509495

@@ -532,14 +518,33 @@ sub get_no_ortho_species_html {
532518
my $hub = $self->hub;
533519
my $html = '';
534520

535-
foreach (sort {lc $a cmp lc $b} keys %$species_not_shown) {
521+
# Species will be easier to find if we sort them by display name.
522+
foreach (sort {lc $species_not_shown->{$a} cmp lc $species_not_shown->{$b}} keys %$species_not_shown) {
536523
my $class = $sets_by_species->{$_} ? sprintf(' class="%s"', join(' ', @{$sets_by_species->{$_}})) : '';
537524
$html .= sprintf '<li%s>%s</li>', $class, $species_not_shown->{$_};
538525
}
539526

540527
return $html;
541528
}
542529

530+
sub get_strain_type_breakdown {
531+
## Get text listing strain types in order of decreasing frequency.
532+
my ($self, $strain_types, $num_genomes) = @_;
533+
534+
my @ordered_strain_types = sort {$strain_types->{$b} <=> $strain_types->{$a} || $a cmp $b} keys %$strain_types;
535+
536+
if ($num_genomes > 1) {
537+
@ordered_strain_types = map { pluralise($_) } @ordered_strain_types;
538+
}
539+
540+
my $strain_type_text = scalar(@ordered_strain_types) > 1
541+
? join(', ', @ordered_strain_types[0 .. ($#ordered_strain_types-1)]) . ' and ' . $ordered_strain_types[-1]
542+
: $ordered_strain_types[0]
543+
;
544+
545+
return $strain_type_text;
546+
}
547+
543548
sub get_export_data {
544549
## Get data for export
545550
my ($self, $flag) = @_;

modules/EnsEMBL/Web/Component/Gene/ComparaParalogs.pm

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ sub content {
4242
my $cdb = shift || $hub->param('cdb') || 'compara';
4343
my $biotype = $self->object->Obj->get_Biotype; # We expect a Biotype object, though it could be a biotype name.
4444
my $is_ncrna = ( ref $biotype eq 'Bio::EnsEMBL::Biotype' ? $biotype->biotype_group =~ /noncoding$/ : $biotype =~ /RNA/ );
45-
my $strain_url = $hub->is_strain ? 'Strain_' : '';
45+
my $strain_url = $hub->action =~ /^Strain_/ ? 'Strain_' : '';
4646
my %paralogue_list = %{$self->object->get_homology_matches('ENSEMBL_PARALOGUES', 'paralog|gene_split', undef, $cdb)};
4747

4848
return '<p>No paralogues have been identified for this gene</p>' unless keys %paralogue_list;

modules/EnsEMBL/Web/Component/Gene/ComparaTree.pm

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@ sub get_details {
3838
my $member = $object->get_compara_Member({'stable_id' => $object->stable_id, 'cdb' => $cdb});
3939
return (undef, '<strong>Gene is not in the compara database</strong>') unless $member;
4040

41-
my $strain_tree = $self->hub->species_defs->get_config($self->hub->species,'RELATED_TAXON') if ($self->hub->is_strain || $self->hub->param('strain') || $self->hub->action =~ /Strain_/);
41+
my $strain_tree = $self->hub->species_defs->get_config($self->hub->species,'RELATED_TAXON') if ($self->hub->param('strain') || $self->hub->action =~ /^Strain_/);
42+
4243
my $tree = $object->get_GeneTree($cdb,"", $strain_tree);
4344
return (undef, '<strong>Gene is not in a compara tree</strong>') unless $tree;
4445

@@ -94,7 +95,7 @@ sub content {
9495
my $hub = $self->hub;
9596
my $object = $self->object || $self->hub->core_object('gene');
9697
my $is_genetree = $object && $object->isa('EnsEMBL::Web::Object::GeneTree') ? 1 : 0;
97-
my $is_strain = $hub->is_strain || $hub->param('strain') || $hub->action =~ /Strain_/;
98+
my $is_strain_view = $hub->param('strain') || $hub->action =~ /^Strain_/;
9899
my ($gene, $member, $tree, $node);
99100

100101
my $type = $self->param('data_type') || $hub->type;
@@ -148,7 +149,7 @@ sub content {
148149
if (defined $parent) {
149150

150151
if ($vc->get('super_tree') eq 'on' || $self->param('super_tree') eq 'on') {
151-
my $super_url = $self->ajax_url('sub_supertree',{ cdb => $cdb, update_panel => undef, strain => $is_strain });
152+
my $super_url = $self->ajax_url('sub_supertree',{ cdb => $cdb, update_panel => undef, strain => $is_strain_view });
152153
$html .= qq(<div class="ajax"><input type="hidden" class="ajax_load" value="$super_url" /></div>);
153154
} else {
154155
$html .= $self->_info(
@@ -163,10 +164,10 @@ sub content {
163164
}
164165

165166
if ($hub->type eq 'Gene') {
166-
if ($tree->tree->clusterset_id ne $clusterset_id && !$self->is_strain) {
167+
if ($tree->tree->clusterset_id ne $clusterset_id) {
167168
$html .= $self->_info('Phylogenetic model selection',
168169
sprintf(
169-
'The phylogenetic model <I>%s</I> is not available for this tree. Showing the default (consensus) tree instead.', $clusterset_id
170+
'The phylogenetic model <i>%s</i> is not available for this tree. Showing the <i>%s</i> tree instead.', $clusterset_id, $tree->tree->clusterset_id,
170171
)
171172
);
172173
} elsif ($tree->tree->ref_root_id) {
@@ -280,7 +281,7 @@ sub content {
280281
image_width => $image_width,
281282
slice_number => '1|1',
282283
cdb => $cdb,
283-
strain => $is_strain,
284+
strain => $is_strain_view,
284285
});
285286

286287
# Keep track of collapsed nodes
@@ -393,7 +394,11 @@ sub content {
393394
my $collapsed_to_rank = $self->collapsed_nodes($tree, $node, "rank_$rank", $highlight_genome_db_id, $highlight_gene);
394395
push @rank_options, sprintf qq{<option value="%s" %s>%s</option>\n}, $hub->url({ collapse => $collapsed_to_rank, g1 => $highlight_gene, s1 => $highlight_species_url, gtr => $rank }), $rank eq $selected_rank ? 'selected' : '', ucfirst $rank;
395396
}
396-
push @view_links, sprintf qq{<li>Collapse all the nodes at the taxonomic rank <select onchange="Ensembl.redirect(this.value)">%s</select></li>}, join("\n", @rank_options) if(!$self->is_strain);
397+
# The ability to collapse by taxonomic rank was not seen as
398+
# particularly useful in a strain gene-tree view ( ENSWEB-3037 ).
399+
if(!$is_strain_view) {
400+
push @view_links, sprintf qq{<li>Collapse all the nodes at the taxonomic rank <select onchange="Ensembl.redirect(this.value)">%s</select></li>}, join("\n", @rank_options);
401+
}
397402
}
398403

399404
$html .= $image->render;

modules/EnsEMBL/Web/Component/Gene/Compara_Portal.pm

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,9 @@ sub content {
3535
my $hub = $self->hub;
3636
my $availability = $self->object->availability;
3737
my $location = $hub->url({ type => 'Location', action => 'Compara' });
38-
my $strain_url = ($self->is_strain || $hub->action =~ /^Strain_/) ? "Strain_" : "";
39-
my $strain_avail = ($self->is_strain || $hub->action =~ /^Strain_/) ? "strain_" : "";
38+
my $is_strain_view = $hub->action =~ /^Strain_/;
39+
my $strain_url = $is_strain_view ? "Strain_" : "";
40+
my $strain_avail = $is_strain_view ? "strain_" : "";
4041

4142
my $ortho_image = $strain_avail ? 'strain_ortho.gif' : 'compara_ortho.gif';
4243
my $para_image = $strain_avail ? 'strain_para.gif' : 'compara_para.gif';
@@ -49,7 +50,7 @@ sub content {
4950
{ title => 'Families', img => '80/compara_fam.gif', url => $availability->{'family'} ? $hub->url({ action => 'Family' }) : '' },
5051
];
5152

52-
@$buttons = grep { $_->{title} !~ /^Families$|^Genomic alignments$/ } @$buttons if($self->is_strain); #remove the one we dont show for strains species
53+
@$buttons = grep { $_->{title} !~ /^Families$|^Genomic alignments$/ } @$buttons if($is_strain_view); #remove the one we dont show for strain views
5354
my $html = $self->button_portal($buttons, 'portal-small');
5455
$html .= qq{<p>More views of comparative genomics data, such as multiple alignments and synteny, are available on the <a href="$location">Location</a> page for this gene.</p>};
5556

modules/EnsEMBL/Web/Component/Gene/HomologAlignment.pm

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -85,10 +85,13 @@ sub content {
8585
my $label = $external_species ? $pan_lookup->{$prodname}{'display_name'} : $species_defs->species_label($member_species);
8686
my $location = sprintf '%s:%d-%d', $gene->dnafrag->name, $gene->dnafrag_start, $gene->dnafrag_end;
8787

88-
if (!$second_gene && $member_species ne $species && $hub->param('species_' .$prodname) eq 'off') {
89-
$flag = 0;
90-
$skipped{$label}++;
91-
next;
88+
if (!$second_gene && $member_species ne $species) {
89+
my $species_toggle = $hub->param('species_' .$prodname);
90+
if (!defined $species_toggle || $species_toggle eq 'off') {
91+
$skipped{$label}++ if defined $species_toggle;
92+
$flag = 0;
93+
next;
94+
}
9295
}
9396

9497
if ($gene->stable_id eq $gene_id) {

modules/EnsEMBL/Web/ConfigPacker.pm

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1387,11 +1387,30 @@ sub _summarise_compara_db {
13871387
$sth->execute;
13881388

13891389
while (my ($sp, $clusterset_id, $strain_type) = $sth->fetchrow_array) {
1390+
$self->db_tree->{$db_name}{'CLUSTERSET_PRODNAMES'}{$clusterset_id}{$sp} = 1;
13901391
next if exists $preferred_clusterset_id{$sp} && $clusterset_id ne $preferred_clusterset_id{$sp};
13911392
$self->db_tree->{$db_name}{'CLUSTERSETS'}{$sp} = $clusterset_id;
13921393
$self->db_tree->{$db_name}{'STRAIN_TYPES'}{$sp} = $strain_type;
13931394
}
13941395

1396+
if (exists $self->db_tree->{$db_name}{'CLUSTERSET_PRODNAMES'}) {
1397+
1398+
my $default_oset_spp_aref = $dbh->selectcol_arrayref('
1399+
select distinct gd.name
1400+
from method_link_species_set mlss
1401+
join method_link ml using(method_link_id)
1402+
join species_set ss using(species_set_id)
1403+
join species_set_header ssh using(species_set_id)
1404+
join genome_db gd using(genome_db_id)
1405+
where ml.type in ("PROTEIN_TREES", "NC_TREES")
1406+
and trim(leading "collection-" from ssh.name) = "default";
1407+
');
1408+
1409+
foreach my $sp (@$default_oset_spp_aref) {
1410+
$self->db_tree->{$db_name}{'CLUSTERSET_PRODNAMES'}{'default'}{$sp} = 1;
1411+
}
1412+
}
1413+
13951414
###################################################################
13961415
## Cache MLSS for quick lookup in ImageConfig
13971416

0 commit comments

Comments
 (0)