Skip to content

Commit 6871a1e

Browse files
authored
Merge pull request #3 from jeizenga/eval-crash
Speed up XG construction when IDs don't start at 1
2 parents 4aaf17d + 0a4c45f commit 6871a1e

File tree

3 files changed

+26
-31
lines changed

3 files changed

+26
-31
lines changed

Diff for: CMakeLists.txt

+4-3
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ set(sdsl-lite-divsufsort_LIB "${INSTALL_DIR}/src/sdsl-lite-build/external/libdiv
7777
# mmmultimap (memory mapped multimap)
7878
ExternalProject_Add(mmmultimap
7979
GIT_REPOSITORY "https://github.com/ekg/mmmultimap.git"
80-
GIT_TAG "8a76cec0b819de6e2b855f4edc29993f75b1b26b"
80+
GIT_TAG "b92a5c8826141d61413546278719724e0f612c39"
8181
BUILD_COMMAND ""
8282
UPDATE_COMMAND ""
8383
INSTALL_COMMAND "")
@@ -109,8 +109,8 @@ set(gfakluge_tinyFA_INCLUDE "${INSTALL_DIR}/src/gfakluge/src/tinyFA")
109109

110110
# In-place Parallel Super Scalar Samplesort (IPS⁴o), header only
111111
ExternalProject_Add(ips4o
112-
GIT_REPOSITORY "https://github.com/SaschaWitt/ips4o.git"
113-
GIT_TAG "bff3ccf0bf349497f2bb10f825d160b792236367"
112+
GIT_REPOSITORY "https://github.com/vgteam/ips4o.git"
113+
GIT_TAG "22069381cc1bf2df07ee1ff47f6b6073fcfb4508"
114114
INSTALL_COMMAND ""
115115
BUILD_COMMAND ""
116116
CONFIGURE_COMMAND "")
@@ -121,6 +121,7 @@ set(CMAKE_BUILD_TYPE Release)
121121

122122
add_library(xg_objs OBJECT src/xg.cpp)
123123

124+
add_dependencies(xg_objs handlegraph)
124125
add_dependencies(xg_objs sdsl-lite)
125126
add_dependencies(xg_objs mmmultimap)
126127
add_dependencies(xg_objs tayweeargs)

Diff for: src/xg.cpp

+17-17
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
#include "gfakluge.hpp"
1313

14-
#define VERBOSE_DEBUG
14+
//#define VERBOSE_DEBUG
1515
//#define debug_algorithms
1616
//#define debug_component_index
1717

@@ -383,31 +383,31 @@ void XG::from_gfa(const std::string& gfa_filename, bool validate, std::string ba
383383
// set up our enumerators
384384
auto for_each_sequence = [&](const std::function<void(const std::string& seq, const nid_t& node_id)>& lambda) {
385385
gfa.for_each_sequence_line_in_file(filename, [&](gfak::sequence_elem s) {
386-
nid_t node_id = std::stol(s.name);
387-
lambda(s.sequence, node_id);
388-
});
386+
nid_t node_id = std::stol(s.name);
387+
lambda(s.sequence, node_id);
388+
});
389389
};
390390
auto for_each_edge = [&](const std::function<void(const nid_t& from_id, const bool& from_rev,
391391
const nid_t& to_id, const bool& to_rev)>& lambda) {
392392
gfa.for_each_edge_line_in_file(filename, [&](gfak::edge_elem e) {
393-
if (e.source_name.empty()) return;
394-
nid_t from_id = std::stol(e.source_name);
395-
bool from_rev = !e.source_orientation_forward;
396-
nid_t to_id = std::stol(e.sink_name);
397-
bool to_rev = !e.sink_orientation_forward;
398-
lambda(from_id, from_rev, to_id, to_rev);
399-
});
393+
if (e.source_name.empty()) return;
394+
nid_t from_id = std::stol(e.source_name);
395+
bool from_rev = !e.source_orientation_forward;
396+
nid_t to_id = std::stol(e.sink_name);
397+
bool to_rev = !e.sink_orientation_forward;
398+
lambda(from_id, from_rev, to_id, to_rev);
399+
});
400400
};
401401
auto for_each_path_element = [&](const std::function<void(const std::string& path_name,
402402
const nid_t& node_id, const bool& is_rev,
403403
const std::string& cigar)>& lambda) {
404404
gfa.for_each_path_element_in_file(filename, [&](const std::string& path_name_raw, const std::string& node_id_str,
405405
bool is_rev, const std::string& cigar) {
406-
nid_t node_id = std::stol(node_id_str);
407-
std::string path_name = path_name_raw;
408-
path_name.erase(std::remove_if(path_name.begin(), path_name.end(), [](char c) { return std::isspace(c); }), path_name.end());
409-
lambda(path_name, node_id, is_rev, cigar);
410-
});
406+
nid_t node_id = std::stol(node_id_str);
407+
std::string path_name = path_name_raw;
408+
path_name.erase(std::remove_if(path_name.begin(), path_name.end(), [](char c) { return std::isspace(c); }), path_name.end());
409+
lambda(path_name, node_id, is_rev, cigar);
410+
});
411411
};
412412
from_enumerators(for_each_sequence, for_each_edge, for_each_path_element, validate, basename);
413413
}
@@ -587,7 +587,7 @@ void XG::from_enumerators(const std::function<void(const std::function<void(cons
587587
edge_from_to_mm.append(as_integer(from_handle), as_integer(to_handle));
588588
edge_to_from_mm.append(as_integer(to_handle), as_integer(from_handle));
589589
});
590-
handle_t max_handle = number_bool_packing::pack(max_id, true);
590+
handle_t max_handle = number_bool_packing::pack(r_iv.size(), true);
591591
edge_from_to_mm.index(as_integer(max_handle));
592592
edge_to_from_mm.index(as_integer(max_handle));
593593

Diff for: src/xg.hpp

+5-11
Original file line numberDiff line numberDiff line change
@@ -22,17 +22,11 @@
2222
#include "sdsl/csa_wt.hpp"
2323
#include "sdsl/suffix_arrays.hpp"
2424

25-
#include <handlegraph/types.hpp>
26-
#include <handlegraph/iteratee.hpp>
27-
#include <handlegraph/util.hpp>
28-
#include <handlegraph/handle_graph.hpp>
29-
//#include <handlegraph/path_handle_graph.hpp>
30-
#include <handlegraph/path_position_handle_graph.hpp>
31-
//#include <handlegraph/mutable_handle_graph.hpp>
32-
//#include <handlegraph/mutable_path_handle_graph.hpp>
33-
//#include <handlegraph/mutable_path_mutable_handle_graph.hpp>
34-
//#include <handlegraph/deletable_handle_graph.hpp>
35-
//#include <handlegraph/mutable_path_deletable_handle_graph.hpp>
25+
#include "handlegraph/types.hpp"
26+
#include "handlegraph/iteratee.hpp"
27+
#include "handlegraph/util.hpp"
28+
#include "handlegraph/handle_graph.hpp"
29+
#include "handlegraph/path_position_handle_graph.hpp"
3630

3731
#include "mmmultimap.hpp"
3832

0 commit comments

Comments
 (0)