Skip to content

Commit

Permalink
Merge pull request #27 from Ibvt/segfault_issues
Browse files Browse the repository at this point in the history
changes to data structure
  • Loading branch information
riasc authored Mar 5, 2025
2 parents cd9e671 + a74c3fb commit ad340dd
Show file tree
Hide file tree
Showing 9 changed files with 89 additions and 20 deletions.
Empty file added .gitattributes
Empty file.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

# [0.2.4]

# Fix
- Fixed incompatibility when no control sample is provided
- Fixed bug when relative paths are provided for test data

# [0.2.3]

## Fix
Expand Down
7 changes: 5 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
cmake_minimum_required(VERSION 3.22.1)
project(RNAnue VERSION 0.2.3)
project(RNAnue VERSION 0.2.4)
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED True)
set(CMAKE_CXX_FLAGS -fopenmp)
Expand All @@ -11,7 +11,7 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/include/Config.hpp.in ${CMAKE_CURRENT

###### SeqAn #####
list (APPEND CMAKE_PREFIX_PATH "${CMAKE_CURRENT_SOURCE_DIR}/seqan3/build_system")
find_package (seqan3 3.0 REQUIRED)
find_package (seqan3 3.3.0 REQUIRED)
find_package(OpenMP)

###### Boost ######
Expand All @@ -38,6 +38,9 @@ target_link_libraries(RNAnue PkgConfig::HTSLIB)
cmake_print_properties(TARGETS RNAnue PROPERTIES TARGET_INCLUDE_DIRECTORIES)

###### Tests ######
# make tests optional
option(BUILD_TESTS "Building unit tests" OFF)

file(GLOB TEST_SOURCES "test/*.cpp")
file(GLOB SOURCES "src/*.cpp")
list(REMOVE_ITEM SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/src/main.cpp)
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[![docker-release](https://github.com/Ibvt/RNAnue/actions/workflows/docker.yml/badge.svg)](https://github.com/Ibvt/RNAnue/actions/workflows/docker.yml)

# RNAnue - 0.2.3
# RNAnue - 0.2.4

## About
RNAnue is a comprehensive analysis to detect RNA-RNA interactions from Direct-Duplex-Detection (DDD) data.
Expand All @@ -13,7 +13,7 @@ to detect the Boost libraries system-wide. However, Seqan3 is expected to be loc
folder of RNAnue as specified in the CMakeLists.txt. Segemehl and the Vienna binaries need to be
located in $PATH.

* [Boost C++ Libraries](https://www.boost.org/) (v1.7.2)
* [Boost C++ Libraries](https://www.boost.org/) (v1.72.x)
* [SeqAn](https://github.com/seqan/seqan3) (v3.3.0)
* [Segemehl](http://www.bioinf.uni-leipzig.de/Software/segemehl/) (v0.3.4)
* [Vienna Package](https://www.tbi.univie.ac.at/RNA/#binary_packages) (>= v2.4.17)
Expand Down
2 changes: 2 additions & 0 deletions include/Utility.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <iomanip>
#include <chrono>
#include <random>
#include <filesystem>


// Boost
Expand All @@ -18,6 +19,7 @@
#include "DataTypes.hpp"

namespace fs = boost::filesystem;
namespace stdfs = std::filesystem;

// filesystem manipulation
namespace helper {
Expand Down
23 changes: 18 additions & 5 deletions src/Data.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "Data.hpp"

Data::Data(po::variables_map params) : params(params) {
std::cout << helper::getTime() << "Data object created\n";
std::string subcall = params["subcall"].as<std::string>();
fs::path outDir = fs::path(params["outdir"].as<std::string>());

Expand Down Expand Up @@ -31,7 +32,7 @@ Data::~Data() {

void Data::preprocDataPrep() {
// retrieve paths that contain the reads
fs::path ctrlsPath = fs::path(this->params["ctrls"].as<std::string>());
fs::path ctrlsPath = fs::path(params["ctrls"].as<std::string>());
fs::path trtmsPath = fs::path(this->params["trtms"].as<std::string>());

GroupsPath groups = getGroupsPath(ctrlsPath, trtmsPath);
Expand All @@ -45,7 +46,10 @@ void Data::alignDataPrep() {
} else {
// make sure that data has been preprocessed (or at least selected)
if(params["preproc"].as<std::bitset<1>>() == std::bitset<1>("1")) {
fs::path ctrlsPath = fs::path(params["outdir"].as<std::string>()) / "preproc/ctrls";
fs::path ctrlsPath = "";
if(params["ctrls"].as<std::string>() != "") {
fs::path ctrlsPath = fs::path(params["outdir"].as<std::string>()) / "preproc/ctrls";
}
fs::path trtmsPath = fs::path(params["outdir"].as<std::string>()) / "preproc/trtms";

/*
Expand All @@ -59,23 +63,32 @@ void Data::alignDataPrep() {
}

void Data::detectDataPrep() {
fs::path ctrlsPath = fs::path(params["outdir"].as<std::string>()) / "align/ctrls";
fs::path ctrlsPath = "";
if(params["ctrls"].as<std::string>() != "") {
fs::path ctrlsPath = fs::path(params["outdir"].as<std::string>()) / "align/ctrls";
}
fs::path trtmsPath = fs::path(params["outdir"].as<std::string>()) / "align/trtms";

GroupsPath groups = getGroupsPath(ctrlsPath, trtmsPath);
getCondition(groups);
}

void Data::clusteringDataPrep() {
fs::path ctrlsPath = fs::path(params["outdir"].as<std::string>()) / "detect/ctrls";
fs::path ctrlsPath = "";
if(params["ctrls"].as<std::string>() != "") {
fs::path ctrlsPath = fs::path(params["outdir"].as<std::string>()) / "detect/ctrls";
}
fs::path trtmsPath = fs::path(params["outdir"].as<std::string>()) / "detect/trtms";

GroupsPath groups = getGroupsPath(ctrlsPath, trtmsPath);
getCondition(groups);
}

void Data::analysisDataPrep() {
fs::path ctrlsPath = fs::path(params["outdir"].as<std::string>()) / "detect/ctrls";
fs::path ctrlsPath = "";
if(params["ctrls"].as<std::string>() != "") {
fs::path ctrlsPath = fs::path(params["outdir"].as<std::string>()) / "detect/ctrls";
}
fs::path trtmsPath = fs::path(params["outdir"].as<std::string>()) / "detect/trtms";

GroupsPath groups = getGroupsPath(ctrlsPath, trtmsPath);
Expand Down
32 changes: 26 additions & 6 deletions src/SplitReadCalling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,6 @@ void SplitReadCalling::start(pt::ptree sample, pt::ptree condition) {
}

producer(inBam); // start the producer (and add the recordsreads to the queue)

for(auto& consumer : consumers) {
consumer.join();
}
Expand Down Expand Up @@ -144,6 +143,7 @@ void SplitReadCalling::producer(seqan3::sam_file_input<>& inputfile) {

void SplitReadCalling::consumer(dtp::BAMOut& singleOut, dtp::BAMOut& splitsOut, dtp::BAMOut& multsplitsOut,
std::mutex& singleOutMutex, std::mutex& splitsOutMutex, std::mutex& multsplitsOutMutex) {

using recordType = typename seqan3::sam_file_input<>::record_type;
std::vector<recordType> readrecords;
while(true) {
Expand All @@ -165,6 +165,7 @@ void SplitReadCalling::process(std::vector<T>& readrecords, auto& singleOut,
auto& singleOutMutex, auto& splitsOutMutex,
auto& multsplitsOutMutex) {


// define variables
std::string qname = "";
seqan3::sam_flag flag = seqan3::sam_flag::none;
Expand Down Expand Up @@ -195,6 +196,7 @@ void SplitReadCalling::process(std::vector<T>& readrecords, auto& singleOut,
std::map<int, std::vector<SAMrecord>> putative{};

for(auto& rec : readrecords) {

qname = rec.id();
flag = rec.flag();
ref_id = rec.reference_id();
Expand Down Expand Up @@ -227,6 +229,14 @@ void SplitReadCalling::process(std::vector<T>& readrecords, auto& singleOut,

cigarMatch = 0; // reset matches count in cigar
// check the cigar string for splits
// std::cout << "cigar: ";
// for(auto& cig: cigar) { // iterate over all cigar elements
// cigarSize = get<uint32_t>(cig); // determine the size and operator of the cigar element
// cigarOp = get<seqan3::cigar::operation>(cig); // print cigarOP
// std::cout << cigarSize << cigarOp.to_char();
// }
// std::cout << std::endl;

for(auto& cig: cigar) {
// determine the size and operator of the cigar element
cigarSize = get<uint32_t>(cig);
Expand All @@ -253,7 +263,9 @@ void SplitReadCalling::process(std::vector<T>& readrecords, auto& singleOut,
cigarSplit.clear(); // new split - new CIGAR
ref_offset.value() += cigarSize + endPosRead + 1; // adjust left-most mapping position
splitId++; // increase split ID
} else { // matches the splice junction
} else { // matches the splice junctiona
// endPosRead += cigarSize;
// cigarSplit.push_back(cig);
xjtag--; // decrease the number of splits (basically merge the splits)
}
} else {
Expand All @@ -277,6 +289,7 @@ void SplitReadCalling::process(std::vector<T>& readrecords, auto& singleOut,
}
}


auto subSeq = seq | seqan3::views::slice(startPosRead - 1, endPosRead);
seqan3::sam_tag_dictionary newTags{};
newTags.get<"XX"_tag>() = startPosSplit;
Expand All @@ -292,8 +305,10 @@ void SplitReadCalling::process(std::vector<T>& readrecords, auto& singleOut,
curated.clear();
segNum = 0;
++splitId;

}
decide(putative, splitsOut, multsplitsOut, splitsOutMutex, multsplitsOutMutex);

}

void SplitReadCalling::decide(std::map<int, std::vector<SAMrecord>>& putative, auto& splitsOut,
Expand Down Expand Up @@ -401,23 +416,28 @@ bool SplitReadCalling::filter(auto& sequence, uint32_t cigarmatch) {
return true;
}

// checks if the detected splice junction is within the (annotated) splice site
bool SplitReadCalling::matchSpliceSites(dtp::Interval& spliceSites, std::optional<uint32_t> refId) {
if(params["splicing"].as<std::bitset<1>>() == std::bitset<1>("1")) {
std::vector<std::pair<Node*,IntervalData*>> ovlps = features.search(this->refIds[refId.value()], spliceSites);
for(int i=0;i<ovlps.size();++i) { // iterate over all overlapping intervals
dtp::SpliceJunctions junctions = ovlps[i].second->getJunctions();
for(auto& [name, junc] : junctions) {
for(int j=1;j<junc.size()-1;++j) {
if(helper::withinRange(spliceSites.first, junc[j-1].second, 5) &&
helper::withinRange(spliceSites.second, junc[j].first, 5)) {
return true;
if(junc.size() > 1) {
for(int j=1;j<junc.size()-1;++j) {
if(helper::withinRange(spliceSites.first, junc[j-1].second, 5) &&
helper::withinRange(spliceSites.second, junc[j].first, 5)) {
return true;
}
}
}
}
}
return false;
} else {
return false;
}

}

void SplitReadCalling::storeSegments(auto& splitrecord, std::optional<int32_t> refPos, dtp::DNASpan& seq,
Expand Down
27 changes: 26 additions & 1 deletion src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,27 @@ void showVersion(std::ostream& _str) {
_str << std::endl;
}

void makePathAbs(po::variables_map& params, std::string param, fs::path& configFileDir) {
if(params.count(param)) {
fs::path filePath = fs::path(params[param].as<std::string>());
if(filePath.empty()) { return; } // if file path is empty, do nothing
if(!filePath.is_absolute()) {
fs::path newPath = configFileDir / filePath;
params.erase(param); // remove param from variables_map
// add corrected path to variables_map
params.insert(std::make_pair(param, po::variable_value((configFileDir / filePath).string(), false)));
}
}
}

// correct the paths to absolute paths (if they are relative)
void correctPaths(po::variables_map& params, fs::path& configFileDir) {
std::vector<std::string> paramsToCheck = {"ctrls", "trtms", "outdir", "adpt3", "adpt5", "dbref", "features"};
for(auto& param : paramsToCheck) {
makePathAbs(params, param, configFileDir);
}
}

int main(int argc, char* argv[]) {
try {
std::string readType;
Expand Down Expand Up @@ -155,7 +176,6 @@ int main(int argc, char* argv[]) {
.add(clustering)
.add(output);


// translate all positional options into subcall options
po::positional_options_description p;
p.add("subcall", -1);
Expand Down Expand Up @@ -198,6 +218,11 @@ int main(int argc, char* argv[]) {
notify(params);
}
}

// correct the paths (if they are relative)
fs::path configFileDir = fs::path(configFile).parent_path();
correctPaths(params, configFileDir);

Base base(params);

} catch(po::error& e) {
Expand Down
8 changes: 4 additions & 4 deletions test/humanSE.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ readtype = SE # paired-end (PE) or single-end (SE)
# absolute path of dirs containing the raw reads (additional dir for each library)
trtms = data/human/trtms/ # treatments
ctrls = data/human/ctrls/ # controls
outdir = human-test-outdir # dir
outdir = data/human-test-outdir # dir

threads = 4 # number of threads
quality = 20 # lower limit for the average quality (Phred Quality Score) of the reads
Expand All @@ -18,15 +18,15 @@ modetrm = 1 # mode of the trimming: only 5' (=0) and 3' (=1) or both (=2)
# sequence preceeding 5'-end (N for arbitrary bp) in .fa format
adpt5 =
# sequence succeeding 3'-end (N for arbitrary bp) in fa. format
adpt3 = build/adapters3.fa
adpt3 = ../build/adapters3.fa
wtrim = 0 # on whether (=1) or not (=0) to include window quality trimming
# rate of mismatches allowed when aligning adapters with read sequence
mmrate = 0.1 # e.g., 0.1 on a sequence length of 10 results in
wsize = 3 # window size
minovlps = 10 # minimum overlaps required when merging paired-end reads

### ALIGNMENT (forwarded to segemehl.x)
dbref = GRCh38.primary_assembly.genome_20-22.fa
dbref = ref/GRCh38.primary_assembly.genome_20-22.fa
accuracy = 90 # min percentage of matches per read in semi-global alignment
minfragsco = 15 # min score of a spliced fragment
minfraglen = 15 # min length of a spliced fragment
Expand All @@ -46,7 +46,7 @@ clustdist = 0 # minimum distance between clusters

### ANALYSIS
# specify the annotations of the organism of interest (optional)
features = gencode.v45.annotation.gff3 # GFF3 feature file
features = ref/gencode.v45.annotation.gff3 # GFF3 feature file

# OUTPUT
stats = 1 # produce a statistics of the libraries
Expand Down

0 comments on commit ad340dd

Please sign in to comment.