From 60a160cbc046f9317d3bb4f4dc73f5088cf756ec Mon Sep 17 00:00:00 2001
From: riasc <richard.schaefer@zoho.com>
Date: Mon, 3 Jun 2024 01:01:16 -0500
Subject: [PATCH] bring repo back on track to have all changes included

---
 .github/workflows/docker.yml |   6 +-
 CHANGELOG.md                 |  19 +-
 CMakeLists.txt               |  12 +-
 Dockerfile                   |   4 +-
 README.md                    | 152 ++++++++++++--
 build/params.cfg             |   5 +-
 include/Analysis.hpp         |   7 +-
 include/FilterScores.hpp     |  42 ++++
 include/IBPTree.hpp          |   4 +
 include/Node.hpp             |   1 +
 include/SplitReadCalling.hpp |  22 +-
 src/Align.cpp                |   1 +
 src/Analysis.cpp             | 393 ++++++++++++++++++++++++++++++++++-
 src/Data.cpp                 |   1 -
 src/IBPTree.cpp              |  98 ++++-----
 src/Node.cpp                 |   1 +
 src/SplitReadCalling.cpp     | 187 +++++++++++------
 src/Stats.cpp                |  13 +-
 src/main.cpp                 |   2 +-
 19 files changed, 809 insertions(+), 161 deletions(-)
 create mode 100644 include/FilterScores.hpp

diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
index 320b8584..be5f7a35 100644
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@@ -1,8 +1,8 @@
 name: docker-release
 on:
   push:
-    branches:
-      - "master"
+    tags:
+      - '*'
   pull_request:
     branches:
       - "master"
@@ -41,5 +41,5 @@ jobs:
           context: "{{defaultContext}}"
           platforms: linux/amd64, linux/arm64
           push: true
-          tags: cobirna/rnanue:${{ steps.extract_version.outputs.VERSION }}
+          tags: cobirna/rnanue:${{ steps.extract_version.outputs.VERSION }}, cobirna/rnanue:latest
 
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 10fbdcc3..97a1a91f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,17 +5,26 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+# [0.2.1]
+
+## Features
+
+- code cleanup 
+- fix in segmentation of the preprocessing
+
 # [0.2.0]
 
+## Features
+
+- update to C++20 and SeqAn 3.3.0
+- native support for concurrency 
+
 # [0.1.1]
 
 ## Fix
 
-
 # [0.1.0]
 
-# [0.0.1]
-
-## [0.0.0] 
-
+### Features
+- Initial implementation of RNAnue
 
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 32acd4b0..f2b2bc18 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.25)
+cmake_minimum_required(VERSION 3.22.1)
 project(RNAnue VERSION 0.2.0)
 set(CMAKE_CXX_STANDARD 20)
 set(CMAKE_CXX_STANDARD_REQUIRED True)
@@ -9,7 +9,7 @@ include(CMakePrintHelpers)
 # configure header file to pass the version number to the source code
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/include/Config.hpp.in ${CMAKE_CURRENT_SOURCE_DIR}/include/Config.hpp)
 
-###### Seqan #####
+###### SeqAn #####
 list (APPEND CMAKE_PREFIX_PATH "${CMAKE_CURRENT_SOURCE_DIR}/seqan3/build_system")
 find_package (seqan3 3.0 REQUIRED)
 find_package(OpenMP)
@@ -22,11 +22,19 @@ if(Boost_FOUND)
     include_directories(${Boost_INCLUDE_DIRS})
 endif()
 
+###### HTSlib ######
+find_package(PkgConfig REQUIRED)
+pkg_check_modules(HTSLIB REQUIRED IMPORTED_TARGET htslib)
+include_directories(${HTSLIB_INCLUDE_DIRS})
+
 file(GLOB SOURCES "src/*.cpp")
 add_executable(RNAnue ${SOURCES})
 target_include_directories(RNAnue PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include")
 target_link_libraries(RNAnue seqan3::seqan3)
 target_link_libraries(RNAnue Boost::program_options Boost::filesystem)
+target_link_libraries(RNAnue PkgConfig::HTSLIB)
+#target_link_libraries(RNAnue -L/usr/local/include -lRNA -flto)
+
 cmake_print_properties(TARGETS RNAnue PROPERTIES TARGET_INCLUDE_DIRECTORIES)
 
 ###### Tests ######
diff --git a/Dockerfile b/Dockerfile
index e49e4913..4ecd85a5 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -15,7 +15,7 @@ LABEL authors="Richard A. Schaefer"
 RUN apt-get -y update && apt-get -y upgrade
 RUN apt-get install -y curl build-essential cmake git pkg-config
 RUN apt-get install -y libbz2-dev zlib1g-dev libncurses5-dev liblzma-dev
-RUN apt-get install -y libboost-all-dev
+RUN apt-get install -y libboost-all-dev gdb
 
 # install htslib
 WORKDIR /
@@ -35,6 +35,8 @@ RUN export PKG_CONFIG_PATH=/usr/local/lib/pkgconfig
 RUN make all
 RUN cp segemehl.x /usr/local/bin
 
+# install ViennaRNA
+
 # retrieve RNAnue
 WORKDIR /
 RUN git clone https://github.com/Ibvt/RNAnue.git
diff --git a/README.md b/README.md
index 45f2992d..f713cb18 100644
--- a/README.md
+++ b/README.md
@@ -9,8 +9,8 @@ RNAnue is a comprehensive analysis to detect RNA-RNA interactions from Direct-Du
 ### Dependencies
 RNAnue has the following dependencies, whereas the brackets indicate the version RNAnue has 
 been build and tested on. Make sure the requirements are satified by your system. cmake is able
-to detect the Boost libraries system-wide. However seqan is expected to be located in the current 
-folder of RNAnue as specified in the CMakeLists.txt. Segemehl and the Vienna binaries need to be
+to detect the Boost libraries system-wide. However Seqan3 is expected to be located in the current 
+folder of RNAnue as specified in the CMakeLists.txt. egemehl and the Vienna binaries need to be
 located in $PATH.
 
 * [Boost C++ Libraries](https://www.boost.org/) (v1.7.2)
@@ -18,8 +18,6 @@ located in $PATH.
 * [Segemehl](http://www.bioinf.uni-leipzig.de/Software/segemehl/) (v0.3.4)
 * [Vienna Package](https://www.tbi.univie.ac.at/RNA/#binary_packages) (v2.4.17)
 
-
-
 ### CMake
 RNAnue is build using CMake. At first, clone the repository and change into the source directory.
 ```
@@ -29,13 +27,6 @@ cd RNAnue
 In the next step, retrieve the SeqAn library and place it in the root folder of RNAnue
 ```
 
-```
-
-
-
-
-
-
 CMake is a cross-platform Makefile generator. For that, we provide the [CMakeLists](./source/CMakeLists.txt) 
 to simplify the build process. In particular, it utilizes the instructions given in the CMakeLists.
 It is recommended to create a "out-of-source build". For that, create a build folder (e.g., ./bin)
@@ -81,7 +72,6 @@ RNAnue <subcall> --config /path/to/params.cfg
 Here, subcall corresponds to positional arguments.In any case, the specifying parameters over the command lines has 
 precedence over the config file.
 
-
 ## Results
 
 In principle, the results of the analysis are stored in the specified output folder and its subfolders
@@ -90,7 +80,7 @@ and the RNA-RNA interactions. RNAnue reports the split reads in SAM format. Addi
 scores and hybridization energies are stored in the tags FC and FE, respectively. We report the clusters in a
 custom format that includes the IDs of the clusters, its length, size and genomic coordinates.
 
-### Split Reads (.SAM)
+### Split Reads (.BAM)
 
 RNAnue reports the detected splits in .SAM format (RNAnue `detect`). In this file, pairs of rows represent the
 split reads, consisting of the individual segments, e.g
@@ -114,9 +104,143 @@ duplex.
 
 ### Clustering results
 
-
+The `clustering` procedure reports a single clusters.tab file which is a tab-delimited file of the clustering results. 
+Here, each line represents a cluster that corresponds to overlapping split reads, defined by the two segments. The 
+columns are defined in the following:
+
+| Field | Description |
+| ----- | ----------- |
+| clustID | Unique identifier of the cluster |
+| fst_seg_chr | Chromosome (accession) of the first segment |
+| fst_seg_strd | Strand where the first segment is located |
+| fst_seg_strt | Start position of the first segment in the cluster |
+| fst_seg_end | End position of the first segment in the cluster |
+| sec_seg_chr | Chromosome (accession) of the second segment |
+| sec_seg_strd | Strand where the second segment is located |
+| sec_seg_strt | Start position of the second segment in the cluster |
+| sec_seg_end | End position of the second segment in the cluster |
+| no_splits | Number of split reads in the cluster |
+| fst_seg_len | Length of the first segment |
+| sec_seg_len | Length of the second segment |
 
 ### Interaction table
+
+The `analysis` procedure generates `_interactions` files for each library in 
+which each line represents an annotated split read that is mapped to a 
+transcript interaction. The fields are defined as follows:
+
+| Field | Description |
+| ----- | ----------- |
+| qname | read/template identifier |
+| fst_seg_strd | Strand where the first segment is located |
+| fst_seg_strt | Start position of the first segment |
+| fst_seg_end | End position of the first segment |
+| fst_seg_ref | Reference name of the first segment corresponding to the seqid in GFF and/or clusterID |
+| fst_seg_name | Name of the first segment that corresponds to gene name/symbol and/or clusterID |
+| first_seg_bt | Biotype of the annotation transcript (if available) |
+| fst_seg_anno_strd | Strand information of the transcript in the overlapping annotation |
+| fst_seg_prod | Description of the transcript (if available in annotation) |
+| fst_seg_ori | Orientation of the segment with respect to annotation (sense/antisense) |
+| sec_seg_strd | Strand where the second segment is located |
+| sec_seg_strt | Start position of the second segment |
+| sec_seg_end | End position of the second segment |
+| sec_seg_ref | Reference name of the second segment corresponding to the seqid in GFF and/or clusterID |
+| sec_seg_name | Name of the second segment that corresponds to gene name/symbol and/or clusterID |
+| sec_seg_bt | Biotype of the annotation transcript (if available) |
+| sec_seg_anno_strd | Strand information of the transcript in the overlapping annotation |
+| sec_seg_prod | Description of the transcript (if available in annotation) |
+| sec_seg_ori | Orientation of the segment with respect to annotation (sense/antisense) |
+| cmpl | Complementarity score of the interaction |
+| fst_seg_compl_aln | Alignment results in the complementarity calculation of the first segment |
+| sec_seg_cmpl_aln | Alignment results in the complementarity calculation of the second segment |
+| mfe | Hybridisation energy of the interaction |
+| mfe_struc | Minimum free energy (MFE) structure of interaction in dot-bracket notation |
+
+The main result of an RNAnue analysis are transcript interactions. 
+They are stored in  the file  `allints.txt` in the same directory. 
+Its entries are structured as described in the following where 
+columns with prefix <sample> are given for each sample specified in 
+the analysis (within the same file).
+
+| Field | Description |
+| ----- | ----------- |
+| fs_rna | Gene/Transcript name of the first interaction partner |
+| sec_rna | Gene/Transcript name of the second interaction partner |
+| fst_rna_ori | Orientation of the first interaction partner with respect to annotation (sense/antisense) |
+| sec_rna_ori | Orientation of the second interaction partner with respect to annotation (sense/antisense) |
+| <sample>_supp_reads | Number of (split)reads that support the interaction |
+| <sample>_ges | Global energy score (gcs) of the interaction |
+| <sample>_ghs | Global hybridisation score (ghs) of the interaction |
+| <sample>_pval | Statistical significance (p-value) of the interaction |
+| <library>_padj | Benjamini-Hochberg adjusted p-value among the samples |
+
+If the option –outcnt is set to 1 RNAnue generates the count table `counts.txt` in the output directory. 
+It contains the counts of each interaction for each sample and  can be used for differential expression 
+analysis. Similarly, –outjgf set to 1 generates a `graph.json` file that contains the detected interactions 
+in JSON graph format. Finally, –stats set to 1 creates a `stats.txt` file that contains basic statistics for 
+each step of the analysis. 
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
 
 
 ### Docker
diff --git a/build/params.cfg b/build/params.cfg
index 0eae0224..a72ab945 100644
--- a/build/params.cfg
+++ b/build/params.cfg
@@ -31,7 +31,10 @@ accuracy = 90 # min percentage of matches per read in semi-global alignment
 minfragsco = 15 # min score of a spliced fragment 
 minfraglen = 15 # min length of a spliced fragment
 minsplicecov = 80 # min coverage for spliced transcripts
-exclclipping = 0 # exclude soft clipping from 
+mapq = 20 # min mapping quality for spliced transcripts
+exclclipping = 0 # exclude soft clipping from
+unprd = 0 # only for paired-end reads: include unpaired reads in the subsequent analysis
+unmrg = 0 # only for paired-end reads: include unmerged reads in the subsequent analysis
 
 ### SPLIT READ CALLING 
 sitelenratio = 0.0
diff --git a/include/Analysis.hpp b/include/Analysis.hpp
index b3d50834..c3280c8a 100644
--- a/include/Analysis.hpp
+++ b/include/Analysis.hpp
@@ -4,9 +4,14 @@
 // Standard
 #include <iostream>
 
+// Boost
+#include <boost/program_options.hpp>
+
+namespace po = boost::program_options;
+
 class Analysis {
     public:
-        Analysis();
+        Analysis(po::variables_map params);
         ~Analysis();
 
         void start();
diff --git a/include/FilterScores.hpp b/include/FilterScores.hpp
new file mode 100644
index 00000000..648f9fb9
--- /dev/null
+++ b/include/FilterScores.hpp
@@ -0,0 +1,42 @@
+#ifndef RNANUE_FILTERSCORES_HPP
+#define RNANUE_FILTERSCORES_HPP
+
+#include "DataTypes.hpp"
+
+class Complementarity {
+    public:
+        Complementarity();
+        ~Complementarity();
+
+        void compute(dtp::DNAVector seq1, dtp::DNAVector seq2);
+
+    private:
+        double score;
+        int alignmentLength;
+        double siteLengthRatio;
+        int matches;
+};
+
+class Hybridization {
+    public:
+        Hybridization();
+        ~Hybridization();
+
+        void compute(dtp::DNAVector seq1, dtp::DNAVector seq2);
+
+    private:
+        double score;
+        int alignmentLength;
+        double siteLengthRatio;
+        int matches;
+};
+
+
+class FilterScores {
+    public:
+        FilterScores();
+        ~FilterScores();
+
+};
+
+#endif //RNANUE_FILTERSCORES_HPP
diff --git a/include/IBPTree.hpp b/include/IBPTree.hpp
index b6d49641..e60ae540 100644
--- a/include/IBPTree.hpp
+++ b/include/IBPTree.hpp
@@ -34,6 +34,10 @@ class IBPTree {
         void insert(IntervalData& data);
         void insertIter(Node* node, IntervalData& data);
         void splitNode(Node* node, int index);
+        std::vector<IntervalData*> search(std::string chrom, dtp::Interval interval);
+        void searchIter(Node* node, const dtp::Interval& interval, std::vector<IntervalData*> results);
+        bool isOverlapping(dtp::Interval intvl1, dtp::Interval intvl2);
+
 
         std::map<std::string, std::string> getAttributes(std::string& attributes);
         std::string getTag(std::map<std::string, std::string> attributes, const std::vector<std::string>& keys);
diff --git a/include/Node.hpp b/include/Node.hpp
index a4c50454..f2e53c7f 100644
--- a/include/Node.hpp
+++ b/include/Node.hpp
@@ -39,6 +39,7 @@ class IntervalData {
         // operations
         void addJunction(std::string junction);
         bool isSubset(int start, int end);
+        bool isOverlapping(dtp::Interval intvl1, dtp::Interval intvl2);
         void printNode();
 
     private:
diff --git a/include/SplitReadCalling.hpp b/include/SplitReadCalling.hpp
index b6814b93..9c9de585 100644
--- a/include/SplitReadCalling.hpp
+++ b/include/SplitReadCalling.hpp
@@ -38,7 +38,7 @@ using types = seqan3::type_list<
         std::optional<int32_t>,
         std::optional<uint8_t>,
         dtp::CigarVector,
-        dtp::DNAVector,
+        dtp::DNASpan,
         seqan3::sam_tag_dictionary>;
 
 using fields = seqan3::fields<
@@ -51,6 +51,19 @@ using fields = seqan3::fields<
         seqan3::field::seq,
         seqan3::field::tags>;
 
+using types2 = seqan3::type_list<
+        std::vector<seqan3::dna5>,
+        std::string,
+        std::vector<seqan3::cigar>>;
+using fields2 = seqan3::fields<
+        seqan3::field::seq,
+        seqan3::field::id,
+        seqan3::field::cigar>;
+using sam_record_type = seqan3::sam_record<types2, fields2>;
+
+
+
+
 // define tags
 using seqan3::operator""_tag;
 template <> struct seqan3::sam_tag_type<"XH"_tag> { using type = int32_t; }; // number of splits (SAM record)
@@ -63,7 +76,7 @@ template <> struct seqan3::sam_tag_type<"XN"_tag> { using type = int32_t; };
 
 
 
-using SAMrecord = seqan3::record<types, fields>;
+using SAMrecord = seqan3::sam_record<types, fields>;
 using namespace seqan3::literals;
 using seqan3::get;
 
@@ -103,7 +116,9 @@ class SplitReadCalling {
         template <typename T>
         void process(std::vector<T>& readrecords, auto& singleOut, auto& splitsOut, auto& multsplitsOut,
                      auto& singleOutMutex, auto& splitsOutMutex, auto& multsplitsOutMutex);
-        bool filter(auto& sequence, uint32_t cigarmatch);
+        void decide(std::map<int, std::vector<SAMrecord>>& putative, auto& splitsOut, auto& multsplitsOut,
+                    auto& splitsOutMutex, auto& multsplitsOutMutex);
+        bool filter(auto& sequence, uint32_t cigarmatch, std::string chrom, dtp::Interval interval);
         void storeSegments(auto& splitrecord, std::optional<int32_t>& refOffset, dtp::CigarVector& cigar,
                            dtp::DNAVector& seq, seqan3::sam_tag_dictionary &tags, std::vector<SAMrecord>& curated);
 
@@ -113,6 +128,7 @@ class SplitReadCalling {
         //Stats stats;
         std::shared_ptr<Stats> stats;
         std::string condition; // stores the current condition
+        std::deque<std::string> refIds; // stores the reference ids
 };
 
 #endif //RNANUE_DETECT_HPP
diff --git a/src/Align.cpp b/src/Align.cpp
index db9fb9bd..d6c1cde0 100644
--- a/src/Align.cpp
+++ b/src/Align.cpp
@@ -49,6 +49,7 @@ void Align::alignReads(std::string query, std::string mate, std::string matched)
         align += " -p " + mate;
     }
     align += " -o " + matched;
+    std::cout << align << "\n";
     const char* alignCallChar = align.c_str();
     int result = system(alignCallChar);
     if(result != 0) {
diff --git a/src/Analysis.cpp b/src/Analysis.cpp
index 13d9fa6a..f8111559 100644
--- a/src/Analysis.cpp
+++ b/src/Analysis.cpp
@@ -1,5 +1,394 @@
 #include "Analysis.hpp"
 
-Analysis::Analysis() {
-    std::cout << "Analysis object created" << std::endl;
+//
+Analysis::Analysis(po::variables_map _params) : params(_params) {
+    std::string line;
+    std::ifstream anno;
+
+    anno.open(params["features"].as<std::string>());
+    // parse annotations
+    if(!anno.is_open()) {
+        perror("Error open");
+        exit(EXIT_FAILURE);
+    }
+    while(getline(anno, line)) {
+        if(line[0] == '#') { continue; }
+
+        std::vector<std::string> tokens;
+        std::istringstream iss(line);
+        std::string token;
+        while(std::getline(iss, token, '\t')) {
+            tokens.push_back(token);
+        }
+
+        // boundaries
+        std::pair<int,int> bnds = std::make_pair(std::stoi(tokens[3]),std::stoi(tokens[4]));
+        std::pair<std::pair<int,int>,std::string> con = std::make_pair(bnds,"strand="+tokens[6]+";"+tokens[8]);
+
+        std::size_t start_position;
+        if(features.count(tokens[0]) == 0) { // check of RefName is in map
+            if(start_position != std::string::npos) {
+                features.insert(std::pair<std::string, std::vector<std::pair<std::pair<int,int>,std::string>>>(tokens[0],{con}));
+            }
+        } else {
+            features[tokens[0]].push_back(con);
+        }
+    }
+
+    // read file
+    fs::path freqFile = fs::path(params["outdir"].as<std::string>()) / fs::path("frequency.txt");
+    std::ifstream freqHandle;
+    freqHandle.open(freqFile.string());
+
+    // iterate and store into frquency map
+    while(getline(freqHandle, line)) {
+        std::vector<std::string> tokens;
+        std::istringstream iss(line);
+        std::string token;
+        while(std::getline(iss, token, '\t')) {
+            tokens.push_back(token);
+        }
+        frequency.insert(std::pair<std::string,int>(tokens[0],std::stoi(tokens[1])));
+    }
 }
+
+//
+void Analysis::createCountTable() {
+    std::map<std::tuple<std::string,std::string,std::string,std::string>,std::vector<std::tuple<int,std::vector<float>,std::vector<float>>>> counts;
+
+    std::ifstream intsfile;
+    for(unsigned i=0;i<interPaths.size();++i) {
+        intsfile.open(interPaths[i]);
+
+        std::cout << interPaths[i] << std::endl;
+
+        if(!intsfile.is_open()) {
+            perror("Error open");
+            exit(EXIT_FAILURE);
+        }
+
+        std::string line;
+        while(getline(intsfile, line)){  //read data from file object and put it into string.
+            if(line[0] == '#') { continue; }
+
+            std::vector<std::string> tokens;
+            std::istringstream iss(line);
+            std::string token;
+            while(std::getline(iss, token, '\t')) {
+                tokens.push_back(token);
+            }
+
+            std::tuple<std::string,std::string,std::string,std::string> key1;
+            std::tuple<std::string,std::string,std::string,std::string> key2;
+            key1 = std::make_tuple(tokens[5],tokens[8],tokens[13],tokens[16]);
+            key2 = std::make_tuple(tokens[5],tokens[8],tokens[13],tokens[16]);
+
+            //
+            if(counts.count(key1) == 0 && counts.count(key2) == 0) {
+                std::vector<std::tuple<int,std::vector<float>,std::vector<float>>> content;
+
+                for(unsigned j=0; j<interPaths.size();++j) {
+                    std::tuple<int,std::vector<float>,std::vector<float>> vals{0,{},{}};
+                    content.push_back(vals);
+                }
+                std::get<0>(content[i]) = 1;
+                std::vector<float> en = std::get<1>(content[i]);
+                std::vector<float> cm = std::get<2>(content[i]);
+
+                en.push_back(std::stof(tokens[17]));
+                cm.push_back(std::stof(tokens[18]));
+
+                std::get<1>(content[i]) = en;
+                std::get<2>(content[i]) = cm;
+
+                counts.insert(std::pair<std::tuple<std::string,std::string,std::string,std::string>,std::vector<std::tuple<int,std::vector<float>,std::vector<float>>>>(key1,content));
+
+            } else {
+                if(counts.count(key1) > 0) {
+                    std::tuple<int,std::vector<float>,std::vector<float>> oldVal;
+                    oldVal = counts[key1][i];
+                    std::get<0>(oldVal) = std::get<0>(oldVal) + 1;
+
+                    std::vector<float> oldValEnergy = std::get<1>(oldVal);
+                    std::vector<float> oldValCmpl = std::get<2>(oldVal);
+
+                    oldValEnergy.push_back(std::stof(tokens[17]));
+                    oldValCmpl.push_back(std::stof(tokens[18]));
+
+                    std::get<1>(oldVal) = oldValEnergy;
+                    std::get<2>(oldVal) = oldValCmpl;
+
+                    counts[key1][i] = oldVal;
+                } else {
+                    if(counts.count(key2) > 0) {
+                        std::tuple<int,std::vector<float>,std::vector<float>> oldVal;
+                        oldVal = counts[key2][i];
+                        std::get<0>(oldVal) = std::get<0>(oldVal) + 1;
+
+                        std::vector<float> oldValEnergy = std::get<1>(oldVal);
+                        std::vector<float> oldValCmpl = std::get<2>(oldVal);
+
+                        oldValEnergy.push_back(std::stof(tokens[17]));
+                        oldValCmpl.push_back(std::stof(tokens[18]));
+
+                        std::get<1>(oldVal) = oldValEnergy;
+                        std::get<2>(oldVal) = oldValCmpl;
+
+                        counts[key2][i] = oldVal;
+                    }
+                }
+            }
+        }
+    }
+
+
+    // write back to file
+    std::string outfile = params["outdir"].as<std::string>();
+    fs::path outPath = fs::path(outfile) / "allints.txt";
+
+    std::ofstream outFileHandle;
+    outFileHandle.open(outPath.string());
+
+    outFileHandle << "RNA1\tRNA2\tRNA1orientation\tRNA2orientation\t";
+
+    for(unsigned i=0;i<interPaths.size();i++) {
+        outFileHandle << fs::path(interPaths[i]).stem().string() << "_counts\t";
+        outFileHandle << fs::path(interPaths[i]).stem().string() << "_ges\t";
+        outFileHandle << fs::path(interPaths[i]).stem().string() << "_gcs\t";
+    }
+    outFileHandle << "\n";
+
+    for(auto it=counts.begin(); it!=counts.end(); ++it) {
+        outFileHandle << std::get<0>(it->first) << "\t";
+        outFileHandle << std::get<2>(it->first) << "\t";
+        outFileHandle << std::get<1>(it->first) << "\t";
+        outFileHandle << std::get<3>(it->first) << "\t";
+
+        //
+        for(unsigned z=0;z<it->second.size();++z) {
+            outFileHandle << std::get<0>(it->second[z]) << "\t";
+
+            std::vector<float> vecNrg = std::get<1>(it->second[z]);
+            std::vector<float> vecCpl = std::get<2>(it->second[z]);
+
+            // determine ges
+            float ges = 0.0;
+            if(vecNrg.size() == 1) {
+                ges = vecNrg[0];
+            } else {
+                if(vecNrg.size() > 1) {
+                    auto m = vecNrg.begin() + vecNrg.size()/2;
+                    std::nth_element(vecNrg.begin(), m, vecNrg.end());
+                    double min = *min_element(vecNrg.begin(), vecNrg.end());
+                    ges = vecNrg[vecNrg.size()/2];
+                }
+            }
+            outFileHandle << ges << "\t";
+
+            // determine gcs
+            float gcs = 0.0;
+            if(vecCpl.size() == 1) {
+                gcs = vecCpl[0];
+            } else {
+                if(vecCpl.size() > 1) {
+                    auto m = vecCpl.begin() + vecCpl.size()/2;
+                    std::nth_element(vecCpl.begin(), m, vecCpl.end());
+                    double min = *min_element(vecCpl.begin(), vecCpl.end());
+                    gcs = vecCpl[vecCpl.size()/2];
+                }
+            }
+            outFileHandle << gcs << "\t";
+        }
+        outFileHandle << "\n";
+    }
+    outFileHandle.close();
+}
+
+
+
+//
+std::string Analysis::retrieveTagValue(std::string tags, std::string tagName, std::string oldValue) {
+    std::size_t start_position = tags.find(tagName+"=");
+    // gene name
+    if(start_position != std::string::npos) {
+        std::string sub = tags.substr(start_position+tagName.size()+1,tags.length());
+        std::size_t end_position = sub.find(";");
+        oldValue = sub.substr(0,end_position);
+    }
+    return oldValue;
+}
+
+
+float Analysis::calc_pvalue(int x, int n, float p) {
+    // simulate draw from binomial distribution
+    std::binomial_distribution<int> binomial(n, p);
+    // assign p value
+    float pval = 1.0 - cdf(binomial, x);
+    return pval;
+}
+
+void Analysis::start(pt::ptree sample) {
+    // retrieve input and output files
+    pt::ptree input = sample.get_child("input");
+    std::string splits = input.get<std::string>("splits");
+    pt::ptree output = sample.get_child("output");
+    std::string interactions = output.get<std::string>("interactions");
+
+
+    interPaths.push_back(interactions);
+
+    // input .sam record
+    seqan3::alignment_file_input fin{splits,
+                                     seqan3::fields<seqan3::field::id,
+                                     seqan3::field::flag,
+                                     seqan3::field::ref_id,
+                                     seqan3::field::ref_offset,
+                                     seqan3::field::seq,
+                                     seqan3::field::tags>{}};
+
+    std::vector<seqan3::sam_flag> flags;
+    std::vector<std::string> refIDs;
+    std::vector<std::optional<int32_t>> refOffsets;
+
+    std::vector<size_t> ref_lengths{};
+    for(auto &info : fin.header().ref_id_info) {
+        ref_lengths.push_back(std::get<0>(info));
+    }
+    std::deque<std::string> ref_ids = fin.header().ref_ids();
+
+//    uint32_t flag, start, end;
+
+    // open file
+    std::ofstream outInts;
+    outInts.open(interactions);
+
+    std::string entry; // stores output to write to file
+
+    // variables for interactions file
+    std::string qNAME, flag;
+    uint32_t start, end;
+    std::string geneID, geneName, product, annoStrand;
+    float hybnrg, cmpl;
+
+    seqan3::sam_tag_dictionary tags;
+
+    outInts << "#QNAME\tSegment1Strand\tSegment1Start\tSegment1End\tSegment1RefName"
+    outInts << "\tSegment1Name\tSegment1AnnoStrand\tSegment1Product\tSegment1Orientation"
+    outInts << "\tSegment2Strand\tSegment2Start\tSegment2End\tSegment2RefName\tSegment2Name"
+    outInts << "\tSegment2AnnoStrand\tSegment2Product\tSegment1Orientation\tenergy\tcomplementarity\n";
+
+    int segCnt = 0;
+    int segCntMatch = 0;
+    int segFound = 0;
+
+    for(auto && rec : fin | seqan3::views::chunk(2)) {
+        entry = "";
+        hybnrg = 0.0;
+        cmpl = 0.0;
+        for(auto & split : rec) {
+            //           seqan3::debug_stream << split << std::endl;
+
+            qNAME = seqan3::get<seqan3::field::id>(split);
+            flag = "+";
+            if(static_cast<bool>(seqan3::get<seqan3::field::flag>(split)
+                                 & seqan3::sam_flag::on_reverse_strand)) {
+                flag = "-";
+            }
+
+            // start & end
+            start = seqan3::get<seqan3::field::ref_offset>(split).value();
+            end = start + seqan3::get<seqan3::field::seq>(split).size()-1;
+
+            // refID
+            std::optional<int32_t> refIDidx = seqan3::get<seqan3::field::ref_id>(split);
+            std::string refID = ref_ids[refIDidx.value()];
+
+            tags = seqan3::get<seqan3::field::tags>(split);
+            auto nrg = tags["XE"_tag];
+            auto cpl = tags["XC"_tag];
+            hybnrg = std::get<float>(nrg);
+            cmpl = std::get<float>(cpl);
+
+            std::vector<std::pair<std::pair<int,int>,std::string>> feat;
+            if(features.count(refID) > 0) {
+                feat = features[refID];
+
+                std::pair<std::string, std::string> geneNames;
+                geneID = ".";
+                geneName = ".";
+                product = ".";
+                annoStrand = ".";
+                for(unsigned i=0;i<feat.size();++i) {
+                    if((start >= feat[i].first.first && start <= feat[i].first.second) ||
+                       (end >= feat[i].first.first && end <= feat[i].first.second)) {
+
+                        // make sure that overlap is in exon
+                        if(params["splicing"].as<std::bitset<1>() && feat[i].second.find("exon") == std::string::npos) {
+                            continue;
+                        }
+                        geneID = retrieveTagValue(feat[i].second, "ID", geneName);
+                        geneName = retrieveTagValue(feat[i].second, "gene", geneName);
+                        product = retrieveTagValue(feat[i].second, "product", product);
+                        annoStrand = retrieveTagValue(feat[i].second, "strand", annoStrand);
+
+                        segFound = 1; // found a match (in annotation) for segment
+                    }
+                }
+
+                // search for geneNames in frequency map
+                geneNames = std::make_pair(retrieveTagValue(feat[0].second, "gene", geneName), retrieveTagValue(feat[1].second, "gene", geneName));
+
+                float value;
+                // check if geneNames first in frequency map
+                if(frequency.count(geneNames.first) > 0 && frequency.count(geneNames.second) > 0) {
+                    if (geneNames.first == geneNames.second) {
+                        value = frequency[geneNames.first] * frequency[geneNames.second];
+                    } else {
+                        value = 2 * (frequency[geneNames.first] * frequency[geneNames.second]);
+                    }
+                } else{
+                    value = 0;
+                }
+
+                // calculate p-value
+                int draws = frequency[geneNames.first] + frequency[geneNames.second];
+                float pval = calc_pvalue(segCntMatch, draws, value);
+
+
+                if(segFound == 1) {
+                    if(segCnt == 0) { entry += qNAME + "\t";}
+                    entry += flag + "\t";
+                    entry += std::to_string(start) + "\t";
+                    entry += std::to_string(end) + "\t";
+                    entry += refID + "\t";
+                    entry += geneName + "\t";
+                    entry += annoStrand + "\t";
+                    entry += "\"" + product  + "\"\t";
+
+                    if(flag == annoStrand) {
+                        entry += "sense\t";
+                    } else {
+                        entry += "antisense\t";
+                    }
+
+                    entry += pval + "\t";
+
+                    segCntMatch++;
+                    segFound = 0;
+                }
+            }
+            segCnt++; // on to the next segment
+        }
+
+        if(segCntMatch == 2) {
+            outInts << entry << hybnrg << "\t" << cmpl << "\n";
+        }
+
+        segCnt = 0; // reset segment
+        segCntMatch = 0;
+    }
+
+    outInts.close();
+}
+
+
diff --git a/src/Data.cpp b/src/Data.cpp
index bf28e92f..7aa705e4 100644
--- a/src/Data.cpp
+++ b/src/Data.cpp
@@ -57,7 +57,6 @@ void Data::alignDataPrep() {
             * (ctrls -> "/Users/.../crtls")
             */
             GroupsPath groups = getGroupsPath(ctrlsPath, trtmsPath);
-
             getCondition(groups);
         }
     }
diff --git a/src/IBPTree.cpp b/src/IBPTree.cpp
index b59c9449..2067d1da 100644
--- a/src/IBPTree.cpp
+++ b/src/IBPTree.cpp
@@ -201,6 +201,50 @@ void IBPTree::splitNode(Node* parent, int index) {
     }
 }
 
+std::vector<IntervalData*> IBPTree::search(std::string chrom, dtp::Interval interval) {
+    Node* root = this->rootnodes[chrom]; // search for the root node
+    std::vector<IntervalData*> result;
+    searchIter(root, interval, result);
+}
+
+void IBPTree::searchIter(Node* node, const dtp::Interval& interval, std::vector<IntervalData*> results) {
+    if(node->isLeaf) {
+        Node* current = node;
+        for(auto& intvl : current->keys) {
+            if(isOverlapping(intvl.first, interval)) {
+                results.push_back(intvl.second);
+            }
+            if(!current->next || !isOverlapping(current->next->keys[0].first, interval)) {
+                break;
+            }
+            node = node->next;
+        }
+    } else {
+        int i = 0;
+        while(i < node->keys.size() &&
+                (interval.first > node->keys[i].first.first &&
+                        (interval.first > node->keys[i].first.second))){
+            i++;
+        }
+        if(node->children[i] != nullptr) {
+            searchIter(node->children[i], interval, results);
+        }
+    }
+}
+
+bool IBPTree::isOverlapping(dtp::Interval intvl1, dtp::Interval intvl2) {
+    if((intvl1.first <= intvl2.first && intvl1.second >= intvl2.second) ||
+       (intvl1.first >= intvl2.first && intvl1.second <= intvl2.second) ||
+       (intvl1.first <= intvl2.first && intvl1.second > intvl2.first) ||
+       (intvl1.first > intvl2.first && intvl1.first < intvl2.second)) {
+        return true;
+    }
+}
+
+
+
+
+
 // get attribute from the attributes fields
 std::string IBPTree::getTag(std::map<std::string, std::string> attributes, const std::vector<std::string>& keys) {
     std::string element = "";
@@ -249,57 +293,3 @@ void IBPTree::traverse(Node* parent, Node* child, int link, std::ostream& ofs) c
         traverse(child, child->children[i], i, ofs);
     }
 }
-
-/*
-// Function to search for intervals overlapping with the given interval
-std::vector<Interval> IBPTree::search(const Interval& interval) {
-    std::vector<Interval> result;
-    if (root != nullptr)
-        searchHelper(root, interval, result);
-    return result;
-}
-
-// Helper function for searching intervals
-void IBPTree::searchHelper(Node* node, const Interval& interval, std::vector<Interval>& result) {
-    if (node->isLeaf()) {
-        LeafNode* leaf = dynamic_cast<LeafNode*>(node);
-        for (const Interval& key : leaf->keys) {
-            if (interval.low <= key.high && interval.high >= key.low)
-                result.push_back(key);
-        }
-    } else {
-        InternalNode* internal = dynamic_cast<InternalNode*>(node);
-        int i = 0;
-        while (i < internal->keys.size() && interval.low > internal->keys[i])
-            i++;
-        searchHelper(internal->children[i], interval, result);
-    }
-}
-
-
-
-*/
-/*
-if(root->getLeaf()) { // root is a leaf node
-    root->addInterval(data);
-    if(root->getKeys().size() > this->order-1) {
-        splitNode(root);
-    }
-} else {
-    dtp::Interval interval = std::make_pair(5300,5400);
-    Node* child = traverse(interval, root);
-    if(child == nullptr) {
-        std::cout << "nullptr" << std::endl;
-    } else {
-        std::cout << "new Child: " << child->keysToString() << std::endl;
-    }
-
-*/
-
-
-
-
-/*
-std::cout << "Child: " << child->keysToString() << std::endl;
- */
-
diff --git a/src/Node.cpp b/src/Node.cpp
index 6e9227f6..77b7bf91 100644
--- a/src/Node.cpp
+++ b/src/Node.cpp
@@ -35,6 +35,7 @@ bool IntervalData::isSubset(int start, int end) {
     return (start >= this->interval.first && end <= this->interval.second);
 }
 
+
 // create new node
 Node::Node(int k) : order{k}, keys{}, children{}, next{nullptr}, parent{nullptr}, isLeaf{false} {}
 
diff --git a/src/SplitReadCalling.cpp b/src/SplitReadCalling.cpp
index d46a5fa7..0e4f5c23 100644
--- a/src/SplitReadCalling.cpp
+++ b/src/SplitReadCalling.cpp
@@ -19,7 +19,6 @@ ThreadPool::ThreadPool(size_t num_threads) : stop(false) {
     }
 }
 
-
 ThreadPool::~ThreadPool() {
     {
         std::unique_lock<std::mutex> lock(queue_mutex);
@@ -47,13 +46,14 @@ auto ThreadPool::enqueue(F&& f, Args&&... args) -> std::future<typename std::inv
 SplitReadCalling::SplitReadCalling(po::variables_map params) {
     this->params = params;
     // create IBPTree (if splicing need to be considered)
-    if(params["splicing"].as<std::bitset<1>>() ==  std::bitset<1>("1")) {
+    if(params["splicing"].as<std::bitset<1>>() == std::bitset<1>("1")) {
         // order of the IBPTree
         int k = 7; // can be later extracted from config file
         // create IBPT
         this->features = IBPTree(params, k);
     }
     this->condition = ""; // stores the current condition
+    this->refIds = std::deque<std::string>(); // stores the reference ids
 
     // initialize stats
     this->stats = std::make_shared<Stats>();
@@ -61,7 +61,12 @@ SplitReadCalling::SplitReadCalling(po::variables_map params) {
     const char* seq = "GGGAAAUCC";
 }
 
-SplitReadCalling::~SplitReadCalling() {}
+SplitReadCalling::~SplitReadCalling() {
+    if(params["stats"].as<std::bitset<1>>() == std::bitset<1>("1")) {
+        fs::path outdir = fs::path(params["outdir"].as<std::string>());
+        stats->writeStats(outdir);
+    }
+}
 
 void SplitReadCalling::iterate(std::string& matched, std::string& single, std::string &splits, std::string &multsplits) {
     ThreadPool pool(params["threads"].as<int>()); // initialize thread pool
@@ -71,10 +76,11 @@ void SplitReadCalling::iterate(std::string& matched, std::string& single, std::s
     for(auto &info : fin.header().ref_id_info) {
         ref_lengths.push_back(std::get<0>(info));
     }
-    std::deque<std::string> ref_ids = fin.header().ref_ids();
-    seqan3::sam_file_output singleOut{single, ref_ids, ref_lengths}; // initialize output file
-    seqan3::sam_file_output splitsOut{splits, ref_ids, ref_lengths}; // initialize output file
-    seqan3::sam_file_output multsplitsOut{multsplits, ref_ids, ref_lengths}; // initialize output file
+    this->refIds = fin.header().ref_ids();
+
+    seqan3::sam_file_output singleOut{single, this->refIds, ref_lengths}; // initialize output file
+    seqan3::sam_file_output splitsOut{splits, this->refIds, ref_lengths}; // initialize output file
+    seqan3::sam_file_output multsplitsOut{multsplits, this->refIds, ref_lengths}; // initialize output file
 
     // create mutex objects for output - had to be done here as class variables cause errors w/ std::bind
     std::mutex singleOutMutex;
@@ -145,7 +151,7 @@ void SplitReadCalling::process(std::vector<T>& readrecords, auto& singleOut, aut
 
     // stores the putative splits/segments
     std::vector<SAMrecord> curated;
-    std::map<int, std::vector<SAMrecord>> segments;
+    std::map<int, std::vector<SAMrecord>> putative{};
 
     for(auto& rec : readrecords) {
         qname = rec.id();
@@ -153,6 +159,7 @@ void SplitReadCalling::process(std::vector<T>& readrecords, auto& singleOut, aut
         ref_id = rec.reference_id();
         ref_offset = rec.reference_position();
         mapq = rec.mapping_quality();
+        seq = rec.sequence();
 
         // CIGAR string
         cigar = rec.cigar_sequence();
@@ -164,17 +171,18 @@ void SplitReadCalling::process(std::vector<T>& readrecords, auto& singleOut, aut
         auto xjtag = tags.get<"XJ"_tag>();
         auto xhtag = tags.get<"XH"_tag>();
 
-        seqan3::debug_stream << "QNAME: " << qname << "\n";
-        seqan3::debug_stream << "FLAG: " << flag << "\n";
-
-        if(xjtag < 2) { continue; } // ignore reads with less than 2 splits
+        if(xjtag < 2 ) { // ignore reads with less than 2 splits (or no splits at all)
+            {
+                std::lock_guard<std::mutex> lock(singleOutMutex);
+                singleOut.push_back(rec);
+            }
+            continue;
+        }
         startPosRead = 1; // initialize start/end of read
         endPosRead = 0;
         startPosSplit = xxtag;
         endPosSplit = xytag;
 
-        seqan3::debug_stream << cigar << std::endl;
-
         cigarMatch = 0; // reset matches count in cigar
         // check the cigar string for splits
         for(auto& cig: cigar) {
@@ -190,15 +198,11 @@ void SplitReadCalling::process(std::vector<T>& readrecords, auto& singleOut, aut
                 newTags.get<"XY"_tag>() = endPosSplit;
                 newTags.get<"XN"_tag>() = splitId;
 
-                // filter segments
-                if(filter(subSeq, cigarMatch)) {
+                // filter & store the segments
+                dtp::Interval intvl = {ref_offset.value(), ref_offset.value() + endPosRead};
+                if(filter(subSeq, cigarMatch, this->refIds[ref_id.value()], intvl)) {
                     storeSegments(rec, ref_offset, cigarSplit, seq, tags, curated);
                 }
-
-
-
-
-
                 // settings to prepare for the next split
                 startPosSplit = endPosSplit+1;
                 startPosRead = endPosRead+1;
@@ -232,49 +236,102 @@ void SplitReadCalling::process(std::vector<T>& readrecords, auto& singleOut, aut
         newTags.get<"XY"_tag>() = endPosSplit;
         newTags.get<"XN"_tag>() = splitId;
 
-        // filter segements
-
-        if(segNum) {
-
+        // filter and store the segments
+        dtp::Interval intvl = {ref_offset.value(), ref_offset.value() + endPosRead};
+        if(filter(subSeq, cigarMatch, this->refIds[ref_id.value()], intvl)) {
+            storeSegments(rec, ref_offset, cigarSplit, seq, tags, curated);
         }
+        if(segNum == xjtag) {
+            putative.insert(std::make_pair(splitId, curated));
+            curated.clear();
+            segNum = 0;
+            ++splitId;
+        }
+    }
+    decide(putative, splitsOut, multsplitsOut, splitsOutMutex, multsplitsOutMutex);
+}
 
-
-        /*
-        std::string recid = rec.id();
-        dtp::DNAVector recseq = rec.sequence();
-        dtp::CigarVector reccigar = rec.cigar_sequence();
-
-        SAMRec outrec{recid, recseq, reccigar};*/
-
-
-
-
-
-        // single output
-        /*
-        {
-            std::lock_guard<std::mutex> lock(singleOutMutex);
-            singleOut.push_back(outrec);
-        }*/
-        /*
-        // splits output
-        {
-            std::lock_guard<std::mutex> lock(splitsOutMutex);
-            splitsOut.push_back(outrec);
+void SplitReadCalling::decide(std::map<int, std::vector<SAMrecord>>& putative, auto& splitsOut, auto& multsplitsOut,
+            auto& splitsOutMutex, auto& multsplitsOutMutex) {
+    std::vector<SAMrecord> p1, p2;
+    seqan3::sam_tag_dictionary p1Tags, p2Tags;
+    std::pair<double, double> scores;
+    std::string p1QNAME, p2QNAME;
+
+    if(putative.size() > 0) {
+        for(auto& [splitId, records] : putative) {
+            if(records.size() > 1) { // splits contain more than one segment
+                for(unsigned i=0;i<records.size();++i) {
+                    for(unsigned j=i+1;j<records.size();++j) {
+                        p1Tags = records[i].tags();
+                        p2Tags = records[j].tags();
+
+                        auto p1Start = p1Tags.get<"XX"_tag>();
+                        auto p1End = p1Tags.get<"XY"_tag>();
+                        auto p2Start = p2Tags.get<"XX"_tag>();
+                        auto p2End = p2Tags.get<"XY"_tag>();
+
+                        // prevent overlap between read position -> same segment of read
+                        if(p1.empty() && p2.empty()) {
+                            TracebackResult initCmpl = complementarity(seqan3::get<seqan3::field::seq>(splits[i]), seqan3::get<seqan3::field::seq>(splits[j]));
+                            double initHyb = hybridize(seqan3::get<seqan3::field::seq>(splits[i]), seqan3::get<seqan3::field::seq>(splits[j]));
+                            if(!initCmpl.a.empty()) { // split read survived complementarity & sitelenratio cutoff
+                                if(initHyb <= params["nrgmax"].as<double>()) {
+                                    filters = std::make_pair(initCmpl.cmpl, initHyb);
+                                    addComplementarityToSamRecord(splits[i], splits[j], initCmpl);
+                                    addHybEnergyToSamRecord(splits[i], splits[j], initHyb);
+                                    splitSegments.push_back(std::make_pair(splits[i],splits[j]));
+                                }
+                            }
+                        } else {
+                            TracebackResult cmpl = complementarity(seqan3::get<seqan3::field::seq>(splits[i]), seqan3::get<seqan3::field::seq>(splits[j]));
+                            double hyb = hybridize(seqan3::get<seqan3::field::seq>(splits[i]), seqan3::get<seqan3::field::seq>(splits[j]));
+
+                            if(!cmpl.a.empty()) { // check that there is data for complementarity / passed cutoffs
+                                if(cmpl.cmpl > filters.first) { // complementarity is higher
+                                    splitSegments.clear();
+                                    filters = std::make_pair(cmpl.cmpl, hyb);
+                                    addComplementarityToSamRecord(splits[i], splits[j], cmpl);
+                                    addHybEnergyToSamRecord(splits[i], splits[j], hyb);
+                                    splitSegments.push_back(std::make_pair(splits[i],splits[j]));
+                                } else{
+                                    if(cmpl.cmpl == filters.first) {
+                                        if(hyb > filters.second) {
+                                            splitSegments.clear();
+                                            filters = std::make_pair(cmpl.cmpl, hyb);
+                                            addComplementarityToSamRecord(splits[i], splits[j], cmpl);
+                                            addHybEnergyToSamRecord(splits[i], splits[j], hyb);
+                                            splitSegments.push_back(std::make_pair(splits[i],splits[j]));
+                                        }
+                                    } else {
+                                        if(hyb == filters.second) { // same cmpl & hyb -> ambigious read!
+                                            addComplementarityToSamRecord(splits[i], splits[j], cmpl);
+                                            addHybEnergyToSamRecord(splits[i], splits[j], hyb);
+                                            splitSegments.push_back(std::make_pair(splits[i],splits[j]));
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
         }
-        // multsplits output
-        {
-            std::lock_guard<std::mutex> lock(multsplitsOutMutex);
-            multsplitsOut.push_back(outrec);
-        }*/
+
     }
 }
 
-bool SplitReadCalling::filter(auto& sequence, uint32_t cigarmatch) {
-    // filter segments
+bool SplitReadCalling::filter(auto& sequence, uint32_t cigarmatch, std::string chrom, dtp::Interval intvl) {
+    //seqan3::debug_stream << sequence << std::endl;
     if(sequence.size() < params["minlen"].as<int>()) { return false; }
     double matchrate = static_cast<double>(cigarmatch) / static_cast<double>(sequence.size());
     if(matchrate < params["minfragmatches"].as<double>()) { return false; }
+    if(params["splicing"].as<std::bitset<1>>() == std::bitset<1>("1")) {
+        std::cout << "check for interval :" << intvl.first << " " << intvl.second << std::endl;
+        // check if the segment is spliced
+        //std::vector<IntervalData*> ovlps = features.search(chrom, intvl);
+        //std::cout << "overlap " << ovlps.size() << std::endl;
+    }
     return true;
 }
 
@@ -282,20 +339,12 @@ void SplitReadCalling::storeSegments(auto& splitrecord, std::optional<int32_t>&
                                      dtp::CigarVector& cigar, dtp::DNAVector& seq,
                                      seqan3::sam_tag_dictionary& tags, std::vector<SAMrecord>& curated) {
     SAMrecord segment{}; // create new SAM Record
-
-    using types = seqan3::type_list<std::vector<seqan3::dna5>, std::string, std::vector<seqan3::cigar>>;
-    using fields = seqan3::fields<seqan3::field::seq, seqan3::field::id, seqan3::field::cigar>;
-    using sam_record_type = seqan3::sam_record<types, fields>;
-
-
-    // write the following to the file
-    // r001 0   *   0   0   4M2I2M2D    *   0   0   ACGTACGT    *
-    sam_record_type record{};
-    record.id() = "r001";
-    record.sequence() = "ACGTACGT"_dna5;
-
-    seqan3::debug_stream << splitrecord.id() << std::endl;
-
+    segment.id() = splitrecord.id();
+    seqan3::sam_flag flag{0};
+    if(static_cast<bool>(splitrecord.flag() & seqan3::sam_flag::on_reverse_strand)) {
+        flag |= seqan3::sam_flag::on_reverse_strand;
+    }
+    segment.reference_id() = splitrecord.reference_id();
 }
 
 
diff --git a/src/Stats.cpp b/src/Stats.cpp
index da4f74c4..fb48de49 100644
--- a/src/Stats.cpp
+++ b/src/Stats.cpp
@@ -26,9 +26,14 @@ void Stats::setMultSplitsCount(std::string condition, int increment) {
         stats[condition].multSplitsCount += increment;
     }
 }
-void Stats::setNSurvivedCount(std::string condition, int nsurvivedcount) {
-    {
-        std::lock_guard<std::mutex> lock(statsMutex);
-        stats[condition].nSurvivedCount = nsurvivedcount;
+
+void Stats::writeStats(fs::path outdir) {
+    fs::path statsFile = outdir / "stats.tsv";
+    std::ofstream statsStream(statsFile);
+    statsStream << "Condition\tReads\tAligned\tSplits\tMultSplits\n";
+    for (const auto& [condition, stat] : stats) {
+        statsStream << condition << "\t" << stat.readsCount << "\t";
+        statsStream << stat.alignedCount << "\t" << stat.splitsCount << "\t";
+        statsStream << stat.multSplitsCount << "\n";
     }
 }
\ No newline at end of file
diff --git a/src/main.cpp b/src/main.cpp
index fa59d46b..2f35b4eb 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -85,7 +85,7 @@ int main(int argc, char* argv[]) {
                  "minimum score of a spliced fragment")
                 ("minfraglen", po::value<int>()->default_value(20),
                  "minimum length of a spliced fragment")
-                ("minfragmatches", po::value<double>()->default_value(0.7),
+                ("minfragmatches", po::value<double>()->default_value(0.5),
                         "minimum percentage matches of a spliced fragment")
                 ("minsplicecov", po::value<int>()->default_value(80),
                         "minimum coverage for spliced transcripts")