From 8c0cc0213a63f34374138a550e6bc09a9ade4491 Mon Sep 17 00:00:00 2001 From: riasc Date: Mon, 3 Mar 2025 15:06:10 -0600 Subject: [PATCH] changes to data structure --- CMakeLists.txt | 7 +++++-- include/Utility.hpp | 2 ++ src/Data.cpp | 3 ++- src/main.cpp | 27 ++++++++++++++++++++++++++- test/humanSE.cfg | 6 +++--- 5 files changed, 38 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e33a2dd6..4a6bf5bc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.22.1) -project(RNAnue VERSION 0.2.3) +project(RNAnue VERSION 0.2.4) set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED True) set(CMAKE_CXX_FLAGS -fopenmp) @@ -11,7 +11,7 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/include/Config.hpp.in ${CMAKE_CURRENT ###### SeqAn ##### list (APPEND CMAKE_PREFIX_PATH "${CMAKE_CURRENT_SOURCE_DIR}/seqan3/build_system") -find_package (seqan3 3.0 REQUIRED) +find_package (seqan3 3.3.0 REQUIRED) find_package(OpenMP) ###### Boost ###### @@ -38,6 +38,9 @@ target_link_libraries(RNAnue PkgConfig::HTSLIB) cmake_print_properties(TARGETS RNAnue PROPERTIES TARGET_INCLUDE_DIRECTORIES) ###### Tests ###### +# make tests optional +option(BUILD_TESTS "Building unit tests" OFF) + file(GLOB TEST_SOURCES "test/*.cpp") file(GLOB SOURCES "src/*.cpp") list(REMOVE_ITEM SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/src/main.cpp) diff --git a/include/Utility.hpp b/include/Utility.hpp index f01f07d3..60002832 100644 --- a/include/Utility.hpp +++ b/include/Utility.hpp @@ -6,6 +6,7 @@ #include #include #include +#include // Boost @@ -18,6 +19,7 @@ #include "DataTypes.hpp" namespace fs = boost::filesystem; +namespace stdfs = std::filesystem; // filesystem manipulation namespace helper { diff --git a/src/Data.cpp b/src/Data.cpp index 5009ef7a..0ed345a6 100644 --- a/src/Data.cpp +++ b/src/Data.cpp @@ -1,6 +1,7 @@ #include "Data.hpp" Data::Data(po::variables_map params) : params(params) { + std::cout << helper::getTime() << "Data object created\n"; std::string subcall = params["subcall"].as(); fs::path outDir = fs::path(params["outdir"].as()); @@ -67,7 +68,7 @@ void Data::detectDataPrep() { } void Data::clusteringDataPrep() { -fs::path ctrlsPath = fs::path(params["outdir"].as()) / "detect/ctrls"; + fs::path ctrlsPath = fs::path(params["outdir"].as()) / "detect/ctrls"; fs::path trtmsPath = fs::path(params["outdir"].as()) / "detect/trtms"; GroupsPath groups = getGroupsPath(ctrlsPath, trtmsPath); diff --git a/src/main.cpp b/src/main.cpp index 6a053ec5..b972a239 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -25,6 +25,27 @@ void showVersion(std::ostream& _str) { _str << std::endl; } +void makePathAbs(po::variables_map& params, std::string param, fs::path& configFileDir) { + if(params.count(param)) { + fs::path filePath = fs::path(params[param].as()); + if(filePath.empty()) { return; } // if file path is empty, do nothing + if(!filePath.is_absolute()) { + fs::path newPath = configFileDir / filePath; + params.erase(param); // remove param from variables_map + // add corrected path to variables_map + params.insert(std::make_pair(param, po::variable_value((configFileDir / filePath).string(), false))); + } + } +} + +// correct the paths to absolute paths (if they are relative) +void correctPaths(po::variables_map& params, fs::path& configFileDir) { + std::vector paramsToCheck = {"ctrls", "trtms", "outdir", "adpt3", "adpt5", "dbref", "features"}; + for(auto& param : paramsToCheck) { + makePathAbs(params, param, configFileDir); + } +} + int main(int argc, char* argv[]) { try { std::string readType; @@ -155,7 +176,6 @@ int main(int argc, char* argv[]) { .add(clustering) .add(output); - // translate all positional options into subcall options po::positional_options_description p; p.add("subcall", -1); @@ -198,6 +218,11 @@ int main(int argc, char* argv[]) { notify(params); } } + + // correct the paths (if they are relative) + fs::path configFileDir = fs::path(configFile).parent_path(); + correctPaths(params, configFileDir); + Base base(params); } catch(po::error& e) { diff --git a/test/humanSE.cfg b/test/humanSE.cfg index 009c4c9c..a4ff7db9 100755 --- a/test/humanSE.cfg +++ b/test/humanSE.cfg @@ -4,7 +4,7 @@ readtype = SE # paired-end (PE) or single-end (SE) # absolute path of dirs containing the raw reads (additional dir for each library) trtms = data/human/trtms/ # treatments ctrls = data/human/ctrls/ # controls -outdir = human-test-outdir # dir +outdir = data/human-test-outdir # dir threads = 4 # number of threads quality = 20 # lower limit for the average quality (Phred Quality Score) of the reads @@ -18,7 +18,7 @@ modetrm = 1 # mode of the trimming: only 5' (=0) and 3' (=1) or both (=2) # sequence preceeding 5'-end (N for arbitrary bp) in .fa format adpt5 = # sequence succeeding 3'-end (N for arbitrary bp) in fa. format -adpt3 = build/adapters3.fa +adpt3 = ../build/adapters3.fa wtrim = 0 # on whether (=1) or not (=0) to include window quality trimming # rate of mismatches allowed when aligning adapters with read sequence mmrate = 0.1 # e.g., 0.1 on a sequence length of 10 results in @@ -26,7 +26,7 @@ wsize = 3 # window size minovlps = 10 # minimum overlaps required when merging paired-end reads ### ALIGNMENT (forwarded to segemehl.x) -dbref = GRCh38.primary_assembly.genome_20-22.fa +dbref = ref/GRCh38.primary_assembly.genome_20-22.fa accuracy = 90 # min percentage of matches per read in semi-global alignment minfragsco = 15 # min score of a spliced fragment minfraglen = 15 # min length of a spliced fragment