Skip to content

Commit

Permalink
Oct 18, 2024: Stable release v0.0.1
Browse files Browse the repository at this point in the history
  • Loading branch information
AldhairMedico committed Oct 18, 2024
1 parent 7066c14 commit 7249f56
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 28 deletions.
5 changes: 2 additions & 3 deletions include/input.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,8 @@ struct UserInputTeloscope : UserInput {
uint32_t windowSize = 1000;
uint8_t kmerLen = 21;
uint32_t step = 500;
unsigned short int minBlockLen = 24;
unsigned short int minBlockDist = 5;
unsigned short int maxBlockDist = 100;
unsigned short int minBlockLen = 500;
unsigned short int maxBlockDist = 50;

bool keepWindowData = false; // Memory intensive
bool modeMatch = true, modeEntropy = true, modeGC = true; // Change to: de novo, user-defined
Expand Down
8 changes: 0 additions & 8 deletions include/teloscope.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,14 +141,6 @@ class Teloscope {
std::ofstream& canonicalBlocksFile,
std::ofstream& noncanonicalBlocksFile);

// void writeBEDFile(std::ofstream& shannonFile, std::ofstream& gcContentFile,
// std::unordered_map<std::string, std::ofstream>& patternMatchFiles,
// std::unordered_map<std::string, std::ofstream>& patternCountFiles,
// std::unordered_map<std::string, std::ofstream>& patternDensityFiles,
// std::ofstream& telomereBlocksAllFile,
// std::ofstream& telomereBlocksCanonicalFile,
// std::ofstream& telomereBlocksNonCanonicalFile);

void handleBEDFile();

void printSummary();
Expand Down
3 changes: 3 additions & 0 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -269,10 +269,13 @@ int main(int argc, char **argv) {
printf("\nRequired Parameters:\n");
printf("\t'-f'\t--input-sequence\tInitiate tool with fasta file.\n");
printf("\t'-o'\t--output\tSet output route.\n");
printf("\t'-c'\t--canonical\tSet canonical pattern. [Default: TTAGGG]\n");
printf("\t'-p'\t--patterns\tSet patterns to explore, separate them by commas [Default: TTAGGG]\n");
printf("\t'-w'\t--window\tSet sliding window size. [Default: 1000]\n");
printf("\t'-s'\t--step\tSet sliding window step. [Default: 500]\n");
printf("\t'-j'\t--threads\tSet maximum number of threads. [Default: max. available]\n");
printf("\t'-l'\t--min-block-length\tSet minimum block length for merging. [Default: 500]\n");
printf("\t'-d'\t--max-block-distance\tSet maximum block distance for merging. [Default: 50]\n");

printf("\nOptional Parameters:\n");
printf("\t'-m'\t--mode\tSet analysis modes, separate them by commas. [Options: all,match,gc,entropy]\n");
Expand Down
36 changes: 19 additions & 17 deletions src/teloscope.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,15 +122,15 @@ std::vector<TelomereBlock> Teloscope::getTelomereBlocks(const std::vector<uint32

// Helper function
auto finalizeBlock = [&](uint64_t endPosition) {
if (blockCounts >= 2) {
if (blockCounts >= 2) { // Jack: CHECK
TelomereBlock block;
block.start = blockStart;
block.blockLen = (endPosition - blockStart) + patternSize;
winBlocks.push_back(block);
}
};

for (size_t i = 1; i <= inputMatches.size(); ++i) {
for (size_t i = 1; i <= inputMatches.size(); ++i) { // Iterate from second match
uint64_t currentPosition;
uint64_t distance;

Expand Down Expand Up @@ -162,22 +162,20 @@ std::vector<TelomereBlock> Teloscope::getTelomereBlocks(const std::vector<uint32
std::vector<TelomereBlock> Teloscope::mergeTelomereBlocks(const std::vector<TelomereBlock>& winBlocks) {
std::vector<TelomereBlock> mergedBlocks;
unsigned short int minBlockLen = userInput.minBlockLen;
unsigned short int minBlockDist = userInput.minBlockDist;
unsigned short int maxBlockDist = userInput.maxBlockDist;

if (winBlocks.empty()) {
return mergedBlocks; // No blocks to merge
}

TelomereBlock currentBlock = winBlocks[0]; // Initialize the first block as the current block
// uint16_t D = this->trie.getLongestPatternSize(); // Use D as the merging distance threshold

for (size_t i = 1; i < winBlocks.size(); ++i) {
const TelomereBlock& nextBlock = winBlocks[i];
uint64_t currentEnd = currentBlock.start + currentBlock.blockLen;
uint64_t distance = nextBlock.start - currentEnd;

if ((distance <= maxBlockDist && distance > minBlockDist) || distance == 0) {
if (distance <= maxBlockDist) {
uint64_t newEnd = nextBlock.start + nextBlock.blockLen;
currentBlock.blockLen = newEnd - currentBlock.start;

Expand All @@ -190,7 +188,9 @@ std::vector<TelomereBlock> Teloscope::mergeTelomereBlocks(const std::vector<Telo
}
}

mergedBlocks.push_back(currentBlock);
if (currentBlock.blockLen >= minBlockLen) {
mergedBlocks.push_back(currentBlock);
}

return mergedBlocks;
}
Expand Down Expand Up @@ -484,6 +484,10 @@ void Teloscope::handleBEDFile() {
}

void Teloscope::printSummary() {
if (!userInput.keepWindowData) { // If windowData is not stored, skip
return;
}

std::cout << "\n+++Summary Report+++\n";
std::cout << "Total windows analyzed:\t" << totalNWindows << "\n";
std::cout << "Total input patterns found:\n";
Expand All @@ -493,15 +497,13 @@ void Teloscope::printSummary() {

// For each pattern, print the path header with the highest number of matches - PENDING
// For each pattern, print the path header with the lowest number of matches - PENDING
if (userInput.keepWindowData) {
std::cout << "Max Shannon Entropy:\t" << getMax(entropyValues) << "\n";
std::cout << "Mean Shannon Entropy:\t" << getMean(entropyValues) << "\n";
std::cout << "Median Shannon Entropy:\t" << getMedian(entropyValues) << "\n";
std::cout << "Min Shannon Entropy:\t" << getMin(entropyValues) << "\n";

std::cout << "Max GC Content:\t" << getMax(gcContentValues) << "\n";
std::cout << "Mean GC Content:\t" << getMean(gcContentValues) << "\n";
std::cout << "Median GC Content:\t" << getMedian(gcContentValues) << "\n";
std::cout << "Min GC Content:\t" << getMin(gcContentValues) << "\n";
}
std::cout << "Max Shannon Entropy:\t" << getMax(entropyValues) << "\n";
std::cout << "Mean Shannon Entropy:\t" << getMean(entropyValues) << "\n";
std::cout << "Median Shannon Entropy:\t" << getMedian(entropyValues) << "\n";
std::cout << "Min Shannon Entropy:\t" << getMin(entropyValues) << "\n";

std::cout << "Max GC Content:\t" << getMax(gcContentValues) << "\n";
std::cout << "Mean GC Content:\t" << getMean(gcContentValues) << "\n";
std::cout << "Median GC Content:\t" << getMedian(gcContentValues) << "\n";
std::cout << "Min GC Content:\t" << getMin(gcContentValues) << "\n";
}

0 comments on commit 7249f56

Please sign in to comment.