Skip to content

Commit

Permalink
Nov 12, 2024: Better memory usage - dirt
Browse files Browse the repository at this point in the history
  • Loading branch information
AldhairMedico committed Nov 12, 2024
1 parent 8e94d76 commit 7dcfc57
Show file tree
Hide file tree
Showing 2 changed files with 249 additions and 128 deletions.
57 changes: 33 additions & 24 deletions include/teloscope.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,11 @@ class Trie {
};


struct PatternData {
std::vector<uint32_t> patMatches; // Match indexes in window
uint32_t count = 0; // Total pattern count
float density = 0.0f; // Density of the pattern
};
// struct PatternData {
// std::vector<uint32_t> patMatches; // Match indexes in window
// uint32_t count = 0; // Total pattern count
// float density = 0.0f; // Density of the pattern
// };


struct TelomereBlock {
Expand All @@ -61,21 +61,23 @@ struct TelomereBlock {
struct WindowData {
uint32_t windowStart;
uint32_t currentWindowSize;
uint32_t nucleotideCounts[4] = {0, 0, 0, 0};
float gcContent;
float shannonEntropy;
// uint32_t winHDistance = 0;

uint32_t nucleotideCounts[4] = {0, 0, 0, 0};
std::unordered_map<std::string, PatternData> patternMap; // Condensed pattern data
// std::unordered_map<std::string, PatternData> patternMap; // Condensed pattern data
std::vector<TelomereBlock> winBlocks;
std::vector<uint8_t> hDistances;

std::vector<uint32_t> canonicalMatches;
std::vector<uint32_t> nonCanonicalMatches;
std::vector<uint32_t> windowMatches;

uint16_t canonicalCounts = 0; // JACK: For density
uint16_t canonicalCounts = 0;
uint16_t nonCanonicalCounts = 0;
uint16_t windowCounts = 0;
float canonicalDensity = 0.0f;
float nonCanonicalDensity = 0.0f;

WindowData() : windowStart(0), currentWindowSize(0), gcContent(0.0f), shannonEntropy(0.0f) {}
};
Expand Down Expand Up @@ -123,19 +125,19 @@ class Teloscope {
}


inline void getPatternDensities(WindowData& windowData, uint32_t windowSize) {
for (auto &entry : windowData.patternMap) {
auto &pattern = entry.first;
auto &data = entry.second;
data.density = static_cast<float>(data.count * pattern.size()) / windowSize;
}
}
// inline void getPatternDensities(WindowData& windowData, uint32_t windowSize) {
// for (auto &entry : windowData.patternMap) {
// auto &pattern = entry.first;
// auto &data = entry.second;
// data.density = static_cast<float>(data.count * pattern.size()) / windowSize;
// }
// }


float getMean(const std::vector<float>& values);
float getMedian(std::vector<float> values);
float getMin(std::vector<float> values);
float getMax(std::vector<float> values);
float getMin(const std::vector<float> values);
float getMax(const std::vector<float> values);

public:

Expand All @@ -160,13 +162,20 @@ class Teloscope {

std::vector<TelomereBlock> mergeTelomereBlocks(const std::vector<TelomereBlock>& winBlocks);

// void writeBEDFile(std::ofstream& shannonFile, std::ofstream& gcContentFile,
// std::unordered_map<std::string, std::ofstream>& patternMatchFiles,
// std::unordered_map<std::string, std::ofstream>& patternCountFiles,
// std::unordered_map<std::string, std::ofstream>& patternDensityFiles,
// std::ofstream& allBlocksFile,
// std::ofstream& canonicalBlocksFile,
// std::ofstream& noncanonicalBlocksFile);

void writeBEDFile(std::ofstream& shannonFile, std::ofstream& gcContentFile,
std::unordered_map<std::string, std::ofstream>& patternMatchFiles,
std::unordered_map<std::string, std::ofstream>& patternCountFiles,
std::unordered_map<std::string, std::ofstream>& patternDensityFiles,
std::ofstream& allBlocksFile,
std::ofstream& canonicalBlocksFile,
std::ofstream& noncanonicalBlocksFile);
std::ofstream& canonicalMatchFile, std::ofstream& noncanonicalMatchFile,
std::ofstream& canonicalCountFile, std::ofstream& noncanonicalCountFile,
std::ofstream& canonicalDensityFile, std::ofstream& noncanonicalDensityFile,
std::ofstream& allBlocksFile, std::ofstream& canonicalBlocksFile, std::ofstream& noncanonicalBlocksFile);


void handleBEDFile();

Expand Down
Loading

0 comments on commit 7dcfc57

Please sign in to comment.