Skip to content

Commit 837ec2a

Browse files
authored
Merge branch 'main' into Apriori
2 parents f94ed53 + 2483648 commit 837ec2a

File tree

6 files changed

+439
-62
lines changed

6 files changed

+439
-62
lines changed

CMakeLists.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,10 +77,15 @@ add_executable(NeuralNetwork tests/neural_network/NeuralNetworkTest.cpp)
7777
target_compile_definitions(NeuralNetwork PRIVATE TEST_NEURAL_NETWORK)
7878
target_link_libraries(NeuralNetwork cpp_ml_library)
7979

80+
8081
add_executable(Apriori tests/association/AprioriTest.cpp)
8182
target_compile_definitions(Apriori PRIVATE TEST_APRIORI)
8283
target_link_libraries(Apriori cpp_ml_library)
8384

85+
add_executable(Eclat tests/association/EclatTest.cpp)
86+
target_compile_definitions(Eclat PRIVATE TEST_ECLAT)
87+
target_link_libraries(Eclat cpp_ml_library)
88+
8489
# Register individual tests
8590
add_test(NAME LogisticRegressionTest COMMAND LogisticRegressionTest)
8691
add_test(NAME PolynomialRegressionTest COMMAND PolynomialRegressionTest)
@@ -96,6 +101,7 @@ add_test(NAME HierarchicalClustering COMMAND HierarchicalClustering)
96101
add_test(NAME SupportVectorRegression COMMAND SupportVectorRegression)
97102
add_test(NAME NeuralNetwork COMMAND NeuralNetwork)
98103
add_test(NAME Apriori COMMAND Apriori)
104+
add_test(NAME Eclat COMMAND Eclat)
99105

100106

101107
# Add example executables if BUILD_EXAMPLES is ON
@@ -137,6 +143,9 @@ if(BUILD_EXAMPLES)
137143
target_compile_definitions(${EXAMPLE_TARGET} PRIVATE TEST_NEURAL_NETWORK)
138144
elseif(EXAMPLE_NAME STREQUAL "AprioriExample")
139145
target_compile_definitions(${EXAMPLE_TARGET} PRIVATE TEST_APRIORI)
146+
elseif(EXAMPLE_NAME STREQUAL "EclatExample")
147+
target_compile_definitions(${EXAMPLE_TARGET} PRIVATE TEST_ECLAT)
148+
140149
endif()
141150
endforeach()
142151
endif()

examples/EclatExample.cpp

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
#include "../ml_library_include/ml/association/Eclat.hpp"
2+
#include <iostream>
3+
4+
void testEclat() {
5+
// Sample transactions
6+
std::vector<std::vector<int>> transactions = {
7+
{1, 2, 5},
8+
{2, 4},
9+
{2, 3},
10+
{1, 2, 4},
11+
{1, 3},
12+
{2, 3},
13+
{1, 3},
14+
{1, 2, 3, 5},
15+
{1, 2, 3}
16+
};
17+
18+
// Minimum support threshold (e.g., 22% of total transactions)
19+
double min_support = 0.22;
20+
21+
// Create Eclat object
22+
Eclat eclat(min_support);
23+
24+
// Run Eclat algorithm
25+
std::vector<std::vector<int>> frequent_itemsets = eclat.run(transactions);
26+
27+
// Get support counts
28+
auto support_counts = eclat.get_support_counts();
29+
30+
// Display frequent itemsets and their support counts
31+
std::cout << "Frequent Itemsets:\n";
32+
for (const auto& itemset : frequent_itemsets) {
33+
std::cout << "Itemset: { ";
34+
for (int item : itemset) {
35+
std::cout << item << " ";
36+
}
37+
std::cout << "} - Support: " << support_counts.at(itemset) << "\n";
38+
}
39+
}
40+
41+
int main() {
42+
testEclat();
43+
return 0;
44+
}
Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
#ifndef ECLAT_HPP
2+
#define ECLAT_HPP
3+
4+
#include <map>
5+
#include <vector>
6+
#include <algorithm>
7+
#include <iostream>
8+
#include <string>
9+
#include <cmath>
10+
#include <stdexcept>
11+
12+
/**
13+
* @file Eclat.hpp
14+
* @brief Optimized Implementation of the Eclat algorithm for frequent itemset mining.
15+
*/
16+
17+
/**
18+
* @class Eclat
19+
* @brief Class to perform frequent itemset mining using the Eclat algorithm.
20+
*/
21+
class Eclat {
22+
public:
23+
/**
24+
* @brief Constructor for the Eclat class.
25+
* @param min_support Minimum support threshold (as a fraction between 0 and 1).
26+
*/
27+
Eclat(double min_support);
28+
29+
/**
30+
* @brief Runs the Eclat algorithm on the provided dataset.
31+
* @param transactions A vector of transactions, each transaction is a vector of items.
32+
* @return A vector of frequent itemsets, where each itemset is represented as a vector of items.
33+
*/
34+
std::vector<std::vector<int>> run(const std::vector<std::vector<int>>& transactions);
35+
36+
/**
37+
* @brief Gets the support counts for all frequent itemsets found.
38+
* @return A map where keys are itemsets (as vectors) and values are support counts.
39+
*/
40+
std::map<std::vector<int>, int> get_support_counts() const;
41+
42+
private:
43+
/**
44+
* @brief Recursively mines frequent itemsets using the Eclat algorithm.
45+
* @param prefix The current itemset prefix.
46+
* @param items A vector of items to consider.
47+
* @param tid_sets A map from items to their transaction ID vectors.
48+
*/
49+
void eclat_recursive(const std::vector<int>& prefix,
50+
const std::vector<int>& items,
51+
const std::map<int, std::vector<int>>& tid_sets);
52+
53+
double min_support; ///< Minimum support threshold.
54+
int min_support_count; ///< Minimum support count (absolute number of transactions).
55+
int total_transactions; ///< Total number of transactions.
56+
std::map<std::vector<int>, int> support_counts; ///< Support counts for itemsets.
57+
};
58+
59+
Eclat::Eclat(double min_support)
60+
: min_support(min_support), min_support_count(0), total_transactions(0) {
61+
if (min_support <= 0.0 || min_support > 1.0) {
62+
throw std::invalid_argument("min_support must be between 0 and 1.");
63+
}
64+
}
65+
66+
std::vector<std::vector<int>> Eclat::run(const std::vector<std::vector<int>>& transactions) {
67+
total_transactions = static_cast<int>(transactions.size());
68+
min_support_count = static_cast<int>(std::ceil(min_support * total_transactions));
69+
70+
// Map each item to its TID vector
71+
std::map<int, std::vector<int>> item_tidsets;
72+
for (int tid = 0; tid < total_transactions; ++tid) {
73+
for (int item : transactions[tid]) {
74+
item_tidsets[item].push_back(tid);
75+
}
76+
}
77+
78+
// Sort TID vectors
79+
for (auto& [item, tids] : item_tidsets) {
80+
std::sort(tids.begin(), tids.end());
81+
}
82+
83+
// Filter items that meet the minimum support
84+
std::vector<int> frequent_items;
85+
for (const auto& [item, tidset] : item_tidsets) {
86+
if (static_cast<int>(tidset.size()) >= min_support_count) {
87+
frequent_items.push_back(item);
88+
}
89+
}
90+
91+
// Sort items for consistent order
92+
std::sort(frequent_items.begin(), frequent_items.end());
93+
94+
// Initialize support counts for single items
95+
for (int item : frequent_items) {
96+
std::vector<int> itemset = {item};
97+
support_counts[itemset] = static_cast<int>(item_tidsets[item].size());
98+
}
99+
100+
// Start recursive mining
101+
eclat_recursive({}, frequent_items, item_tidsets);
102+
103+
// Collect frequent itemsets from support counts
104+
std::vector<std::vector<int>> frequent_itemsets;
105+
for (const auto& [itemset, count] : support_counts) {
106+
if (count >= min_support_count) {
107+
frequent_itemsets.push_back(itemset);
108+
}
109+
}
110+
111+
return frequent_itemsets;
112+
}
113+
114+
void Eclat::eclat_recursive(const std::vector<int>& prefix,
115+
const std::vector<int>& items,
116+
const std::map<int, std::vector<int>>& tid_sets) {
117+
size_t n = items.size();
118+
for (size_t i = 0; i < n; ++i) {
119+
int item = items[i];
120+
std::vector<int> new_prefix = prefix;
121+
new_prefix.push_back(item);
122+
123+
// Update support counts
124+
int support = static_cast<int>(tid_sets.at(item).size());
125+
support_counts[new_prefix] = support;
126+
127+
// Generate new combinations
128+
std::vector<int> remaining_items;
129+
std::map<int, std::vector<int>> new_tid_sets;
130+
131+
for (size_t j = i + 1; j < n; ++j) {
132+
int next_item = items[j];
133+
134+
// Intersect TID sets
135+
std::vector<int> intersect_tid_set;
136+
const auto& tid_set1 = tid_sets.at(item);
137+
const auto& tid_set2 = tid_sets.at(next_item);
138+
std::set_intersection(tid_set1.begin(), tid_set1.end(),
139+
tid_set2.begin(), tid_set2.end(),
140+
std::back_inserter(intersect_tid_set));
141+
142+
if (static_cast<int>(intersect_tid_set.size()) >= min_support_count) {
143+
remaining_items.push_back(next_item);
144+
new_tid_sets[next_item] = std::move(intersect_tid_set);
145+
}
146+
}
147+
148+
// Recursive call
149+
if (!remaining_items.empty()) {
150+
eclat_recursive(new_prefix, remaining_items, new_tid_sets);
151+
}
152+
}
153+
}
154+
155+
std::map<std::vector<int>, int> Eclat::get_support_counts() const {
156+
return support_counts;
157+
}
158+
159+
#endif // ECLAT_HPP

ml_library_include/ml/regression/MultiLinearRegression.hpp

Lines changed: 65 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,11 @@
88

99
/**
1010
* @file MultilinearRegression.hpp
11-
* @brief A simple implementation of Multi Linear Regression.
11+
* @brief A simple implementation of Multilinear Regression with improvements.
1212
*/
1313

1414
/**
15-
* @class Multilinear Regression
15+
* @class MultilinearRegression
1616
* @brief A class that implements Multilinear Regression for predicting values
1717
* based on multiple features.
1818
*/
@@ -23,9 +23,10 @@ class MultilinearRegression {
2323
*
2424
* @param learningRate The rate at which the model learns (default 0.01).
2525
* @param iterations The number of iterations for the gradient descent (default 1000).
26+
* @param regularizationParameter The regularization parameter lambda (default 0.0, no regularization).
2627
*/
27-
MultilinearRegression(double learningRate = 0.01, int iterations = 1000)
28-
: learningRate(learningRate), iterations(iterations) {}
28+
MultilinearRegression(double learningRate = 0.01, int iterations = 1000, double regularizationParameter = 0.0)
29+
: learningRate_(learningRate), iterations_(iterations), lambda_(regularizationParameter) {}
2930

3031
/**
3132
* @brief Trains the Multilinear Regression model on the provided data.
@@ -39,10 +40,23 @@ class MultilinearRegression {
3940
throw std::invalid_argument("Features and target data sizes do not match.");
4041
}
4142

42-
int numFeatures = features[0].size();
43-
weights.resize(numFeatures, 0.0); // Initialize weights
43+
size_t numSamples = features.size();
44+
size_t numFeatures = features[0].size();
4445

45-
for (int i = 0; i < iterations; ++i) {
46+
// Validate that all feature vectors have the same size
47+
for (const auto& feature : features) {
48+
if (feature.size() != numFeatures) {
49+
throw std::invalid_argument("All feature vectors must have the same number of elements.");
50+
}
51+
}
52+
53+
// Initialize weights and bias if they haven't been initialized yet
54+
if (weights_.empty()) {
55+
weights_.resize(numFeatures, 0.0);
56+
bias_ = 0.0;
57+
}
58+
59+
for (int iter = 0; iter < iterations_; ++iter) {
4660
gradientDescentStep(features, target);
4761
}
4862
}
@@ -54,13 +68,38 @@ class MultilinearRegression {
5468
* @return The predicted value.
5569
*/
5670
double predict(const std::vector<double>& features) const {
57-
return std::inner_product(weights.begin(), weights.end(), features.begin(), 0.0);
71+
if (features.size() != weights_.size()) {
72+
throw std::invalid_argument("Feature vector size does not match the number of weights.");
73+
}
74+
double result = std::inner_product(weights_.begin(), weights_.end(), features.begin(), 0.0);
75+
result += bias_;
76+
return result;
77+
}
78+
79+
/**
80+
* @brief Gets the current weights of the model.
81+
*
82+
* @return A vector containing the weights.
83+
*/
84+
std::vector<double> getWeights() const {
85+
return weights_;
86+
}
87+
88+
/**
89+
* @brief Gets the current bias of the model.
90+
*
91+
* @return The bias term.
92+
*/
93+
double getBias() const {
94+
return bias_;
5895
}
5996

6097
private:
61-
double learningRate; ///< The learning rate for gradient descent.
62-
int iterations; ///< The number of iterations for training.
63-
std::vector<double> weights; ///< The weights for the model.
98+
double learningRate_; ///< The learning rate for gradient descent.
99+
int iterations_; ///< The number of iterations for training.
100+
double lambda_; ///< Regularization parameter (lambda).
101+
std::vector<double> weights_; ///< The weights for the model.
102+
double bias_ = 0.0; ///< Bias term.
64103

65104
/**
66105
* @brief Performs a single iteration of gradient descent to update the model weights.
@@ -69,20 +108,29 @@ class MultilinearRegression {
69108
* @param target A vector containing the target values.
70109
*/
71110
void gradientDescentStep(const std::vector<std::vector<double>>& features, const std::vector<double>& target) {
72-
std::vector<double> gradients(weights.size(), 0.0);
111+
size_t numSamples = features.size();
112+
size_t numFeatures = weights_.size();
113+
114+
std::vector<double> gradients(numFeatures, 0.0);
115+
double biasGradient = 0.0;
73116

74-
for (size_t i = 0; i < features.size(); ++i) {
117+
for (size_t i = 0; i < numSamples; ++i) {
75118
double prediction = predict(features[i]);
76119
double error = prediction - target[i];
77120

78-
for (size_t j = 0; j < weights.size(); ++j) {
79-
gradients[j] += error * features[i][j];
121+
for (size_t j = 0; j < numFeatures; ++j) {
122+
gradients[j] += (error * features[i][j]) + (lambda_ * weights_[j]);
80123
}
124+
125+
biasGradient += error;
81126
}
82127

83-
for (size_t j = 0; j < weights.size(); ++j) {
84-
weights[j] -= (learningRate / features.size()) * gradients[j];
128+
// Update weights and bias
129+
for (size_t j = 0; j < numFeatures; ++j) {
130+
weights_[j] -= (learningRate_ / numSamples) * gradients[j];
85131
}
132+
133+
bias_ -= (learningRate_ / numSamples) * biasGradient;
86134
}
87135
};
88136

0 commit comments

Comments
 (0)