1
1
#ifndef ECLAT_HPP
2
2
#define ECLAT_HPP
3
3
4
- #include < unordered_map>
5
- #include < unordered_set>
4
+ #include < map>
6
5
#include < vector>
7
- #include < set>
8
6
#include < algorithm>
9
- #include < functional>
10
7
#include < iostream>
11
8
#include < string>
12
9
#include < cmath>
10
+ #include < stdexcept>
13
11
14
12
/* *
15
13
* @file Eclat.hpp
16
- * @brief Implementation of the Eclat algorithm for frequent itemset mining.
14
+ * @brief Optimized Implementation of the Eclat algorithm for frequent itemset mining.
17
15
*/
18
16
19
17
/* *
@@ -31,38 +29,31 @@ class Eclat {
31
29
/* *
32
30
* @brief Runs the Eclat algorithm on the provided dataset.
33
31
* @param transactions A vector of transactions, each transaction is a vector of items.
34
- * @return A vector of frequent itemsets, where each itemset is represented as a set of items.
32
+ * @return A vector of frequent itemsets, where each itemset is represented as a vector of items.
35
33
*/
36
- std::vector<std::set <int >> run (const std::vector<std::vector<int >>& transactions);
34
+ std::vector<std::vector <int >> run (const std::vector<std::vector<int >>& transactions);
37
35
38
36
/* *
39
37
* @brief Gets the support counts for all frequent itemsets found.
40
- * @return An unordered_map where keys are itemsets (as strings ) and values are support counts.
38
+ * @return A map where keys are itemsets (as vectors ) and values are support counts.
41
39
*/
42
- std::unordered_map<std::string, int > get_support_counts () const ;
43
- /* *
44
- * @brief Converts an itemset to a string representation for use as a key.
45
- * @param itemset The itemset to convert.
46
- * @return A string representation of the itemset.
47
- */
48
- std::string itemset_to_string (const std::set<int >& itemset) const ;
40
+ std::map<std::vector<int >, int > get_support_counts () const ;
49
41
50
42
private:
51
43
/* *
52
44
* @brief Recursively mines frequent itemsets using the Eclat algorithm.
53
45
* @param prefix The current itemset prefix.
54
46
* @param items A vector of items to consider.
55
- * @param tid_sets A map from items to their transaction ID sets .
47
+ * @param tid_sets A map from items to their transaction ID vectors .
56
48
*/
57
- void eclat_recursive (const std::set <int >& prefix,
49
+ void eclat_recursive (const std::vector <int >& prefix,
58
50
const std::vector<int >& items,
59
- const std::unordered_map<int , std::unordered_set<int >>& tid_sets);
60
-
51
+ const std::map<int , std::vector<int >>& tid_sets);
61
52
62
53
double min_support; // /< Minimum support threshold.
63
54
int min_support_count; // /< Minimum support count (absolute number of transactions).
64
55
int total_transactions; // /< Total number of transactions.
65
- std::unordered_map <std::string , int > support_counts; // /< Support counts for itemsets.
56
+ std::map <std::vector< int > , int > support_counts; // /< Support counts for itemsets.
66
57
};
67
58
68
59
Eclat::Eclat (double min_support)
@@ -72,18 +63,23 @@ Eclat::Eclat(double min_support)
72
63
}
73
64
}
74
65
75
- std::vector<std::set <int >> Eclat::run (const std::vector<std::vector<int >>& transactions) {
66
+ std::vector<std::vector <int >> Eclat::run (const std::vector<std::vector<int >>& transactions) {
76
67
total_transactions = static_cast <int >(transactions.size ());
77
68
min_support_count = static_cast <int >(std::ceil (min_support * total_transactions));
78
69
79
- // Map each item to its TID set
80
- std::unordered_map <int , std::unordered_set <int >> item_tidsets;
70
+ // Map each item to its TID vector
71
+ std::map <int , std::vector <int >> item_tidsets;
81
72
for (int tid = 0 ; tid < total_transactions; ++tid) {
82
73
for (int item : transactions[tid]) {
83
- item_tidsets[item].insert (tid);
74
+ item_tidsets[item].push_back (tid);
84
75
}
85
76
}
86
77
78
+ // Sort TID vectors
79
+ for (auto & [item, tids] : item_tidsets) {
80
+ std::sort (tids.begin (), tids.end ());
81
+ }
82
+
87
83
// Filter items that meet the minimum support
88
84
std::vector<int > frequent_items;
89
85
for (const auto & [item, tidset] : item_tidsets) {
@@ -97,64 +93,51 @@ std::vector<std::set<int>> Eclat::run(const std::vector<std::vector<int>>& trans
97
93
98
94
// Initialize support counts for single items
99
95
for (int item : frequent_items) {
100
- std::set<int > itemset = {item};
101
- std::string itemset_str = itemset_to_string (itemset);
102
- support_counts[itemset_str] = static_cast <int >(item_tidsets[item].size ());
96
+ std::vector<int > itemset = {item};
97
+ support_counts[itemset] = static_cast <int >(item_tidsets[item].size ());
103
98
}
104
99
105
100
// Start recursive mining
106
101
eclat_recursive ({}, frequent_items, item_tidsets);
107
102
108
103
// Collect frequent itemsets from support counts
109
- std::vector<std::set <int >> frequent_itemsets;
110
- for (const auto & [itemset_str , count] : support_counts) {
104
+ std::vector<std::vector <int >> frequent_itemsets;
105
+ for (const auto & [itemset , count] : support_counts) {
111
106
if (count >= min_support_count) {
112
- // Convert string back to itemset
113
- std::set<int > itemset;
114
- size_t pos = 0 ;
115
- std::string token;
116
- std::string s = itemset_str;
117
- while ((pos = s.find (' ,' )) != std::string::npos) {
118
- token = s.substr (0 , pos);
119
- itemset.insert (std::stoi (token));
120
- s.erase (0 , pos + 1 );
121
- }
122
- itemset.insert (std::stoi (s));
123
107
frequent_itemsets.push_back (itemset);
124
108
}
125
109
}
126
110
127
111
return frequent_itemsets;
128
112
}
129
113
130
- void Eclat::eclat_recursive (const std::set <int >& prefix,
114
+ void Eclat::eclat_recursive (const std::vector <int >& prefix,
131
115
const std::vector<int >& items,
132
- const std::unordered_map <int , std::unordered_set <int >>& tid_sets) {
116
+ const std::map <int , std::vector <int >>& tid_sets) {
133
117
size_t n = items.size ();
134
118
for (size_t i = 0 ; i < n; ++i) {
135
119
int item = items[i];
136
- std::set<int > new_prefix = prefix;
137
- new_prefix.insert (item);
138
- std::string itemset_str = itemset_to_string (new_prefix);
120
+ std::vector<int > new_prefix = prefix;
121
+ new_prefix.push_back (item);
139
122
140
123
// Update support counts
141
124
int support = static_cast <int >(tid_sets.at (item).size ());
142
- support_counts[itemset_str ] = support;
125
+ support_counts[new_prefix ] = support;
143
126
144
127
// Generate new combinations
145
128
std::vector<int > remaining_items;
146
- std::unordered_map <int , std::unordered_set <int >> new_tid_sets;
129
+ std::map <int , std::vector <int >> new_tid_sets;
147
130
148
131
for (size_t j = i + 1 ; j < n; ++j) {
149
132
int next_item = items[j];
150
133
151
134
// Intersect TID sets
152
- std::unordered_set <int > intersect_tid_set;
135
+ std::vector <int > intersect_tid_set;
153
136
const auto & tid_set1 = tid_sets.at (item);
154
137
const auto & tid_set2 = tid_sets.at (next_item);
155
138
std::set_intersection (tid_set1.begin (), tid_set1.end (),
156
139
tid_set2.begin (), tid_set2.end (),
157
- std::inserter (intersect_tid_set, intersect_tid_set. begin () ));
140
+ std::back_inserter (intersect_tid_set));
158
141
159
142
if (static_cast <int >(intersect_tid_set.size ()) >= min_support_count) {
160
143
remaining_items.push_back (next_item);
@@ -169,19 +152,8 @@ void Eclat::eclat_recursive(const std::set<int>& prefix,
169
152
}
170
153
}
171
154
172
- std::unordered_map <std::string , int > Eclat::get_support_counts () const {
155
+ std::map <std::vector< int > , int > Eclat::get_support_counts () const {
173
156
return support_counts;
174
157
}
175
158
176
- std::string Eclat::itemset_to_string (const std::set<int >& itemset) const {
177
- std::string s;
178
- for (auto it = itemset.begin (); it != itemset.end (); ++it) {
179
- s += std::to_string (*it);
180
- if (std::next (it) != itemset.end ()) {
181
- s += " ," ;
182
- }
183
- }
184
- return s;
185
- }
186
-
187
159
#endif // ECLAT_HPP
0 commit comments