44
55#include " server/engine_shard.h"
66
7+ #include < absl/strings/escaping.h>
78#include < absl/strings/match.h>
89#include < absl/strings/str_cat.h>
910
1011#include < memory>
1112
1213#include " base/flags.h"
14+ #include " core/huff_coder.h"
1315#include " core/page_usage_stats.h"
1416#include " io/proc_reader.h"
1517
@@ -116,6 +118,75 @@ size_t CalculateHowManyBytesToEvictOnShard(size_t global_memory_limit, size_t gl
116118 return shard_budget < shard_memory_threshold ? (shard_memory_threshold - shard_budget) : 0 ;
117119}
118120
121+ class HuffmanCheckTask {
122+ public:
123+ int32_t Run (DbSlice* db_slice);
124+
125+ private:
126+ PrimeTable::Cursor cursor_;
127+
128+ static constexpr unsigned kMaxSymbol = 255 ;
129+ array<unsigned , kMaxSymbol + 1 > hist_; // histogram of symbols.
130+ string scratch_;
131+ };
132+
133+ int32_t HuffmanCheckTask::Run (DbSlice* db_slice) {
134+ DbTable* db_table = db_slice->GetDBTable (0 ); // we currently support only default db.
135+ if (!db_table)
136+ return -1 ;
137+
138+ // incrementally aggregate frequency histogram.
139+ auto & prime = db_table->prime ;
140+
141+ constexpr uint32_t kMaxTraverses = 512 ;
142+ uint32_t traverses_count = 0 ;
143+ do {
144+ cursor_ = prime.Traverse (cursor_, [&](PrimeIterator it) {
145+ if (!it->first .IsInline ()) {
146+ string_view val = it->first .GetSlice (&scratch_);
147+ for (unsigned char c : val) {
148+ hist_[c]++;
149+ }
150+ }
151+ });
152+ traverses_count++;
153+ } while (traverses_count < kMaxTraverses && cursor_);
154+
155+ if (cursor_)
156+ return 4 ; // priority to continue later.
157+
158+ // Finished scanning the table, now normalize the table.
159+ constexpr unsigned kMaxFreqTotal = static_cast <unsigned >((1U << 31 ) * 0.9 );
160+ size_t total_freq = std::accumulate (hist_.begin (), hist_.end (), 0UL );
161+
162+ // to avoid overflow.
163+ double scale = total_freq > kMaxFreqTotal ? static_cast <double >(total_freq) / kMaxFreqTotal : 1.0 ;
164+ for (unsigned i = 0 ; i <= kMaxSymbol ; i++) {
165+ hist_[i] = static_cast <unsigned >(hist_[i] / scale);
166+ if (hist_[i] == 0 ) {
167+ hist_[i] = 1 ; // Avoid zero frequency symbols.
168+ }
169+ }
170+
171+ // Build the huffman table. We currently output the table to logs and just increase
172+ // the metric counter to signal that we built a table.
173+
174+ HuffmanEncoder huff_enc;
175+ string error_msg;
176+ if (huff_enc.Build (hist_.data (), kMaxSymbol , &error_msg)) {
177+ size_t compressed_size = huff_enc.EstimateCompressedSize (hist_.data (), kMaxSymbol );
178+ LOG (INFO) << " Huffman table built, reducing memory usage from " << total_freq << " to "
179+ << compressed_size << " bytes, ratio " << double (compressed_size) / total_freq;
180+ string bintable = huff_enc.Export ();
181+ LOG (INFO) << " Huffman binary table: " << absl::Base64Escape (bintable);
182+ db_slice->shard_owner ()->stats ().huffman_tables_built ++;
183+ } else {
184+ LOG (WARNING) << " Huffman build failed: " << error_msg;
185+ }
186+
187+ return -1 ; // task completed.
188+ }
189+
119190} // namespace
120191
121192__thread EngineShard* EngineShard::shard_ = nullptr ;
@@ -141,7 +212,7 @@ string EngineShard::TxQueueInfo::Format() const {
141212}
142213
143214EngineShard::Stats& EngineShard::Stats::operator +=(const EngineShard::Stats& o) {
144- static_assert (sizeof (Stats) == 96 );
215+ static_assert (sizeof (Stats) == 104 );
145216
146217#define ADD (x ) x += o.x
147218
@@ -157,6 +228,7 @@ EngineShard::Stats& EngineShard::Stats::operator+=(const EngineShard::Stats& o)
157228 ADD (total_heartbeat_expired_bytes);
158229 ADD (total_heartbeat_expired_calls);
159230 ADD (total_migrated_keys);
231+ ADD (huffman_tables_built);
160232
161233#undef ADD
162234 return *this ;
@@ -347,6 +419,8 @@ void EngineShard::Shutdown() {
347419
348420void EngineShard::StopPeriodicFiber () {
349421 ProactorBase::me ()->RemoveOnIdleTask (defrag_task_id_);
422+ ProactorBase::me ()->RemoveOnIdleTask (huffman_check_task_id_);
423+
350424 fiber_heartbeat_periodic_done_.Notify ();
351425 if (fiber_heartbeat_periodic_.IsJoinable ()) {
352426 fiber_heartbeat_periodic_.Join ();
@@ -620,7 +694,43 @@ void EngineShard::Heartbeat() {
620694 }
621695 return ;
622696 }
623- start = std::chrono::system_clock::now ();
697+
698+ thread_local bool check_huffman = (shard_id_ == 0 ); // run it only on shard 0.
699+ if (check_huffman) {
700+ auto * ptr = db_slice.GetDBTable (0 );
701+ if (ptr) {
702+ size_t key_usage = ptr->stats .memory_usage_by_type [OBJ_KEY];
703+ size_t obj_usage = ptr->stats .obj_memory_usage ;
704+
705+ #ifdef NDEBUG
706+ #define MB_THRESHOLD (50 * 1024 * 1024 )
707+ #else
708+ #define MB_THRESHOLD (5 * 1024 * 1024 )
709+ #endif
710+
711+ if (key_usage > MB_THRESHOLD && key_usage > obj_usage / 8 ) {
712+ VLOG (1 ) << " Scheduling huffman check task, key usage: " << key_usage
713+ << " , obj usage: " << obj_usage;
714+
715+ check_huffman = false ; // trigger only once.
716+
717+ // launch the task
718+ HuffmanCheckTask* task = new HuffmanCheckTask ();
719+ huffman_check_task_id_ = ProactorBase::me ()->AddOnIdleTask ([task] {
720+ if (!shard_ || !namespaces) {
721+ delete task;
722+ return -1 ;
723+ }
724+
725+ DbSlice& db_slice = namespaces->GetDefaultNamespace ().GetDbSlice (shard_->shard_id ());
726+ int32_t res = task->Run (&db_slice);
727+ if (res == -1 )
728+ delete task;
729+ return res;
730+ });
731+ }
732+ }
733+ }
624734
625735 if (!IsReplica ()) { // Never run expiry/evictions on replica.
626736 RetireExpiredAndEvict ();
@@ -655,7 +765,7 @@ void EngineShard::RetireExpiredAndEvict() {
655765 // hence we map our delete/traversed ratio into a range [0, kTtlDeleteLimit).
656766 // The higher ttl_delete_target the more likely we have lots of expired items that need
657767 // to be deleted.
658- ttl_delete_target = kTtlDeleteLimit * double (deleted) / (double (traversed) + 10 );
768+ ttl_delete_target = unsigned ( kTtlDeleteLimit * double (deleted) / (double (traversed) + 10 ) );
659769 }
660770
661771 DbContext db_cntx;
0 commit comments