|
| 1 | +package operation |
| 2 | + |
| 3 | +import ( |
| 4 | + "context" |
| 5 | + "encoding/json" |
| 6 | + "fmt" |
| 7 | + "math" |
| 8 | + "sort" |
| 9 | + "sync" |
| 10 | + |
| 11 | + "github.com/rs/zerolog" |
| 12 | + |
| 13 | + "github.com/onflow/flow-go/module/util" |
| 14 | + "github.com/onflow/flow-go/storage" |
| 15 | +) |
| 16 | + |
| 17 | +// Stats holds statistics for a single prefix group. |
| 18 | +type Stats struct { |
| 19 | + Count int `json:"count"` |
| 20 | + MinSize int `json:"min_size"` |
| 21 | + MaxSize int `json:"max_size"` |
| 22 | + TotalSize int `json:"total_size"` |
| 23 | + AverageSize float64 `json:"avg_size"` |
| 24 | +} |
| 25 | + |
| 26 | +// SummarizeKeysByFirstByteConcurrent iterates over all prefixes [0x00..0xFF] in parallel |
| 27 | +// using nWorker goroutines. Each worker handles one prefix at a time until all are processed. |
| 28 | +// |
| 29 | +// The storage.Reader must be able to create multiple iterators concurrently. |
| 30 | +func SummarizeKeysByFirstByteConcurrent(log zerolog.Logger, r storage.Reader, nWorker int) (map[byte]Stats, error) { |
| 31 | + // We'll have at most 256 possible prefixes (0x00..0xFF). |
| 32 | + // Create tasks (one per prefix), a results channel, and a wait group. |
| 33 | + taskChan := make(chan byte, 256) |
| 34 | + resultChan := make(chan struct { |
| 35 | + prefix byte |
| 36 | + stats Stats |
| 37 | + err error |
| 38 | + }, 256) |
| 39 | + |
| 40 | + var wg sync.WaitGroup |
| 41 | + ctx, cancel := context.WithCancel(context.Background()) |
| 42 | + defer cancel() |
| 43 | + |
| 44 | + // Start nWorker goroutines. |
| 45 | + for i := 0; i < nWorker; i++ { |
| 46 | + wg.Add(1) |
| 47 | + go func() { |
| 48 | + defer wg.Done() |
| 49 | + for { |
| 50 | + select { |
| 51 | + case <-ctx.Done(): |
| 52 | + return // Stop immediately on cancellation |
| 53 | + case prefix, ok := <-taskChan: |
| 54 | + if !ok { |
| 55 | + return // Stop if taskChan is closed |
| 56 | + } |
| 57 | + |
| 58 | + st, err := processPrefix(r, prefix) |
| 59 | + resultChan <- struct { |
| 60 | + prefix byte |
| 61 | + stats Stats |
| 62 | + err error |
| 63 | + }{ |
| 64 | + prefix: prefix, |
| 65 | + stats: st, |
| 66 | + err: err, |
| 67 | + } |
| 68 | + } |
| 69 | + } |
| 70 | + }() |
| 71 | + } |
| 72 | + |
| 73 | + progress := util.LogProgress(log, |
| 74 | + util.DefaultLogProgressConfig( |
| 75 | + "Summarizing keys by first byte", |
| 76 | + 256, |
| 77 | + )) |
| 78 | + |
| 79 | + // Send all prefixes [0..255] to taskChan. |
| 80 | + for p := 0; p < 256; p++ { |
| 81 | + taskChan <- byte(p) |
| 82 | + } |
| 83 | + close(taskChan) |
| 84 | + |
| 85 | + // Once all workers finish, close the result channel. |
| 86 | + go func() { |
| 87 | + wg.Wait() |
| 88 | + close(resultChan) |
| 89 | + }() |
| 90 | + |
| 91 | + // Gather results. We'll accumulate them in a map[prefix]Stats. |
| 92 | + finalStats := make(map[byte]Stats, 256) |
| 93 | + |
| 94 | + var err error |
| 95 | + // If we encounter an error, we will return it immediately. |
| 96 | + for res := range resultChan { |
| 97 | + if res.err != nil { |
| 98 | + cancel() // Cancel running goroutines |
| 99 | + err = res.err |
| 100 | + break |
| 101 | + } |
| 102 | + finalStats[res.prefix] = res.stats |
| 103 | + log.Info(). |
| 104 | + Int("prefix", int(res.prefix)). |
| 105 | + Int("total", res.stats.TotalSize). |
| 106 | + Int("count", res.stats.Count). |
| 107 | + Int("min", res.stats.MinSize). |
| 108 | + Int("max", res.stats.MaxSize). |
| 109 | + Msg("Processed prefix") |
| 110 | + progress(1) // log the progress |
| 111 | + } |
| 112 | + |
| 113 | + if err != nil { |
| 114 | + return nil, err |
| 115 | + } |
| 116 | + return finalStats, nil |
| 117 | +} |
| 118 | + |
| 119 | +// processPrefix does the actual iteration and statistic calculation for a single prefix. |
| 120 | +// It returns the Stats for that prefix, or an error if iteration fails. |
| 121 | +func processPrefix(r storage.Reader, prefix byte) (Stats, error) { |
| 122 | + var s Stats |
| 123 | + // We use MinSize = math.MaxInt as a sentinel so the first real size will become the new minimum. |
| 124 | + s.MinSize = math.MaxInt |
| 125 | + |
| 126 | + // Iterator range is [prefix, prefix] (inclusive). |
| 127 | + start, end := []byte{prefix}, []byte{prefix} |
| 128 | + it, err := r.NewIter(start, end, storage.IteratorOption{BadgerIterateKeyOnly: true}) |
| 129 | + if err != nil { |
| 130 | + return s, fmt.Errorf("failed to create iterator for prefix 0x%X: %w", prefix, err) |
| 131 | + } |
| 132 | + defer it.Close() |
| 133 | + |
| 134 | + for it.First(); it.Valid(); it.Next() { |
| 135 | + item := it.IterItem() |
| 136 | + |
| 137 | + // item.Value(...) is a function call that gives us the value, on which we measure size. |
| 138 | + err := item.Value(func(val []byte) error { |
| 139 | + size := len(val) |
| 140 | + s.Count++ |
| 141 | + s.TotalSize += size |
| 142 | + if size < s.MinSize { |
| 143 | + s.MinSize = size |
| 144 | + } |
| 145 | + if size > s.MaxSize { |
| 146 | + s.MaxSize = size |
| 147 | + } |
| 148 | + return nil |
| 149 | + }) |
| 150 | + |
| 151 | + if err != nil { |
| 152 | + return s, fmt.Errorf("failed to process value for prefix %v: %w", int(prefix), err) |
| 153 | + } |
| 154 | + } |
| 155 | + |
| 156 | + // If we found no keys for this prefix, reset MinSize to 0 to avoid confusion. |
| 157 | + if s.Count == 0 { |
| 158 | + s.MinSize = 0 |
| 159 | + } else { |
| 160 | + // Compute average size. |
| 161 | + s.AverageSize = float64(s.TotalSize) / float64(s.Count) |
| 162 | + } |
| 163 | + |
| 164 | + return s, nil |
| 165 | +} |
| 166 | + |
| 167 | +// PrintStats logs the statistics for each prefix in ascending order. |
| 168 | +// Each prefix is shown in hex, along with count, min, max, total, and average sizes. |
| 169 | +func PrintStats(log zerolog.Logger, stats map[byte]Stats) { |
| 170 | + if len(stats) == 0 { |
| 171 | + log.Info().Msg("No stats to print (map is empty).") |
| 172 | + return |
| 173 | + } |
| 174 | + |
| 175 | + // Convert map to a slice of key-value pairs |
| 176 | + statList := make([]struct { |
| 177 | + Prefix int `json:"prefix"` |
| 178 | + Stats Stats `json:"stats"` |
| 179 | + }, 0, len(stats)) |
| 180 | + |
| 181 | + for p, s := range stats { |
| 182 | + statList = append(statList, struct { |
| 183 | + Prefix int `json:"prefix"` |
| 184 | + Stats Stats `json:"stats"` |
| 185 | + }{Prefix: int(p), Stats: s}) |
| 186 | + } |
| 187 | + |
| 188 | + // Sort by TotalSize in ascending order |
| 189 | + sort.Slice(statList, func(i, j int) bool { |
| 190 | + return statList[i].Stats.TotalSize < statList[j].Stats.TotalSize |
| 191 | + }) |
| 192 | + |
| 193 | + // Convert sorted stats to JSON |
| 194 | + jsonData, err := json.MarshalIndent(statList, "", " ") |
| 195 | + if err != nil { |
| 196 | + log.Error().Err(err).Msg("Failed to marshal stats to JSON") |
| 197 | + return |
| 198 | + } |
| 199 | + |
| 200 | + // Log the JSON |
| 201 | + log.Info().RawJSON("stats", jsonData).Msg("Sorted prefix stats") |
| 202 | +} |
0 commit comments