Don’t parse binlog events of unrelated tables#36
Open
coding-chimp wants to merge 1 commit into
Open
Conversation
Author
|
I had a benchmark for a bit that compared filtering events to not filtering them, but it didn't feel worth checking that in: package binlog
import (
"reflect"
"testing"
"unsafe"
gomysql "github.com/go-mysql-org/go-mysql/mysql"
"github.com/go-mysql-org/go-mysql/replication"
)
func benchmarkSetUnexportedField(target interface{}, fieldName string, value interface{}) {
field := reflect.ValueOf(target).Elem().FieldByName(fieldName)
reflect.NewAt(field.Type(), unsafe.Pointer(field.UnsafeAddr())).Elem().Set(reflect.ValueOf(value))
}
func newBenchmarkRowsEvent(schemaName, tableName string) *replication.RowsEvent {
const tableID uint64 = 7
rowsEvent := &replication.RowsEvent{
Version: 1,
}
benchmarkSetUnexportedField(rowsEvent, "tableIDSize", 6)
benchmarkSetUnexportedField(rowsEvent, "needBitmap2", false)
benchmarkSetUnexportedField(rowsEvent, "eventType", replication.WRITE_ROWS_EVENTv1)
benchmarkSetUnexportedField(rowsEvent, "tables", map[uint64]*replication.TableMapEvent{
tableID: {
TableID: tableID,
Schema: []byte(schemaName),
Table: []byte(tableName),
ColumnCount: 1,
ColumnType: []byte{gomysql.MYSQL_TYPE_LONG},
ColumnMeta: []uint16{0},
NullBitmap: []byte{0x00},
},
})
return rowsEvent
}
func newBenchmarkRowsEventHeaderData() []byte {
// RowsEvent v1 header:
// - 6 byte little-endian table id
// - 2 byte flags
// - length-encoded column count = 1
// - 1 byte column bitmap
return []byte{7, 0, 0, 0, 0, 0, 0, 0, 1, 1}
}
func newBenchmarkRowsEventDataWithRows(rowCount int) []byte {
data := newBenchmarkRowsEventHeaderData()
for i := 0; i < rowCount; i++ {
// One MYSQL_TYPE_LONG column:
// - 1 byte null bitmap
// - 4 byte little-endian int32 value
data = append(data, 0, byte(i), byte(i>>8), byte(i>>16), byte(i>>24))
}
return data
}
func BenchmarkRowsEventDecodeFiltering(b *testing.B) {
const rowCount = 1000
data := newBenchmarkRowsEventDataWithRows(rowCount)
b.Run("filtered_table_skips_row_decode", func(b *testing.B) {
rowsEvent := newBenchmarkRowsEvent("testdb", "ignored_table")
b.ReportAllocs()
b.SetBytes(int64(len(data)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
rowsEvent.Rows = nil
rowsEvent.SkippedColumns = nil
_, err := rowsEvent.DecodeHeader(data)
if err != nil {
b.Fatal(err)
}
if string(rowsEvent.Table.Schema) != "testdb" || string(rowsEvent.Table.Table) != "ignored_table" {
b.Fatalf("unexpected table: %s.%s", rowsEvent.Table.Schema, rowsEvent.Table.Table)
}
}
})
b.Run("full_rows_event_decode", func(b *testing.B) {
rowsEvent := newBenchmarkRowsEvent("testdb", "wanted_table")
b.ReportAllocs()
b.SetBytes(int64(len(data)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
rowsEvent.Rows = nil
rowsEvent.SkippedColumns = nil
if err := rowsEvent.Decode(data); err != nil {
b.Fatal(err)
}
}
})
}go test ./go/binlog/ -run='^$' -bench=BenchmarkRowsEventDecodeFunc -benchmem -benchtime=5s -v
goos: darwin
goarch: arm64
pkg: github.com/github/gh-ost/go/binlog
cpu: Apple M3 Pro
BenchmarkRowsEventDecodeFiltering
BenchmarkRowsEventDecodeFiltering/filtered_table_skips_row_decode
BenchmarkRowsEventDecodeFiltering/filtered_table_skips_row_decode-11 769241822 7.730 ns/op 648137.47 MB/s 0 B/op 0 allocs/op
BenchmarkRowsEventDecodeFiltering/full_rows_event_decode
BenchmarkRowsEventDecodeFiltering/full_rows_event_decode-11 153952 39340 ns/op 127.35 MB/s 137713 B/op 1766 allocs/op
PASS
ok github.com/github/gh-ost/go/binlog 13.471s |
forge33
reviewed
May 28, 2026
| } | ||
|
|
||
| func NewGoMySQLReader(migrationContext *base.MigrationContext) *GoMySQLReader { | ||
| func NewGoMySQLReader(migrationContext *base.MigrationContext, rowsEventFilters ...RowsEventFilterFunc) *GoMySQLReader { |
There was a problem hiding this comment.
I find it strange we're doing variadic here but were always taking 1 argument, never 0, never 1+
Author
There was a problem hiding this comment.
We don't need it ourselves but it does provide backwards compatibility for external callers. I'm not sure whether we (or the gh-ost maintainers) should care about this, but PRs like this one do give the impression there are people using gh-ost as a library.
grodowski
approved these changes
May 28, 2026
Member
There was a problem hiding this comment.
There's a somewhat related binlog optimization pending upstream: github#1687, which should stack nicely with this one 👍
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
Description
This change avoids decoding row payloads for DML events on tables that gh-ost is not listening to.
Previously, gh-ost decoded every row event from the binlog before filtering by database/table name in the event streamer. This meant DML activity on unrelated tables still paid the full row decoding cost, even though those events were discarded later.
With this change, gh-ost now uses go-mysql’s
RowsEventDecodeFunchook to inspect the row event header/table-map metadata first. If no registered listener is interested in the event’s database/table, gh-ost skips decoding the row data entirely.script/cibuildreturns with no formatting errors, build errors or unit test errors.