Skip to content

Commit 5de2049

Browse files
committed
base metrics objects
1 parent 4ddfe1b commit 5de2049

File tree

7 files changed

+674
-2
lines changed

7 files changed

+674
-2
lines changed

common/metrics/structured/base.go

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
package structured
2+
3+
import (
4+
"maps"
5+
"strconv"
6+
"strings"
7+
"testing"
8+
"time"
9+
10+
"github.com/uber-go/tally"
11+
"go.uber.org/fx"
12+
)
13+
14+
var Module = fx.Options(
15+
fx.Provide(func(s tally.Scope) Emitter {
16+
return Emitter{scope: s}
17+
}),
18+
)
19+
20+
// Metadata is a shared interface for all "...Tags" structs.
21+
//
22+
// You are generally NOT expected to implement any of this yourself.
23+
// Just define your struct, and let the code generator take care of it (`make metrics`).
24+
//
25+
// For the intended usage and implementation, see generated code.
26+
type Metadata interface {
27+
NumTags() int // for efficient pre-allocation
28+
PutTags(into map[string]string) // populates the map
29+
GetTags() map[string]string // returns a pre-allocated and pre-populated map
30+
}
31+
32+
// DynamicTags is a very simple helper for treating an arbitrary map as a Metadata.
33+
//
34+
// This can be used externally (for completely manual metrics) or in metrics-emitting
35+
// methods to simplify adding custom tags (e.g. it is returned from GetTags).
36+
type DynamicTags map[string]string
37+
38+
var _ Metadata = DynamicTags{}
39+
40+
func (o DynamicTags) NumTags() int { return len(o) }
41+
func (o DynamicTags) PutTags(into map[string]string) { maps.Copy(into, o) }
42+
func (o DynamicTags) GetTags() map[string]string { return maps.Clone(o) }
43+
44+
// Emitter is the base helper for emitting metrics, and it contains only low-level
45+
// metrics-emitting funcs to keep it as simple as possible.
46+
//
47+
// It is intended to be used with the `make metrics` code generator and structs-of-tags,
48+
// but it's intentionally possible to (ab)use it by hand because ad-hoc metrics
49+
// should be easy and encouraged.
50+
//
51+
// Metadata can be constructed from any map via DynamicTags, but this API intentionally hides
52+
// [tally.Scope.Tagged] because it's (somewhat) memory-wasteful, self-referential interfaces are
53+
// difficult to mock, and it's very hard to figure out what tags may be present at runtime.
54+
//
55+
// TODO: this can / likely should be turned into an interface to allow disconnecting from tally,
56+
// to allow providing a specific version or to drop it entirely if desired.
57+
type Emitter struct {
58+
// intentionally NOT no-op by default.
59+
//
60+
// use a test emitter in tests, it should be quite easy to construct,
61+
// and this way it will panic if forgotten for some reason, rather than
62+
// causing a misleading lack-of-metrics.
63+
//
64+
// currently, because this is constructed by common/config/metrics.go,
65+
// this scope already contains the `cadence_service:cadence-{whatever}` tag,
66+
// but essentially no others (aside from platform-level stuff).
67+
// you can get the instance from go.uber.org/fx, as just `tally.Scope`.
68+
scope tally.Scope
69+
}
70+
71+
// Histogram records a duration-based histogram with the provided data.
72+
// It adds a "histogram_scale" tag, so histograms can be accurately subset in queries or via middleware.
73+
func (b Emitter) Histogram(name string, buckets SubsettableHistogram, dur time.Duration, meta Metadata) {
74+
tags := make(DynamicTags, meta.NumTags()+1)
75+
meta.PutTags(tags)
76+
77+
// all subsettable histograms need to emit scale values so scale changes
78+
// can be correctly merged at query time.
79+
if _, ok := tags["histogram_scale"]; ok {
80+
// rewrite the existing tag so it can be noticed
81+
tags["error_rename_this_tag_histogram_scale"] = tags["histogram_scale"]
82+
}
83+
tags["histogram_scale"] = strconv.Itoa(buckets.scale)
84+
85+
if !strings.HasSuffix(name, "_ns") {
86+
// duration-based histograms are always in nanoseconds,
87+
// and the name MUST be different from timers while we migrate,
88+
// so this ensures we always have a unique _ns suffix.
89+
//
90+
// hopefully this is never used, but it'll at least make it clear if it is.
91+
name = name + "_error_missing_suffix_ns"
92+
}
93+
b.scope.Tagged(tags).Histogram(name, buckets).RecordDuration(dur)
94+
}
95+
96+
// IntHistogram records a count-based histogram with the provided data.
97+
// It adds a "histogram_scale" tag, so histograms can be accurately subset in queries or via middleware.
98+
func (b Emitter) IntHistogram(name string, buckets IntSubsettableHistogram, num int, meta Metadata) {
99+
tags := make(DynamicTags, meta.NumTags()+1)
100+
meta.PutTags(tags)
101+
102+
// all subsettable histograms need to emit scale values so scale changes
103+
// can be correctly merged at query time.
104+
if _, ok := tags["histogram_scale"]; ok {
105+
// rewrite the existing tag so it can be noticed
106+
tags["error_rename_this_tag_histogram_scale"] = tags["histogram_scale"]
107+
}
108+
tags["histogram_scale"] = strconv.Itoa(buckets.scale)
109+
110+
if !strings.HasSuffix(name, "_counts") {
111+
// int-based histograms are always in "_counts" (currently anyway),
112+
// and the name MUST be different from timers while we migrate.
113+
// so this ensures we always have a unique _counts suffix.
114+
//
115+
// hopefully this is never used, but it'll at least make it clear if it is.
116+
name = name + "_error_missing_suffix_counts"
117+
}
118+
b.scope.Tagged(tags).Histogram(name, buckets).RecordDuration(time.Duration(num))
119+
}
120+
121+
// TODO: make a MinMaxHistogram helper which maintains a precise, rolling
122+
// min/max gauge, over a window larger than the metrics granularity (e.g. ~20s)
123+
// to work around gauges' last-data-only behavior.
124+
//
125+
// This will likely require some additional state though, and might benefit from
126+
// keeping that state further up the Tags-stack to keep contention and
127+
// series-deduplication-costs low.
128+
//
129+
// Maybe OTEL / Prometheus will natively support this one day. It'd be simple.
130+
131+
// Count records a counter with the provided data.
132+
func (b Emitter) Count(name string, num int, meta Metadata) {
133+
b.scope.Tagged(meta.GetTags()).Counter(name).Inc(int64(num))
134+
}
135+
136+
// Gauge emits a gauge with the provided data.
137+
func (b Emitter) Gauge(name string, val float64, meta Metadata) {
138+
b.scope.Tagged(meta.GetTags()).Gauge(name).Update(val)
139+
}
140+
141+
// NewTestEmitter creates an emitter for tests, optionally using the provided scope.
142+
// If scope is nil, a no-op scope will be used.
143+
func NewTestEmitter(t *testing.T, scope tally.Scope) Emitter {
144+
t.Name() // require non-nil
145+
if scope == nil {
146+
scope = tally.NoopScope
147+
}
148+
return Emitter{scope}
149+
}

common/metrics/structured/doc.go

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
/*
2+
Package structured contains the base objects for a struct-based metrics system.
3+
4+
This is intended to be used with internal/tools/metricsgen, but the Emitter is
5+
public on many StructTags to ensure ad-hoc metrics are still simple to emit (and
6+
to make codegen reasonably easy).
7+
8+
For concrete details, check the generated code of any ...Tags structs, or the
9+
generator in [github.com/uber/cadence/internal/tools/metricsgen].
10+
11+
# To make a new metrics-tag-containing struct
12+
13+
- Define a `type ...Tags struct` anywhere. These can be public or private.
14+
- Embed any parent ...Tags structs desired, and add any fields to store tag values
15+
(or declare that they will be emitted, if they are not static)
16+
- Add a `//go:generate metricsgen` comment to the file (if not already present)
17+
- Run `make metrics` to generate the supporting code
18+
19+
In many cases, that's likely enough. Construct your new thing and use it:
20+
21+
thing := NewYourTags(parents, and, tags) // get it from somewhere
22+
thing.Count("name", 1) // "name" must be unique within Cadence
23+
// or inside a method on YourTags:
24+
func (y YourTags) ItHappened() {
25+
y.Count("it_happened", 1)
26+
}
27+
28+
to emit a metric with all the associated tags.
29+
30+
# To add new tags to existing metrics / structs
31+
32+
Add the field and run `make metrics`.
33+
34+
This will re-generate the constructor(s), which will lead to a broken build.
35+
Just chase build failures until you've ensured that every code path has access
36+
to the new data you wanted to add.
37+
38+
# To see what tags an existing metric has
39+
40+
Find the name string (e.g. grep for it), open it in an IDE, and just ask the
41+
IDE to auto-complete a field access:
42+
43+
yourTagsInstance.<ctrl-space to request autocomplete>
44+
45+
In Goland, VSCode, and likely elsewhere, this will give you a drop-down of all
46+
fields inherited from all parents, for easy reading.
47+
48+
# Best practices
49+
50+
Use constant, in-line strings for metric names. Prometheus requires that each
51+
"name" must have a stable set of tags, so there is no safety benefit to using a
52+
const - generally speaking it must NOT be shared.
53+
54+
Ad-hoc metrics are encouraged to use the convenience methods for simplicity.
55+
When curious about something, just emit a metric and find out later (but watch
56+
out for cardinality).
57+
58+
Avoid pointers, both for the ...Tags struct and its values, to prevent mutation.
59+
This also implies you should generally use "simple" and minimal field types, as
60+
they will be copied repeatedly - avoid e.g. complex thrift objects. Hopefully
61+
this will end up being nicer to the garbage collector than pointers everywhere.
62+
63+
For any metrics (or "events" which have multiple metrics) you consider "stable"
64+
or have alerts or dashboards based on, strongly consider declaring a method on
65+
your ...Tags struct and emitting in there. This helps inform reviewers that
66+
changing the metrics might cause problems elsewhere, and documents intent for
67+
Cadence operators if they get an alert or see strange numbers.
68+
69+
# Code generation customization
70+
71+
Fields have two major options available: they can declare a custom to-string
72+
conversion, and they can "reserve" a tag without defining a value:
73+
74+
type YourTags struct {
75+
Fancy protobuf.Thing `tag:"fancy" convert:"{{.}}.String()"`
76+
Reserved struct{} `tag:"reserved"`
77+
}
78+
79+
Custom conversion is just a text/template string, where `.` will be filled in
80+
with the field access (i.e. `y.Fancy`). Strings work automatically, and
81+
integers (int, int32, and int64) will be automatically `strconv.Itoa`-converted,
82+
but all other types will require custom conversion. As you cannot declare new
83+
imports in this string, make sure you've imported any packages you need to
84+
stringify a value in the same file as the ...Tags is declared.
85+
86+
Reserved tags serve two purposes:
87+
- They document that a tag will be emitted, so it can be discovered
88+
- They reserve space in the map returned by `GetTags()`, so you can
89+
efficiently add it at runtime
90+
91+
Because reserved tags will not be filled in by convenience methods like `Count`,
92+
they are almost exclusively useful for methods that emit specific metrics.
93+
94+
For the simplest use cases, use a method on the ...Tags struct and add the tags
95+
by hand:
96+
97+
func (s SomethingTags) ItHappened(times int) {
98+
tags := s.GetTags() // get all static tags
99+
tags["reserved"] = fmt.Sprint(rand.Intn(10)) // add the reserved one(s)
100+
s.Emitter.Count("it_happened", times, tags) // use the lower-level Emitter
101+
}
102+
*/
103+
package structured

0 commit comments

Comments
 (0)