cadence-workflow
diff --git a/‎common/metrics/structured/base.go‎
Lines changed: 149 additions & 0 deletions b/‎common/metrics/structured/base.go‎
Lines changed: 149 additions & 0 deletions
diff --git a/‎common/metrics/structured/doc.go‎
Lines changed: 103 additions & 0 deletions b/‎common/metrics/structured/doc.go‎
Lines changed: 103 additions & 0 deletions
@@ -0,0 +1,149 @@
+package structured
+
+import (
+	"maps"
+	"strconv"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/uber-go/tally"
+	"go.uber.org/fx"
+)
+
+var Module = fx.Options(
+	fx.Provide(func(s tally.Scope) Emitter {
+		return Emitter{scope: s}
+	}),
+)
+
+// Metadata is a shared interface for all "...Tags" structs.
+//
+// You are generally NOT expected to implement any of this yourself.
+// Just define your struct, and let the code generator take care of it (`make metrics`).
+//
+// For the intended usage and implementation, see generated code.
+type Metadata interface {
+	NumTags() int                   // for efficient pre-allocation
+	PutTags(into map[string]string) // populates the map
+	GetTags() map[string]string     // returns a pre-allocated and pre-populated map
+}
+
+// DynamicTags is a very simple helper for treating an arbitrary map as a Metadata.
+//
+// This can be used externally (for completely manual metrics) or in metrics-emitting
+// methods to simplify adding custom tags (e.g. it is returned from GetTags).
+type DynamicTags map[string]string
+
+var _ Metadata = DynamicTags{}
+
+func (o DynamicTags) NumTags() int                   { return len(o) }
+func (o DynamicTags) PutTags(into map[string]string) { maps.Copy(into, o) }
+func (o DynamicTags) GetTags() map[string]string     { return maps.Clone(o) }
+
+// Emitter is the base helper for emitting metrics, and it contains only low-level
+// metrics-emitting funcs to keep it as simple as possible.
+//
+// It is intended to be used with the `make metrics` code generator and structs-of-tags,
+// but it's intentionally possible to (ab)use it by hand because ad-hoc metrics
+// should be easy and encouraged.
+//
+// Metadata can be constructed from any map via DynamicTags, but this API intentionally hides
+// [tally.Scope.Tagged] because it's (somewhat) memory-wasteful, self-referential interfaces are
+// difficult to mock, and it's very hard to figure out what tags may be present at runtime.
+//
+// TODO: this can / likely should be turned into an interface to allow disconnecting from tally,
+// to allow providing a specific version or to drop it entirely if desired.
+type Emitter struct {
+	// intentionally NOT no-op by default.
+	//
+	// use a test emitter in tests, it should be quite easy to construct,
+	// and this way it will panic if forgotten for some reason, rather than
+	// causing a misleading lack-of-metrics.
+	//
+	// currently, because this is constructed by common/config/metrics.go,
+	// this scope already contains the `cadence_service:cadence-{whatever}` tag,
+	// but essentially no others (aside from platform-level stuff).
+	// you can get the instance from go.uber.org/fx, as just `tally.Scope`.
+	scope tally.Scope
+}
+
+// Histogram records a duration-based histogram with the provided data.
+// It adds a "histogram_scale" tag, so histograms can be accurately subset in queries or via middleware.
+func (b Emitter) Histogram(name string, buckets SubsettableHistogram, dur time.Duration, meta Metadata) {
+	tags := make(DynamicTags, meta.NumTags()+1)
+	meta.PutTags(tags)
+
+	// all subsettable histograms need to emit scale values so scale changes
+	// can be correctly merged at query time.
+	if _, ok := tags["histogram_scale"]; ok {
+		// rewrite the existing tag so it can be noticed
+		tags["error_rename_this_tag_histogram_scale"] = tags["histogram_scale"]
+	}
+	tags["histogram_scale"] = strconv.Itoa(buckets.scale)
+
+	if !strings.HasSuffix(name, "_ns") {
+		// duration-based histograms are always in nanoseconds,
+		// and the name MUST be different from timers while we migrate,
+		// so this ensures we always have a unique _ns suffix.
+		//
+		// hopefully this is never used, but it'll at least make it clear if it is.
+		name = name + "_error_missing_suffix_ns"
+	}
+	b.scope.Tagged(tags).Histogram(name, buckets).RecordDuration(dur)
+}
+
+// IntHistogram records a count-based histogram with the provided data.
+// It adds a "histogram_scale" tag, so histograms can be accurately subset in queries or via middleware.
+func (b Emitter) IntHistogram(name string, buckets IntSubsettableHistogram, num int, meta Metadata) {
+	tags := make(DynamicTags, meta.NumTags()+1)
+	meta.PutTags(tags)
+
+	// all subsettable histograms need to emit scale values so scale changes
+	// can be correctly merged at query time.
+	if _, ok := tags["histogram_scale"]; ok {
+		// rewrite the existing tag so it can be noticed
+		tags["error_rename_this_tag_histogram_scale"] = tags["histogram_scale"]
+	}
+	tags["histogram_scale"] = strconv.Itoa(buckets.scale)
+
+	if !strings.HasSuffix(name, "_counts") {
+		// int-based histograms are always in "_counts" (currently anyway),
+		// and the name MUST be different from timers while we migrate.
+		// so this ensures we always have a unique _counts suffix.
+		//
+		// hopefully this is never used, but it'll at least make it clear if it is.
+		name = name + "_error_missing_suffix_counts"
+	}
+	b.scope.Tagged(tags).Histogram(name, buckets).RecordDuration(time.Duration(num))
+}
+
+// TODO: make a MinMaxHistogram helper which maintains a precise, rolling
+// min/max gauge, over a window larger than the metrics granularity (e.g. ~20s)
+// to work around gauges' last-data-only behavior.
+//
+// This will likely require some additional state though, and might benefit from
+// keeping that state further up the Tags-stack to keep contention and
+// series-deduplication-costs low.
+//
+// Maybe OTEL / Prometheus will natively support this one day.  It'd be simple.
+
+// Count records a counter with the provided data.
+func (b Emitter) Count(name string, num int, meta Metadata) {
+	b.scope.Tagged(meta.GetTags()).Counter(name).Inc(int64(num))
+}
+
+// Gauge emits a gauge with the provided data.
+func (b Emitter) Gauge(name string, val float64, meta Metadata) {
+	b.scope.Tagged(meta.GetTags()).Gauge(name).Update(val)
+}
+
+// NewTestEmitter creates an emitter for tests, optionally using the provided scope.
+// If scope is nil, a no-op scope will be used.
+func NewTestEmitter(t *testing.T, scope tally.Scope) Emitter {
+	t.Name() // require non-nil
+	if scope == nil {
+		scope = tally.NoopScope
+	}
+	return Emitter{scope}
+}
@@ -0,0 +1,103 @@
+/*
+Package structured contains the base objects for a struct-based metrics system.
+
+This is intended to be used with internal/tools/metricsgen, but the Emitter is
+public on many StructTags to ensure ad-hoc metrics are still simple to emit (and
+to make codegen reasonably easy).
+
+For concrete details, check the generated code of any ...Tags structs, or the
+generator in [github.com/uber/cadence/internal/tools/metricsgen].
+
+# To make a new metrics-tag-containing struct
+
+  - Define a `type ...Tags struct` anywhere.  These can be public or private.
+  - Embed any parent ...Tags structs desired, and add any fields to store tag values
+    (or declare that they will be emitted, if they are not static)
+  - Add a `//go:generate metricsgen` comment to the file (if not already present)
+  - Run `make metrics` to generate the supporting code
+
+In many cases, that's likely enough.  Construct your new thing and use it:
+
+	thing := NewYourTags(parents, and, tags) // get it from somewhere
+	thing.Count("name", 1)                   // "name" must be unique within Cadence
+	// or inside a method on YourTags:
+	func (y YourTags) ItHappened() {
+		y.Count("it_happened", 1)
+	}
+
+to emit a metric with all the associated tags.
+
+# To add new tags to existing metrics / structs
+
+Add the field and run `make metrics`.
+
+This will re-generate the constructor(s), which will lead to a broken build.
+Just chase build failures until you've ensured that every code path has access
+to the new data you wanted to add.
+
+# To see what tags an existing metric has
+
+Find the name string (e.g. grep for it), open it in an IDE, and just ask the
+IDE to auto-complete a field access:
+
+	yourTagsInstance.<ctrl-space to request autocomplete>
+
+In Goland, VSCode, and likely elsewhere, this will give you a drop-down of all
+fields inherited from all parents, for easy reading.
+
+# Best practices
+
+Use constant, in-line strings for metric names.  Prometheus requires that each
+"name" must have a stable set of tags, so there is no safety benefit to using a
+const - generally speaking it must NOT be shared.
+
+Ad-hoc metrics are encouraged to use the convenience methods for simplicity.
+When curious about something, just emit a metric and find out later (but watch
+out for cardinality).
+
+Avoid pointers, both for the ...Tags struct and its values, to prevent mutation.
+This also implies you should generally use "simple" and minimal field types, as
+they will be copied repeatedly - avoid e.g. complex thrift objects.  Hopefully
+this will end up being nicer to the garbage collector than pointers everywhere.
+
+For any metrics (or "events" which have multiple metrics) you consider "stable"
+or have alerts or dashboards based on, strongly consider declaring a method on
+your ...Tags struct and emitting in there.  This helps inform reviewers that
+changing the metrics might cause problems elsewhere, and documents intent for
+Cadence operators if they get an alert or see strange numbers.
+
+# Code generation customization
+
+Fields have two major options available: they can declare a custom to-string
+conversion, and they can "reserve" a tag without defining a value:
+
+	type YourTags struct {
+		Fancy protobuf.Thing `tag:"fancy" convert:"{{.}}.String()"`
+		Reserved struct{} `tag:"reserved"`
+	}
+
+Custom conversion is just a text/template string, where `.` will be filled in
+with the field access (i.e. `y.Fancy`).  Strings work automatically, and
+integers (int, int32, and int64) will be automatically `strconv.Itoa`-converted,
+but all other types will require custom conversion.  As you cannot declare new
+imports in this string, make sure you've imported any packages you need to
+stringify a value in the same file as the ...Tags is declared.
+
+Reserved tags serve two purposes:
+  - They document that a tag will be emitted, so it can be discovered
+  - They reserve space in the map returned by `GetTags()`, so you can
+    efficiently add it at runtime
+
+Because reserved tags will not be filled in by convenience methods like `Count`,
+they are almost exclusively useful for methods that emit specific metrics.
+
+For the simplest use cases, use a method on the ...Tags struct and add the tags
+by hand:
+
+	func (s SomethingTags) ItHappened(times int) {
+		tags := s.GetTags()                          // get all static tags
+		tags["reserved"] = fmt.Sprint(rand.Intn(10)) // add the reserved one(s)
+		s.Emitter.Count("it_happened", times, tags)  // use the lower-level Emitter
+	}
+*/
+package structured