Skip to content

Commit de3ecfb

Browse files
Fix a bucket mapping issue with the origin inspector latency metrics
The exporter maps bucketed counters from the real-time stats API to a Prometheus histogram. Since it doesn't have the actual values that were used to build the bucketed counters it has to pick a value for each bucket to call Observe() with. The current code uses values that are mapping to the wrong buckets. Here's a subset of the buckets from the origin latency histogram definition: 0.001, 0.005, 0.010 Calling Observe() with the value of 0.005 places the value in the 1-5ms bucket, not the 5-10ms bucket. The result of the current behavior is that the counts for each bucket are shifted to the next smallest bucket. This fixes the issue by using the values at the end of each bucket interval as the parameters to Observe(). This will result in increased origin latency values when users upgrade to a release with this commit, but the increased numbers are more accurate.
1 parent 11aba6d commit de3ecfb

File tree

2 files changed

+58
-61
lines changed

2 files changed

+58
-61
lines changed

pkg/origin/process.go

+38-41
Original file line numberDiff line numberDiff line change
@@ -110,62 +110,59 @@ func process(serviceID, serviceName, datacenter, origin string, stats Stats, m *
110110
// Latency stats are clearly from xxx_bucket{le="v"} metrics,
111111
// but I don't see a good way to re-populate a histogram from
112112
// those numbers. (If I'm missing something, file an issue!)
113-
//
114-
// Our clue is the final bucket, which says it's observations
115-
// "of 60s and above". Based on that we use the lower bound of
116-
// each stat as the observed value, except for the first bucket
117-
// which we yolo as 500us because 0 doesn't really make sense??
113+
// We use the upper bound of each bucket because the interval
114+
// is (start, end]
118115
for v, n := range map[float64]uint64{
119-
60.00: stats.Latency60000plus,
120-
10.00: stats.Latency10000to60000,
121-
5.000: stats.Latency5000to10000,
122-
1.000: stats.Latency1000to5000,
123-
0.500: stats.Latency500to1000,
124-
0.250: stats.Latency250to500,
125-
0.100: stats.Latency100to250,
126-
0.050: stats.Latency50to100,
127-
0.010: stats.Latency10to50,
128-
0.005: stats.Latency5to10,
129-
0.001: stats.Latency1to5,
130-
0.0005: stats.Latency0to1, // yolo
116+
61.00: stats.Latency60000plus,
117+
60.00: stats.Latency10000to60000,
118+
10.00: stats.Latency5000to10000,
119+
5.000: stats.Latency1000to5000,
120+
1.000: stats.Latency500to1000,
121+
0.500: stats.Latency250to500,
122+
0.250: stats.Latency100to250,
123+
0.100: stats.Latency50to100,
124+
0.050: stats.Latency10to50,
125+
0.010: stats.Latency5to10,
126+
0.005: stats.Latency1to5,
127+
0.001: stats.Latency0to1,
131128
} {
132129
for i := uint64(0); i < n; i++ {
133130
m.LatencySeconds.WithLabelValues(serviceID, serviceName, datacenter, origin, srcDelivery).Observe(v)
134131
}
135132
}
136133

137134
for v, n := range map[float64]uint64{
138-
60.00: stats.WafLatency60000plus,
139-
10.00: stats.WafLatency10000to60000,
140-
5.000: stats.WafLatency5000to10000,
141-
1.000: stats.WafLatency1000to5000,
142-
0.500: stats.WafLatency500to1000,
143-
0.250: stats.WafLatency250to500,
144-
0.100: stats.WafLatency100to250,
145-
0.050: stats.WafLatency50to100,
146-
0.010: stats.WafLatency10to50,
147-
0.005: stats.WafLatency5to10,
148-
0.001: stats.WafLatency1to5,
149-
0.0005: stats.WafLatency0to1, // yolo
135+
61.00: stats.WafLatency60000plus,
136+
60.00: stats.WafLatency10000to60000,
137+
10.00: stats.WafLatency5000to10000,
138+
5.000: stats.WafLatency1000to5000,
139+
1.000: stats.WafLatency500to1000,
140+
0.500: stats.WafLatency250to500,
141+
0.250: stats.WafLatency100to250,
142+
0.100: stats.WafLatency50to100,
143+
0.050: stats.WafLatency10to50,
144+
0.010: stats.WafLatency5to10,
145+
0.005: stats.WafLatency1to5,
146+
0.001: stats.WafLatency0to1,
150147
} {
151148
for i := uint64(0); i < n; i++ {
152149
m.LatencySeconds.WithLabelValues(serviceID, serviceName, datacenter, origin, srcWaf).Observe(v)
153150
}
154151
}
155152

156153
for v, n := range map[float64]uint64{
157-
60.00: stats.ComputeLatency60000plus,
158-
10.00: stats.ComputeLatency10000to60000,
159-
5.000: stats.ComputeLatency5000to10000,
160-
1.000: stats.ComputeLatency1000to5000,
161-
0.500: stats.ComputeLatency500to1000,
162-
0.250: stats.ComputeLatency250to500,
163-
0.100: stats.ComputeLatency100to250,
164-
0.050: stats.ComputeLatency50to100,
165-
0.010: stats.ComputeLatency10to50,
166-
0.005: stats.ComputeLatency5to10,
167-
0.001: stats.ComputeLatency1to5,
168-
0.0005: stats.ComputeLatency0to1, // yolo
154+
61.00: stats.ComputeLatency60000plus,
155+
60.00: stats.ComputeLatency10000to60000,
156+
10.00: stats.ComputeLatency5000to10000,
157+
5.000: stats.ComputeLatency1000to5000,
158+
1.000: stats.ComputeLatency500to1000,
159+
0.500: stats.ComputeLatency250to500,
160+
0.250: stats.ComputeLatency100to250,
161+
0.100: stats.ComputeLatency50to100,
162+
0.050: stats.ComputeLatency10to50,
163+
0.010: stats.ComputeLatency5to10,
164+
0.005: stats.ComputeLatency1to5,
165+
0.001: stats.ComputeLatency0to1,
169166
} {
170167
for i := uint64(0); i < n; i++ {
171168
m.LatencySeconds.WithLabelValues(serviceID, serviceName, datacenter, origin, srcCompute).Observe(v)

pkg/rt/common_test.go

+20-20
Original file line numberDiff line numberDiff line change
@@ -4881,7 +4881,7 @@ const originsResponseFixture = `{
48814881

48824882
var expectedOriginsMetricsOutputMap = map[string]float64{
48834883
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="compute",le="+Inf"}`: 10,
4884-
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="compute",le="0.001"}`: 10,
4884+
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="compute",le="0.001"}`: 5,
48854885
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="compute",le="0.005"}`: 10,
48864886
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="compute",le="0.01"}`: 10,
48874887
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="compute",le="0.05"}`: 10,
@@ -4893,10 +4893,10 @@ var expectedOriginsMetricsOutputMap = map[string]float64{
48934893
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="compute",le="5"}`: 10,
48944894
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="compute",le="60"}`: 10,
48954895
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="+Inf"}`: 10,
4896-
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="0.001"}`: 2,
4897-
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="0.005"}`: 3,
4898-
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="0.01"}`: 4,
4899-
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="0.05"}`: 10,
4896+
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="0.001"}`: 1,
4897+
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="0.005"}`: 2,
4898+
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="0.01"}`: 3,
4899+
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="0.05"}`: 4,
49004900
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="0.1"}`: 10,
49014901
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="0.25"}`: 10,
49024902
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="0.5"}`: 10,
@@ -4906,8 +4906,8 @@ var expectedOriginsMetricsOutputMap = map[string]float64{
49064906
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="60"}`: 10,
49074907
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf",le="+Inf"}`: 10,
49084908
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf",le="0.001"}`: 0,
4909-
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf",le="0.005"}`: 5,
4910-
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf",le="0.01"}`: 10,
4909+
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf",le="0.005"}`: 0,
4910+
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf",le="0.01"}`: 5,
49114911
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf",le="0.05"}`: 10,
49124912
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf",le="0.1"}`: 10,
49134913
`testspace_origin_latency_seconds_bucket{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf",le="0.25"}`: 10,
@@ -4919,9 +4919,9 @@ var expectedOriginsMetricsOutputMap = map[string]float64{
49194919
`testspace_origin_latency_seconds_count{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="compute"}`: 10,
49204920
`testspace_origin_latency_seconds_count{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery"}`: 10,
49214921
`testspace_origin_latency_seconds_count{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf"}`: 10,
4922-
`testspace_origin_latency_seconds_sum{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="compute"}`: 0.008,
4923-
`testspace_origin_latency_seconds_sum{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery"}`: 0.317,
4924-
`testspace_origin_latency_seconds_sum{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf"}`: 0.075,
4922+
`testspace_origin_latency_seconds_sum{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="compute"}`: 0.03,
4923+
`testspace_origin_latency_seconds_sum{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery"}`: 0.666,
4924+
`testspace_origin_latency_seconds_sum{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf"}`: 0.3,
49254925
`testspace_origin_resp_body_bytes_total{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="compute"}`: 1152,
49264926
`testspace_origin_resp_body_bytes_total{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery"}`: 1024,
49274927
`testspace_origin_resp_body_bytes_total{datacenter="TYO",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf"}`: 1088,
@@ -5000,7 +5000,7 @@ var expectedOriginsMetricsOutputMap = map[string]float64{
50005000

50015001
var expectedOriginsMetricsAggOutputMap = map[string]float64{
50025002
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="compute",le="+Inf"}`: 10,
5003-
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="compute",le="0.001"}`: 10,
5003+
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="compute",le="0.001"}`: 5,
50045004
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="compute",le="0.005"}`: 10,
50055005
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="compute",le="0.01"}`: 10,
50065006
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="compute",le="0.05"}`: 10,
@@ -5012,10 +5012,10 @@ var expectedOriginsMetricsAggOutputMap = map[string]float64{
50125012
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="compute",le="5"}`: 10,
50135013
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="compute",le="60"}`: 10,
50145014
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="+Inf"}`: 10,
5015-
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="0.001"}`: 2,
5016-
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="0.005"}`: 3,
5017-
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="0.01"}`: 4,
5018-
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="0.05"}`: 10,
5015+
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="0.001"}`: 1,
5016+
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="0.005"}`: 2,
5017+
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="0.01"}`: 3,
5018+
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="0.05"}`: 4,
50195019
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="0.1"}`: 10,
50205020
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="0.25"}`: 10,
50215021
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="0.5"}`: 10,
@@ -5025,8 +5025,8 @@ var expectedOriginsMetricsAggOutputMap = map[string]float64{
50255025
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery",le="60"}`: 10,
50265026
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf",le="+Inf"}`: 10,
50275027
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf",le="0.001"}`: 0,
5028-
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf",le="0.005"}`: 5,
5029-
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf",le="0.01"}`: 10,
5028+
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf",le="0.005"}`: 0,
5029+
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf",le="0.01"}`: 5,
50305030
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf",le="0.05"}`: 10,
50315031
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf",le="0.1"}`: 10,
50325032
`testspace_origin_latency_seconds_bucket{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf",le="0.25"}`: 10,
@@ -5038,9 +5038,9 @@ var expectedOriginsMetricsAggOutputMap = map[string]float64{
50385038
`testspace_origin_latency_seconds_count{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="compute"}`: 10,
50395039
`testspace_origin_latency_seconds_count{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery"}`: 10,
50405040
`testspace_origin_latency_seconds_count{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf"}`: 10,
5041-
`testspace_origin_latency_seconds_sum{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="compute"}`: 0.008,
5042-
`testspace_origin_latency_seconds_sum{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery"}`: 0.317,
5043-
`testspace_origin_latency_seconds_sum{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf"}`: 0.075,
5041+
`testspace_origin_latency_seconds_sum{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="compute"}`: 0.03,
5042+
`testspace_origin_latency_seconds_sum{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery"}`: 0.666,
5043+
`testspace_origin_latency_seconds_sum{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf"}`: 0.3,
50445044
`testspace_origin_resp_body_bytes_total{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="compute"}`: 1152,
50455045
`testspace_origin_resp_body_bytes_total{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="delivery"}`: 1024,
50465046
`testspace_origin_resp_body_bytes_total{datacenter="aggregate",origin="testorigin",service_id="my-service-id",service_name="my-service-name",source="waf"}`: 1088,

0 commit comments

Comments
 (0)