Skip to content

Commit

Permalink
[tmpnet] Add check for metrics collection to monitoring action
Browse files Browse the repository at this point in the history
  • Loading branch information
maru-ava committed Feb 24, 2025
1 parent 91b64a7 commit 3ec85e1
Show file tree
Hide file tree
Showing 5 changed files with 184 additions and 10 deletions.
14 changes: 13 additions & 1 deletion .github/actions/run-monitored-tmpnet-cmd/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -80,4 +80,16 @@ runs:
if: always()
with:
name: ${{ inputs.artifact_prefix }}-tmpnet-data
# TODO(marun) Check that collection is working by querying prometheus and loki with the GH_* labels above
- name: Check that metrics were collected
shell: bash
run: go run ./tests/fixture/tmpnet/cmd check-metrics
env:
PROMETHEUS_USERNAME: ${{ inputs.prometheus_username }}
PROMETHEUS_PASSWORD: ${{ inputs.prometheus_password }}
GH_REPO: ${{ inputs.repository_owner }}/${{ inputs.repository_name }}
GH_WORKFLOW: ${{ inputs.workflow }}
GH_RUN_ID: ${{ inputs.run_id }}
GH_RUN_NUMBER: ${{ inputs.run_number }}
GH_RUN_ATTEMPT: ${{ inputs.run_attempt }}
GH_JOB_ID: ${{ inputs.job }}
# TODO(marun) Check that log collection was successful
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,8 @@ github.com/jessevdk/go-flags v0.0.0-20141203071132-1679536dcc89/go.mod h1:4FA24M
github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI=
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2EA=
github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4=
github.com/jrick/logrotate v1.0.0/go.mod h1:LNinyqDIJnpAur+b8yyulnQw/wDuN1+BYKlTRt3OuAQ=
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
Expand Down Expand Up @@ -478,6 +480,8 @@ github.com/mr-tron/base58 v1.2.0 h1:T/HDJBh4ZCPbU39/+c3rRvE0uKBQlU27+QI8LJ4t64o=
github.com/mr-tron/base58 v1.2.0/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU=
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f h1:y5//uYreIhSUg3J1GEMiLbxo1LJaP8RfCpH6pymGZus=
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw=
github.com/nats-io/jwt v0.3.0/go.mod h1:fRYCDE99xlTsqUzISS1Bi75UBJ6ljOJQOAAu5VglpSg=
Expand Down
132 changes: 132 additions & 0 deletions tests/fixture/tmpnet/check_metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved.
// See the file LICENSE for licensing terms.

package tmpnet

import (
"context"
"errors"
"fmt"
"net/http"
"strings"
"time"

"github.com/prometheus/client_golang/api"
"github.com/prometheus/client_golang/api/prometheus/v1"
"github.com/prometheus/common/model"
"go.uber.org/zap"

"github.com/ava-labs/avalanchego/utils/logging"
)

// CheckMetricsExist checks if metrics exist for the given
// network. Github labels are also included if provided as env vars
// (GH_*).
func CheckMetricsExist(
log logging.Logger,
networkUUID string,
) error {
username, password, err := getCollectorCredentials(prometheusCmd)
if err != nil {
return fmt.Errorf("failed to get collector credentials: %w", err)
}
query, err := getCheckMetricsQuery(networkUUID)
if err != nil {
return err
}
url := getPrometheusURL()

log.Info("checking if metrics exist",
zap.String("url", url),
zap.String("query", query),
)

results, err := queryPrometheus(log, url, username, password, query)
if err != nil {
return err
}

metricsCount := len(results)
if metricsCount > 0 {
log.Info("metrics exist",
zap.String("query", query),
zap.Int("count", metricsCount),
)
return nil
}

return errors.New("metrics not found")
}

// getCheckMetricsQuery returns the query to check if metrics exist.
func getCheckMetricsQuery(networkUUID string) (string, error) {
selectors := []string{}
if len(networkUUID) > 0 {
selectors = append(selectors, fmt.Sprintf("network_uuid=\"%s\"", networkUUID))
}
githubLabels := githubLabelsFromEnv()
for label := range githubLabels {
value, err := githubLabels.GetStringVal(label)
if err != nil {
return "", err
}
if len(value) == 0 {
continue
}
selectors = append(selectors, fmt.Sprintf("%s=\"%s\"", label, value))
}
return fmt.Sprintf("{%s}", strings.Join(selectors, ",")), nil
}

func queryPrometheus(
log logging.Logger,
url string,
username string,
password string,
query string,
) (model.Vector, error) {
// Create client with basic auth
client, err := api.NewClient(api.Config{
Address: url,
RoundTripper: &basicAuthRoundTripper{
username: username,
password: password,
rt: api.DefaultRoundTripper,
},
})
if err != nil {
return nil, fmt.Errorf("failed to create client: %w", err)
}

// Query Prometheus
result, warnings, err := v1.NewAPI(client).Query(
context.Background(),
query,
time.Now(),
)
if err != nil {
return nil, fmt.Errorf("query failed: %w", err)
}
if len(warnings) > 0 {
log.Warn("prometheus query warnings",
zap.Strings("warnings", warnings),
)
}

// Check results
vector, ok := result.(model.Vector)
if !ok {
return nil, fmt.Errorf("unexpected result type: %s", result.Type())
}
return vector, nil
}

type basicAuthRoundTripper struct {
username, password string
rt http.RoundTripper
}

func (b *basicAuthRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) {
req.SetBasicAuth(b.username, b.password)
return b.rt.RoundTrip(req)
}
20 changes: 20 additions & 0 deletions tests/fixture/tmpnet/cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,26 @@ func main() {
}
rootCmd.AddCommand(stopCollectorsCmd)

var networkUUID string
checkMetricsCmd := &cobra.Command{
Use: "check-metrics",
Short: "Checks whether the default prometheus server has the expected metrics",
RunE: func(*cobra.Command, []string) error {
log, err := tests.LoggerForFormat("", rawLogFormat)
if err != nil {
return err
}
return tmpnet.CheckMetricsExist(log, networkUUID)
},
}
checkMetricsCmd.PersistentFlags().StringVar(
&networkUUID,
"network-uuid",
"",
"[optional] The network UUID to check metrics for. Labels read from GH_* env vars will always be used.",
)
rootCmd.AddCommand(checkMetricsCmd)

if err := rootCmd.Execute(); err != nil {
fmt.Fprintf(os.Stderr, "tmpnetctl failed: %v\n", err)
os.Exit(1)
Expand Down
24 changes: 15 additions & 9 deletions tests/fixture/tmpnet/node_process.go
Original file line number Diff line number Diff line change
Expand Up @@ -262,16 +262,8 @@ func (p *NodeProcess) writeMonitoringConfig() error {
"node_id": p.node.NodeID,
"is_ephemeral_node": strconv.FormatBool(p.node.IsEphemeral),
"network_owner": p.node.NetworkOwner,
// prometheus/promtail ignore empty values so including these
// labels with empty values outside of a github worker (where
// the env vars will not be set) should not be a problem.
"gh_repo": os.Getenv("GH_REPO"),
"gh_workflow": os.Getenv("GH_WORKFLOW"),
"gh_run_id": os.Getenv("GH_RUN_ID"),
"gh_run_number": os.Getenv("GH_RUN_NUMBER"),
"gh_run_attempt": os.Getenv("GH_RUN_ATTEMPT"),
"gh_job_id": os.Getenv("GH_JOB_ID"),
}
commonLabels.SetDefaults(githubLabelsFromEnv())

prometheusConfig := []FlagsMap{
{
Expand Down Expand Up @@ -419,3 +411,17 @@ func watchLogFileForFatal(ctx context.Context, cancelWithCause context.CancelCau
}
}
}

func githubLabelsFromEnv() FlagsMap {
return FlagsMap{
// prometheus/promtail ignore empty values so including these
// labels with empty values outside of a github worker (where
// the env vars will not be set) should not be a problem.
"gh_repo": os.Getenv("GH_REPO"),
"gh_workflow": os.Getenv("GH_WORKFLOW"),
"gh_run_id": os.Getenv("GH_RUN_ID"),
"gh_run_number": os.Getenv("GH_RUN_NUMBER"),
"gh_run_attempt": os.Getenv("GH_RUN_ATTEMPT"),
"gh_job_id": os.Getenv("GH_JOB_ID"),
}
}

0 comments on commit 3ec85e1

Please sign in to comment.