Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add new Argo Workflow metrics #19447

Merged
merged 19 commits into from
Jan 23, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions argo_workflows/changelog.d/19447.added
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add new Argo Workflow metrics for v3.6+
17 changes: 16 additions & 1 deletion argo_workflows/datadog_checks/argo_workflows/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,23 @@

# Metrics available in Argo Workflow v3.6+
V3_6_PLUS_METRICS = {
'argo_workflows_cronworkflows_concurrencypolicy_triggered': 'cronworkflows.concurrencypolicy_triggered',
'argo_workflows_cronworkflows_triggered': 'cronworkflows.triggered',
'argo_workflows_deprecated_feature': 'deprecated.feature',
'argo_workflows_gauge': 'current_workflows',
'argo_workflows_is_leader': 'is_leader',
'argo_workflows_k8s_request_duration': 'k8s_request.duration',
'argo_workflows_log_messages': 'log_messages',
'argo_workflows_pod_pending_count': 'pod.pending',
'argo_workflows_pods_gauge': 'pods',
'argo_workflows_pods_total_count': 'pods_total',
'argo_workflows_queue_depth_gauge': 'queue_depth',
'argo_workflows_log_messages': 'log_messages',
'argo_workflows_queue_duration': 'queue.duration',
'argo_workflows_queue_longest_running': 'queue.longest_running',
'argo_workflows_queue_retries': 'queue.retries',
'argo_workflows_queue_unfinished_work': 'queue.unfinished_work',
'argo_workflows_total_count': 'total',
'argo_workflows_version': 'version',
'argo_workflows_workflowtemplate_runtime': 'workflowtemplate.runtime',
'argo_workflows_workflowtemplate_triggered': 'workflowtemplate.triggered',
}
111 changes: 65 additions & 46 deletions argo_workflows/metadata.csv
Original file line number Diff line number Diff line change
@@ -1,46 +1,65 @@
metric_name,metric_type,interval,unit_name,per_unit_name,description,orientation,integration,short_name,curated_metric
argo_workflows.current_workflows,gauge,,,,Number of Workflows currently accessible by the controller by status (refreshed every 15s),0,argo_workflows,,
argo_workflows.error.count,count,,error,,Number of errors encountered by the controller by cause,0,argo_workflows,,
argo_workflows.go.gc.duration.seconds.count,count,,,,The summary count of garbage collection cycles in the Argo Workflows instance,0,argo_workflows,,
argo_workflows.go.gc.duration.seconds.quantile,gauge,,,,The pause duration of garbage collection cycles in the Argo Workflows instance by `quantile`,0,argo_workflows,,
argo_workflows.go.gc.duration.seconds.sum,count,,second,,The sum of the pause duration of garbage collection cycles in the Argo Workflows instance,0,argo_workflows,,
argo_workflows.go.goroutines,gauge,,,,Number of goroutines that currently exist.,0,argo_workflows,,
argo_workflows.go.info,gauge,,,,Information about the Go environment.,0,argo_workflows,,
argo_workflows.go.memstats.alloc_bytes,gauge,,byte,,Number of bytes allocated and still in use.,0,argo_workflows,,
argo_workflows.go.memstats.alloc_bytes.count,count,,byte,,"Total number of bytes allocated, even if freed.",0,argo_workflows,,
argo_workflows.go.memstats.buck_hash.sys_bytes,gauge,,byte,,Number of bytes used by the profiling bucket hash table.,0,argo_workflows,,
argo_workflows.go.memstats.frees.count,count,,,,Total number of frees.,0,argo_workflows,,
argo_workflows.go.memstats.gc.sys_bytes,gauge,,byte,,Number of bytes used for garbage collection system metadata.,0,argo_workflows,,
argo_workflows.go.memstats.heap.alloc_bytes,gauge,,byte,,Number of heap bytes allocated and still in use.,0,argo_workflows,,
argo_workflows.go.memstats.heap.idle_bytes,gauge,,byte,,Number of heap bytes waiting to be used.,0,argo_workflows,,
argo_workflows.go.memstats.heap.inuse_bytes,gauge,,byte,,Number of heap bytes that are in use.,0,argo_workflows,,
argo_workflows.go.memstats.heap.objects,gauge,,,,Number of allocated objects.,0,argo_workflows,,
argo_workflows.go.memstats.heap.released_bytes,gauge,,byte,,Number of heap bytes released to OS.,0,argo_workflows,,
argo_workflows.go.memstats.heap.sys_bytes,gauge,,byte,,Number of heap bytes obtained from system.,0,argo_workflows,,
argo_workflows.go.memstats.last_gc_time_seconds,gauge,,second,,Number of seconds since 1970 of last garbage collection.,0,argo_workflows,,
argo_workflows.go.memstats.lookups.count,count,,,,Total number of pointer lookups.,0,argo_workflows,,
argo_workflows.go.memstats.mallocs.count,count,,,,Total number of mallocs.,0,argo_workflows,,
argo_workflows.go.memstats.mcache.inuse_bytes,gauge,,byte,,Number of bytes in use by mcache structures.,0,argo_workflows,,
argo_workflows.go.memstats.mcache.sys_bytes,gauge,,byte,,Number of bytes used for mcache structures obtained from system.,0,argo_workflows,,
argo_workflows.go.memstats.mspan.inuse_bytes,gauge,,byte,,Number of bytes in use by mspan structures.,0,argo_workflows,,
argo_workflows.go.memstats.mspan.sys_bytes,gauge,,byte,,Number of bytes used for mspan structures obtained from system.,0,argo_workflows,,
argo_workflows.go.memstats.next.gc_bytes,gauge,,byte,,Number of heap bytes when next garbage collection will take place.,0,argo_workflows,,
argo_workflows.go.memstats.other.sys_bytes,gauge,,byte,,Number of bytes used for other system allocations.,0,argo_workflows,,
argo_workflows.go.memstats.stack.inuse_bytes,gauge,,byte,,Number of bytes in use by the stack allocator.,0,argo_workflows,,
argo_workflows.go.memstats.stack.sys_bytes,gauge,,byte,,Number of bytes obtained from system for stack allocator.,0,argo_workflows,,
argo_workflows.go.memstats.sys_bytes,gauge,,byte,,Number of bytes obtained from system.,0,argo_workflows,,
argo_workflows.go.threads,gauge,,,,Number of OS threads created.,0,argo_workflows,,
argo_workflows.k8s_request.count,count,,request,,"Number of kubernetes requests executed. https://argo-workflows.readthedocs.io/en/release-3.5/metrics/#argo_workflows_k8s_request_total",0,argo_workflows,,
argo_workflows.log_messages.count,count,,message,,Total number of log messages.,0,argo_workflows,,
argo_workflows.operation_duration_seconds.bucket,count,,,,The count of observations in the histogram of durations of operations split into buckets by upper bound.,0,argo_workflows,,
argo_workflows.operation_duration_seconds.count,count,,,,The total count of observations in the histogram of durations of operations,0,argo_workflows,,
argo_workflows.operation_duration_seconds.sum,count,,second,,Total time in seconds spent on operations,0,argo_workflows,,
argo_workflows.pods,gauge,,,,Number of Pods from Workflows currently accessible by the controller by status (refreshed every 15s),0,argo_workflows,,
argo_workflows.queue_adds.count,count,,,,Adds to the queue,0,argo_workflows,,
argo_workflows.queue_depth,gauge,,,,Depth of the queue,0,argo_workflows,,
argo_workflows.queue_latency.bucket,count,,,,"The count of observations for the time that objects spend waiting in the queue. Split into buckets by upper bounds",0,argo_workflows,,
argo_workflows.queue_latency.count,count,,,,"The total count of observations for the time that objects spend waiting in the queue.",0,argo_workflows,,
argo_workflows.queue_latency.sum,count,,second,,"The total time that objects spend waiting in the queue.",0,argo_workflows,,
argo_workflows.workers_busy,gauge,,worker,,Number of workers currently busy,0,argo_workflows,,
argo_workflows.workflow_condition,gauge,,,,"Workflow condition. https://argo-workflows.readthedocs.io/en/release-3.5/metrics/#argo_workflows_workflow_condition",0,argo_workflows,,
argo_workflows.workflows_processed.count,count,,,,Number of workflow updates processed,0,argo_workflows,,
metric_name,metric_type,interval,unit_name,per_unit_name,description,orientation,integration,short_name,curated_metric,sample_tags
argo_workflows.cronworkflows.concurrencypolicy_triggered.count,count,,,,Number of times concurrency policy triggered in cron workflows,0,argo_workflows,,,
argo_workflows.cronworkflows.triggered.count,count,,,,Number of cron workflows triggered,0,argo_workflows,,,
argo_workflows.current_workflows,gauge,,,,Number of Workflows currently accessible by the controller by status (refreshed every 15s),0,argo_workflows,,,
argo_workflows.deprecated.feature,gauge,,,,Indicates usage of deprecated features,0,argo_workflows,,,
argo_workflows.error.count,count,,error,,Number of errors encountered by the controller by cause,0,argo_workflows,,,
argo_workflows.go.gc.duration.seconds.count,count,,,,The summary count of garbage collection cycles in the Argo Workflows instance,0,argo_workflows,,,
argo_workflows.go.gc.duration.seconds.quantile,gauge,,,,The pause duration of garbage collection cycles in the Argo Workflows instance by `quantile`,0,argo_workflows,,,
argo_workflows.go.gc.duration.seconds.sum,count,,second,,The sum of the pause duration of garbage collection cycles in the Argo Workflows instance,0,argo_workflows,,,
argo_workflows.go.goroutines,gauge,,,,Number of goroutines that currently exist.,0,argo_workflows,,,
argo_workflows.go.info,gauge,,,,Information about the Go environment.,0,argo_workflows,,,
argo_workflows.go.memstats.alloc_bytes,gauge,,byte,,Number of bytes allocated and still in use.,0,argo_workflows,,,
argo_workflows.go.memstats.alloc_bytes.count,count,,byte,,"Total number of bytes allocated, even if freed.",0,argo_workflows,,,
argo_workflows.go.memstats.buck_hash.sys_bytes,gauge,,byte,,Number of bytes used by the profiling bucket hash table.,0,argo_workflows,,,
argo_workflows.go.memstats.frees.count,count,,,,Total number of frees.,0,argo_workflows,,,
argo_workflows.go.memstats.gc.sys_bytes,gauge,,byte,,Number of bytes used for garbage collection system metadata.,0,argo_workflows,,,
argo_workflows.go.memstats.heap.alloc_bytes,gauge,,byte,,Number of heap bytes allocated and still in use.,0,argo_workflows,,,
argo_workflows.go.memstats.heap.idle_bytes,gauge,,byte,,Number of heap bytes waiting to be used.,0,argo_workflows,,,
argo_workflows.go.memstats.heap.inuse_bytes,gauge,,byte,,Number of heap bytes that are in use.,0,argo_workflows,,,
argo_workflows.go.memstats.heap.objects,gauge,,,,Number of allocated objects.,0,argo_workflows,,,
argo_workflows.go.memstats.heap.released_bytes,gauge,,byte,,Number of heap bytes released to OS.,0,argo_workflows,,,
argo_workflows.go.memstats.heap.sys_bytes,gauge,,byte,,Number of heap bytes obtained from system.,0,argo_workflows,,,
argo_workflows.go.memstats.last_gc_time_seconds,gauge,,second,,Number of seconds since 1970 of last garbage collection.,0,argo_workflows,,,
argo_workflows.go.memstats.lookups.count,count,,,,Total number of pointer lookups.,0,argo_workflows,,,
argo_workflows.go.memstats.mallocs.count,count,,,,Total number of mallocs.,0,argo_workflows,,,
argo_workflows.go.memstats.mcache.inuse_bytes,gauge,,byte,,Number of bytes in use by mcache structures.,0,argo_workflows,,,
argo_workflows.go.memstats.mcache.sys_bytes,gauge,,byte,,Number of bytes used for mcache structures obtained from system.,0,argo_workflows,,,
argo_workflows.go.memstats.mspan.inuse_bytes,gauge,,byte,,Number of bytes in use by mspan structures.,0,argo_workflows,,,
argo_workflows.go.memstats.mspan.sys_bytes,gauge,,byte,,Number of bytes used for mspan structures obtained from system.,0,argo_workflows,,,
argo_workflows.go.memstats.next.gc_bytes,gauge,,byte,,Number of heap bytes when next garbage collection will take place.,0,argo_workflows,,,
argo_workflows.go.memstats.other.sys_bytes,gauge,,byte,,Number of bytes used for other system allocations.,0,argo_workflows,,,
argo_workflows.go.memstats.stack.inuse_bytes,gauge,,byte,,Number of bytes in use by the stack allocator.,0,argo_workflows,,,
argo_workflows.go.memstats.stack.sys_bytes,gauge,,byte,,Number of bytes obtained from system for stack allocator.,0,argo_workflows,,,
argo_workflows.go.memstats.sys_bytes,gauge,,byte,,Number of bytes obtained from system.,0,argo_workflows,,,
argo_workflows.go.threads,gauge,,,,Number of OS threads created.,0,argo_workflows,,,
argo_workflows.is_leader,gauge,,,,Indicates if the current instance is the leader,0,argo_workflows,,,
argo_workflows.k8s_request.count,count,,request,,Number of kubernetes requests executed. https://argo-workflows.readthedocs.io/en/release-3.5/metrics/#argo_workflows_k8s_request_total,0,argo_workflows,,,
argo_workflows.k8s_request.duration.bucket,count,,,,Count of Kubernetes request durations split into buckets by upper bounds,0,argo_workflows,,,
argo_workflows.k8s_request.duration.count,count,,,,Total count of Kubernetes request durations,0,argo_workflows,,,
argo_workflows.k8s_request.duration.sum,count,,second,,Sum of Kubernetes request durations,0,argo_workflows,,,
argo_workflows.log_messages.count,count,,message,,Total number of log messages.,0,argo_workflows,,,
argo_workflows.operation_duration_seconds.bucket,count,,,,The count of observations in the histogram of durations of operations split into buckets by upper bound.,0,argo_workflows,,,
argo_workflows.operation_duration_seconds.count,count,,,,The total count of observations in the histogram of durations of operations,0,argo_workflows,,,
argo_workflows.operation_duration_seconds.sum,count,,second,,Total time in seconds spent on operations,0,argo_workflows,,,
argo_workflows.pod.pending.count,count,,,,Number of pending pods,0,argo_workflows,,,
argo_workflows.pods,gauge,,,,Number of Pods from Workflows currently accessible by the controller by status (refreshed every 15s),0,argo_workflows,,,
argo_workflows.pods_total.count,count,,,,Total count of pods,0,argo_workflows,,,
argo_workflows.queue.duration.bucket,count,,,,Count of queue durations split into buckets by upper bounds,0,argo_workflows,,,
argo_workflows.queue.duration.count,count,,,,Total count of queue durations,0,argo_workflows,,,
argo_workflows.queue.duration.sum,count,,second,,Sum of queue durations,0,argo_workflows,,,
argo_workflows.queue.longest_running,gauge,,,,Duration of the longest running queue,0,argo_workflows,,,
argo_workflows.queue.retries.count,count,,,,Number of queue retries,0,argo_workflows,,,
argo_workflows.queue.unfinished_work,gauge,,,,Unfinished work in the queue,0,argo_workflows,,,
argo_workflows.queue_adds.count,count,,,,Adds to the queue,0,argo_workflows,,,
argo_workflows.queue_depth,gauge,,,,Depth of the queue,0,argo_workflows,,,
argo_workflows.queue_latency.bucket,count,,,,The count of observations for the time that objects spend waiting in the queue. Split into buckets by upper bounds,0,argo_workflows,,,
argo_workflows.queue_latency.count,count,,,,The total count of observations for the time that objects spend waiting in the queue.,0,argo_workflows,,,
argo_workflows.queue_latency.sum,count,,second,,The total time that objects spend waiting in the queue.,0,argo_workflows,,,
argo_workflows.total.count,count,,,,Total count of workflows,0,argo_workflows,,,
argo_workflows.version,gauge,,,,Argo Workflows version,0,argo_workflows,,,
argo_workflows.workers_busy,gauge,,worker,,Number of workers currently busy,0,argo_workflows,,,
argo_workflows.workflow_condition,gauge,,,,Workflow condition. https://argo-workflows.readthedocs.io/en/release-3.5/metrics/#argo_workflows_workflow_condition,0,argo_workflows,,,
argo_workflows.workflows_processed.count,count,,,,Number of workflow updates processed,0,argo_workflows,,,
argo_workflows.workflowtemplate.runtime,gauge,,,,Runtime of the workflow template,0,argo_workflows,,,
argo_workflows.workflowtemplate.triggered.count,count,,,,Number of times workflow templates triggered,0,argo_workflows,,,
66 changes: 66 additions & 0 deletions argo_workflows/tests/fixtures/metricsv3-6+.txt
Original file line number Diff line number Diff line change
Expand Up @@ -203,3 +203,69 @@ go_threads 10
argo_workflows_log_messages{level="error"} 0
argo_workflows_log_messages{level="info"} 136
argo_workflows_log_messages{level="warning"} 0
# HELP argo_workflows_cronworkflows_concurrencypolicy_triggered Number of cron workflows triggered with concurrency policy.
# TYPE argo_workflows_cronworkflows_concurrencypolicy_triggered counter
argo_workflows_cronworkflows_concurrencypolicy_triggered{namespace="default",workflow="example-cron"} 42
# HELP argo_workflows_cronworkflows_triggered_total Total number of cron workflows triggered.
# TYPE argo_workflows_cronworkflows_triggered_total counter
argo_workflows_cronworkflows_triggered_total{namespace="default",workflow="example-cron"} 100
# HELP argo_workflows_log_messages Count of log messages by level.
# TYPE argo_workflows_log_messages counter
argo_workflows_log_messages{level="info"} 5000
argo_workflows_log_messages{level="error"} 200
# HELP argo_workflows_queue_retries Total number of retries in queues.
# TYPE argo_workflows_queue_retries counter
argo_workflows_queue_retries{queue="workflow_queue"} 25
# HELP argo_workflows_total_count Total count of workflows processed.
# TYPE argo_workflows_total_count counter
argo_workflows_total_count{namespace="default"} 1500
# HELP argo_workflows_workflowtemplate_triggered_total Total number of workflow templates triggered.
# TYPE argo_workflows_workflowtemplate_triggered_total counter
argo_workflows_workflowtemplate_triggered_total{template="example-template"} 75
# HELP argo_workflows_deprecated_feature Count of deprecated features used.
# TYPE argo_workflows_deprecated_feature gauge
argo_workflows_deprecated_feature{feature="legacy-feature"} 1
# HELP argo_workflows_current_workflows Current number of workflows running.
# TYPE argo_workflows_current_workflows gauge
argo_workflows_current_workflows{namespace="default"} 10
# HELP argo_workflows_is_leader Indicates if this controller is the leader.
# TYPE argo_workflows_is_leader gauge
argo_workflows_is_leader 1
# HELP argo_workflows_pod_pending_count Number of pending pods.
# TYPE argo_workflows_pod_pending_count counter
argo_workflows_pod_pending_count{namespace="default",reason="unschedulable"} 3
# HELP argo_workflows_pods Current number of pods.
# TYPE argo_workflows_pods gauge
argo_workflows_pods{namespace="default"} 50
# HELP argo_workflows_pods_total_count Total number of pods processed.
# TYPE argo_workflows_pods_total_count counter
argo_workflows_pods_total_count{namespace="default"} 2000
# HELP argo_workflows_queue_depth Current depth of workflow queues.
# TYPE argo_workflows_queue_depth gauge
argo_workflows_queue_depth{queue="workflow_queue"} 15
# HELP argo_workflows_queue_longest_running Longest-running task in the queue.
# TYPE argo_workflows_queue_longest_running gauge
argo_workflows_queue_longest_running{queue="workflow_queue"} 120
# HELP argo_workflows_queue_unfinished_work Amount of unfinished work in the queue.
# TYPE argo_workflows_queue_unfinished_work gauge
argo_workflows_queue_unfinished_work{queue="workflow_queue"} 500
# HELP argo_workflows_version Controller version information.
# TYPE argo_workflows_version gauge
argo_workflows_version{version="3.6.0",platform="linux/amd64"} 1
# HELP argo_workflows_workflowtemplate_runtime Runtime of workflow templates.
# TYPE argo_workflows_workflowtemplate_runtime gauge
argo_workflows_workflowtemplate_runtime{template="example-template"} 45.2
# HELP argo_workflows_k8s_request_duration Duration of Kubernetes API requests.
# TYPE argo_workflows_k8s_request_duration histogram
argo_workflows_k8s_request_duration_bucket{kind="pod",verb="GET",le="1.0"} 100
argo_workflows_k8s_request_duration_bucket{kind="pod",verb="GET",le="2.0"} 200
argo_workflows_k8s_request_duration_bucket{kind="pod",verb="GET",le="5.0"} 300
argo_workflows_k8s_request_duration_sum{kind="pod",verb="GET"} 1500.0
argo_workflows_k8s_request_duration_count{kind="pod",verb="GET"} 300
# HELP argo_workflows_queue_duration Time spent processing items in queues.
# TYPE argo_workflows_queue_duration histogram
argo_workflows_queue_duration_bucket{queue="workflow_queue",le="1.0"} 50
argo_workflows_queue_duration_bucket{queue="workflow_queue",le="2.0"} 150
argo_workflows_queue_duration_bucket{queue="workflow_queue",le="5.0"} 250
argo_workflows_queue_duration_sum{queue="workflow_queue"} 1200.0
argo_workflows_queue_duration_count{queue="workflow_queue"} 250
Loading
Loading