Skip to content

Commit c0262aa

Browse files
maltesanderdervoeti
authored andcommitted
chore: ensure metrics are correctly exposed (#767)
* add prometheus annotations to metrics service * adapted changelog
1 parent 54651b6 commit c0262aa

File tree

6 files changed

+257
-204
lines changed

6 files changed

+257
-204
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ All notable changes to this project will be documented in this file.
1010
- Expose more Prometheus metrics, such as successful or failed bundle loads and information about the OPA environment ([#748]).
1111
- Helm: Allow Pod `priorityClassName` to be configured ([#762]).
1212
- Add support for OPA `1.8.0` ([#765]).
13+
- Add `prometheus.io/path|port|scheme` annotations to metrics service ([#767]).
1314

1415
### Changed
1516

@@ -32,6 +33,7 @@ All notable changes to this project will be documented in this file.
3233
[#754]: https://github.com/stackabletech/opa-operator/pull/754
3334
[#762]: https://github.com/stackabletech/opa-operator/pull/762
3435
[#765]: https://github.com/stackabletech/opa-operator/pull/765
36+
[#767]: https://github.com/stackabletech/opa-operator/pull/767
3537

3638
## [25.7.0] - 2025-07-23
3739

rust/operator-binary/src/controller.rs

Lines changed: 16 additions & 199 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,7 @@ use stackable_operator::{
3939
apps::v1::{DaemonSet, DaemonSetSpec},
4040
core::v1::{
4141
ConfigMap, EmptyDirVolumeSource, EnvVar, EnvVarSource, HTTPGetAction,
42-
ObjectFieldSelector, Probe, SecretVolumeSource, Service, ServiceAccount,
43-
ServicePort, ServiceSpec,
42+
ObjectFieldSelector, Probe, SecretVolumeSource, ServiceAccount,
4443
},
4544
},
4645
apimachinery::pkg::{apis::meta::v1::LabelSelector, util::intstr::IntOrString},
@@ -78,17 +77,18 @@ use crate::{
7877
discovery::{self, build_discovery_configmaps},
7978
operations::graceful_shutdown::add_graceful_shutdown_config,
8079
product_logging::{BundleBuilderLogLevel, extend_role_group_config_map},
80+
service::{
81+
self, APP_PORT, APP_PORT_NAME, APP_TLS_PORT, APP_TLS_PORT_NAME,
82+
build_rolegroup_headless_service, build_rolegroup_metrics_service,
83+
build_server_role_service,
84+
},
8185
};
8286

8387
pub const OPA_CONTROLLER_NAME: &str = "opacluster";
8488
pub const OPA_FULL_CONTROLLER_NAME: &str = concatcp!(OPA_CONTROLLER_NAME, '.', OPERATOR_NAME);
8589

8690
pub const CONFIG_FILE: &str = "config.json";
87-
pub const APP_PORT: u16 = 8081;
88-
pub const APP_TLS_PORT: u16 = 8443;
89-
pub const APP_PORT_NAME: &str = "http";
90-
pub const APP_TLS_PORT_NAME: &str = "https";
91-
pub const METRICS_PORT_NAME: &str = "metrics";
91+
9292
pub const BUNDLES_ACTIVE_DIR: &str = "/bundles/active";
9393
pub const BUNDLES_INCOMING_DIR: &str = "/bundles/incoming";
9494
pub const BUNDLES_TMP_DIR: &str = "/bundles/tmp";
@@ -176,9 +176,6 @@ pub enum Error {
176176
source: stackable_opa_operator::crd::Error,
177177
},
178178

179-
#[snafu(display("failed to calculate role service name"))]
180-
RoleServiceNameNotFound,
181-
182179
#[snafu(display("failed to apply role Service"))]
183180
ApplyRoleService {
184181
source: stackable_operator::cluster_resources::Error,
@@ -190,12 +187,6 @@ pub enum Error {
190187
rolegroup: RoleGroupRef<v1alpha1::OpaCluster>,
191188
},
192189

193-
#[snafu(display("failed to apply metrics Service for [{rolegroup}]"))]
194-
ApplyRoleGroupMetricsService {
195-
source: stackable_operator::cluster_resources::Error,
196-
rolegroup: RoleGroupRef<v1alpha1::OpaCluster>,
197-
},
198-
199190
#[snafu(display("failed to build ConfigMap for [{rolegroup}]"))]
200191
BuildRoleGroupConfig {
201192
source: stackable_operator::builder::configmap::Error,
@@ -343,6 +334,9 @@ pub enum Error {
343334
ResolveProductImage {
344335
source: product_image_selection::Error,
345336
},
337+
338+
#[snafu(display("failed to build service"))]
339+
BuildService { source: service::Error },
346340
}
347341
type Result<T, E = Error> = std::result::Result<T, E>;
348342

@@ -483,7 +477,8 @@ pub async fn reconcile_opa(
483477
.map(Cow::Borrowed)
484478
.unwrap_or_default();
485479

486-
let server_role_service = build_server_role_service(opa, &resolved_product_image)?;
480+
let server_role_service =
481+
build_server_role_service(opa, &resolved_product_image).context(BuildServiceSnafu)?;
487482
// required for discovery config map later
488483
let server_role_service = cluster_resources
489484
.add(client, server_role_service)
@@ -525,10 +520,11 @@ pub async fn reconcile_opa(
525520
&rolegroup,
526521
&merged_config,
527522
)?;
528-
let rg_service =
529-
build_rolegroup_headless_service(opa, &resolved_product_image, &rolegroup)?;
523+
let rg_service = build_rolegroup_headless_service(opa, &resolved_product_image, &rolegroup)
524+
.context(BuildServiceSnafu)?;
530525
let rg_metrics_service =
531-
build_rolegroup_metrics_service(opa, &resolved_product_image, &rolegroup)?;
526+
build_rolegroup_metrics_service(opa, &resolved_product_image, &rolegroup)
527+
.context(BuildServiceSnafu)?;
532528
let rg_daemonset = build_server_rolegroup_daemonset(
533529
opa,
534530
&resolved_product_image,
@@ -626,154 +622,6 @@ pub async fn reconcile_opa(
626622
Ok(Action::await_change())
627623
}
628624

629-
/// The server-role service is the primary endpoint that should be used by clients that do not perform internal load balancing,
630-
/// including targets outside of the cluster.
631-
pub fn build_server_role_service(
632-
opa: &v1alpha1::OpaCluster,
633-
resolved_product_image: &ResolvedProductImage,
634-
) -> Result<Service> {
635-
let role_name = v1alpha1::OpaRole::Server.to_string();
636-
let role_svc_name = opa
637-
.server_role_service_name()
638-
.context(RoleServiceNameNotFoundSnafu)?;
639-
640-
let metadata = ObjectMetaBuilder::new()
641-
.name_and_namespace(opa)
642-
.name(&role_svc_name)
643-
.ownerreference_from_resource(opa, None, Some(true))
644-
.context(ObjectMissingMetadataForOwnerRefSnafu)?
645-
.with_recommended_labels(build_recommended_labels(
646-
opa,
647-
&resolved_product_image.app_version_label_value,
648-
&role_name,
649-
"global",
650-
))
651-
.context(ObjectMetaSnafu)?
652-
.build();
653-
654-
let service_selector_labels =
655-
Labels::role_selector(opa, APP_NAME, &role_name).context(BuildLabelSnafu)?;
656-
657-
let (port_name, port) = if opa.spec.cluster_config.tls.is_some() {
658-
(APP_TLS_PORT_NAME, APP_TLS_PORT)
659-
} else {
660-
(APP_PORT_NAME, APP_PORT)
661-
};
662-
663-
let service_spec = ServiceSpec {
664-
type_: Some(opa.spec.cluster_config.listener_class.k8s_service_type()),
665-
ports: Some(vec![ServicePort {
666-
name: Some(port_name.to_string()),
667-
port: port.into(),
668-
protocol: Some("TCP".to_string()),
669-
..ServicePort::default()
670-
}]),
671-
selector: Some(service_selector_labels.into()),
672-
internal_traffic_policy: Some("Local".to_string()),
673-
..ServiceSpec::default()
674-
};
675-
676-
Ok(Service {
677-
metadata,
678-
spec: Some(service_spec),
679-
status: None,
680-
})
681-
}
682-
683-
/// The rolegroup [`Service`] is a headless service that allows direct access to the instances of a certain rolegroup
684-
///
685-
/// This is mostly useful for internal communication between peers, or for clients that perform client-side load balancing.
686-
fn build_rolegroup_headless_service(
687-
opa: &v1alpha1::OpaCluster,
688-
resolved_product_image: &ResolvedProductImage,
689-
rolegroup: &RoleGroupRef<v1alpha1::OpaCluster>,
690-
) -> Result<Service> {
691-
let metadata = ObjectMetaBuilder::new()
692-
.name_and_namespace(opa)
693-
.name(rolegroup.rolegroup_headless_service_name())
694-
.ownerreference_from_resource(opa, None, Some(true))
695-
.context(ObjectMissingMetadataForOwnerRefSnafu)?
696-
.with_recommended_labels(build_recommended_labels(
697-
opa,
698-
&resolved_product_image.app_version_label_value,
699-
&rolegroup.role,
700-
&rolegroup.role_group,
701-
))
702-
.context(ObjectMetaSnafu)?
703-
.build();
704-
705-
let service_spec = ServiceSpec {
706-
// Currently we don't offer listener-exposition of OPA mostly due to security concerns.
707-
// OPA is currently public within the Kubernetes (without authentication).
708-
// Opening it up to outside of Kubernetes might worsen things.
709-
// We are open to implement listener-integration, but this needs to be thought through before
710-
// implementing it.
711-
// Note: We have kind of similar situations for HMS and Zookeeper, as the authentication
712-
// options there are non-existent (mTLS still opens plain port) or suck (Kerberos).
713-
type_: Some("ClusterIP".to_string()),
714-
cluster_ip: Some("None".to_string()),
715-
ports: Some(data_service_ports_with_tls(opa.spec.cluster_config.tls.is_some())),
716-
selector: Some(role_group_selector_labels(opa, rolegroup)?.into()),
717-
publish_not_ready_addresses: Some(true),
718-
..ServiceSpec::default()
719-
};
720-
721-
Ok(Service {
722-
metadata,
723-
spec: Some(service_spec),
724-
status: None,
725-
})
726-
}
727-
728-
/// The rolegroup metrics [`Service`] is a service that exposes metrics and has the
729-
/// prometheus.io/scrape label.
730-
fn build_rolegroup_metrics_service(
731-
opa: &v1alpha1::OpaCluster,
732-
resolved_product_image: &ResolvedProductImage,
733-
rolegroup: &RoleGroupRef<v1alpha1::OpaCluster>,
734-
) -> Result<Service> {
735-
let labels = Labels::try_from([("prometheus.io/scrape", "true")])
736-
.expect("static Prometheus labels must be valid");
737-
738-
let metadata = ObjectMetaBuilder::new()
739-
.name_and_namespace(opa)
740-
.name(rolegroup.rolegroup_metrics_service_name())
741-
.ownerreference_from_resource(opa, None, Some(true))
742-
.context(ObjectMissingMetadataForOwnerRefSnafu)?
743-
.with_recommended_labels(build_recommended_labels(
744-
opa,
745-
&resolved_product_image.app_version_label_value,
746-
&rolegroup.role,
747-
&rolegroup.role_group,
748-
))
749-
.context(ObjectMetaSnafu)?
750-
.with_labels(labels)
751-
.build();
752-
753-
let service_spec = ServiceSpec {
754-
type_: Some("ClusterIP".to_string()),
755-
cluster_ip: Some("None".to_string()),
756-
ports: Some(vec![metrics_service_port_with_tls(opa.spec.cluster_config.tls.is_some())]),
757-
selector: Some(role_group_selector_labels(opa, rolegroup)?.into()),
758-
..ServiceSpec::default()
759-
};
760-
761-
Ok(Service {
762-
metadata,
763-
spec: Some(service_spec),
764-
status: None,
765-
})
766-
}
767-
768-
/// Returns the [`Labels`] that can be used to select all Pods that are part of the roleGroup.
769-
fn role_group_selector_labels(
770-
opa: &v1alpha1::OpaCluster,
771-
rolegroup: &RoleGroupRef<v1alpha1::OpaCluster>,
772-
) -> Result<Labels> {
773-
Labels::role_group_selector(opa, APP_NAME, &rolegroup.role, &rolegroup.role_group)
774-
.context(BuildLabelSnafu)
775-
}
776-
777625
/// The rolegroup [`ConfigMap`] configures the rolegroup based on the configuration given by the administrator
778626
fn build_server_rolegroup_config_map(
779627
opa: &v1alpha1::OpaCluster,
@@ -1543,37 +1391,6 @@ fn build_prepare_start_command(
15431391
prepare_container_args
15441392
}
15451393

1546-
fn data_service_ports_with_tls(tls_enabled: bool) -> Vec<ServicePort> {
1547-
let (port_name, port) = if tls_enabled {
1548-
(APP_TLS_PORT_NAME, APP_TLS_PORT)
1549-
} else {
1550-
(APP_PORT_NAME, APP_PORT)
1551-
};
1552-
1553-
vec![ServicePort {
1554-
name: Some(port_name.to_string()),
1555-
port: port.into(),
1556-
protocol: Some("TCP".to_string()),
1557-
..ServicePort::default()
1558-
}]
1559-
}
1560-
1561-
fn metrics_service_port_with_tls(tls_enabled: bool) -> ServicePort {
1562-
let port = if tls_enabled {
1563-
APP_TLS_PORT
1564-
} else {
1565-
APP_PORT
1566-
};
1567-
1568-
ServicePort {
1569-
name: Some(METRICS_PORT_NAME.to_string()),
1570-
// The metrics are served on the same port as the HTTP traffic
1571-
port: port.into(),
1572-
protocol: Some("TCP".to_string()),
1573-
..ServicePort::default()
1574-
}
1575-
}
1576-
15771394
/// Creates recommended `ObjectLabels` to be used in deployed resources
15781395
pub fn build_recommended_labels<'a, T>(
15791396
owner: &'a T,

rust/operator-binary/src/crd/mod.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -337,19 +337,19 @@ impl v1alpha1::OpaCluster {
337337
}
338338

339339
/// The name of the role-level load-balanced Kubernetes `Service`
340-
pub fn server_role_service_name(&self) -> Option<String> {
341-
Some(format!(
340+
pub fn server_role_service_name(&self) -> String {
341+
format!(
342342
"{cluster_name}-{role}",
343343
cluster_name = self.name_any(),
344344
role = v1alpha1::OpaRole::Server
345-
))
345+
)
346346
}
347347

348348
/// The fully-qualified domain name of the role-level load-balanced Kubernetes `Service`
349349
pub fn server_role_service_fqdn(&self, cluster_info: &KubernetesClusterInfo) -> Option<String> {
350350
Some(format!(
351351
"{role_service_name}.{namespace}.svc.{cluster_domain}",
352-
role_service_name = self.server_role_service_name()?,
352+
role_service_name = self.server_role_service_name(),
353353
namespace = self.metadata.namespace.as_ref()?,
354354
cluster_domain = cluster_info.cluster_domain
355355
))

rust/operator-binary/src/discovery.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use stackable_operator::{
88
utils::cluster_info::KubernetesClusterInfo,
99
};
1010

11-
use crate::controller::{APP_PORT, APP_TLS_PORT, build_recommended_labels};
11+
use crate::{controller::build_recommended_labels, service::{APP_PORT, APP_TLS_PORT}};
1212

1313
#[derive(Snafu, Debug)]
1414
pub enum Error {

rust/operator-binary/src/main.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ mod controller;
3737
mod discovery;
3838
mod operations;
3939
mod product_logging;
40+
mod service;
4041

4142
pub mod built_info {
4243
include!(concat!(env!("OUT_DIR"), "/built.rs"));

0 commit comments

Comments
 (0)