Skip to content

Commit 48ea07e

Browse files
authored
chore: ensure metrics are correctly exposed (#767)
* add prometheus annotations to metrics service * adapted changelog
1 parent 6b4f0f9 commit 48ea07e

File tree

6 files changed

+226
-180
lines changed

6 files changed

+226
-180
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ All notable changes to this project will be documented in this file.
1010
- Expose more Prometheus metrics, such as successful or failed bundle loads and information about the OPA environment ([#748]).
1111
- Helm: Allow Pod `priorityClassName` to be configured ([#762]).
1212
- Add support for OPA `1.8.0` ([#765]).
13+
- Add `prometheus.io/path|port|scheme` annotations to metrics service ([#767]).
1314

1415
### Changed
1516

@@ -32,6 +33,7 @@ All notable changes to this project will be documented in this file.
3233
[#754]: https://github.com/stackabletech/opa-operator/pull/754
3334
[#762]: https://github.com/stackabletech/opa-operator/pull/762
3435
[#765]: https://github.com/stackabletech/opa-operator/pull/765
36+
[#767]: https://github.com/stackabletech/opa-operator/pull/767
3537

3638
## [25.7.0] - 2025-07-23
3739

rust/operator-binary/src/controller.rs

Lines changed: 15 additions & 175 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,7 @@ use stackable_operator::{
3939
apps::v1::{DaemonSet, DaemonSetSpec},
4040
core::v1::{
4141
ConfigMap, EmptyDirVolumeSource, EnvVar, EnvVarSource, HTTPGetAction,
42-
ObjectFieldSelector, Probe, SecretVolumeSource, Service, ServiceAccount,
43-
ServicePort, ServiceSpec,
42+
ObjectFieldSelector, Probe, SecretVolumeSource, ServiceAccount,
4443
},
4544
},
4645
apimachinery::pkg::{apis::meta::v1::LabelSelector, util::intstr::IntOrString},
@@ -78,15 +77,17 @@ use crate::{
7877
discovery::{self, build_discovery_configmaps},
7978
operations::graceful_shutdown::add_graceful_shutdown_config,
8079
product_logging::{BundleBuilderLogLevel, extend_role_group_config_map},
80+
service::{
81+
self, APP_PORT, APP_PORT_NAME, build_rolegroup_headless_service,
82+
build_rolegroup_metrics_service, build_server_role_service,
83+
},
8184
};
8285

8386
pub const OPA_CONTROLLER_NAME: &str = "opacluster";
8487
pub const OPA_FULL_CONTROLLER_NAME: &str = concatcp!(OPA_CONTROLLER_NAME, '.', OPERATOR_NAME);
8588

8689
pub const CONFIG_FILE: &str = "config.json";
87-
pub const APP_PORT: u16 = 8081;
88-
pub const APP_PORT_NAME: &str = "http";
89-
pub const METRICS_PORT_NAME: &str = "metrics";
90+
9091
pub const BUNDLES_ACTIVE_DIR: &str = "/bundles/active";
9192
pub const BUNDLES_INCOMING_DIR: &str = "/bundles/incoming";
9293
pub const BUNDLES_TMP_DIR: &str = "/bundles/tmp";
@@ -172,9 +173,6 @@ pub enum Error {
172173
source: stackable_opa_operator::crd::Error,
173174
},
174175

175-
#[snafu(display("failed to calculate role service name"))]
176-
RoleServiceNameNotFound,
177-
178176
#[snafu(display("failed to apply role Service"))]
179177
ApplyRoleService {
180178
source: stackable_operator::cluster_resources::Error,
@@ -186,12 +184,6 @@ pub enum Error {
186184
rolegroup: RoleGroupRef<v1alpha1::OpaCluster>,
187185
},
188186

189-
#[snafu(display("failed to apply metrics Service for [{rolegroup}]"))]
190-
ApplyRoleGroupMetricsService {
191-
source: stackable_operator::cluster_resources::Error,
192-
rolegroup: RoleGroupRef<v1alpha1::OpaCluster>,
193-
},
194-
195187
#[snafu(display("failed to build ConfigMap for [{rolegroup}]"))]
196188
BuildRoleGroupConfig {
197189
source: stackable_operator::builder::configmap::Error,
@@ -334,6 +326,9 @@ pub enum Error {
334326
ResolveProductImage {
335327
source: product_image_selection::Error,
336328
},
329+
330+
#[snafu(display("failed to build service"))]
331+
BuildService { source: service::Error },
337332
}
338333
type Result<T, E = Error> = std::result::Result<T, E>;
339334

@@ -474,7 +469,8 @@ pub async fn reconcile_opa(
474469
.map(Cow::Borrowed)
475470
.unwrap_or_default();
476471

477-
let server_role_service = build_server_role_service(opa, &resolved_product_image)?;
472+
let server_role_service =
473+
build_server_role_service(opa, &resolved_product_image).context(BuildServiceSnafu)?;
478474
// required for discovery config map later
479475
let server_role_service = cluster_resources
480476
.add(client, server_role_service)
@@ -516,10 +512,11 @@ pub async fn reconcile_opa(
516512
&rolegroup,
517513
&merged_config,
518514
)?;
519-
let rg_service =
520-
build_rolegroup_headless_service(opa, &resolved_product_image, &rolegroup)?;
515+
let rg_service = build_rolegroup_headless_service(opa, &resolved_product_image, &rolegroup)
516+
.context(BuildServiceSnafu)?;
521517
let rg_metrics_service =
522-
build_rolegroup_metrics_service(opa, &resolved_product_image, &rolegroup)?;
518+
build_rolegroup_metrics_service(opa, &resolved_product_image, &rolegroup)
519+
.context(BuildServiceSnafu)?;
523520
let rg_daemonset = build_server_rolegroup_daemonset(
524521
opa,
525522
&resolved_product_image,
@@ -617,143 +614,6 @@ pub async fn reconcile_opa(
617614
Ok(Action::await_change())
618615
}
619616

620-
/// The server-role service is the primary endpoint that should be used by clients that do not perform internal load balancing,
621-
/// including targets outside of the cluster.
622-
pub fn build_server_role_service(
623-
opa: &v1alpha1::OpaCluster,
624-
resolved_product_image: &ResolvedProductImage,
625-
) -> Result<Service> {
626-
let role_name = v1alpha1::OpaRole::Server.to_string();
627-
let role_svc_name = opa
628-
.server_role_service_name()
629-
.context(RoleServiceNameNotFoundSnafu)?;
630-
631-
let metadata = ObjectMetaBuilder::new()
632-
.name_and_namespace(opa)
633-
.name(&role_svc_name)
634-
.ownerreference_from_resource(opa, None, Some(true))
635-
.context(ObjectMissingMetadataForOwnerRefSnafu)?
636-
.with_recommended_labels(build_recommended_labels(
637-
opa,
638-
&resolved_product_image.app_version_label_value,
639-
&role_name,
640-
"global",
641-
))
642-
.context(ObjectMetaSnafu)?
643-
.build();
644-
645-
let service_selector_labels =
646-
Labels::role_selector(opa, APP_NAME, &role_name).context(BuildLabelSnafu)?;
647-
648-
let service_spec = ServiceSpec {
649-
type_: Some(opa.spec.cluster_config.listener_class.k8s_service_type()),
650-
ports: Some(data_service_ports()),
651-
selector: Some(service_selector_labels.into()),
652-
internal_traffic_policy: Some("Local".to_string()),
653-
..ServiceSpec::default()
654-
};
655-
656-
Ok(Service {
657-
metadata,
658-
spec: Some(service_spec),
659-
status: None,
660-
})
661-
}
662-
663-
/// The rolegroup [`Service`] is a headless service that allows direct access to the instances of a certain rolegroup
664-
///
665-
/// This is mostly useful for internal communication between peers, or for clients that perform client-side load balancing.
666-
fn build_rolegroup_headless_service(
667-
opa: &v1alpha1::OpaCluster,
668-
resolved_product_image: &ResolvedProductImage,
669-
rolegroup: &RoleGroupRef<v1alpha1::OpaCluster>,
670-
) -> Result<Service> {
671-
let metadata = ObjectMetaBuilder::new()
672-
.name_and_namespace(opa)
673-
.name(rolegroup.rolegroup_headless_service_name())
674-
.ownerreference_from_resource(opa, None, Some(true))
675-
.context(ObjectMissingMetadataForOwnerRefSnafu)?
676-
.with_recommended_labels(build_recommended_labels(
677-
opa,
678-
&resolved_product_image.app_version_label_value,
679-
&rolegroup.role,
680-
&rolegroup.role_group,
681-
))
682-
.context(ObjectMetaSnafu)?
683-
.build();
684-
685-
let service_spec = ServiceSpec {
686-
// Currently we don't offer listener-exposition of OPA mostly due to security concerns.
687-
// OPA is currently public within the Kubernetes (without authentication).
688-
// Opening it up to outside of Kubernetes might worsen things.
689-
// We are open to implement listener-integration, but this needs to be thought through before
690-
// implementing it.
691-
// Note: We have kind of similar situations for HMS and Zookeeper, as the authentication
692-
// options there are non-existent (mTLS still opens plain port) or suck (Kerberos).
693-
type_: Some("ClusterIP".to_string()),
694-
cluster_ip: Some("None".to_string()),
695-
ports: Some(data_service_ports()),
696-
selector: Some(role_group_selector_labels(opa, rolegroup)?.into()),
697-
publish_not_ready_addresses: Some(true),
698-
..ServiceSpec::default()
699-
};
700-
701-
Ok(Service {
702-
metadata,
703-
spec: Some(service_spec),
704-
status: None,
705-
})
706-
}
707-
708-
/// The rolegroup metrics [`Service`] is a service that exposes metrics and has the
709-
/// prometheus.io/scrape label.
710-
fn build_rolegroup_metrics_service(
711-
opa: &v1alpha1::OpaCluster,
712-
resolved_product_image: &ResolvedProductImage,
713-
rolegroup: &RoleGroupRef<v1alpha1::OpaCluster>,
714-
) -> Result<Service> {
715-
let labels = Labels::try_from([("prometheus.io/scrape", "true")])
716-
.expect("static Prometheus labels must be valid");
717-
718-
let metadata = ObjectMetaBuilder::new()
719-
.name_and_namespace(opa)
720-
.name(rolegroup.rolegroup_metrics_service_name())
721-
.ownerreference_from_resource(opa, None, Some(true))
722-
.context(ObjectMissingMetadataForOwnerRefSnafu)?
723-
.with_recommended_labels(build_recommended_labels(
724-
opa,
725-
&resolved_product_image.app_version_label_value,
726-
&rolegroup.role,
727-
&rolegroup.role_group,
728-
))
729-
.context(ObjectMetaSnafu)?
730-
.with_labels(labels)
731-
.build();
732-
733-
let service_spec = ServiceSpec {
734-
type_: Some("ClusterIP".to_string()),
735-
cluster_ip: Some("None".to_string()),
736-
ports: Some(vec![metrics_service_port()]),
737-
selector: Some(role_group_selector_labels(opa, rolegroup)?.into()),
738-
..ServiceSpec::default()
739-
};
740-
741-
Ok(Service {
742-
metadata,
743-
spec: Some(service_spec),
744-
status: None,
745-
})
746-
}
747-
748-
/// Returns the [`Labels`] that can be used to select all Pods that are part of the roleGroup.
749-
fn role_group_selector_labels(
750-
opa: &v1alpha1::OpaCluster,
751-
rolegroup: &RoleGroupRef<v1alpha1::OpaCluster>,
752-
) -> Result<Labels> {
753-
Labels::role_group_selector(opa, APP_NAME, &rolegroup.role, &rolegroup.role_group)
754-
.context(BuildLabelSnafu)
755-
}
756-
757617
/// The rolegroup [`ConfigMap`] configures the rolegroup based on the configuration given by the administrator
758618
fn build_server_rolegroup_config_map(
759619
opa: &v1alpha1::OpaCluster,
@@ -1470,26 +1330,6 @@ fn build_prepare_start_command(
14701330
prepare_container_args
14711331
}
14721332

1473-
fn data_service_ports() -> Vec<ServicePort> {
1474-
// Currently only HTTP is exposed
1475-
vec![ServicePort {
1476-
name: Some(APP_PORT_NAME.to_string()),
1477-
port: APP_PORT.into(),
1478-
protocol: Some("TCP".to_string()),
1479-
..ServicePort::default()
1480-
}]
1481-
}
1482-
1483-
fn metrics_service_port() -> ServicePort {
1484-
ServicePort {
1485-
name: Some(METRICS_PORT_NAME.to_string()),
1486-
// The metrics are served on the same port as the HTTP traffic
1487-
port: APP_PORT.into(),
1488-
protocol: Some("TCP".to_string()),
1489-
..ServicePort::default()
1490-
}
1491-
}
1492-
14931333
/// Creates recommended `ObjectLabels` to be used in deployed resources
14941334
pub fn build_recommended_labels<'a, T>(
14951335
owner: &'a T,

rust/operator-binary/src/crd/mod.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -325,19 +325,19 @@ impl v1alpha1::OpaCluster {
325325
}
326326

327327
/// The name of the role-level load-balanced Kubernetes `Service`
328-
pub fn server_role_service_name(&self) -> Option<String> {
329-
Some(format!(
328+
pub fn server_role_service_name(&self) -> String {
329+
format!(
330330
"{cluster_name}-{role}",
331331
cluster_name = self.name_any(),
332332
role = v1alpha1::OpaRole::Server
333-
))
333+
)
334334
}
335335

336336
/// The fully-qualified domain name of the role-level load-balanced Kubernetes `Service`
337337
pub fn server_role_service_fqdn(&self, cluster_info: &KubernetesClusterInfo) -> Option<String> {
338338
Some(format!(
339339
"{role_service_name}.{namespace}.svc.{cluster_domain}",
340-
role_service_name = self.server_role_service_name()?,
340+
role_service_name = self.server_role_service_name(),
341341
namespace = self.metadata.namespace.as_ref()?,
342342
cluster_domain = cluster_info.cluster_domain
343343
))

rust/operator-binary/src/discovery.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use stackable_operator::{
88
utils::cluster_info::KubernetesClusterInfo,
99
};
1010

11-
use crate::controller::{APP_PORT, build_recommended_labels};
11+
use crate::{controller::build_recommended_labels, service::APP_PORT};
1212

1313
#[derive(Snafu, Debug)]
1414
pub enum Error {

rust/operator-binary/src/main.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ mod controller;
3737
mod discovery;
3838
mod operations;
3939
mod product_logging;
40+
mod service;
4041

4142
pub mod built_info {
4243
include!(concat!(env!("OUT_DIR"), "/built.rs"));

0 commit comments

Comments
 (0)