Skip to content

Commit 9e16206

Browse files
committed
feat: alarms
1 parent 4c2fd17 commit 9e16206

6 files changed

Lines changed: 312 additions & 0 deletions

File tree

packages/cdk/bin/PfPApiApp.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ function main() {
3030
tc007NhsNumberValue: getConfigFromEnvVar("tc007NhsNumberValue", CDK_ENV_PREFIX, "not_in_use"),
3131
tc008NhsNumberValue: getConfigFromEnvVar("tc008NhsNumberValue", CDK_ENV_PREFIX, "not_in_use"),
3232
tc009NhsNumberValue: getConfigFromEnvVar("tc009NhsNumberValue", CDK_ENV_PREFIX, "not_in_use"),
33+
enableAlerts: getBooleanConfigFromEnvVar("enableAlerts", CDK_ENV_PREFIX, "true"),
3334
mutualTlsTrustStoreKey: props.isPullRequest ? undefined : getConfigFromEnvVar("trustStoreFile"),
3435
// CSOC API GW log destination - do not change
3536
csocApiGatewayDestination: "arn:aws:logs:eu-west-2:693466633220:destination:api_gateway_log_destination",
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
import {Duration} from "aws-cdk-lib"
2+
import {Construct} from "constructs"
3+
import {
4+
Alarm,
5+
ComparisonOperator,
6+
Metric,
7+
TreatMissingData,
8+
Unit
9+
} from "aws-cdk-lib/aws-cloudwatch"
10+
import {ITopic} from "aws-cdk-lib/aws-sns"
11+
12+
type AlarmDefinition = {
13+
name: string
14+
metric: string
15+
description: string
16+
dimensions?: {[key: string]: string}
17+
threshold?: number
18+
comparisonOperator?: ComparisonOperator
19+
unit?: Unit
20+
}
21+
22+
export interface MetricAlarmProps {
23+
readonly stackName: string
24+
readonly enableAlerts: boolean
25+
readonly namespace: string
26+
readonly alarmDefinition: AlarmDefinition
27+
readonly slackAlertTopic: ITopic
28+
}
29+
30+
export class MetricAlarm extends Construct {
31+
alarms: {[key: string]: Alarm}
32+
33+
public constructor(scope: Construct, id: string, props: MetricAlarmProps){
34+
super(scope, id)
35+
36+
const metricFunction = (metricName: string) =>
37+
new Metric({
38+
namespace: props.namespace,
39+
metricName,
40+
dimensionsMap: props.alarmDefinition.dimensions,
41+
unit: props.alarmDefinition.unit ?? Unit.COUNT,
42+
statistic: "Sum",
43+
period: Duration.minutes(1)
44+
})
45+
46+
const alarm = new Alarm(this, `${props.alarmDefinition.name}Alarm`, {
47+
alarmName: `${props.stackName}-${props.alarmDefinition.name}`,
48+
metric: metricFunction(props.alarmDefinition.metric),
49+
threshold: props.alarmDefinition.threshold ?? 1,
50+
evaluationPeriods: 1,
51+
comparisonOperator:
52+
props.alarmDefinition.comparisonOperator ?? ComparisonOperator.GREATER_THAN_OR_EQUAL_TO_THRESHOLD,
53+
treatMissingData: TreatMissingData.NOT_BREACHING,
54+
alarmDescription: props.alarmDefinition.description,
55+
actionsEnabled: props.enableAlerts
56+
})
57+
58+
alarm.addAlarmAction({
59+
bind: () => ({alarmActionArn: props.slackAlertTopic.topicArn})
60+
})
61+
alarm.addOkAction({
62+
bind: () => ({alarmActionArn: props.slackAlertTopic.topicArn})
63+
})
64+
alarm.addInsufficientDataAction({
65+
bind: () => ({alarmActionArn: props.slackAlertTopic.topicArn})
66+
})
67+
68+
this.alarms = {[props.alarmDefinition.name]: alarm}
69+
}
70+
}

packages/cdk/resources/Alarms.ts

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
import {Fn} from "aws-cdk-lib"
2+
import {Unit} from "aws-cdk-lib/aws-cloudwatch"
3+
import {
4+
MetricFilter,
5+
FilterPattern,
6+
IFilterPattern,
7+
ILogGroup
8+
} from "aws-cdk-lib/aws-logs"
9+
import {Topic} from "aws-cdk-lib/aws-sns"
10+
import {TypescriptLambdaFunction} from "@nhsdigital/eps-cdk-constructs"
11+
import {Construct} from "constructs"
12+
import {MetricAlarm} from "../constructs/MetricAlarm"
13+
14+
export interface AlarmsProps {
15+
readonly stackName: string
16+
readonly enableAlerts: boolean
17+
readonly functions: {[key: string]: TypescriptLambdaFunction}
18+
}
19+
20+
export class Alarms extends Construct {
21+
public constructor(scope: Construct, id: string, props: AlarmsProps) {
22+
super(scope, id)
23+
24+
const createMetricFilter = (
25+
metricFilterId: string,
26+
metricFilterProps: {
27+
filterName: string
28+
filterPattern: IFilterPattern
29+
logGroup: ILogGroup
30+
metricNamespace: string
31+
metricName?: string
32+
metricValue?: string
33+
unit?: Unit
34+
dimensions?: {[key: string]: string}
35+
}
36+
) => new MetricFilter(this, metricFilterId, {
37+
...metricFilterProps,
38+
metricName: metricFilterProps.metricName ?? "ErrorCount",
39+
metricValue: metricFilterProps.metricValue ?? "1",
40+
unit: metricFilterProps.unit ?? Unit.COUNT
41+
})
42+
43+
const slackAlertTopic = Topic.fromTopicArn(
44+
this,
45+
"SlackAlertsTopic",
46+
Fn.importValue("lambda-resources:SlackAlertsSnsTopicArn")
47+
)
48+
49+
const getMyPrescriptionsFunction = props.functions.getMyPrescriptions.function
50+
const enrichPrescriptionsFunction = props.functions.enrichPrescriptions.function
51+
52+
createMetricFilter("ServiceSearchErrorsLogsMetricFilter", {
53+
filterName: "ServiceSearchErrors",
54+
filterPattern: FilterPattern.literal(
55+
`{ ($.level = "ERROR") && ($.function_name = "${getMyPrescriptionsFunction.functionName}") ` +
56+
"&& $.message = %error in request to serviceSearch% }"
57+
),
58+
logGroup: getMyPrescriptionsFunction.logGroup,
59+
metricNamespace: "LambdaLogFilterMetrics",
60+
metricName: "ServiceSearchErrorCount",
61+
dimensions: {
62+
FunctionName: "$.function_name"
63+
}
64+
})
65+
66+
new MetricAlarm(this, "ServiceSearchErrors", {
67+
stackName: props.stackName,
68+
enableAlerts: props.enableAlerts,
69+
namespace: "LambdaLogFilterMetrics",
70+
alarmDefinition: {
71+
name: "ServiceSearch_Errors",
72+
metric: "ServiceSearchErrorCount",
73+
description: "Count of Service Search errors",
74+
dimensions: {
75+
FunctionName: getMyPrescriptionsFunction.functionName
76+
}
77+
},
78+
slackAlertTopic
79+
})
80+
81+
new MetricAlarm(this, "ServiceSearchUnhandledErrors", {
82+
stackName: props.stackName,
83+
enableAlerts: props.enableAlerts,
84+
namespace: "Lambda",
85+
alarmDefinition: {
86+
name: "ServiceSearch_UnhandledErrors",
87+
metric: "Errors",
88+
description: "Count of Service Search unhandled errors",
89+
dimensions: {
90+
FunctionName: getMyPrescriptionsFunction.functionName
91+
}
92+
},
93+
slackAlertTopic
94+
})
95+
96+
createMetricFilter("GetMyPrescriptionsErrorsLogsMetricFilter", {
97+
filterName: `${props.stackName}_GetMyPrescriptionsErrors`,
98+
filterPattern: FilterPattern.literal(
99+
`{ ($.level = "ERROR") && ($.function_name = "${getMyPrescriptionsFunction.functionName}") ` +
100+
"&& ($.message != %error in request to serviceSearch%) }"
101+
),
102+
logGroup: getMyPrescriptionsFunction.logGroup,
103+
metricNamespace: "LambdaLogFilterMetrics",
104+
dimensions: {
105+
FunctionName: "$.function_name"
106+
}
107+
})
108+
109+
new MetricAlarm(this, "GetMyPrescriptionsErrors", {
110+
stackName: props.stackName,
111+
enableAlerts: props.enableAlerts,
112+
namespace: "LambdaLogFilterMetrics",
113+
alarmDefinition: {
114+
name: "GetMyPrescriptions_Errors",
115+
metric: "ErrorCount",
116+
description: "Count of GetMyPrescriptions errors",
117+
dimensions: {
118+
FunctionName: getMyPrescriptionsFunction.functionName
119+
}
120+
},
121+
slackAlertTopic
122+
})
123+
124+
createMetricFilter("EnrichPrescriptionsErrorsLogsMetricFilter", {
125+
filterName: `${props.stackName}_EnrichPrescriptionsErrors`,
126+
filterPattern: FilterPattern.literal("ERROR"),
127+
logGroup: enrichPrescriptionsFunction.logGroup,
128+
metricNamespace: "LambdaLogFilterMetrics",
129+
metricName: `${props.stackName}EnrichPrescriptionsErrorCount`
130+
})
131+
132+
new MetricAlarm(this, "EnrichPrescriptionsErrors", {
133+
stackName: props.stackName,
134+
enableAlerts: props.enableAlerts,
135+
namespace: "LambdaLogFilterMetrics",
136+
alarmDefinition: {
137+
name: "EnrichPrescriptions_Errors",
138+
metric: `${props.stackName}EnrichPrescriptionsErrorCount`,
139+
description: "Count of EnrichPrescriptions errors"
140+
},
141+
slackAlertTopic
142+
})
143+
}
144+
}

packages/cdk/stacks/PfPApiStack.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import {nagSuppressions} from "../nagSuppressions"
33
import {Functions} from "../resources/Functions"
44
import {StateMachines} from "../resources/StateMachines"
55
import {Apis} from "../resources/Apis"
6+
import {Alarms} from "../resources/Alarms"
67
import {StandardStackProps} from "@nhsdigital/eps-cdk-constructs"
78
import Parameters from "../resources/Parameters"
89

@@ -19,6 +20,7 @@ export interface PfPApiStackProps extends StandardStackProps {
1920
readonly tc008NhsNumberValue: string
2021
readonly tc009NhsNumberValue: string
2122
readonly mutualTlsTrustStoreKey: string | undefined
23+
readonly enableAlerts: boolean
2224
readonly csocApiGatewayDestination: string
2325
readonly forwardCsocLogs: boolean
2426
}
@@ -56,6 +58,12 @@ export class PfPApiStack extends Stack {
5658
functions: functions.functions
5759
})
5860

61+
new Alarms(this, "Alarms", {
62+
stackName: props.stackName,
63+
enableAlerts: props.enableAlerts,
64+
functions: functions.functions
65+
})
66+
5967
new Apis(this, "Apis", {
6068
stackName: props.stackName,
6169
logRetentionInDays: props.logRetentionInDays,
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
import {App, Stack} from "aws-cdk-lib"
2+
import {Template} from "aws-cdk-lib/assertions"
3+
import {ComparisonOperator, Unit} from "aws-cdk-lib/aws-cloudwatch"
4+
import {Topic} from "aws-cdk-lib/aws-sns"
5+
import {describe, expect, it} from "vitest"
6+
import {MetricAlarm} from "../constructs/MetricAlarm"
7+
8+
describe("MetricAlarm construct", () => {
9+
it("applies sane defaults for simple alarm definitions", () => {
10+
const app = new App()
11+
const stack = new Stack(app, "TestStack")
12+
const slackAlertTopic = new Topic(stack, "SlackAlertsTopic")
13+
14+
new MetricAlarm(stack, "SimpleMetricAlarm", {
15+
stackName: "pfp-test-stack",
16+
enableAlerts: true,
17+
namespace: "LambdaLogFilterMetrics",
18+
alarmDefinition: {
19+
name: "MySimpleAlarm",
20+
metric: "ErrorCount",
21+
description: "Simple alarm"
22+
},
23+
slackAlertTopic
24+
})
25+
26+
const template = Template.fromStack(stack)
27+
28+
template.hasResourceProperties("AWS::CloudWatch::Alarm", {
29+
AlarmName: "pfp-test-stack-MySimpleAlarm",
30+
Namespace: "LambdaLogFilterMetrics",
31+
MetricName: "ErrorCount",
32+
Threshold: 1,
33+
ComparisonOperator: "GreaterThanOrEqualToThreshold",
34+
Unit: "Count",
35+
Statistic: "Sum",
36+
Period: 60,
37+
EvaluationPeriods: 1,
38+
TreatMissingData: "notBreaching",
39+
AlarmDescription: "Simple alarm",
40+
ActionsEnabled: true
41+
})
42+
})
43+
44+
it("allows overriding threshold, comparison operator, unit and dimensions", () => {
45+
const app = new App()
46+
const stack = new Stack(app, "OverrideStack")
47+
const slackAlertTopic = new Topic(stack, "SlackAlertsTopic")
48+
49+
const metricAlarm = new MetricAlarm(stack, "OverrideMetricAlarm", {
50+
stackName: "pfp-test-stack",
51+
enableAlerts: false,
52+
namespace: "CustomNamespace",
53+
alarmDefinition: {
54+
name: "MyOverrideAlarm",
55+
metric: "Latency",
56+
description: "Override alarm",
57+
dimensions: {
58+
FunctionName: "my-function"
59+
},
60+
threshold: 250,
61+
comparisonOperator: ComparisonOperator.GREATER_THAN_THRESHOLD,
62+
unit: Unit.MILLISECONDS
63+
},
64+
slackAlertTopic
65+
})
66+
67+
expect(metricAlarm.alarms.MyOverrideAlarm).toBeDefined()
68+
69+
const template = Template.fromStack(stack)
70+
71+
template.hasResourceProperties("AWS::CloudWatch::Alarm", {
72+
AlarmName: "pfp-test-stack-MyOverrideAlarm",
73+
Namespace: "CustomNamespace",
74+
MetricName: "Latency",
75+
Threshold: 250,
76+
ComparisonOperator: "GreaterThanThreshold",
77+
Unit: "Milliseconds",
78+
Dimensions: [
79+
{
80+
Name: "FunctionName",
81+
Value: "my-function"
82+
}
83+
],
84+
AlarmDescription: "Override alarm",
85+
ActionsEnabled: false
86+
})
87+
})
88+
})

packages/cdk/tests/synth.test.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ describe("CDK synth smoke tests", () => {
5353
CDK_CONFIG_tc007NhsNumberValue: "9000000009",
5454
CDK_CONFIG_tc008NhsNumberValue: "9000000017",
5555
CDK_CONFIG_tc009NhsNumberValue: "9000000025",
56+
CDK_CONFIG_enableAlerts: "true",
5657
CDK_CONFIG_forwardCsocLogs: "false"
5758
}
5859
})

0 commit comments

Comments
 (0)