Skip to content

Commit 943c8ed

Browse files
authored
[reefd] add windows instance reaper (#261)
terminates windows instance that runs for too long --------- Signed-off-by: Lonnie Liu <[email protected]>
1 parent 0381f1b commit 943c8ed

File tree

5 files changed

+478
-0
lines changed

5 files changed

+478
-0
lines changed

go.mod

+14
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,27 @@ module github.com/ray-project/rayci
33
go 1.24.1
44

55
require (
6+
github.com/aws/aws-sdk-go-v2 v1.36.3
7+
github.com/aws/aws-sdk-go-v2/config v1.29.9
8+
github.com/aws/aws-sdk-go-v2/service/ec2 v1.210.1
69
github.com/google/go-containerregistry v0.20.3
710
gopkg.in/yaml.v3 v3.0.1
811
)
912

1013
require (
1114
github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 // indirect
1215
github.com/Microsoft/go-winio v0.6.2 // indirect
16+
github.com/aws/aws-sdk-go-v2/credentials v1.17.62 // indirect
17+
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.30 // indirect
18+
github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.34 // indirect
19+
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.34 // indirect
20+
github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3 // indirect
21+
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.3 // indirect
22+
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.15 // indirect
23+
github.com/aws/aws-sdk-go-v2/service/sso v1.25.1 // indirect
24+
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.29.1 // indirect
25+
github.com/aws/aws-sdk-go-v2/service/sts v1.33.17 // indirect
26+
github.com/aws/smithy-go v1.22.2 // indirect
1327
github.com/cenkalti/backoff/v4 v4.3.0 // indirect
1428
github.com/containerd/stargz-snapshotter/estargz v0.16.3 // indirect
1529
github.com/distribution/reference v0.6.0 // indirect

go.sum

+28
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,34 @@ github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25
22
github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
33
github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=
44
github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
5+
github.com/aws/aws-sdk-go-v2 v1.36.3 h1:mJoei2CxPutQVxaATCzDUjcZEjVRdpsiiXi2o38yqWM=
6+
github.com/aws/aws-sdk-go-v2 v1.36.3/go.mod h1:LLXuLpgzEbD766Z5ECcRmi8AzSwfZItDtmABVkRLGzg=
7+
github.com/aws/aws-sdk-go-v2/config v1.29.9 h1:Kg+fAYNaJeGXp1vmjtidss8O2uXIsXwaRqsQJKXVr+0=
8+
github.com/aws/aws-sdk-go-v2/config v1.29.9/go.mod h1:oU3jj2O53kgOU4TXq/yipt6ryiooYjlkqqVaZk7gY/U=
9+
github.com/aws/aws-sdk-go-v2/credentials v1.17.62 h1:fvtQY3zFzYJ9CfixuAQ96IxDrBajbBWGqjNTCa79ocU=
10+
github.com/aws/aws-sdk-go-v2/credentials v1.17.62/go.mod h1:ElETBxIQqcxej++Cs8GyPBbgMys5DgQPTwo7cUPDKt8=
11+
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.30 h1:x793wxmUWVDhshP8WW2mlnXuFrO4cOd3HLBroh1paFw=
12+
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.30/go.mod h1:Jpne2tDnYiFascUEs2AWHJL9Yp7A5ZVy3TNyxaAjD6M=
13+
github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.34 h1:ZK5jHhnrioRkUNOc+hOgQKlUL5JeC3S6JgLxtQ+Rm0Q=
14+
github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.34/go.mod h1:p4VfIceZokChbA9FzMbRGz5OV+lekcVtHlPKEO0gSZY=
15+
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.34 h1:SZwFm17ZUNNg5Np0ioo/gq8Mn6u9w19Mri8DnJ15Jf0=
16+
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.34/go.mod h1:dFZsC0BLo346mvKQLWmoJxT+Sjp+qcVR1tRVHQGOH9Q=
17+
github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3 h1:bIqFDwgGXXN1Kpp99pDOdKMTTb5d2KyU5X/BZxjOkRo=
18+
github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3/go.mod h1:H5O/EsxDWyU+LP/V8i5sm8cxoZgc2fdNR9bxlOFrQTo=
19+
github.com/aws/aws-sdk-go-v2/service/ec2 v1.210.1 h1:+4A9SDduLZFlDeXWRmfQ6r8kyEJZQfK6lcg+KwdvWrI=
20+
github.com/aws/aws-sdk-go-v2/service/ec2 v1.210.1/go.mod h1:ouvGEfHbLaIlWwpDpOVWPWR+YwO0HDv3vm5tYLq8ImY=
21+
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.3 h1:eAh2A4b5IzM/lum78bZ590jy36+d/aFLgKF/4Vd1xPE=
22+
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.3/go.mod h1:0yKJC/kb8sAnmlYa6Zs3QVYqaC8ug2AbnNChv5Ox3uA=
23+
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.15 h1:dM9/92u2F1JbDaGooxTq18wmmFzbJRfXfVfy96/1CXM=
24+
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.15/go.mod h1:SwFBy2vjtA0vZbjjaFtfN045boopadnoVPhu4Fv66vY=
25+
github.com/aws/aws-sdk-go-v2/service/sso v1.25.1 h1:8JdC7Gr9NROg1Rusk25IcZeTO59zLxsKgE0gkh5O6h0=
26+
github.com/aws/aws-sdk-go-v2/service/sso v1.25.1/go.mod h1:qs4a9T5EMLl/Cajiw2TcbNt2UNo/Hqlyp+GiuG4CFDI=
27+
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.29.1 h1:KwuLovgQPcdjNMfFt9OhUd9a2OwcOKhxfvF4glTzLuA=
28+
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.29.1/go.mod h1:MlYRNmYu/fGPoxBQVvBYr9nyr948aY/WLUvwBMBJubs=
29+
github.com/aws/aws-sdk-go-v2/service/sts v1.33.17 h1:PZV5W8yk4OtH1JAuhV2PXwwO9v5G5Aoj+eMCn4T+1Kc=
30+
github.com/aws/aws-sdk-go-v2/service/sts v1.33.17/go.mod h1:cQnB8CUnxbMU82JvlqjKR2HBOm3fe9pWorWBza6MBJ4=
31+
github.com/aws/smithy-go v1.22.2 h1:6D9hW43xKFrRx/tXXfAlIZc4JI+yQe6snnWcQyxSyLQ=
32+
github.com/aws/smithy-go v1.22.2/go.mod h1:irrKGvNn1InZwb2d7fkIRNucdfwR8R+Ts3wxYa/cJHg=
533
github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8=
634
github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
735
github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I=

reefd/aws_clients.go

+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
package reefd
2+
3+
import (
4+
"context"
5+
6+
"github.com/aws/aws-sdk-go-v2/aws"
7+
awsconfig "github.com/aws/aws-sdk-go-v2/config"
8+
"github.com/aws/aws-sdk-go-v2/service/ec2"
9+
)
10+
11+
type ec2Client interface {
12+
DescribeInstances(
13+
ctx context.Context,
14+
params *ec2.DescribeInstancesInput,
15+
optFns ...func(*ec2.Options),
16+
) (*ec2.DescribeInstancesOutput, error)
17+
18+
TerminateInstances(
19+
ctx context.Context,
20+
params *ec2.TerminateInstancesInput,
21+
optFns ...func(*ec2.Options),
22+
) (*ec2.TerminateInstancesOutput, error)
23+
}
24+
25+
type awsClients struct {
26+
ec2 func() ec2Client
27+
}
28+
29+
func newAWSClientsFromConfig(cfg *aws.Config) *awsClients {
30+
return &awsClients{
31+
ec2: func() ec2Client { return ec2.NewFromConfig(*cfg) },
32+
}
33+
}
34+
35+
const awsRegion = "us-west-2"
36+
37+
func newAWSClients(ctx context.Context) (*awsClients, error) {
38+
cfg, err := awsconfig.LoadDefaultConfig(
39+
ctx, awsconfig.WithRegion(awsRegion),
40+
)
41+
if err != nil {
42+
return nil, err
43+
}
44+
return newAWSClientsFromConfig(&cfg), nil
45+
}

reefd/reaper.go

+98
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
package reefd
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"log"
7+
"sort"
8+
"time"
9+
10+
"github.com/aws/aws-sdk-go-v2/aws"
11+
"github.com/aws/aws-sdk-go-v2/service/ec2"
12+
"github.com/aws/aws-sdk-go-v2/service/ec2/types"
13+
)
14+
15+
type reaper struct {
16+
ec2 ec2Client
17+
nowFunc func() time.Time
18+
}
19+
20+
func newReaper(ec2 ec2Client) *reaper {
21+
return &reaper{ec2: ec2}
22+
}
23+
24+
func (r *reaper) now() time.Time {
25+
if r.nowFunc != nil {
26+
return r.nowFunc()
27+
}
28+
return time.Now()
29+
}
30+
31+
func (r *reaper) setNowFunc(f func() time.Time) {
32+
r.nowFunc = f
33+
}
34+
35+
func (r *reaper) listDeadWindowsInstances(ctx context.Context) ([]string, error) {
36+
filters := []types.Filter{{
37+
Name: aws.String("tag:BuildkiteQueue"),
38+
Values: []string{"*windows*"},
39+
}, {
40+
Name: aws.String("instance-state-code"),
41+
Values: []string{
42+
"0", // pending
43+
"16", // running
44+
},
45+
}}
46+
const maxResults = 500
47+
input := &ec2.DescribeInstancesInput{
48+
Filters: filters,
49+
MaxResults: aws.Int32(maxResults),
50+
}
51+
result, err := r.ec2.DescribeInstances(ctx, input)
52+
if err != nil {
53+
return nil, fmt.Errorf("describe instances: %w", err)
54+
}
55+
56+
const instanceAgeLimit = -4 * time.Hour
57+
58+
cut := r.now().Add(instanceAgeLimit)
59+
60+
var instances []string
61+
for _, r := range result.Reservations {
62+
for _, i := range r.Instances {
63+
if i.LaunchTime.Before(cut) {
64+
instances = append(instances, *i.InstanceId)
65+
}
66+
}
67+
}
68+
69+
sort.Strings(instances)
70+
71+
return instances, nil
72+
}
73+
74+
func (r *reaper) terminateInstances(ctx context.Context, ids []string) error {
75+
if len(ids) == 0 {
76+
return nil
77+
}
78+
input := &ec2.TerminateInstancesInput{InstanceIds: ids}
79+
_, err := r.ec2.TerminateInstances(ctx, input)
80+
return err
81+
}
82+
83+
func (r *reaper) listAndReapDeadWindowsInstances(ctx context.Context) (int, error) {
84+
ids, err := r.listDeadWindowsInstances(ctx)
85+
if err != nil {
86+
return 0, err
87+
}
88+
if len(ids) == 0 {
89+
return 0, nil
90+
}
91+
92+
log.Printf("terminating %d instances: %v", len(ids), ids)
93+
if err := r.terminateInstances(ctx, ids); err != nil {
94+
return 0, err
95+
}
96+
97+
return len(ids), nil
98+
}

0 commit comments

Comments
 (0)