@@ -6,18 +6,23 @@ import (
6
6
"errors"
7
7
"fmt"
8
8
"regexp"
9
+ "sync"
9
10
"time"
10
11
11
12
"github.com/aws/aws-sdk-go/aws"
12
13
"github.com/golang/glog"
13
14
mq "github.com/remind101/mq-go"
14
15
)
15
16
17
+ const MAX_RETRIES = 3
18
+
16
19
type SQSHandler struct {
17
- QueueURL string
18
- Start bool
19
- JobConfigs []JobConfig
20
- Server * mq.Server
20
+ QueueURL string
21
+ Start bool
22
+ JobConfigs []JobConfig
23
+ Server * mq.Server
24
+ MonitoredJobs []* JobInfo
25
+ Mu sync.Mutex
21
26
}
22
27
23
28
type JobConfig struct {
@@ -44,21 +49,51 @@ func (handler *SQSHandler) StartServer() error {
44
49
return nil
45
50
}
46
51
47
- fmt .Println ("Start a new server" )
48
52
newClient , err := NewSQSClient ()
49
53
if err != nil {
50
54
return err
51
55
}
52
56
57
+ glog .Info ("Starting a new server..." )
53
58
handler .Server = mq .NewServer (handler .QueueURL , mq .HandlerFunc (func (m * mq.Message ) error {
54
59
return handler .HandleSQSMessage (m )
55
60
}), mq .WithClient (newClient ))
56
61
handler .Server .Start ()
62
+ glog .Info ("The server is started" )
63
+
64
+ go handler .StartMonitoringProcess ()
57
65
58
66
return nil
59
67
60
68
}
61
69
70
+ // StartMonitoringProcess starts the process to monitor the created job
71
+ func (handler * SQSHandler ) StartMonitoringProcess () {
72
+ for {
73
+ var nextMonitoredJobs []* JobInfo
74
+
75
+ for _ , jobInfo := range handler .MonitoredJobs {
76
+ k8sJob , err := GetJobStatusByID (jobInfo .UID )
77
+ if err != nil {
78
+ glog .Errorf ("Can not get k8s job %s. Detail %s" , jobInfo .Name , err )
79
+ } else {
80
+ glog .Infof ("%s: %s" , k8sJob .Name , k8sJob .Status )
81
+ if k8sJob .Status == "Unknown" || k8sJob .Status == "Running" {
82
+ nextMonitoredJobs = append (nextMonitoredJobs , jobInfo )
83
+ }
84
+ }
85
+
86
+ }
87
+ handler .Mu .Lock ()
88
+ handler .MonitoredJobs = nextMonitoredJobs
89
+ handler .Mu .Unlock ()
90
+
91
+ RemoveCompletedJobs ()
92
+
93
+ time .Sleep (30 * time .Second )
94
+ }
95
+ }
96
+
62
97
// ShutdownServer shutdowns a server
63
98
func (handler * SQSHandler ) ShutdownServer () error {
64
99
fmt .Println ("Shutdown the server" )
@@ -162,9 +197,6 @@ func (handler *SQSHandler) HandleSQSMessage(m *mq.Message) error {
162
197
jobNameList = append (jobNameList , jobConfig .Name )
163
198
}
164
199
165
- // remove completed jobs
166
- RemoveCompletedJobs (jobNameList )
167
-
168
200
jobMap := make (map [string ]JobConfig )
169
201
for _ , objectPath := range objectPaths {
170
202
for _ , jobConfig := range handler .JobConfigs {
@@ -178,21 +210,24 @@ func (handler *SQSHandler) HandleSQSMessage(m *mq.Message) error {
178
210
glog .Infof ("Start to run %d jobs" , len (jobMap ))
179
211
180
212
for objectPath , jobConfig := range jobMap {
181
- for GetNumberRunningJobs (jobNameList ) > GetMaxJobConfig () {
213
+ for GetNumberRunningJobs () > GetMaxJobConfig () {
182
214
time .Sleep (5 * time .Second )
183
215
}
184
216
glog .Info ("Processing: " , objectPath )
185
- result , err := CreateK8sJob (objectPath , jobConfig )
217
+ jobInfo , err := CreateK8sJob (objectPath , jobConfig )
186
218
if err != nil {
187
219
glog .Errorln (err )
188
220
return err
189
221
}
190
- out , err := json .Marshal (result )
222
+ out , err := json .Marshal (jobInfo )
191
223
if err != nil {
192
224
glog .Errorln (err )
193
225
return err
194
226
}
195
227
glog .Info (string (out ))
228
+ handler .Mu .Lock ()
229
+ handler .MonitoredJobs = append (handler .MonitoredJobs , jobInfo )
230
+ handler .Mu .Unlock ()
196
231
}
197
232
198
233
return nil
0 commit comments