Skip to content

Commit c97704f

Browse files
committed
[NodeKiller] Add summary
1 parent c601e3d commit c97704f

File tree

3 files changed

+66
-1
lines changed

3 files changed

+66
-1
lines changed

clusterloader2/pkg/chaos/monkey.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@ limitations under the License.
1717
package chaos
1818

1919
import (
20+
"fmt"
21+
"strings"
22+
2023
clientset "k8s.io/client-go/kubernetes"
2124
"k8s.io/perf-tests/clusterloader2/api"
2225
)
@@ -47,3 +50,13 @@ func (m *Monkey) Init(config api.ChaosMonkeyConfig, stopCh <-chan struct{}) erro
4750

4851
return nil
4952
}
53+
54+
// Summary logs Monkey execution
55+
func (m *Monkey) Summary() string {
56+
var sb strings.Builder
57+
if m.nodeKiller != nil {
58+
sb.WriteString(fmt.Sprintf("Summary of Chaos Monkey execution\n"))
59+
sb.WriteString(m.nodeKiller.Summary())
60+
}
61+
return sb.String()
62+
}

clusterloader2/pkg/chaos/nodes.go

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"fmt"
2121
"math"
2222
"math/rand"
23+
"strings"
2324
"sync"
2425
"time"
2526

@@ -47,14 +48,44 @@ type NodeKiller struct {
4748
provider string
4849
// killedNodes stores names of the nodes that have been killed by NodeKiller.
4950
killedNodes sets.String
51+
recorder *eventRecorder
52+
}
53+
54+
type nodeAction string
55+
56+
const (
57+
stopServices nodeAction = "stopService"
58+
rebootNode = "rebootNode"
59+
)
60+
61+
type event struct {
62+
time time.Time
63+
action nodeAction
64+
nodeName string
65+
}
66+
67+
type eventRecorder struct {
68+
events []event
69+
mux sync.Mutex
70+
}
71+
72+
func newEventRecorder() *eventRecorder {
73+
return &eventRecorder{[]event{}, sync.Mutex{}}
74+
}
75+
76+
func (r *eventRecorder) record(a nodeAction, nodeName string) {
77+
e := event{time.Now(), a, nodeName}
78+
r.mux.Lock()
79+
r.events = append(r.events, e)
80+
r.mux.Unlock()
5081
}
5182

5283
// NewNodeKiller creates new NodeKiller.
5384
func NewNodeKiller(config api.NodeFailureConfig, client clientset.Interface, provider string) (*NodeKiller, error) {
5485
if provider != "gce" && provider != "gke" {
5586
return nil, fmt.Errorf("provider %q is not supported by NodeKiller", provider)
5687
}
57-
return &NodeKiller{config, client, provider, sets.NewString()}, nil
88+
return &NodeKiller{config, client, provider, sets.NewString(), newEventRecorder()}, nil
5889
}
5990

6091
// Run starts NodeKiller until stopCh is closed.
@@ -121,6 +152,7 @@ func (k *NodeKiller) kill(nodes []v1.Node) {
121152
defer wg.Done()
122153

123154
klog.Infof("%s: Stopping docker and kubelet on %q to simulate failure", k, node.Name)
155+
k.addStopServicesEvent(node.Name)
124156
err := util.SSH("sudo systemctl stop docker kubelet", &node, nil)
125157
if err != nil {
126158
klog.Errorf("%s: ERROR while stopping node %q: %v", k, node.Name, err)
@@ -141,6 +173,7 @@ func (k *NodeKiller) kill(nodes []v1.Node) {
141173
// '> /dev/null 2> /dev/null < /dev/null' - File descriptor redirect, all three I/O to avoid ssh hanging,
142174
// see https://web.archive.org/web/20090429074212/http://www.openssh.com/faq.html#3.10
143175
// '&' - Execute command in background, end without waiting for result
176+
k.addRebootEvent(node.Name)
144177
err = util.SSH("nohup sudo shutdown -r +1s > /dev/null 2> /dev/null < /dev/null &", &node, nil)
145178
if err != nil {
146179
klog.Errorf("%s: Error while rebooting node %q: %v", k, node.Name, err)
@@ -151,6 +184,24 @@ func (k *NodeKiller) kill(nodes []v1.Node) {
151184
wg.Wait()
152185
}
153186

187+
func (k *NodeKiller) addStopServicesEvent(nodeName string) {
188+
k.recorder.record(stopServices, nodeName)
189+
}
190+
191+
func (k *NodeKiller) addRebootEvent(nodeName string) {
192+
k.recorder.record(rebootNode, nodeName)
193+
}
194+
195+
// Summary logs NodeKiller execution
196+
func (k *NodeKiller) Summary() string {
197+
var sb strings.Builder
198+
sb.WriteString(fmt.Sprintf("%s: Recorded following events\n", k))
199+
for _, e := range k.recorder.events {
200+
sb.WriteString(fmt.Sprintf("%s: At %v %v happend for node %s\n", k, e.time.Format(time.UnixDate), e.action, e.nodeName))
201+
}
202+
return sb.String()
203+
}
204+
154205
func (k *NodeKiller) String() string {
155206
return "NodeKiller"
156207
}

clusterloader2/pkg/test/simple_test_executor.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ func (ste *simpleTestExecutor) ExecuteTest(ctx Context, conf *api.Config) *error
102102
}
103103
}
104104
}
105+
klog.Infof(ctx.GetChaosMonkey().Summary())
105106
return errList
106107
}
107108

0 commit comments

Comments
 (0)