Skip to content

Commit 2456482

Browse files
qat,e2e: add heartbeat and auto-reset validations
Signed-off-by: Hyeongju Johannes Lee <[email protected]>
1 parent 4ca9eb9 commit 2456482

File tree

3 files changed

+119
-0
lines changed

3 files changed

+119
-0
lines changed

demo/qat-auto-reset/Dockerfile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
FROM busybox
2+
COPY qat-auto-reset-test.sh /usr/bin/
3+
4+
CMD ["/usr/bin/qat-auto-reset-test.sh"]
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
#!/bin/sh
2+
ENABLED_QAT_PF_PCIIDS=${ENABLED_QAT_PF_PCIIDS:-37c8 4940 4942 4944 4946}
3+
DEVS=$(for pf in $ENABLED_QAT_PF_PCIIDS; do lspci -n | grep -e "$pf" | grep -o -e "^\\S*"; done)
4+
5+
set_auto_reset() {
6+
for dev in $DEVS; do
7+
auto_reset_path="/sys/bus/pci/devices/0000:$dev/qat/auto_reset"
8+
echo "Setting $dev\'s auto-reset $1" # on/off
9+
echo $1 > "$auto_reset_path"
10+
done
11+
}
12+
13+
inject_error() {
14+
inject_error_paths=$(find /sys/kernel/debug/qat_*/heartbeat/ -name inject_error)
15+
16+
for path in $inject_error_paths; do
17+
echo "Injecting error into: $path"
18+
echo 1 > $path
19+
done
20+
}
21+
22+
case "$1" in
23+
inject_error)
24+
inject_error
25+
;;
26+
set_auto_reset)
27+
if [ -z "$2" ]; then
28+
echo "Usage: $0 set_auto_reset <on|off>"
29+
exit 1
30+
fi
31+
set_auto_reset "$2"
32+
;;
33+
*)
34+
echo "Usage: $0 {inject_error|set_auto_reset <on|off>}"
35+
exit 1
36+
;;
37+
esac

test/e2e/qat/qatplugin_dpdk.go

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import (
1818
"context"
1919
"path/filepath"
2020
"strconv"
21+
"strings"
2122
"time"
2223

2324
"github.com/intel/intel-device-plugins-for-kubernetes/test/e2e/utils"
@@ -137,6 +138,10 @@ func describeQatDpdkPlugin() {
137138
ginkgo.When("there is no app to run [App:noapp]", func() {
138139
ginkgo.It("does nothing", func() {})
139140
})
141+
142+
ginkgo.It("checks heartbeat detection and auto-reset functionalities [Functionality:auto-reset]", func(ctx context.Context) {
143+
checkAutoResetFunctionality(ctx, f, symmetric, resourceName)
144+
})
140145
})
141146

142147
ginkgo.Context("When QAT resources are available with compress (dc) services enabled [Resource:dc]", func() {
@@ -164,6 +169,10 @@ func describeQatDpdkPlugin() {
164169
ginkgo.When("there is no app to run [App:noapp]", func() {
165170
ginkgo.It("does nothing", func() {})
166171
})
172+
173+
ginkgo.It("checks heartbeat detection and auto-reset functionalities [Functionality:auto-reset]", func(ctx context.Context) {
174+
checkAutoResetFunctionality(ctx, f, compression, resourceName)
175+
})
167176
})
168177
}
169178

@@ -199,3 +208,72 @@ func runCpaSampleCode(ctx context.Context, f *framework.Framework, runTests int,
199208
err = e2epod.WaitForPodSuccessInNamespaceTimeout(ctx, f.ClientSet, pod.ObjectMeta.Name, f.Namespace.Name, 300*time.Second)
200209
gomega.Expect(err).To(gomega.BeNil(), utils.GetPodLogs(ctx, f, pod.ObjectMeta.Name, pod.Spec.Containers[0].Name))
201210
}
211+
212+
func checkAutoResetFunctionality(ctx context.Context, f *framework.Framework, runTests int, resourceName v1.ResourceName) {
213+
ginkgo.By("checking if heartbeat status is read correctly")
214+
setQatAutoReset("off", ctx, f)
215+
injectError(ctx, f)
216+
if err := utils.WaitForNodesWithResource(ctx, f.ClientSet, resourceName, 100*time.Second, utils.WaitForZeroResource); err != nil {
217+
framework.Failf("unable to wait for nodes to have zero resource: %v", err)
218+
}
219+
220+
ginkgo.By("checking if auto-reset works to solve injected errors")
221+
setQatAutoReset("on", ctx, f)
222+
if err := utils.WaitForNodesWithResource(ctx, f.ClientSet, resourceName, 300*time.Second, utils.WaitForPositiveResource); err != nil {
223+
framework.Failf("unable to wait for nodes to have positive allocatable resource: %v", err)
224+
}
225+
226+
ginkgo.By("checking if deployed pod runs successfully")
227+
runCpaSampleCode(ctx, f, runTests, resourceName)
228+
}
229+
230+
func setQatAutoReset(status string, ctx context.Context, f *framework.Framework) {
231+
runQatAutoResetPod([]string{"set_auto_reset", status}, ctx, f)
232+
}
233+
234+
func injectError(ctx context.Context, f *framework.Framework) {
235+
runQatAutoResetPod([]string{"inject_error"}, ctx, f)
236+
}
237+
238+
func runQatAutoResetPod(command []string, ctx context.Context, f *framework.Framework) {
239+
ginkgo.By("submitting a pod that sets auto-reset function of QAT devices")
240+
yes := true
241+
podSpec := &v1.Pod{
242+
ObjectMeta: metav1.ObjectMeta{Name: "qat-auto-reset-" + strings.ReplaceAll(strings.Join(command, "-"), "_", "-")},
243+
Spec: v1.PodSpec{
244+
Containers: []v1.Container{
245+
{
246+
Name: "qat-auto-reset",
247+
Image: "intel/qat-auto-reset:devel",
248+
ImagePullPolicy: "IfNotPresent",
249+
Command: append([]string{"qat-auto-reset-test.sh"}, command...),
250+
SecurityContext: &v1.SecurityContext{
251+
Privileged: &yes,
252+
},
253+
VolumeMounts: []v1.VolumeMount{
254+
{
255+
Name: "debugfs",
256+
MountPath: "/sys/kernel/debug/",
257+
},
258+
},
259+
},
260+
},
261+
Volumes: []v1.Volume{
262+
{
263+
Name: "debugfs",
264+
VolumeSource: v1.VolumeSource{
265+
HostPath: &v1.HostPathVolumeSource{
266+
Path: "/sys/kernel/debug/",
267+
},
268+
},
269+
},
270+
},
271+
RestartPolicy: v1.RestartPolicyNever,
272+
},
273+
}
274+
pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Create(ctx, podSpec, metav1.CreateOptions{})
275+
framework.ExpectNoError(err, "pod Create API error")
276+
277+
err = e2epod.WaitForPodSuccessInNamespaceTimeout(ctx, f.ClientSet, pod.ObjectMeta.Name, f.Namespace.Name, 300*time.Second)
278+
gomega.Expect(err).To(gomega.BeNil(), utils.GetPodLogs(ctx, f, pod.ObjectMeta.Name, pod.Spec.Containers[0].Name))
279+
}

0 commit comments

Comments
 (0)