Skip to content

Commit 678438b

Browse files
author
Michal Tichák
committed
better error handling
1 parent c1e3401 commit 678438b

File tree

2 files changed

+103
-77
lines changed

2 files changed

+103
-77
lines changed

occ/peanut/README.md

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -98,13 +98,19 @@ peanut -addr localhost:47100 -mode fmq-step
9898

9999
### Connection monitoring
100100

101-
While connected, peanut passively monitors the gRPC connection in a background goroutine and detects process termination without any button press. The strategy depends on what the controlled process supports:
101+
While connected, peanut polls `GetState` every 2 seconds in a background goroutine. If the process stops responding, the state display shows `UNREACHABLE` and an error modal appears. After restarting the controlled process, press `n` to reconnect.
102102

103-
1. **StateStream** (OCClib processes, `direct` mode) — subscribes to the state stream; any disconnect immediately triggers `UNREACHABLE` and an error modal. State updates from the stream are also reflected in the display in real time.
104-
2. **EventStream** (FairMQ processes, `fmq`/`fmq-step` modes) — subscribes to the event stream; disconnect is detected immediately when the stream breaks.
105-
3. **Polling** (fallback) — if neither stream is available, `GetState` is polled every 2 seconds.
103+
Transition buttons are dimmed and disabled until a connection is successfully established.
106104

107-
When the process dies, the state display shows `UNREACHABLE` and an error modal appears. After restarting the controlled process, press `n` to reconnect.
105+
#### Connection error states
106+
107+
| State | Meaning |
108+
|-------|---------|
109+
| `CONNECTING` | Connection attempt in progress |
110+
| `UNREACHABLE` | No process is listening on the given address, or the connection was lost after a successful connect |
111+
| `WRONG MODE` | A process is running but speaks a different protocol — check the `-mode` flag |
112+
113+
If `WRONG MODE` is shown, peanut will suggest the correct mode in the error modal.
108114

109115
### Runtime configuration files
110116

occ/peanut/peanut.go

Lines changed: 92 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,9 @@ import (
4444
"github.com/gdamore/tcell/v2"
4545
"github.com/rivo/tview"
4646
"google.golang.org/grpc"
47+
"google.golang.org/grpc/codes"
4748
"google.golang.org/grpc/credentials/insecure"
49+
"google.golang.org/grpc/status"
4850
)
4951

5052
// Options configures the peanut TUI.
@@ -59,8 +61,10 @@ var (
5961
tuiMode string
6062
tuiAddr string
6163
tuiConn *grpc.ClientConn
62-
streamCancel context.CancelFunc
63-
transitioning bool
64+
monitorCancel context.CancelFunc
65+
transitioning bool
66+
connected bool
67+
setCommandsEnabled func(bool)
6468
configMap map[string]string
6569
controlList *tview.List
6670
configTextView *tview.TextView
@@ -69,50 +73,7 @@ var (
6973
)
7074

7175
func monitorConnection(ctx context.Context) {
72-
// Try StateStream first — gives state updates and disconnect detection.
73-
stateStream, e := occClient.StateStream(ctx, &pb.StateStreamRequest{})
74-
if e == nil && stateStream != nil {
75-
for {
76-
msg, e := stateStream.Recv()
77-
if e != nil {
78-
if ctx.Err() != nil {
79-
return
80-
}
81-
app.QueueUpdateDraw(func() {
82-
state = "UNREACHABLE"
83-
errorMessage(configPages, "Connection lost", e.Error())
84-
})
85-
return
86-
}
87-
app.QueueUpdateDraw(func() {
88-
switch tuiMode {
89-
case "fmq":
90-
state = cliFMQToOCCState(msg.GetState())
91-
default:
92-
state = msg.GetState()
93-
}
94-
})
95-
}
96-
}
97-
98-
// Try EventStream — disconnect detection only (no state in payload).
99-
eventStream, e := occClient.EventStream(ctx, &pb.EventStreamRequest{})
100-
if e == nil && eventStream != nil {
101-
for {
102-
if _, e := eventStream.Recv(); e != nil {
103-
if ctx.Err() != nil {
104-
return
105-
}
106-
app.QueueUpdateDraw(func() {
107-
state = "UNREACHABLE"
108-
errorMessage(configPages, "Connection lost", e.Error())
109-
})
110-
return
111-
}
112-
}
113-
}
114-
115-
// Neither stream available — poll GetState every 2s.
76+
// Poll GetState every 2s for disconnect detection.
11677
ticker := time.NewTicker(2 * time.Second)
11778
defer ticker.Stop()
11879
for {
@@ -125,6 +86,10 @@ func monitorConnection(ctx context.Context) {
12586
return
12687
}
12788
app.QueueUpdateDraw(func() {
89+
connected = false
90+
if setCommandsEnabled != nil {
91+
setCommandsEnabled(false)
92+
}
12893
state = "UNREACHABLE"
12994
errorMessage(configPages, "Connection lost", e.Error())
13095
})
@@ -135,8 +100,12 @@ func monitorConnection(ctx context.Context) {
135100
}
136101

137102
func connectRPC() {
138-
if streamCancel != nil {
139-
streamCancel() // stop any existing stream monitor
103+
if monitorCancel != nil {
104+
monitorCancel() // stop any existing stream monitor
105+
}
106+
connected = false
107+
if setCommandsEnabled != nil {
108+
setCommandsEnabled(false)
140109
}
141110
state = "CONNECTING"
142111
go func() {
@@ -159,17 +128,47 @@ func connectRPC() {
159128
}
160129
response, e := occClient.GetState(context.TODO(), &pb.GetStateRequest{})
161130
if e != nil {
131+
if st, ok := status.FromError(e); ok && st.Code() == codes.Unavailable {
132+
app.QueueUpdateDraw(func() {
133+
state = "UNREACHABLE"
134+
errorMessage(configPages, "Nothing running", "No process is listening on "+tuiAddr+".")
135+
})
136+
return
137+
}
138+
// Probe with the opposite client to detect mode mismatch.
139+
var altClient pb.OccClient
140+
if tuiMode == "fmq" || tuiMode == "fmq-step" {
141+
altClient = pb.NewOccClient(conn)
142+
} else {
143+
altClient = nopb.NewOccClient(conn)
144+
}
145+
_, altErr := altClient.GetState(context.TODO(), &pb.GetStateRequest{})
162146
app.QueueUpdateDraw(func() {
163-
state = "UNREACHABLE"
164-
errorMessage(configPages, "Connection failed", e.Error())
147+
if altErr == nil {
148+
state = "WRONG MODE"
149+
if tuiMode == "fmq" || tuiMode == "fmq-step" {
150+
errorMessage(configPages, "Wrong mode",
151+
"The process at "+tuiAddr+" is a direct OCC process.\nRestart peanut without -mode fmq.")
152+
} else {
153+
errorMessage(configPages, "Wrong mode",
154+
"The process at "+tuiAddr+" is a FairMQ process.\nRestart peanut with -mode fmq.")
155+
}
156+
} else {
157+
state = "UNREACHABLE"
158+
errorMessage(configPages, "Connection failed", e.Error())
159+
}
165160
})
166161
return
167162
}
168163
tuiConn = conn
164+
connected = true
169165
ctx, cancel := context.WithCancel(context.Background())
170-
streamCancel = cancel
166+
monitorCancel = cancel
171167
go monitorConnection(ctx)
172168
app.QueueUpdateDraw(func() {
169+
if setCommandsEnabled != nil {
170+
setCommandsEnabled(true)
171+
}
173172
switch tuiMode {
174173
case "fmq":
175174
state = cliFMQToOCCState(response.GetState())
@@ -461,7 +460,7 @@ func Run(opts Options) (err error) {
461460
AddPage("configBox", configTextView, true, true)
462461

463462
doTransition := func(evt string) {
464-
if transitioning {
463+
if !connected || transitioning {
465464
return
466465
}
467466
transitioning = true
@@ -478,7 +477,7 @@ func Run(opts Options) (err error) {
478477
}
479478

480479
doFMQStep := func(event string) {
481-
if transitioning {
480+
if !connected || transitioning {
482481
return
483482
}
484483
transitioning = true
@@ -501,34 +500,55 @@ func Run(opts Options) (err error) {
501500
}
502501

503502
controlList = tview.NewList()
503+
504+
var cmdIndices []int
505+
addCmd := func(main, secondary string, shortcut rune, handler func()) {
506+
cmdIndices = append(cmdIndices, controlList.GetItemCount())
507+
controlList.AddItem(main, secondary, shortcut, handler)
508+
}
509+
504510
switch tuiMode {
505511
case "fmq-step":
506-
controlList.
507-
AddItem("INIT DEVICE", "IDLE → INITIALIZING DEVICE", '1', func() { doFMQStep(fairmq.EvtINIT_DEVICE) }).
508-
AddItem("COMPLETE INIT", "INITIALIZING DEVICE → INITIALIZED", '2', func() { doFMQStep(fairmq.EvtCOMPLETE_INIT) }).
509-
AddItem("BIND", "INITIALIZED → BOUND", '3', func() { doFMQStep(fairmq.EvtBIND) }).
510-
AddItem("CONNECT", "BOUND → DEVICE READY", '4', func() { doFMQStep(fairmq.EvtCONNECT) }).
511-
AddItem("INIT TASK", "DEVICE READY → READY", '5', func() { doFMQStep(fairmq.EvtINIT_TASK) }).
512-
AddItem("RUN", "READY → RUNNING", '6', func() { doFMQStep(fairmq.EvtRUN) }).
513-
AddItem("STOP", "RUNNING → READY", '7', func() { doFMQStep(fairmq.EvtSTOP) }).
514-
AddItem("RESET TASK", "READY → DEVICE READY", '8', func() { doFMQStep(fairmq.EvtRESET_TASK) }).
515-
AddItem("RESET DEVICE", "→ IDLE", '9', func() { doFMQStep(fairmq.EvtRESET_DEVICE) }).
516-
AddItem("END", "IDLE → EXITING", '0', func() { doFMQStep(fairmq.EvtEND) })
512+
addCmd("INIT DEVICE", "IDLE → INITIALIZING DEVICE", '1', func() { doFMQStep(fairmq.EvtINIT_DEVICE) })
513+
addCmd("COMPLETE INIT", "INITIALIZING DEVICE → INITIALIZED", '2', func() { doFMQStep(fairmq.EvtCOMPLETE_INIT) })
514+
addCmd("BIND", "INITIALIZED → BOUND", '3', func() { doFMQStep(fairmq.EvtBIND) })
515+
addCmd("CONNECT", "BOUND → DEVICE READY", '4', func() { doFMQStep(fairmq.EvtCONNECT) })
516+
addCmd("INIT TASK", "DEVICE READY → READY", '5', func() { doFMQStep(fairmq.EvtINIT_TASK) })
517+
addCmd("RUN", "READY → RUNNING", '6', func() { doFMQStep(fairmq.EvtRUN) })
518+
addCmd("STOP", "RUNNING → READY", '7', func() { doFMQStep(fairmq.EvtSTOP) })
519+
addCmd("RESET TASK", "READY → DEVICE READY", '8', func() { doFMQStep(fairmq.EvtRESET_TASK) })
520+
addCmd("RESET DEVICE", "→ IDLE", '9', func() { doFMQStep(fairmq.EvtRESET_DEVICE) })
521+
addCmd("END", "IDLE → EXITING", '0', func() { doFMQStep(fairmq.EvtEND) })
517522
default: // direct, fmq
518-
controlList.
519-
AddItem("Transition CONFIGURE", "perform CONFIGURE transition", 'c', func() { doTransition("CONFIGURE") }).
520-
AddItem("Transition RESET", "perform RESET transition", 'r', func() { doTransition("RESET") }).
521-
AddItem("Transition START", "perform START transition", 's', func() { doTransition("START") }).
522-
AddItem("Transition STOP", "perform STOP transition", 't', func() { doTransition("STOP") }).
523-
AddItem("Transition RECOVER", "perform RECOVER transition", 'v', func() { doTransition("RECOVER") }).
524-
AddItem("Transition EXIT", "perform EXIT transition", 'x', func() { doTransition("EXIT") })
525-
}
523+
addCmd("Transition CONFIGURE", "perform CONFIGURE transition", 'c', func() { doTransition("CONFIGURE") })
524+
addCmd("Transition RESET", "perform RESET transition", 'r', func() { doTransition("RESET") })
525+
addCmd("Transition START", "perform START transition", 's', func() { doTransition("START") })
526+
addCmd("Transition STOP", "perform STOP transition", 't', func() { doTransition("STOP") })
527+
addCmd("Transition RECOVER", "perform RECOVER transition", 'v', func() { doTransition("RECOVER") })
528+
addCmd("Transition EXIT", "perform EXIT transition", 'x', func() { doTransition("EXIT") })
529+
}
530+
// Always-on items — not registered in cmdIndices.
526531
controlList.
527532
AddItem("Reconnect", "re-establish gRPC connection to the controlled process", 'n', func() { connectRPC() }).
528533
AddItem("Load configuration", "read runtime configuration from file", 'l', func() { err = acquireConfigFile(configPages) }).
529534
AddItem("Quit", "disconnect from the process and quit peanut", 'q', func() { app.Stop() })
530535
controlList.SetBorder(true).SetTitle("control")
531536

537+
origTexts := make([][2]string, len(cmdIndices))
538+
for i, idx := range cmdIndices {
539+
origTexts[i][0], origTexts[i][1] = controlList.GetItemText(idx)
540+
}
541+
setCommandsEnabled = func(enabled bool) {
542+
for i, idx := range cmdIndices {
543+
if enabled {
544+
controlList.SetItemText(idx, origTexts[i][0], origTexts[i][1])
545+
} else {
546+
controlList.SetItemText(idx, "[::d]"+origTexts[i][0], "[::d]"+origTexts[i][1])
547+
}
548+
}
549+
}
550+
setCommandsEnabled(false) // start grayed out until connected
551+
532552
flex := tview.NewFlex().AddItem(tview.NewFlex().SetDirection(tview.FlexRow).
533553
AddItem(statusBox, 3, 1, false).
534554
AddItem(controlList, 0, 1, true), 0, 1, false).

0 commit comments

Comments
 (0)