Skip to content

add progress tracking flag to monitor VM startup state using cloud-init #3846

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmd/limactl/clone.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ func cloneAction(cmd *cobra.Command, args []string) error {
if err != nil {
return err
}
return instance.Start(ctx, newInst, "", false)
return instance.Start(ctx, newInst, "", false, false)
}

func cloneBashComplete(cmd *cobra.Command, _ []string, _ string) ([]string, cobra.ShellCompDirective) {
Expand Down
2 changes: 1 addition & 1 deletion cmd/limactl/edit.go
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ func editAction(cmd *cobra.Command, args []string) error {
if err != nil {
return err
}
return instance.Start(ctx, inst, "", false)
return instance.Start(ctx, inst, "", false, false)
}

func askWhetherToStart() (bool, error) {
Expand Down
8 changes: 8 additions & 0 deletions cmd/limactl/hostagent.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ func newHostagentCommand() *cobra.Command {
hostagentCommand.Flags().Bool("run-gui", false, "Run GUI synchronously within hostagent")
hostagentCommand.Flags().String("guestagent", "", "Local file path (not URL) of lima-guestagent.OS-ARCH[.gz]")
hostagentCommand.Flags().String("nerdctl-archive", "", "Local file path (not URL) of nerdctl-full-VERSION-GOOS-GOARCH.tar.gz")
hostagentCommand.Flags().Bool("progress", false, "Show provision script progress by monitoring cloud-init logs")
return hostagentCommand
}

Expand Down Expand Up @@ -94,6 +95,13 @@ func hostagentAction(cmd *cobra.Command, args []string) error {
if nerdctlArchive != "" {
opts = append(opts, hostagent.WithNerdctlArchive(nerdctlArchive))
}
showProgress, err := cmd.Flags().GetBool("progress")
if err != nil {
return err
}
if showProgress {
opts = append(opts, hostagent.WithCloudInitProgress(showProgress))
}
ha, err := hostagent.New(instName, stdout, signalCh, opts...)
if err != nil {
return err
Expand Down
2 changes: 1 addition & 1 deletion cmd/limactl/shell.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ func shellAction(cmd *cobra.Command, args []string) error {
return err
}

err = instance.Start(ctx, inst, "", false)
err = instance.Start(ctx, inst, "", false, false)
if err != nil {
return err
}
Expand Down
8 changes: 7 additions & 1 deletion cmd/limactl/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ See the examples in 'limactl create --help'.
startCommand.Flags().Bool("foreground", false, "Run the hostagent in the foreground")
}
startCommand.Flags().Duration("timeout", instance.DefaultWatchHostAgentEventsTimeout, "Duration to wait for the instance to be running before timing out")
startCommand.Flags().Bool("progress", false, "Show provision script progress by tailing cloud-init logs")
return startCommand
}

Expand Down Expand Up @@ -493,7 +494,12 @@ func startAction(cmd *cobra.Command, args []string) error {
ctx = instance.WithWatchHostAgentTimeout(ctx, timeout)
}

return instance.Start(ctx, inst, "", launchHostAgentForeground)
progress, err := cmd.Flags().GetBool("progress")
if err != nil {
return err
}

return instance.Start(ctx, inst, "", launchHostAgentForeground, progress)
}

func createBashComplete(cmd *cobra.Command, _ []string, toComplete string) ([]string, cobra.ShellCompDirective) {
Expand Down
12 changes: 12 additions & 0 deletions pkg/hostagent/events/events.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,18 @@ type Status struct {
Errors []string `json:"errors,omitempty"`

SSHLocalPort int `json:"sshLocalPort,omitempty"`

// Cloud-init progress information
CloudInitProgress *CloudInitProgress `json:"cloudInitProgress,omitempty"`
}

type CloudInitProgress struct {
// Current log line from cloud-init
LogLine string `json:"logLine,omitempty"`
// Whether cloud-init has completed
Completed bool `json:"completed,omitempty"`
// Whether cloud-init monitoring is active
Active bool `json:"active,omitempty"`
}

type Event struct {
Expand Down
159 changes: 159 additions & 0 deletions pkg/hostagent/hostagent.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
package hostagent

import (
"bufio"
"bytes"
"context"
"encoding/json"
Expand Down Expand Up @@ -72,11 +73,14 @@ type HostAgent struct {

guestAgentAliveCh chan struct{} // closed on establishing the connection
guestAgentAliveChOnce sync.Once

showProgress bool // whether to show cloud-init progress
}

type options struct {
guestAgentBinary string
nerdctlArchive string // local path, not URL
showProgress bool
}

type Opt func(*options) error
Expand All @@ -95,6 +99,13 @@ func WithNerdctlArchive(s string) Opt {
}
}

func WithCloudInitProgress(enabled bool) Opt {
return func(o *options) error {
o.showProgress = enabled
return nil
}
}

// New creates the HostAgent.
//
// stdout is for emitting JSON lines of Events.
Expand Down Expand Up @@ -214,6 +225,7 @@ func New(instName string, stdout io.Writer, signalCh chan os.Signal, opts ...Opt
vSockPort: vSockPort,
virtioPort: virtioPort,
guestAgentAliveCh: make(chan struct{}),
showProgress: o.showProgress,
}
return a, nil
}
Expand Down Expand Up @@ -480,6 +492,18 @@ sudo chown -R "${USER}" /run/host-services`
}
if !*a.instConfig.Plain {
go a.watchGuestAgentEvents(ctx)
if a.showProgress {
cloudInitDone := make(chan struct{})
go func() {
a.watchCloudInitProgress(ctx)
close(cloudInitDone)
}()

go func() {
<-cloudInitDone
logrus.Debug("Cloud-init monitoring completed, VM is fully ready")
}()
}
}
if err := a.waitForRequirements("optional", a.optionalRequirements()); err != nil {
errs = append(errs, err)
Expand Down Expand Up @@ -777,6 +801,141 @@ func forwardSSH(ctx context.Context, sshConfig *ssh.SSHConfig, port int, local,
return nil
}

func (a *HostAgent) watchCloudInitProgress(ctx context.Context) {
logrus.Debug("Starting cloud-init progress monitoring")

a.emitEvent(ctx, events.Event{
Status: events.Status{
SSHLocalPort: a.sshLocalPort,
CloudInitProgress: &events.CloudInitProgress{
Active: true,
},
},
})

maxRetries := 30
retryDelay := time.Second
var sshReady bool

for i := 0; i < maxRetries && !sshReady; i++ {
if i > 0 {
time.Sleep(retryDelay)
}

// Test SSH connectivity
args := a.sshConfig.Args()
args = append(args,
"-p", strconv.Itoa(a.sshLocalPort),
"127.0.0.1",
"echo 'SSH Ready'",
)

cmd := exec.CommandContext(ctx, a.sshConfig.Binary(), args...)
if err := cmd.Run(); err == nil {
sshReady = true
logrus.Debug("SSH ready for cloud-init monitoring")
}
}

if !sshReady {
logrus.Warn("SSH not ready for cloud-init monitoring, proceeding anyway")
}

args := a.sshConfig.Args()
args = append(args,
"-p", strconv.Itoa(a.sshLocalPort),
"127.0.0.1",
"sudo", "tail", "-n", "+1", "-f", "/var/log/cloud-init-output.log",
)

cmd := exec.CommandContext(ctx, a.sshConfig.Binary(), args...)
stdout, err := cmd.StdoutPipe()
if err != nil {
logrus.WithError(err).Warn("Failed to create stdout pipe for cloud-init monitoring")
return
}

if err := cmd.Start(); err != nil {
logrus.WithError(err).Warn("Failed to start cloud-init monitoring command")
return
}

scanner := bufio.NewScanner(stdout)
cloudInitFinished := false

for scanner.Scan() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This loop appears to run indefinitely.
The ssh -F … sudo tail -n +1 -f /var/log/cloud-init-output.log process continues after limactl start --progress finishes.
Also, some times limactl start --progress never stop even if Cloud-init v. 25.1.2-0ubuntu0~24.04.1 finished received.

Copy link
Contributor Author

@olamilekan000 olamilekan000 Aug 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have tried to reproduce this but couldn't as everything works fine on my end. Although, I think a timeout context can be added for safety purpose. Can you share how you were able to find the bug?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have opened a ticket for the timeout and also implemented the fix. You can check here for it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you share how you were able to find the bug?

It can be reproduced on my Mac (Apple M2 Pro) with the following:

limactl start template://ubuntu-lts --name=default --progress --set .containerd.user=false

built on current HEAD

$ limactl --version
limactl version 1.2.0-193-gd4f4911e

line := scanner.Text()
if strings.TrimSpace(line) == "" {
continue
}

if strings.Contains(line, "Cloud-init") && strings.Contains(line, "finished") {
cloudInitFinished = true
}

a.emitEvent(ctx, events.Event{
Status: events.Status{
SSHLocalPort: a.sshLocalPort,
CloudInitProgress: &events.CloudInitProgress{
Active: !cloudInitFinished,
LogLine: line,
Completed: cloudInitFinished,
},
},
})
}

if err := cmd.Wait(); err != nil {
logrus.WithError(err).Debug("SSH command finished (expected when cloud-init completes)")
}

if !cloudInitFinished {
logrus.Debug("Connection dropped, checking for any remaining cloud-init logs")

finalArgs := a.sshConfig.Args()
finalArgs = append(finalArgs,
"-p", strconv.Itoa(a.sshLocalPort),
"127.0.0.1",
"sudo", "tail", "-n", "20", "/var/log/cloud-init-output.log",
)

finalCmd := exec.CommandContext(ctx, a.sshConfig.Binary(), finalArgs...)
if finalOutput, err := finalCmd.Output(); err == nil {
lines := strings.Split(string(finalOutput), "\n")
for _, line := range lines {
if strings.TrimSpace(line) != "" {
if strings.Contains(line, "Cloud-init") && strings.Contains(line, "finished") {
cloudInitFinished = true
}

a.emitEvent(ctx, events.Event{
Status: events.Status{
SSHLocalPort: a.sshLocalPort,
CloudInitProgress: &events.CloudInitProgress{
Active: !cloudInitFinished,
LogLine: line,
Completed: cloudInitFinished,
},
},
})
}
}
}
}

a.emitEvent(ctx, events.Event{
Status: events.Status{
SSHLocalPort: a.sshLocalPort,
CloudInitProgress: &events.CloudInitProgress{
Active: false,
Completed: true,
},
},
})

logrus.Debug("Cloud-init progress monitoring completed")
}

func copyToHost(ctx context.Context, sshConfig *ssh.SSHConfig, port int, local, remote string) error {
args := sshConfig.Args()
args = append(args,
Expand Down
9 changes: 6 additions & 3 deletions pkg/instance/restart.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@ import (
"github.com/lima-vm/lima/v2/pkg/store"
)

const launchHostAgentForeground = false
const (
launchHostAgentForeground = false
showProgress = false
)

func Restart(ctx context.Context, inst *store.Instance) error {
if err := StopGracefully(ctx, inst, true); err != nil {
Expand All @@ -23,7 +26,7 @@ func Restart(ctx context.Context, inst *store.Instance) error {
return err
}

if err := Start(ctx, inst, "", launchHostAgentForeground); err != nil {
if err := Start(ctx, inst, "", launchHostAgentForeground, showProgress); err != nil {
return err
}

Expand All @@ -38,7 +41,7 @@ func RestartForcibly(ctx context.Context, inst *store.Instance) error {
return err
}

if err := Start(ctx, inst, "", launchHostAgentForeground); err != nil {
if err := Start(ctx, inst, "", launchHostAgentForeground, showProgress); err != nil {
return err
}

Expand Down
Loading