diff --git a/packages/orchestrator/cmd/resume-build/main.go b/packages/orchestrator/cmd/resume-build/main.go index fe66689035..4f679be713 100644 --- a/packages/orchestrator/cmd/resume-build/main.go +++ b/packages/orchestrator/cmd/resume-build/main.go @@ -72,8 +72,19 @@ func main() { cmdSignalPause := flag.String("cmd-signal-pause", "", "execute command in sandbox, then wait for SIGUSR1 before pausing") optimize := flag.Bool("optimize", false, "collect fresh prefetch mapping after pause (resumes snapshot to record page faults)") + // Pause-time reclaim/FPH overrides — both off by default. Pass >0 to enable. + fphTimeoutMs := flag.Int("fph-timeout-ms", 0, "override free-page-hinting-timeout-ms LD flag (0 = use LD default)") + reclaimTimeoutMs := flag.Int("reclaim-timeout-ms", 0, "override reclaim-on-pause-timeout-ms LD flag (0 = use LD default)") + flag.Parse() + if *fphTimeoutMs > 0 { + featureflags.NewIntFlag("free-page-hinting-timeout-ms", *fphTimeoutMs) + } + if *reclaimTimeoutMs > 0 { + featureflags.NewIntFlag("reclaim-on-pause-timeout-ms", *reclaimTimeoutMs) + } + if *fromBuild == "" { log.Fatal("-from-build required") } diff --git a/packages/orchestrator/pkg/sandbox/fc/client.go b/packages/orchestrator/pkg/sandbox/fc/client.go index d35d28040c..d4f8d09d4e 100644 --- a/packages/orchestrator/pkg/sandbox/fc/client.go +++ b/packages/orchestrator/pkg/sandbox/fc/client.go @@ -427,8 +427,11 @@ func (c *apiClient) startVM(ctx context.Context) error { return nil } -func (c *apiClient) enableFreePageReporting(ctx context.Context) error { - ctx, span := tracer.Start(ctx, "enable-free-page-reporting") +// installBalloon installs the virtio-balloon pre-boot with target size 0. +// FreePageHinting is always armed (pure runtime toggle, used by DrainBalloon); +// FreePageReporting is set per template-build gate. +func (c *apiClient) installBalloon(ctx context.Context, freePageReporting bool) error { + ctx, span := tracer.Start(ctx, "install-balloon") defer span.End() amountMib := int64(0) @@ -439,7 +442,8 @@ func (c *apiClient) enableFreePageReporting(ctx context.Context) error { Body: &models.Balloon{ AmountMib: &amountMib, DeflateOnOom: &deflateOnOom, - FreePageReporting: true, + FreePageReporting: freePageReporting, + FreePageHinting: true, }, } @@ -451,6 +455,33 @@ func (c *apiClient) enableFreePageReporting(ctx context.Context) error { return nil } +func (c *apiClient) startBalloonHinting(ctx context.Context, acknowledgeOnStop bool) error { + params := operations.StartBalloonHintingParams{ + Context: ctx, + Body: &models.BalloonStartCmd{AcknowledgeOnStop: acknowledgeOnStop}, + } + _, err := c.client.Operations.StartBalloonHinting(¶ms) + if err != nil { + return fmt.Errorf("error starting balloon hinting: %w", err) + } + + return nil +} + +func (c *apiClient) describeBalloonHinting(ctx context.Context) (hostCmd, guestCmd int64, err error) { + params := operations.DescribeBalloonHintingParams{Context: ctx} + res, err := c.client.Operations.DescribeBalloonHinting(¶ms) + if err != nil { + return 0, 0, err + } + if res.Payload.HostCmd != nil { + hostCmd = *res.Payload.HostCmd + } + guestCmd = res.Payload.GuestCmd + + return hostCmd, guestCmd, nil +} + func (c *apiClient) memoryMapping(ctx context.Context) (*memory.Mapping, error) { params := operations.GetMemoryMappingsParams{ Context: ctx, diff --git a/packages/orchestrator/pkg/sandbox/fc/process.go b/packages/orchestrator/pkg/sandbox/fc/process.go index 977091304d..ee26ffe899 100644 --- a/packages/orchestrator/pkg/sandbox/fc/process.go +++ b/packages/orchestrator/pkg/sandbox/fc/process.go @@ -133,6 +133,8 @@ type Process struct { Exit *utils.ErrorOnce client *apiClient + + balloonInstalled bool } func NewProcess( @@ -440,13 +442,13 @@ func (p *Process) Create( telemetry.ReportEvent(ctx, "set fc entropy config") if freePageReporting { - err = p.client.enableFreePageReporting(ctx) - if err != nil { + if err := p.client.installBalloon(ctx, freePageReporting); err != nil { fcStopErr := p.Stop(ctx) - return errors.Join(fmt.Errorf("error enabling free page reporting: %w", err), fcStopErr) + return errors.Join(fmt.Errorf("error installing balloon device: %w", err), fcStopErr) } - telemetry.ReportEvent(ctx, "enabled free page reporting") + p.balloonInstalled = true + telemetry.ReportEvent(ctx, "installed balloon device") } err = p.client.startVM(ctx) @@ -710,6 +712,44 @@ func (p *Process) Pause(ctx context.Context) error { return p.client.pauseVM(ctx) } +// DrainBalloon triggers a free-page-hinting run and blocks until the guest +// acknowledges or ctx fires. No-op when the balloon wasn't installed. +func (p *Process) DrainBalloon(ctx context.Context) error { + if !p.balloonInstalled { + return nil + } + + ctx, span := tracer.Start(ctx, "drain-balloon") + defer span.End() + + if err := p.client.startBalloonHinting(ctx, true /* ackOnStop */); err != nil { + return fmt.Errorf("start balloon hinting: %w", err) + } + + backoff := 5 * time.Millisecond + for { + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(backoff): + } + + host, guest, err := p.client.describeBalloonHinting(ctx) + if err != nil { + return fmt.Errorf("balloon hinting status: %w", err) + } + // host_cmd is monotonic and we just called start, so host > 0 + // after FC accepts it. Require it to guard against transient + // nil/zero responses returning a false-positive completion. + if host > 0 && guest >= host { + return nil + } + if backoff < 50*time.Millisecond { + backoff *= 2 + } + } +} + // CreateSnapshot VM needs to be paused before creating a snapshot. func (p *Process) CreateSnapshot(ctx context.Context, snapfilePath string) error { ctx, childSpan := tracer.Start(ctx, "create-snapshot-fc") diff --git a/packages/orchestrator/pkg/sandbox/reclaim.go b/packages/orchestrator/pkg/sandbox/reclaim.go new file mode 100644 index 0000000000..bab324ff1e --- /dev/null +++ b/packages/orchestrator/pkg/sandbox/reclaim.go @@ -0,0 +1,53 @@ +package sandbox + +import ( + "context" + "fmt" + "net/http" + "strconv" + "time" + + "connectrpc.com/connect" + "go.uber.org/zap" + + "github.com/e2b-dev/infra/packages/shared/pkg/consts" + "github.com/e2b-dev/infra/packages/shared/pkg/grpc" + "github.com/e2b-dev/infra/packages/shared/pkg/grpc/envd/process" + "github.com/e2b-dev/infra/packages/shared/pkg/grpc/envd/process/processconnect" + "github.com/e2b-dev/infra/packages/shared/pkg/logger" +) + +// Steps separated by ';' so each runs even if a previous one fails. On +// timeout envd kills bash; the in-flight syscall finishes and remaining +// steps are skipped. +const reclaimScript = `sync; echo 3 > /proc/sys/vm/drop_caches 2>/dev/null; echo 1 > /proc/sys/vm/compact_memory 2>/dev/null; fstrim -av 2>/dev/null` + +// bestEffortReclaim asks envd to reclaim guest memory + disk before pause. +// All failures are swallowed. +func (s *Sandbox) bestEffortReclaim(ctx context.Context, timeout time.Duration) { + ctx, span := tracer.Start(ctx, "envd-reclaim") + defer span.End() + + rcCtx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + + addr := fmt.Sprintf("http://%s:%d", s.Slot.HostIPString(), consts.DefaultEnvdServerPort) + pc := processconnect.NewProcessClient(&http.Client{Transport: sandboxHttpClient.Transport}, addr) + + req := connect.NewRequest(&process.StartRequest{ + Process: &process.ProcessConfig{Cmd: "/bin/bash", Args: []string{"-c", reclaimScript}}, + }) + req.Header().Set("Connect-Timeout-Ms", strconv.FormatInt(int64(timeout/time.Millisecond), 10)) + grpc.SetUserHeader(req.Header(), "root") + + stream, err := pc.Start(rcCtx, req) + if err != nil { + logger.L().Warn(ctx, "envd reclaim failed", logger.WithSandboxID(s.Runtime.SandboxID), zap.Error(err)) + + return + } + defer stream.Close() + + for stream.Receive() { + } +} diff --git a/packages/orchestrator/pkg/sandbox/sandbox.go b/packages/orchestrator/pkg/sandbox/sandbox.go index 23ca0bda82..6555b6ec08 100644 --- a/packages/orchestrator/pkg/sandbox/sandbox.go +++ b/packages/orchestrator/pkg/sandbox/sandbox.go @@ -218,6 +218,8 @@ type Sandbox struct { files *storage.SandboxFiles cleanup *Cleanup + featureFlags *featureflags.Client + process *fc.Process cgroupHandle *cgroup.CgroupHandle @@ -458,7 +460,8 @@ func (f *Factory) CreateSandbox( files: sandboxFiles, process: fcHandle, - cleanup: cleanup, + cleanup: cleanup, + featureFlags: f.featureFlags, APIStoredConfig: apiConfigToStore, @@ -799,7 +802,8 @@ func (f *Factory) ResumeSandbox( files: sandboxFiles, process: fcHandle, - cleanup: cleanup, + cleanup: cleanup, + featureFlags: f.featureFlags, APIStoredConfig: apiConfigToStore, CABundle: f.egressProxy.CABundle(), @@ -1053,6 +1057,19 @@ func (s *Sandbox) Pause( // Stop the health check before pausing the VM s.Checks.Stop() + // Best-effort pre-pause guest reclaim, then FPH drain. Both run on the + // live VM and are non-fatal. Timeout=0 disables the step. + if t := time.Duration(s.featureFlags.IntFlag(ctx, featureflags.ReclaimOnPauseTimeoutMs)) * time.Millisecond; t > 0 { + s.bestEffortReclaim(ctx, t) + } + if t := time.Duration(s.featureFlags.IntFlag(ctx, featureflags.FreePageHintingTimeoutMs)) * time.Millisecond; t > 0 { + drainCtx, cancel := context.WithTimeout(ctx, t) + if err := s.process.DrainBalloon(drainCtx); err != nil { + telemetry.ReportError(ctx, "balloon hinting drain failed (continuing pause)", err) + } + cancel() + } + if err := s.process.Pause(ctx); err != nil { return nil, fmt.Errorf("failed to pause VM: %w", err) } diff --git a/packages/shared/pkg/featureflags/flags.go b/packages/shared/pkg/featureflags/flags.go index 27cf349da8..72f02b8d6b 100644 --- a/packages/shared/pkg/featureflags/flags.go +++ b/packages/shared/pkg/featureflags/flags.go @@ -158,7 +158,11 @@ var ( BestOfKMaxOvercommit = NewIntFlag("best-of-k-max-overcommit", 400) // Default R=4 (stored as percentage, max over-commit ratio) BestOfKAlpha = NewIntFlag("best-of-k-alpha", 50) // Default Alpha=0.5 (stored as percentage for int flag, current usage weight) EnvdInitTimeoutMilliseconds = NewIntFlag("envd-init-request-timeout-milliseconds", 50) // Timeout for envd init request in milliseconds - HostStatsSamplingInterval = NewIntFlag("host-stats-sampling-interval", 5000) // Host stats sampling interval in milliseconds (default 5s) + // 0 disables the step. Both default off; operator opts in once the host + // kernel has the FPH race fix and the fleet is ready. + FreePageHintingTimeoutMs = NewIntFlag("free-page-hinting-timeout-ms", 0) + ReclaimOnPauseTimeoutMs = NewIntFlag("reclaim-on-pause-timeout-ms", 0) + HostStatsSamplingInterval = NewIntFlag("host-stats-sampling-interval", 5000) // Host stats sampling interval in milliseconds (default 5s) MaxCacheWriterConcurrencyFlag = NewIntFlag("max-cache-writer-concurrency", 10) // BuildCacheMaxUsagePercentage the maximum percentage of the cache disk storage