diff --git a/packages/orchestrator/pkg/sandbox/fc/client.go b/packages/orchestrator/pkg/sandbox/fc/client.go index f5c9eb30d0..b6cf4ff34e 100644 --- a/packages/orchestrator/pkg/sandbox/fc/client.go +++ b/packages/orchestrator/pkg/sandbox/fc/client.go @@ -427,6 +427,33 @@ func (c *apiClient) startVM(ctx context.Context) error { return nil } +// installBalloon attaches a zero-MiB balloon device. Individual balloon +// features (free-page-reporting today, free-page-hinting next) are toggled +// via parameters so callers can opt in to any subset independently. +func (c *apiClient) installBalloon(ctx context.Context, freePageReporting bool) error { + ctx, span := tracer.Start(ctx, "install-balloon") + defer span.End() + + amountMib := int64(0) + deflateOnOom := false + + balloonConfig := operations.PutBalloonParams{ + Context: ctx, + Body: &models.Balloon{ + AmountMib: &amountMib, + DeflateOnOom: &deflateOnOom, + FreePageReporting: freePageReporting, + }, + } + + _, err := c.client.Operations.PutBalloon(&balloonConfig) + if err != nil { + return fmt.Errorf("error installing balloon device: %w", err) + } + + return nil +} + func (c *apiClient) memoryMapping(ctx context.Context) (*memory.Mapping, error) { params := operations.GetMemoryMappingsParams{ Context: ctx, diff --git a/packages/orchestrator/pkg/sandbox/fc/process.go b/packages/orchestrator/pkg/sandbox/fc/process.go index 0a91b411b2..c2bbb7caf7 100644 --- a/packages/orchestrator/pkg/sandbox/fc/process.go +++ b/packages/orchestrator/pkg/sandbox/fc/process.go @@ -299,6 +299,7 @@ func (p *Process) Create( vCPUCount int64, memoryMB int64, hugePages bool, + freePageReporting bool, options ProcessOptions, txRateLimit RateLimiterConfig, driveRateLimit RateLimiterConfig, @@ -441,6 +442,16 @@ func (p *Process) Create( } telemetry.ReportEvent(ctx, "set fc entropy config") + if freePageReporting { + err = p.client.installBalloon(ctx, freePageReporting) + if err != nil { + fcStopErr := p.Stop(ctx) + + return errors.Join(fmt.Errorf("error installing balloon device: %w", err), fcStopErr) + } + telemetry.ReportEvent(ctx, "installed balloon device") + } + err = p.client.startVM(ctx) if err != nil { fcStopErr := p.Stop(ctx) diff --git a/packages/orchestrator/pkg/sandbox/sandbox.go b/packages/orchestrator/pkg/sandbox/sandbox.go index 94f2ed7467..da0269b60f 100644 --- a/packages/orchestrator/pkg/sandbox/sandbox.go +++ b/packages/orchestrator/pkg/sandbox/sandbox.go @@ -68,8 +68,9 @@ type Config struct { RamMB int64 // TotalDiskSizeMB optional, now used only for metrics. - TotalDiskSizeMB int64 - HugePages bool + TotalDiskSizeMB int64 + HugePages bool + FreePageReporting bool Envd EnvdMetadata @@ -495,6 +496,7 @@ func (f *Factory) CreateSandbox( config.Vcpu, config.RamMB, config.HugePages, + config.FreePageReporting, processOptions, fc.RateLimiterConfig{ Ops: fc.TokenBucketConfig(throttleConfig.Ops), diff --git a/packages/orchestrator/pkg/sandbox/uffd/uffd.go b/packages/orchestrator/pkg/sandbox/uffd/uffd.go index 5fe3e5370b..2c0ee0fc63 100644 --- a/packages/orchestrator/pkg/sandbox/uffd/uffd.go +++ b/packages/orchestrator/pkg/sandbox/uffd/uffd.go @@ -217,7 +217,30 @@ func (u *Uffd) Exit() *utils.ErrorOnce { // // It *MUST* be only called after the sandbox was successfully paused via API and after the snapshot endpoint was called. func (u *Uffd) DiffMetadata(ctx context.Context, f *fc.Process) (*header.DiffMetadata, error) { - return f.DirtyMemory(ctx, u.memfile.BlockSize()) + handler, err := u.handler.WaitWithContext(ctx) + if err != nil { + return nil, fmt.Errorf("failed to get uffd: %w", err) + } + + // Settle in-flight UFFD workers (and the REMOVE batch) before sampling + // FC's WP-async pagemap, so a Zero→Write install can't slip in between + // and escape both bitmaps. + _, empty := handler.ExportPageStates() + + diff, err := f.DirtyMemory(ctx, u.memfile.BlockSize()) + if err != nil { + return nil, fmt.Errorf("failed to get dirty memory: %w", err) + } + + // Pages that were zero-installed and later written show up in diff.Dirty + // via WP-async, so dirty wins over empty for those. + empty.AndNot(diff.Dirty) + + return &header.DiffMetadata{ + BlockSize: diff.BlockSize, + Dirty: diff.Dirty, + Empty: empty, + }, nil } // PrefetchData returns page fault data for prefetch mapping. diff --git a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/userfaultfd.go b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/userfaultfd.go index 2dcb7534a1..1fbc0182fe 100644 --- a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/userfaultfd.go +++ b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/userfaultfd.go @@ -11,6 +11,7 @@ import ( "time" "unsafe" + "github.com/RoaringBitmap/roaring/v2" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" @@ -136,6 +137,19 @@ func NewUserfaultfdFromFd(fd uintptr, src block.Slicer, m *memory.Mapping, logge return u, nil } +// ExportPageStates returns snapshots of the faulted and removed page-index +// bitmaps after draining in-flight serve-loop iterations and workers. +// Lock order matches the serve loop to avoid AB-BA inversion. +func (u *Userfaultfd) ExportPageStates() (faulted, removed *roaring.Bitmap) { + u.readSerial.Lock() + defer u.readSerial.Unlock() + + u.settleRequests.Lock() + defer u.settleRequests.Unlock() + + return u.pageTracker.Export() +} + func (u *Userfaultfd) readEvents(ctx context.Context) ([]*UffdRemove, []*UffdPagefault, error) { buf := make([]byte, unsafe.Sizeof(UffdMsg{})) @@ -406,7 +420,13 @@ func (u *Userfaultfd) Serve( switch outcome { case faultInstalled: - u.pageTracker.SetRange(idx, idx+1, block.Dirty) + // Zero-fill on a read fault installs zero+WP; the page still + // reads as zero, so keep the tracker entry as Zero so the + // snapshot diff marks it Empty. WP-async will catch any + // later write and surface it via DirtyMemory. + if source != nil || accessType == block.Write { + u.pageTracker.SetRange(idx, idx+1, block.Dirty) + } u.prefetchTracker.Add(offset, accessType) case faultDeferred: deferred.push(pf) diff --git a/packages/orchestrator/pkg/template/build/config/config.go b/packages/orchestrator/pkg/template/build/config/config.go index 713de95ec0..825f9021bf 100644 --- a/packages/orchestrator/pkg/template/build/config/config.go +++ b/packages/orchestrator/pkg/template/build/config/config.go @@ -41,6 +41,9 @@ type TemplateConfig struct { // HugePages sets whether the VM use huge pages. HugePages bool + // FreePageReporting enables Firecracker's balloon free-page-reporting. + FreePageReporting bool + // Command to run to check if the template is ready. ReadyCmd string diff --git a/packages/orchestrator/pkg/template/build/phases/base/builder.go b/packages/orchestrator/pkg/template/build/phases/base/builder.go index b986d4de1c..7638d9b1cf 100644 --- a/packages/orchestrator/pkg/template/build/phases/base/builder.go +++ b/packages/orchestrator/pkg/template/build/phases/base/builder.go @@ -200,9 +200,10 @@ func (bb *BaseBuilder) buildLayerFromOCI( // Allow sandbox internet access during provisioning (nil network = no restrictions). baseSbxConfig := sandbox.NewConfig(sandbox.Config{ - Vcpu: bb.Config.VCpuCount, - RamMB: bb.Config.MemoryMB, - HugePages: bb.Config.HugePages, + Vcpu: bb.Config.VCpuCount, + RamMB: bb.Config.MemoryMB, + HugePages: bb.Config.HugePages, + FreePageReporting: bb.Config.FreePageReporting, Envd: sandbox.EnvdMetadata{ Version: bb.EnvdVersion, diff --git a/packages/orchestrator/pkg/template/build/phases/finalize/builder.go b/packages/orchestrator/pkg/template/build/phases/finalize/builder.go index 998aca413f..1cc0cb19cd 100644 --- a/packages/orchestrator/pkg/template/build/phases/finalize/builder.go +++ b/packages/orchestrator/pkg/template/build/phases/finalize/builder.go @@ -149,9 +149,10 @@ func (ppb *PostProcessingBuilder) Build( // Configure sandbox for final layer sbxConfig := sandbox.NewConfig(sandbox.Config{ - Vcpu: ppb.Config.VCpuCount, - RamMB: ppb.Config.MemoryMB, - HugePages: ppb.Config.HugePages, + Vcpu: ppb.Config.VCpuCount, + RamMB: ppb.Config.MemoryMB, + HugePages: ppb.Config.HugePages, + FreePageReporting: ppb.Config.FreePageReporting, Envd: sandbox.EnvdMetadata{ Version: ppb.EnvdVersion, diff --git a/packages/orchestrator/pkg/template/build/phases/steps/builder.go b/packages/orchestrator/pkg/template/build/phases/steps/builder.go index 0ff423e0b8..507312c5ac 100644 --- a/packages/orchestrator/pkg/template/build/phases/steps/builder.go +++ b/packages/orchestrator/pkg/template/build/phases/steps/builder.go @@ -163,9 +163,10 @@ func (sb *StepBuilder) Build( step := sb.step sbxConfig := sandbox.NewConfig(sandbox.Config{ - Vcpu: sb.Config.VCpuCount, - RamMB: sb.Config.MemoryMB, - HugePages: sb.Config.HugePages, + Vcpu: sb.Config.VCpuCount, + RamMB: sb.Config.MemoryMB, + HugePages: sb.Config.HugePages, + FreePageReporting: sb.Config.FreePageReporting, Envd: sandbox.EnvdMetadata{ Version: sb.EnvdVersion, diff --git a/packages/orchestrator/pkg/template/server/create_template.go b/packages/orchestrator/pkg/template/server/create_template.go index 0e179709a0..a049548001 100644 --- a/packages/orchestrator/pkg/template/server/create_template.go +++ b/packages/orchestrator/pkg/template/server/create_template.go @@ -65,6 +65,7 @@ func (s *ServerStore) TemplateCreate(ctx context.Context, templateRequest *templ return nil, fmt.Errorf("invalid resolved firecracker version %q: %w", firecrackerVersion, err) } hugePages := fcInfo.HasHugePages() + freePageReporting := fcInfo.HasFreePageReporting() && s.featureFlags.BoolFlag(ctx, featureflags.FreePageReportingFlag) childSpan.SetAttributes( telemetry.WithTemplateID(cfg.GetTemplateID()), @@ -75,6 +76,7 @@ func (s *ServerStore) TemplateCreate(ctx context.Context, templateRequest *templ attribute.Int64("env.memory_mb", int64(cfg.GetMemoryMB())), attribute.Int64("env.vcpu_count", int64(cfg.GetVCpuCount())), attribute.Bool("env.huge_pages", hugePages), + attribute.Bool("env.free_page_reporting", freePageReporting), ) template := config.TemplateConfig{ @@ -88,6 +90,7 @@ func (s *ServerStore) TemplateCreate(ctx context.Context, templateRequest *templ ReadyCmd: cfg.GetReadyCommand(), DiskSizeMB: int64(cfg.GetDiskSizeMB()), HugePages: hugePages, + FreePageReporting: freePageReporting, FromImage: cfg.GetFromImage(), FromTemplate: cfg.GetFromTemplate(), RegistryAuthProvider: authProvider, diff --git a/packages/shared/pkg/fcversion/sandbox_features.go b/packages/shared/pkg/fcversion/sandbox_features.go index f6fc9e19a5..b768f199d4 100644 --- a/packages/shared/pkg/fcversion/sandbox_features.go +++ b/packages/shared/pkg/fcversion/sandbox_features.go @@ -7,3 +7,7 @@ func (v *Info) HasHugePages() bool { return false } + +func (v *Info) HasFreePageReporting() bool { + return v.lastReleaseVersion.Major() > 1 || (v.lastReleaseVersion.Major() == 1 && v.lastReleaseVersion.Minor() >= 14) +} diff --git a/packages/shared/pkg/featureflags/flags.go b/packages/shared/pkg/featureflags/flags.go index f390e4fe90..ec6b8b6883 100644 --- a/packages/shared/pkg/featureflags/flags.go +++ b/packages/shared/pkg/featureflags/flags.go @@ -122,6 +122,7 @@ var ( ExecutionMetricsOnWebhooksFlag = NewBoolFlag("execution-metrics-on-webhooks", false) // TODO: Remove NLT 20250315 SandboxLabelBasedSchedulingFlag = NewBoolFlag("sandbox-label-based-scheduling", false) OptimisticResourceAccountingFlag = NewBoolFlag("sandbox-placement-optimistic-resource-accounting", false) + FreePageReportingFlag = NewBoolFlag("free-page-reporting", false) NetworkTransformRulesFlag = NewBoolFlag("network-transform-rules", env.IsDevelopment()) )