Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions packages/orchestrator/pkg/sandbox/fc/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,33 @@ func (c *apiClient) startVM(ctx context.Context) error {
return nil
}

// installBalloon attaches a zero-MiB balloon device. Individual balloon
// features (free-page-reporting today, free-page-hinting next) are toggled
// via parameters so callers can opt in to any subset independently.
func (c *apiClient) installBalloon(ctx context.Context, freePageReporting bool) error {
ctx, span := tracer.Start(ctx, "install-balloon")
defer span.End()

amountMib := int64(0)
deflateOnOom := false

balloonConfig := operations.PutBalloonParams{
Context: ctx,
Body: &models.Balloon{
AmountMib: &amountMib,
DeflateOnOom: &deflateOnOom,
FreePageReporting: freePageReporting,
},
}

_, err := c.client.Operations.PutBalloon(&balloonConfig)
if err != nil {
return fmt.Errorf("error installing balloon device: %w", err)
}

return nil
}

func (c *apiClient) memoryMapping(ctx context.Context) (*memory.Mapping, error) {
params := operations.GetMemoryMappingsParams{
Context: ctx,
Expand Down
11 changes: 11 additions & 0 deletions packages/orchestrator/pkg/sandbox/fc/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,7 @@ func (p *Process) Create(
vCPUCount int64,
memoryMB int64,
hugePages bool,
freePageReporting bool,
options ProcessOptions,
txRateLimit RateLimiterConfig,
driveRateLimit RateLimiterConfig,
Expand Down Expand Up @@ -441,6 +442,16 @@ func (p *Process) Create(
}
telemetry.ReportEvent(ctx, "set fc entropy config")

if freePageReporting {
err = p.client.installBalloon(ctx, freePageReporting)
if err != nil {
fcStopErr := p.Stop(ctx)

return errors.Join(fmt.Errorf("error installing balloon device: %w", err), fcStopErr)
}
telemetry.ReportEvent(ctx, "installed balloon device")
}

err = p.client.startVM(ctx)
if err != nil {
fcStopErr := p.Stop(ctx)
Expand Down
6 changes: 4 additions & 2 deletions packages/orchestrator/pkg/sandbox/sandbox.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,9 @@ type Config struct {
RamMB int64

// TotalDiskSizeMB optional, now used only for metrics.
TotalDiskSizeMB int64
HugePages bool
TotalDiskSizeMB int64
HugePages bool
FreePageReporting bool

Envd EnvdMetadata

Expand Down Expand Up @@ -495,6 +496,7 @@ func (f *Factory) CreateSandbox(
config.Vcpu,
config.RamMB,
config.HugePages,
config.FreePageReporting,
processOptions,
fc.RateLimiterConfig{
Ops: fc.TokenBucketConfig(throttleConfig.Ops),
Expand Down
25 changes: 24 additions & 1 deletion packages/orchestrator/pkg/sandbox/uffd/uffd.go
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,30 @@ func (u *Uffd) Exit() *utils.ErrorOnce {
//
// It *MUST* be only called after the sandbox was successfully paused via API and after the snapshot endpoint was called.
func (u *Uffd) DiffMetadata(ctx context.Context, f *fc.Process) (*header.DiffMetadata, error) {
return f.DirtyMemory(ctx, u.memfile.BlockSize())
handler, err := u.handler.WaitWithContext(ctx)
if err != nil {
return nil, fmt.Errorf("failed to get uffd: %w", err)
}

// Settle in-flight UFFD workers (and the REMOVE batch) before sampling
// FC's WP-async pagemap, so a Zero→Write install can't slip in between
// and escape both bitmaps.
_, empty := handler.ExportPageStates()

diff, err := f.DirtyMemory(ctx, u.memfile.BlockSize())
if err != nil {
return nil, fmt.Errorf("failed to get dirty memory: %w", err)
}

// Pages that were zero-installed and later written show up in diff.Dirty
// via WP-async, so dirty wins over empty for those.
empty.AndNot(diff.Dirty)
Comment thread
claude[bot] marked this conversation as resolved.

return &header.DiffMetadata{
BlockSize: diff.BlockSize,
Dirty: diff.Dirty,
Empty: empty,
}, nil
}

// PrefetchData returns page fault data for prefetch mapping.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"time"
"unsafe"

"github.com/RoaringBitmap/roaring/v2"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
Expand Down Expand Up @@ -136,6 +137,19 @@ func NewUserfaultfdFromFd(fd uintptr, src block.Slicer, m *memory.Mapping, logge
return u, nil
}

// ExportPageStates returns snapshots of the faulted and removed page-index
// bitmaps after draining in-flight serve-loop iterations and workers.
// Lock order matches the serve loop to avoid AB-BA inversion.
func (u *Userfaultfd) ExportPageStates() (faulted, removed *roaring.Bitmap) {
Comment thread
ValentaTomas marked this conversation as resolved.
u.readSerial.Lock()
defer u.readSerial.Unlock()

u.settleRequests.Lock()
defer u.settleRequests.Unlock()

return u.pageTracker.Export()
}
Comment thread
ValentaTomas marked this conversation as resolved.

func (u *Userfaultfd) readEvents(ctx context.Context) ([]*UffdRemove, []*UffdPagefault, error) {
buf := make([]byte, unsafe.Sizeof(UffdMsg{}))

Expand Down Expand Up @@ -406,7 +420,13 @@ func (u *Userfaultfd) Serve(

switch outcome {
case faultInstalled:
u.pageTracker.SetRange(idx, idx+1, block.Dirty)
// Zero-fill on a read fault installs zero+WP; the page still
// reads as zero, so keep the tracker entry as Zero so the
// snapshot diff marks it Empty. WP-async will catch any
// later write and surface it via DirtyMemory.
if source != nil || accessType == block.Write {
u.pageTracker.SetRange(idx, idx+1, block.Dirty)
}
u.prefetchTracker.Add(offset, accessType)
case faultDeferred:
deferred.push(pf)
Expand Down
3 changes: 3 additions & 0 deletions packages/orchestrator/pkg/template/build/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ type TemplateConfig struct {
// HugePages sets whether the VM use huge pages.
HugePages bool

// FreePageReporting enables Firecracker's balloon free-page-reporting.
FreePageReporting bool

// Command to run to check if the template is ready.
ReadyCmd string

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -200,9 +200,10 @@ func (bb *BaseBuilder) buildLayerFromOCI(

// Allow sandbox internet access during provisioning (nil network = no restrictions).
baseSbxConfig := sandbox.NewConfig(sandbox.Config{
Vcpu: bb.Config.VCpuCount,
RamMB: bb.Config.MemoryMB,
HugePages: bb.Config.HugePages,
Vcpu: bb.Config.VCpuCount,
RamMB: bb.Config.MemoryMB,
HugePages: bb.Config.HugePages,
FreePageReporting: bb.Config.FreePageReporting,

Envd: sandbox.EnvdMetadata{
Version: bb.EnvdVersion,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -149,9 +149,10 @@ func (ppb *PostProcessingBuilder) Build(

// Configure sandbox for final layer
sbxConfig := sandbox.NewConfig(sandbox.Config{
Vcpu: ppb.Config.VCpuCount,
RamMB: ppb.Config.MemoryMB,
HugePages: ppb.Config.HugePages,
Vcpu: ppb.Config.VCpuCount,
RamMB: ppb.Config.MemoryMB,
HugePages: ppb.Config.HugePages,
FreePageReporting: ppb.Config.FreePageReporting,

Envd: sandbox.EnvdMetadata{
Version: ppb.EnvdVersion,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -163,9 +163,10 @@ func (sb *StepBuilder) Build(
step := sb.step

sbxConfig := sandbox.NewConfig(sandbox.Config{
Vcpu: sb.Config.VCpuCount,
RamMB: sb.Config.MemoryMB,
HugePages: sb.Config.HugePages,
Vcpu: sb.Config.VCpuCount,
RamMB: sb.Config.MemoryMB,
HugePages: sb.Config.HugePages,
FreePageReporting: sb.Config.FreePageReporting,

Envd: sandbox.EnvdMetadata{
Version: sb.EnvdVersion,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ func (s *ServerStore) TemplateCreate(ctx context.Context, templateRequest *templ
return nil, fmt.Errorf("invalid resolved firecracker version %q: %w", firecrackerVersion, err)
}
hugePages := fcInfo.HasHugePages()
freePageReporting := fcInfo.HasFreePageReporting() && s.featureFlags.BoolFlag(ctx, featureflags.FreePageReportingFlag)
Comment thread
cursor[bot] marked this conversation as resolved.

childSpan.SetAttributes(
telemetry.WithTemplateID(cfg.GetTemplateID()),
Comment thread
ValentaTomas marked this conversation as resolved.
Expand All @@ -75,6 +76,7 @@ func (s *ServerStore) TemplateCreate(ctx context.Context, templateRequest *templ
attribute.Int64("env.memory_mb", int64(cfg.GetMemoryMB())),
attribute.Int64("env.vcpu_count", int64(cfg.GetVCpuCount())),
attribute.Bool("env.huge_pages", hugePages),
attribute.Bool("env.free_page_reporting", freePageReporting),
)

template := config.TemplateConfig{
Expand All @@ -88,6 +90,7 @@ func (s *ServerStore) TemplateCreate(ctx context.Context, templateRequest *templ
ReadyCmd: cfg.GetReadyCommand(),
DiskSizeMB: int64(cfg.GetDiskSizeMB()),
HugePages: hugePages,
FreePageReporting: freePageReporting,
FromImage: cfg.GetFromImage(),
FromTemplate: cfg.GetFromTemplate(),
RegistryAuthProvider: authProvider,
Expand Down
4 changes: 4 additions & 0 deletions packages/shared/pkg/fcversion/sandbox_features.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,7 @@ func (v *Info) HasHugePages() bool {

return false
}

func (v *Info) HasFreePageReporting() bool {
return v.lastReleaseVersion.Major() > 1 || (v.lastReleaseVersion.Major() == 1 && v.lastReleaseVersion.Minor() >= 14)
}
Comment thread
cursor[bot] marked this conversation as resolved.
1 change: 1 addition & 0 deletions packages/shared/pkg/featureflags/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ var (
ExecutionMetricsOnWebhooksFlag = NewBoolFlag("execution-metrics-on-webhooks", false) // TODO: Remove NLT 20250315
SandboxLabelBasedSchedulingFlag = NewBoolFlag("sandbox-label-based-scheduling", false)
OptimisticResourceAccountingFlag = NewBoolFlag("sandbox-placement-optimistic-resource-accounting", false)
FreePageReportingFlag = NewBoolFlag("free-page-reporting", false)

NetworkTransformRulesFlag = NewBoolFlag("network-transform-rules", env.IsDevelopment())
)
Expand Down
Loading