diff --git a/.github/workflows/pr-tests-arm64.yml b/.github/workflows/pr-tests-arm64.yml index c7aeef787f..793872ef6b 100644 --- a/.github/workflows/pr-tests-arm64.yml +++ b/.github/workflows/pr-tests-arm64.yml @@ -117,7 +117,13 @@ jobs: - name: Run tests that require sudo working-directory: ${{ matrix.package }} - run: sudo -E `which go` test -v -timeout 20m ${{ matrix.test_path }} + # -parallel=4 caps concurrent t.Parallel() tests as a runner-resource + # cap: heavyweight cross-process UFFD tests in packages/orchestrator + # each fork a child handler process and we don't want N-CPU copies + # racing for the same kernel resources. This is NOT a workaround for + # the stale-source race in pkg/sandbox/uffd/userfaultfd — that is + # fixed and covered by the deterministic race tests in race_test.go. + run: sudo -E `which go` test -v -timeout 10m -parallel 4 ${{ matrix.test_path }} if: matrix.sudo == true - name: Compile test binaries @@ -127,5 +133,5 @@ jobs: - name: Run tests working-directory: ${{ matrix.package }} - run: go test -v -timeout 20m ${{ matrix.test_path }} + run: go test -v -timeout 10m -parallel 4 ${{ matrix.test_path }} if: matrix.sudo == false diff --git a/.github/workflows/pr-tests.yml b/.github/workflows/pr-tests.yml index eefa3cee12..bc959093a4 100644 --- a/.github/workflows/pr-tests.yml +++ b/.github/workflows/pr-tests.yml @@ -81,12 +81,18 @@ jobs: working-directory: ${{ matrix.package }} run: | # Run tests. The '-E' flag is required to allow root to use the correct cache path. - sudo -E `which go` test -v -timeout 20m ${{ matrix.test_path }} + # -parallel=4 caps concurrent t.Parallel() tests as a runner-resource + # cap: heavyweight cross-process UFFD tests in packages/orchestrator + # each fork a child handler process and we don't want N-CPU copies + # racing for the same kernel resources. This is NOT a workaround for + # the stale-source race in pkg/sandbox/uffd/userfaultfd — that is + # fixed and covered by the deterministic race tests in race_test.go. + sudo -E `which go` test -v -timeout 10m -parallel 4 ${{ matrix.test_path }} if: matrix.sudo == true - name: Run tests working-directory: ${{ matrix.package }} - run: go test -v -timeout 20m ${{ matrix.test_path }} + run: go test -v -timeout 10m -parallel 4 ${{ matrix.test_path }} if: matrix.sudo == false validate-iac: diff --git a/packages/api/internal/template-manager/create_template.go b/packages/api/internal/template-manager/create_template.go index c09ed47e0c..c35a3217ce 100644 --- a/packages/api/internal/template-manager/create_template.go +++ b/packages/api/internal/template-manager/create_template.go @@ -15,6 +15,7 @@ import ( "github.com/e2b-dev/infra/packages/api/internal/utils" "github.com/e2b-dev/infra/packages/db/pkg/types" "github.com/e2b-dev/infra/packages/shared/pkg/fcversion" + "github.com/e2b-dev/infra/packages/shared/pkg/featureflags" templatemanagergrpc "github.com/e2b-dev/infra/packages/shared/pkg/grpc/template-manager" "github.com/e2b-dev/infra/packages/shared/pkg/id" "github.com/e2b-dev/infra/packages/shared/pkg/logger" @@ -108,6 +109,8 @@ func (tm *TemplateManager) CreateTemplate( return fmt.Errorf("failed to convert image registry: %w", err) } + freePageReporting := features.HasFreePageReporting() && tm.featureFlags.BoolFlag(ctx, featureflags.FreePageReportingFlag, featureflags.TeamContext(teamID.String())) + // TODO(ENG-3852): Remove later. KernelVersion and FirecrackerVersion are deprecated on // template-manager selects its own versions and reports the ones it actually // used via TemplateBuildMetadata. They are still populated here for @@ -122,6 +125,7 @@ func (tm *TemplateManager) CreateTemplate( KernelVersion: kernelVersion, FirecrackerVersion: firecrackerVersion, HugePages: features.HasHugePages(), + FreePageReporting: &freePageReporting, StartCommand: startCmd, ReadyCommand: readyCmd, Force: force, diff --git a/packages/orchestrator/cmd/create-build/main.go b/packages/orchestrator/cmd/create-build/main.go index dfbcca6779..f04c505d54 100644 --- a/packages/orchestrator/cmd/create-build/main.go +++ b/packages/orchestrator/cmd/create-build/main.go @@ -63,6 +63,7 @@ func main() { memory := flag.Int("memory", 1024, "memory MB") disk := flag.Int("disk", 1024, "disk MB") hugePages := flag.Bool("hugepages", true, "use 2MB huge pages for memory (false = 4KB pages)") + freePageReporting := flag.Bool("free-page-reporting", false, "enable free page reporting via balloon device (requires Firecracker v1.14+)") startCmd := flag.String("start-cmd", "", "start command") setupCmd := flag.String("setup-cmd", "", "setup command to run during build (e.g., install deps)") readyCmd := flag.String("ready-cmd", "", "ready check command") @@ -100,7 +101,16 @@ func main() { log.Fatalf("network config: %v", err) } - err = doBuild(ctx, *templateID, *toBuild, *fromBuild, *kernel, *fc, *vcpu, *memory, *disk, *hugePages, *startCmd, *setupCmd, *readyCmd, localMode, *verbose, *timeout, builderConfig, networkConfig) + // Detect if --free-page-reporting was explicitly set; if not, pass nil so + // doBuild can default based on the Firecracker version. + var fprOverride *bool + flag.Visit(func(f *flag.Flag) { + if f.Name == "free-page-reporting" { + fprOverride = freePageReporting + } + }) + + err = doBuild(ctx, *templateID, *toBuild, *fromBuild, *kernel, *fc, *vcpu, *memory, *disk, *hugePages, fprOverride, *startCmd, *setupCmd, *readyCmd, localMode, *verbose, *timeout, builderConfig, networkConfig) if err != nil { log.Fatal(err) } @@ -199,9 +209,10 @@ func setupEnv(ctx context.Context, storagePath, sandboxDir, kernel, fc string, l func doBuild( parentCtx context.Context, - templateID, buildID, fromBuild, kernel, fc string, + templateID, buildID, fromBuild, kernel, fcVersion string, vcpu, memory, disk int, hugePages bool, + freePageReporting *bool, startCmd, setupCmd, readyCmd string, localMode, verbose bool, timeout int, @@ -341,6 +352,18 @@ func doBuild( }) } + // Default FPR to enabled when the FC version supports it (v1.14+); explicit flag overrides. + var fprEnabled bool + if freePageReporting != nil { + fprEnabled = *freePageReporting + } else { + versionOnly, _, _ := strings.Cut(fcVersion, "_") + supported, err := utils.IsGTEVersion(versionOnly, "v1.14.0") + if err == nil { + fprEnabled = supported + } + } + tmpl := config.TemplateConfig{ Version: templates.TemplateV2LatestVersion, TemplateID: templateID, @@ -349,10 +372,11 @@ func doBuild( MemoryMB: int64(memory), DiskSizeMB: int64(disk), HugePages: hugePages, + FreePageReporting: fprEnabled, StartCmd: startCmd, ReadyCmd: readyCmd, KernelVersion: kernel, - FirecrackerVersion: fc, + FirecrackerVersion: fcVersion, Steps: steps, } diff --git a/packages/orchestrator/cmd/smoketest/smoke_test.go b/packages/orchestrator/cmd/smoketest/smoke_test.go index 5134c6379b..7e90e7271a 100644 --- a/packages/orchestrator/cmd/smoketest/smoke_test.go +++ b/packages/orchestrator/cmd/smoketest/smoke_test.go @@ -40,6 +40,7 @@ import ( sbxlogger "github.com/e2b-dev/infra/packages/shared/pkg/logger/sandbox" "github.com/e2b-dev/infra/packages/shared/pkg/storage" "github.com/e2b-dev/infra/packages/shared/pkg/templates" + "github.com/e2b-dev/infra/packages/shared/pkg/utils" ) const ( @@ -71,11 +72,14 @@ func TestSmokeAllFCVersions(t *testing.T) { //nolint:paralleltest // subtests sh defer infra.close(ctx) for fcMajor, fcVersion := range featureflags.FirecrackerVersionMap { //nolint:paralleltest // sequential by design + versionOnly, _, _ := strings.Cut(fcVersion, "_") + fpr, _ := utils.IsGTEVersion(versionOnly, "v1.14.0") + t.Run("fc-"+fcMajor, func(t *testing.T) { buildID := uuid.New().String() // Phase 1: create build - t.Logf("creating build %s with FC %s", buildID, fcVersion) + t.Logf("creating build %s with FC %s (freePageReporting=%v)", buildID, fcVersion, fpr) force := true _, err := infra.builder.Build( ctx, @@ -88,6 +92,7 @@ func TestSmokeAllFCVersions(t *testing.T) { //nolint:paralleltest // subtests sh MemoryMB: 512, DiskSizeMB: 512, HugePages: true, + FreePageReporting: fpr, KernelVersion: featureflags.DefaultKernelVersion, FirecrackerVersion: fcVersion, FromImage: baseImage, @@ -111,10 +116,11 @@ func TestSmokeAllFCVersions(t *testing.T) { //nolint:paralleltest // subtests sh ctx, tmpl, sandbox.NewConfig(sandbox.Config{ - BaseTemplateID: "smoke-" + fcMajor, - Vcpu: 2, - RamMB: 512, - HugePages: true, + BaseTemplateID: "smoke-" + fcMajor, + Vcpu: 2, + RamMB: 512, + HugePages: true, + FreePageReporting: fpr, Envd: sandbox.EnvdMetadata{ Vars: map[string]string{}, AccessToken: &token, diff --git a/packages/orchestrator/pkg/sandbox/fc/client.go b/packages/orchestrator/pkg/sandbox/fc/client.go index f5c9eb30d0..d35d28040c 100644 --- a/packages/orchestrator/pkg/sandbox/fc/client.go +++ b/packages/orchestrator/pkg/sandbox/fc/client.go @@ -427,6 +427,30 @@ func (c *apiClient) startVM(ctx context.Context) error { return nil } +func (c *apiClient) enableFreePageReporting(ctx context.Context) error { + ctx, span := tracer.Start(ctx, "enable-free-page-reporting") + defer span.End() + + amountMib := int64(0) + deflateOnOom := false + + balloonConfig := operations.PutBalloonParams{ + Context: ctx, + Body: &models.Balloon{ + AmountMib: &amountMib, + DeflateOnOom: &deflateOnOom, + FreePageReporting: true, + }, + } + + _, err := c.client.Operations.PutBalloon(&balloonConfig) + if err != nil { + return fmt.Errorf("error setting up balloon device: %w", err) + } + + return nil +} + func (c *apiClient) memoryMapping(ctx context.Context) (*memory.Mapping, error) { params := operations.GetMemoryMappingsParams{ Context: ctx, diff --git a/packages/orchestrator/pkg/sandbox/fc/process.go b/packages/orchestrator/pkg/sandbox/fc/process.go index a29f56072b..977091304d 100644 --- a/packages/orchestrator/pkg/sandbox/fc/process.go +++ b/packages/orchestrator/pkg/sandbox/fc/process.go @@ -299,6 +299,7 @@ func (p *Process) Create( vCPUCount int64, memoryMB int64, hugePages bool, + freePageReporting bool, options ProcessOptions, txRateLimit RateLimiterConfig, driveRateLimit RateLimiterConfig, @@ -438,6 +439,16 @@ func (p *Process) Create( } telemetry.ReportEvent(ctx, "set fc entropy config") + if freePageReporting { + err = p.client.enableFreePageReporting(ctx) + if err != nil { + fcStopErr := p.Stop(ctx) + + return errors.Join(fmt.Errorf("error enabling free page reporting: %w", err), fcStopErr) + } + telemetry.ReportEvent(ctx, "enabled free page reporting") + } + err = p.client.startVM(ctx) if err != nil { fcStopErr := p.Stop(ctx) diff --git a/packages/orchestrator/pkg/sandbox/sandbox.go b/packages/orchestrator/pkg/sandbox/sandbox.go index f69100015d..87c83c9f2e 100644 --- a/packages/orchestrator/pkg/sandbox/sandbox.go +++ b/packages/orchestrator/pkg/sandbox/sandbox.go @@ -71,6 +71,9 @@ type Config struct { TotalDiskSizeMB int64 HugePages bool + // Enable free page reporting + FreePageReporting bool + Envd EnvdMetadata FirecrackerConfig fc.Config @@ -495,6 +498,7 @@ func (f *Factory) CreateSandbox( config.Vcpu, config.RamMB, config.HugePages, + config.FreePageReporting, processOptions, fc.RateLimiterConfig{ Ops: fc.TokenBucketConfig(throttleConfig.Ops), diff --git a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/cross_process_helpers_test.go b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/cross_process_helpers_test.go index 768e67f7cf..4148c357b1 100644 --- a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/cross_process_helpers_test.go +++ b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/cross_process_helpers_test.go @@ -1,28 +1,39 @@ package userfaultfd -// This tests is creating uffd in the main process and handling the page faults in another process. -// It prevents problems with Go mmap during testing (https://pojntfx.github.io/networked-linux-memsync/main.html#limitations) and also more accurately simulates what we do with Firecracker. -// These problems are not affecting Firecracker, because: -// 1. It is a different process that handles the page faults -// 2. Does not use garbage collection +// This test creates the userfaultfd in the parent test process and +// drives it from a child helper process. We do this so the actual +// page-fault handling runs in a process where we can fully control +// memory layout (no Go GC scanning / touching the registered region) +// — which mirrors how Firecracker uses UFFD in production. +// +// All parent ↔ child coordination — readiness, page-state queries, +// pause/resume, fault barriers, shutdown — flows over a single Unix +// domain socket using the standard-library net/rpc + jsonrpc codec. +// Each in-flight RPC runs in its own server-side goroutine, so a +// blocking handler (e.g. WaitFaultHeld) does not stall other RPCs. +// The only fd we still hand off out-of-band is the userfaultfd +// itself (kernel object, has to go through ExtraFiles); the initial +// source data is written to a temp file under t.TempDir() because +// base64-stuffing megabytes through the JSON envelope would be silly. import ( "context" "crypto/rand" - "encoding/binary" "errors" "fmt" - "io" + "net" + "net/rpc" + "net/rpc/jsonrpc" "os" "os/exec" - "os/signal" + "path/filepath" "slices" "strconv" - "strings" + "sync" "syscall" "testing" + "time" - "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "golang.org/x/sys/unix" @@ -33,8 +44,8 @@ import ( "github.com/e2b-dev/infra/packages/shared/pkg/logger" ) -// MemorySlicer exposes byte slice via the Slicer interface. -// This is used for testing purposes. +// MemorySlicer exposes a byte slice via the Slicer interface. +// Test-only. type MemorySlicer struct { content []byte pagesize int64 @@ -43,10 +54,7 @@ type MemorySlicer struct { var _ block.Slicer = (*MemorySlicer)(nil) func NewMemorySlicer(content []byte, pagesize int64) *MemorySlicer { - return &MemorySlicer{ - content: content, - pagesize: pagesize, - } + return &MemorySlicer{content: content, pagesize: pagesize} } func (s *MemorySlicer) Slice(_ context.Context, offset, size int64) ([]byte, error) { @@ -67,9 +75,7 @@ func (s *MemorySlicer) BlockSize() int64 { func RandomPages(pagesize, numberOfPages uint64) *MemorySlicer { size := pagesize * numberOfPages - - n := int(size) - buf := make([]byte, n) + buf := make([]byte, int(size)) if _, err := rand.Read(buf); err != nil { panic(err) } @@ -77,357 +83,522 @@ func RandomPages(pagesize, numberOfPages uint64) *MemorySlicer { return NewMemorySlicer(buf, int64(pagesize)) } -// Main process, FC in our case +// Env vars used by the child helper process. +const ( + envHelperFlag = "GO_TEST_HELPER_PROCESS" + envSocketPath = "GO_UFFD_SOCKET" + envContentPath = "GO_UFFD_CONTENT" + envMmapStart = "GO_UFFD_MMAP_START" + envMmapPagesize = "GO_UFFD_MMAP_PAGESIZE" + envMmapTotalSize = "GO_UFFD_MMAP_SIZE" + envAlwaysWP = "GO_UFFD_ALWAYS_WP" + envGated = "GO_UFFD_GATED" + // envBarriers gates the test-only worker hooks. Only race tests + // need them; for everyone else we leave the hook fields nil so + // the hot path stays a single nil-pointer load + branch. + envBarriers = "GO_UFFD_BARRIERS" +) + +// ---- RPC method types --------------------------------------------------- +// +// net/rpc requires methods of the form: +// +// func (s *Service) Method(args *ArgsT, reply *ReplyT) error +// +// where both args and reply are exported pointer types. For methods +// that take or return nothing meaningful we still need a type — Empty +// fills that role. + +type Empty struct{} + +type PageStatesReply struct { + Entries []pageStateEntry +} + +type FaultBarrierArgs struct { + Addr uint64 + Point uint8 +} + +type FaultBarrierReply struct { + Token uint64 +} + +type TokenArgs struct { + Token uint64 +} + +// pageStateEntry is the wire format for PageStates RPC results. +type pageStateEntry struct { + State uint8 + Offset uint64 +} + +// ---- Parent side -------------------------------------------------------- + +// childForkMu serialises the cmd.Start() call across all parallel +// cross-process tests in this binary. Without it, the duplicated +// uffd fd we hand to one child via ExtraFiles is briefly visible in +// the parent's fd table while ANOTHER concurrent test calls fork() +// — so that other test's child inherits a uffd fd it does not own. +// The leaked fd keeps the original test's uffd kernel object alive +// after its owner closes its end, prevents madvise from completing +// once the owning child exits, and produces hard-to-diagnose +// -parallel-only deadlocks. +// +// Holding the mutex only across cmd.Start (which itself holds the +// process lock for the underlying syscall.ForkExec) is enough — by +// the time Start returns the dup'd fd is already mapped into fd 3 +// in the new child and we close it immediately in the parent below. +var childForkMu sync.Mutex + +// Main process, FC in our case. func configureCrossProcessTest(t *testing.T, tt testConfig) (*testHandler, error) { t.Helper() data := RandomPages(tt.pagesize, tt.numberOfPages) + if tt.sourcePatcher != nil { + tt.sourcePatcher(data.Content()) + } + size, err := data.Size() require.NoError(t, err) memoryArea, memoryStart, err := testutils.NewPageMmap(t, uint64(size), tt.pagesize) require.NoError(t, err) - // We can pass mapping nil as the serve is used only in the helper process. uffdFd, err := newFd(syscall.O_CLOEXEC | syscall.O_NONBLOCK) require.NoError(t, err) - t.Cleanup(func() { uffdFd.close() }) - err = configureApi(uffdFd, tt.pagesize) - require.NoError(t, err) + require.NoError(t, configureApi(uffdFd, tt.pagesize)) + require.NoError(t, register(uffdFd, memoryStart, uint64(size), UFFDIO_REGISTER_MODE_MISSING|UFFDIO_REGISTER_MODE_WP)) - err = register(uffdFd, memoryStart, uint64(size), UFFDIO_REGISTER_MODE_MISSING|UFFDIO_REGISTER_MODE_WP) + t.Cleanup(func() { + // Tear the registration down before the late close. With + // UFFD_FEATURE_EVENT_REMOVE enabled (see configureApi), + // munmap can otherwise block on un-acked REMOVE events. + _ = unregister(uffdFd, memoryStart, uint64(size)) + }) + + tmpDir := t.TempDir() + + contentPath := filepath.Join(tmpDir, "content.bin") + require.NoError(t, os.WriteFile(contentPath, data.Content(), 0o600)) + + socketPath := filepath.Join(tmpDir, "rpc.sock") + listener, err := net.Listen("unix", socketPath) require.NoError(t, err) cmd := exec.CommandContext(t.Context(), os.Args[0], "-test.run=TestHelperServingProcess", "-test.timeout=0") - cmd.Env = append(os.Environ(), "GO_TEST_HELPER_PROCESS=1") - cmd.Env = append(cmd.Env, fmt.Sprintf("GO_MMAP_START=%d", memoryStart)) - cmd.Env = append(cmd.Env, fmt.Sprintf("GO_MMAP_PAGE_SIZE=%d", tt.pagesize)) + cmd.Env = append(os.Environ(), + envHelperFlag+"=1", + envSocketPath+"="+socketPath, + envContentPath+"="+contentPath, + fmt.Sprintf("%s=%d", envMmapStart, memoryStart), + fmt.Sprintf("%s=%d", envMmapPagesize, tt.pagesize), + fmt.Sprintf("%s=%d", envMmapTotalSize, size), + ) if tt.alwaysWP { - cmd.Env = append(cmd.Env, "GO_ALWAYS_WP=1") + cmd.Env = append(cmd.Env, envAlwaysWP+"=1") + } + if tt.gated { + cmd.Env = append(cmd.Env, envGated+"=1") + } + if tt.barriers { + cmd.Env = append(cmd.Env, envBarriers+"=1") } - dup, err := syscall.Dup(int(uffdFd)) - require.NoError(t, err) + // We hand the uffd fd to the child via ExtraFiles. The child- + // side dup3 inside fork+exec clears CLOEXEC on the destination + // fd (i.e. fd 3 in the child) automatically, so the SOURCE fd + // in our parent should remain CLOEXEC — otherwise every other + // test fork()'d concurrently from this binary inherits a uffd + // it does not own, the kernel keeps the original test's uffd + // alive after its real owner exits, and madvise stops draining. + // At higher -parallel this surfaces as long, hard-to-diagnose + // hangs. + // + // syscall.Dup creates the new fd WITHOUT CLOEXEC, so we + // re-arm it explicitly. Holding childForkMu across the + // dup → cmd.Start window further guarantees no concurrent + // fork can race the F_SETFD. + childForkMu.Lock() - // clear FD_CLOEXEC on the dup we pass across exec - _, err = unix.FcntlInt(uintptr(dup), unix.F_SETFD, 0) + dup, err := syscall.Dup(int(uffdFd)) require.NoError(t, err) + if _, err := unix.FcntlInt(uintptr(dup), unix.F_SETFD, unix.FD_CLOEXEC); err != nil { + childForkMu.Unlock() + require.NoError(t, err) + } uffdFile := os.NewFile(uintptr(dup), "uffd") + cmd.ExtraFiles = []*os.File{uffdFile} + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr - contentReader, contentWriter, err := os.Pipe() - require.NoError(t, err) - - go func() { - _, writeErr := contentWriter.Write(data.Content()) - assert.NoError(t, writeErr) - - closeErr := contentWriter.Close() - assert.NoError(t, closeErr) - }() - - offsetsReader, offsetsWriter, err := os.Pipe() - require.NoError(t, err) - - t.Cleanup(func() { - offsetsReader.Close() - }) + startErr := cmd.Start() + uffdFile.Close() + childForkMu.Unlock() - readyReader, readyWriter, err := os.Pipe() - require.NoError(t, err) + require.NoError(t, startErr) - t.Cleanup(func() { - readyReader.Close() - }) - - readySignal := make(chan struct{}, 1) + // Accept the child's connection. Tight deadline so a wedged + // child surfaces fast instead of hanging the test. + type acceptResult struct { + conn net.Conn + err error + } + acceptCh := make(chan acceptResult, 1) go func() { - _, err := io.ReadAll(readyReader) - assert.NoError(t, err) - - readySignal <- struct{}{} + c, err := listener.Accept() + acceptCh <- acceptResult{conn: c, err: err} }() - cmd.ExtraFiles = []*os.File{ - uffdFile, - contentReader, - offsetsWriter, - readyWriter, + var conn net.Conn + select { + case res := <-acceptCh: + require.NoError(t, res.err) + conn = res.conn + case <-time.After(10 * time.Second): + listener.Close() + _ = cmd.Process.Kill() + _, _ = cmd.Process.Wait() + t.Fatalf("child did not connect within 10s") } - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr + listener.Close() - err = cmd.Start() - require.NoError(t, err) + client := jsonrpc.NewClient(conn) - contentReader.Close() - offsetsWriter.Close() - readyWriter.Close() - uffdFile.Close() + h := &testHandler{ + memoryArea: &memoryArea, + pagesize: tt.pagesize, + data: data, + client: client, + conn: conn, + cmd: cmd, + } + + // WaitReady blocks on the child until its initial setup is done + // (uffd serve goroutine running, hooks installed). The RPC reply + // IS the readiness signal — no separate ready pipe / signal + // needed. + require.NoError(t, h.client.Call("Service.WaitReady", &Empty{}, &Empty{})) t.Cleanup(func() { - signalErr := cmd.Process.Signal(syscall.SIGUSR1) - assert.NoError(t, signalErr) + // Best-effort graceful shutdown via RPC. If the child has + // already crashed the RPC will error and we fall back to + // killing the process below. + _ = h.client.Call("Service.Shutdown", &Empty{}, &Empty{}) + _ = client.Close() waitErr := cmd.Wait() - // It can be either nil, an ExitError, a context.Canceled error, or "signal: killed" - assert.True(t, - (waitErr != nil && func(err error) bool { - var exitErr *exec.ExitError - - return errors.As(err, &exitErr) - }(waitErr)) || - errors.Is(waitErr, context.Canceled) || - (waitErr != nil && strings.Contains(waitErr.Error(), "signal: killed")) || - waitErr == nil, - "unexpected error: %v", waitErr, - ) - - // Tear down the UFFD registration before the early uffdFd.close() - // cleanup runs. Today this is a no-op (no test enables - // UFFD_FEATURE_EVENT_REMOVE) but a follow-up that does will - // otherwise see munmap block on un-acked REMOVE events queued - // against the still-registered range. Cleanups run LIFO, so - // this fires before the close registered earlier. - assert.NoError(t, unregister(uffdFd, memoryStart, uint64(size))) + if waitErr != nil { + var exitErr *exec.ExitError + if !errors.As(waitErr, &exitErr) { + t.Logf("helper process Wait: %v", waitErr) + } + } }) - // pageStatesOnce asks the serving process for a snapshot of its pageTracker - // and decodes it into a per-state view. It can only be called once. - pageStatesOnce := func() (handlerPageStates, error) { - err := cmd.Process.Signal(syscall.SIGUSR2) - if err != nil { - return handlerPageStates{}, err + if tt.gated { + h.servePause = func() error { + return h.client.Call("Service.ServePause", &Empty{}, &Empty{}) } + h.serveResume = func() error { + return h.client.Call("Service.ServeResume", &Empty{}, &Empty{}) + } + } - var result handlerPageStates - - for { - var entry pageStateEntry - - // binary.Read uses the same field layout as binary.Write on - // the producer side (sum of fixed-size fields, no struct - // padding), so we never have to hard-code the wire size. - err := binary.Read(offsetsReader, binary.LittleEndian, &entry) - if errors.Is(err, io.EOF) { - break - } - - if err != nil { - return handlerPageStates{}, fmt.Errorf("decoding page state entry: %w", err) - } + h.pageStatesOnce = func() (handlerPageStates, error) { + var reply PageStatesReply + if err := h.client.Call("Service.PageStates", &Empty{}, &reply); err != nil { + return handlerPageStates{}, err + } - if pageState(entry.State) == faulted { - result.faulted = append(result.faulted, uint(entry.Offset)) + var states handlerPageStates + for _, e := range reply.Entries { + switch pageState(e.State) { + case faulted: + states.faulted = append(states.faulted, uint(e.Offset)) + case removed: + states.removed = append(states.removed, uint(e.Offset)) } } + slices.Sort(states.faulted) + slices.Sort(states.removed) - slices.Sort(result.faulted) - - return result, nil - } - - select { - case <-t.Context().Done(): - return nil, t.Context().Err() - case <-readySignal: + return states, nil } - return &testHandler{ - memoryArea: &memoryArea, - pagesize: tt.pagesize, - data: data, - pageStatesOnce: pageStatesOnce, - }, nil + return h, nil } -// Secondary process, orchestrator in our case +// ---- Child side --------------------------------------------------------- + +// Secondary process, orchestrator in our case. func TestHelperServingProcess(t *testing.T) { t.Parallel() - if os.Getenv("GO_TEST_HELPER_PROCESS") != "1" { + if os.Getenv(envHelperFlag) != "1" { t.Skip("this is a helper process, skipping direct execution") } - err := crossProcessServe() - if err != nil { - fmt.Println("exit serving process", err) + if err := crossProcessServe(); err != nil { + fmt.Fprintln(os.Stderr, "exit serving process:", err) os.Exit(1) } os.Exit(0) } +// crossProcessServe wires up the child side: connects back to the +// parent socket, registers the RPC service, and runs uffd.Serve in a +// background goroutine that pause/resume RPCs can stop and restart. func crossProcessServe() error { - ctx, cancel := context.WithCancelCause(context.Background()) - defer cancel(nil) + socketPath := os.Getenv(envSocketPath) + if socketPath == "" { + return fmt.Errorf("missing %s", envSocketPath) + } - startRaw, err := strconv.Atoi(os.Getenv("GO_MMAP_START")) + conn, err := net.Dial("unix", socketPath) if err != nil { - return fmt.Errorf("exit parsing mmap start: %w", err) + return fmt.Errorf("dial parent socket: %w", err) } + defer conn.Close() + startRaw, err := strconv.ParseUint(os.Getenv(envMmapStart), 10, 64) + if err != nil { + return fmt.Errorf("parse %s: %w", envMmapStart, err) + } memoryStart := uintptr(startRaw) - uffdFile := os.NewFile(uintptr(3), os.Getenv("GO_UFFD_FILE")) - defer uffdFile.Close() - - uffdFd := uffdFile.Fd() - - contentFile := os.NewFile(uintptr(4), "content") - defer contentFile.Close() + pagesize, err := strconv.ParseInt(os.Getenv(envMmapPagesize), 10, 64) + if err != nil { + return fmt.Errorf("parse %s: %w", envMmapPagesize, err) + } - content, err := io.ReadAll(contentFile) + totalSize, err := strconv.ParseInt(os.Getenv(envMmapTotalSize), 10, 64) if err != nil { - return fmt.Errorf("exit reading content: %w", err) + return fmt.Errorf("parse %s: %w", envMmapTotalSize, err) } - pageSize, err := strconv.ParseInt(os.Getenv("GO_MMAP_PAGE_SIZE"), 10, 64) + content, err := os.ReadFile(os.Getenv(envContentPath)) if err != nil { - return fmt.Errorf("exit parsing page size: %w", err) + return fmt.Errorf("read content: %w", err) } + if int64(len(content)) != totalSize { + return fmt.Errorf("content size %d != expected %d", len(content), totalSize) + } + + data := NewMemorySlicer(content, pagesize) - data := NewMemorySlicer(content, pageSize) + uffdFile := os.NewFile(uintptr(3), "uffd") + defer uffdFile.Close() + uffdFd := uffdFile.Fd() - m := memory.NewMapping([]memory.Region{ + mapping := memory.NewMapping([]memory.Region{ { BaseHostVirtAddr: memoryStart, - Size: uintptr(len(content)), + Size: uintptr(totalSize), Offset: 0, - PageSize: uintptr(pageSize), + PageSize: uintptr(pagesize), }, }) - exitUffd := make(chan struct{}, 1) - defer close(exitUffd) - l, err := logger.NewDevelopmentLogger() if err != nil { - return fmt.Errorf("exit creating logger: %w", err) + return fmt.Errorf("logger: %w", err) } - uffd, err := NewUserfaultfdFromFd(uffdFd, data, m, l) + uffd, err := NewUserfaultfdFromFd(uffdFd, data, mapping, l) if err != nil { - return fmt.Errorf("exit creating uffd: %w", err) + return fmt.Errorf("NewUserfaultfdFromFd: %w", err) } - if os.Getenv("GO_ALWAYS_WP") == "1" { + if os.Getenv(envAlwaysWP) == "1" { uffd.defaultCopyMode = UFFDIO_COPY_MODE_WP } - offsetsFile := os.NewFile(uintptr(5), "offsets") + br := newBarrierRegistry() + + // Hooks are only wired up when the test asked for them (race + // tests). For everyone else we leave the fields nil so the hot + // path is a single nil-pointer load + branch — keeps the high- + // throughput tests (TestParallelMissingWriteWithPrefault, etc.) + // from paying for a Mutex per fault. + if os.Getenv(envBarriers) == "1" { + uffd.beforeWorkerRLockHook = br.hookFor(barrierBeforeRLock) + uffd.beforeFaultPageHook = br.hookFor(barrierBeforeFaultPage) + } + + gated := os.Getenv(envGated) == "1" - offsetsSignal := make(chan os.Signal, 1) - signal.Notify(offsetsSignal, syscall.SIGUSR2) - defer signal.Stop(offsetsSignal) + svc := &Service{ + uffd: uffd, + br: br, + gated: gated, + shutdown: make(chan struct{}), + } + svc.startServe() + server := rpc.NewServer() + if err := server.Register(svc); err != nil { + return fmt.Errorf("rpc Register: %w", err) + } + + // Run the codec in a goroutine so we can react to Shutdown + // without depending on the codec returning. + codecDone := make(chan struct{}) go func() { - defer offsetsFile.Close() - - for { - select { - case <-ctx.Done(): - return - case <-offsetsSignal: - entries, entriesErr := uffd.pageStateEntries() - if entriesErr != nil { - cancel(fmt.Errorf("error getting page state entries: %w", entriesErr)) - - return - } - - for _, entry := range entries { - writeErr := binary.Write(offsetsFile, binary.LittleEndian, entry) - if writeErr != nil { - cancel(fmt.Errorf("error writing page state entry: %w", writeErr)) - - return - } - } - - return - } - } + defer close(codecDone) + server.ServeCodec(jsonrpc.NewServerCodec(conn)) }() - fdExit, err := fdexit.New() - if err != nil { - return fmt.Errorf("exit creating fd exit: %w", err) + select { + case <-svc.shutdown: + fmt.Fprintln(os.Stderr, "child: shutdown received") + case <-codecDone: + fmt.Fprintln(os.Stderr, "child: codec done") } - defer fdExit.Close() - go func() { - defer func() { - exitUffd <- struct{}{} - }() + // Release any still-parked barriers so the serve goroutine can + // finish, then stop the serve goroutine. + br.releaseAll() + fmt.Fprintln(os.Stderr, "child: barriers released") + svc.stopServe() + fmt.Fprintln(os.Stderr, "child: serve stopped") - serverErr := uffd.Serve(ctx, fdExit) - if serverErr != nil { - msg := fmt.Errorf("error serving: %w", serverErr) + // Closing the conn is sufficient to unblock ServeCodec if it + // hasn't already returned. + _ = conn.Close() + <-codecDone + fmt.Fprintln(os.Stderr, "child: codec exited") - fmt.Fprint(os.Stderr, msg.Error()) + return nil +} - cancel(msg) +// Service is the RPC surface exposed to the parent. Methods follow +// net/rpc's required signature. +type Service struct { + uffd *Userfaultfd + br *barrierRegistry - return - } - }() + gated bool - cleanup := func() { - err := fdExit.SignalExit() - if err != nil { - msg := fmt.Errorf("error signaling exit: %w", err) + mu sync.Mutex + stop func() // currently active serve-stop function, nil if paused + shutdown chan struct{} + closed bool +} - fmt.Fprint(os.Stderr, msg.Error()) +func (s *Service) startServe() { + exit, err := fdexit.New() + if err != nil { + fmt.Fprintln(os.Stderr, "fdexit.New:", err) - cancel(msg) + return + } - return + done := make(chan struct{}) + go func() { + defer close(done) + if err := s.uffd.Serve(context.Background(), exit); err != nil { + fmt.Fprintln(os.Stderr, "uffd.Serve:", err) } + }() - <-exitUffd + s.stop = func() { + _ = exit.SignalExit() + <-done + exit.Close() } +} + +func (s *Service) stopServe() { + s.mu.Lock() + defer s.mu.Unlock() + if s.stop != nil { + s.stop() + s.stop = nil + } +} - defer cleanup() +// WaitReady is a no-op handler whose successful reply is the +// readiness signal for the parent. +func (s *Service) WaitReady(_ *Empty, _ *Empty) error { + return nil +} - exitSignal := make(chan os.Signal, 1) - signal.Notify(exitSignal, syscall.SIGUSR1) - defer signal.Stop(exitSignal) +func (s *Service) PageStates(_ *Empty, reply *PageStatesReply) error { + entries, err := s.uffd.pageStateEntries() + if err != nil { + return err + } + reply.Entries = entries - readyFile := os.NewFile(uintptr(6), "ready") + return nil +} - closeErr := readyFile.Close() - if closeErr != nil { - return fmt.Errorf("error closing ready file: %w", closeErr) +func (s *Service) ServePause(_ *Empty, _ *Empty) error { + if !s.gated { + return errors.New("ServePause called on a non-gated handler") } + s.stopServe() - select { - case <-ctx.Done(): - return fmt.Errorf("context done: %w: %w", ctx.Err(), context.Cause(ctx)) - case <-exitSignal: - return nil + return nil +} + +func (s *Service) ServeResume(_ *Empty, _ *Empty) error { + if !s.gated { + return errors.New("ServeResume called on a non-gated handler") } + s.mu.Lock() + defer s.mu.Unlock() + s.startServe() + + return nil } -// pageStateEntry is the wire format used between the main test process -// and the serving helper process. State is emitted as a single byte so it -// can be written directly with binary.Write and decoded on the other side. -type pageStateEntry struct { - State uint8 - Offset uint64 +func (s *Service) InstallFaultBarrier(args *FaultBarrierArgs, reply *FaultBarrierReply) error { + reply.Token = s.br.install(uintptr(args.Addr), barrierPoint(args.Point)) + + return nil } -// pageStateEntries returns a snapshot of every tracked page and its state. -// It holds the settleRequests write lock so no in-flight faultPage worker -// can mutate the pageTracker while we iterate. +func (s *Service) WaitFaultHeld(args *TokenArgs, _ *Empty) error { + return s.br.waitArrived(context.Background(), args.Token) +} + +func (s *Service) ReleaseFault(args *TokenArgs, _ *Empty) error { + s.br.release(args.Token) + + return nil +} + +func (s *Service) Shutdown(_ *Empty, _ *Empty) error { + s.mu.Lock() + defer s.mu.Unlock() + if !s.closed { + s.closed = true + close(s.shutdown) + } + + return nil +} + +// pageStateEntries returns a snapshot of every tracked page and its +// state. It briefly takes settleRequests.Lock so no in-flight worker +// can mutate the pageTracker while we read it. func (u *Userfaultfd) pageStateEntries() ([]pageStateEntry, error) { u.settleRequests.Lock() - defer u.settleRequests.Unlock() + u.settleRequests.Unlock() //nolint:staticcheck // SA2001: intentional — settle the read locks. + + u.pageTracker.mu.RLock() + defer u.pageTracker.mu.RUnlock() entries := make([]pageStateEntry, 0, len(u.pageTracker.m)) for addr, state := range u.pageTracker.m { @@ -435,9 +606,157 @@ func (u *Userfaultfd) pageStateEntries() ([]pageStateEntry, error) { if err != nil { return nil, fmt.Errorf("address %#x not in mapping: %w", addr, err) } - - entries = append(entries, pageStateEntry{uint8(state), uint64(offset)}) + entries = append(entries, pageStateEntry{State: uint8(state), Offset: uint64(offset)}) } return entries, nil } + +// ---- Barrier registry --------------------------------------------------- + +// barrierPoint identifies WHICH hook a barrier should park on. +type barrierPoint uint8 + +const ( + // barrierBeforeRLock parks the worker BEFORE settleRequests.RLock(), + // i.e. before it can read the page state. Use this for the + // stale-source race: a parallel REMOVE batch on the parent loop + // can take the write lock immediately because no worker holds + // the read lock. + barrierBeforeRLock barrierPoint = 1 + // barrierBeforeFaultPage parks the worker AFTER it has taken + // settleRequests.RLock and decided on `source`, but BEFORE the + // actual UFFDIO_COPY syscall. Use this for the in-flight COPY + // deadlock test: the parent's madvise must still return even + // though a worker holds RLock. + barrierBeforeFaultPage barrierPoint = 2 +) + +// barrierRegistry is the child-process side of the barrier. The +// hooks installed on Userfaultfd consult this registry by addr+point +// to decide whether to park, and the RPC handlers manipulate it from +// the parent over the socket. +type barrierRegistry struct { + mu sync.Mutex + next uint64 + tokens map[uint64]*barrierSlot + byKey map[barrierKey]uint64 +} + +type barrierKey struct { + addr uintptr + point barrierPoint +} + +type barrierSlot struct { + addr uintptr + point barrierPoint + arrived chan struct{} + release chan struct{} + arrivedOnce sync.Once +} + +func newBarrierRegistry() *barrierRegistry { + return &barrierRegistry{ + tokens: make(map[uint64]*barrierSlot), + byKey: make(map[barrierKey]uint64), + } +} + +func (b *barrierRegistry) install(addr uintptr, point barrierPoint) uint64 { + b.mu.Lock() + defer b.mu.Unlock() + + b.next++ + token := b.next + slot := &barrierSlot{ + addr: addr, + point: point, + arrived: make(chan struct{}), + release: make(chan struct{}), + } + b.tokens[token] = slot + b.byKey[barrierKey{addr, point}] = token + + return token +} + +func (b *barrierRegistry) lookupByAddr(addr uintptr, point barrierPoint) *barrierSlot { + b.mu.Lock() + defer b.mu.Unlock() + + token, ok := b.byKey[barrierKey{addr, point}] + if !ok { + return nil + } + + return b.tokens[token] +} + +func (b *barrierRegistry) waitArrived(ctx context.Context, token uint64) error { + b.mu.Lock() + slot, ok := b.tokens[token] + b.mu.Unlock() + if !ok { + return fmt.Errorf("unknown barrier token %d", token) + } + + select { + case <-slot.arrived: + return nil + case <-ctx.Done(): + return ctx.Err() + } +} + +func (b *barrierRegistry) release(token uint64) { + b.mu.Lock() + slot, ok := b.tokens[token] + delete(b.tokens, token) + if ok { + delete(b.byKey, barrierKey{slot.addr, slot.point}) + } + b.mu.Unlock() + + if !ok { + return + } + + select { + case <-slot.release: + default: + close(slot.release) + } +} + +func (b *barrierRegistry) releaseAll() { + b.mu.Lock() + tokens := make([]uint64, 0, len(b.tokens)) + for t := range b.tokens { + tokens = append(tokens, t) + } + b.mu.Unlock() + + for _, t := range tokens { + b.release(t) + } +} + +// hookFor returns the function to assign to a Userfaultfd +// beforeXxxHook field. The returned function is a no-op for any +// (addr, point) pair that hasn't been Install'd, so non-targeted +// faults see no scheduling distortion. +func (b *barrierRegistry) hookFor(point barrierPoint) func(addr uintptr) { + return func(addr uintptr) { + slot := b.lookupByAddr(addr, point) + if slot == nil { + return + } + + slot.arrivedOnce.Do(func() { + close(slot.arrived) + }) + + <-slot.release + } +} diff --git a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred.go b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred.go new file mode 100644 index 0000000000..6089ad7660 --- /dev/null +++ b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/deferred.go @@ -0,0 +1,26 @@ +package userfaultfd + +import "sync" + +// deferredFaults collects pagefaults that couldn't be handled (EAGAIN) +// and need to be retried on the next poll iteration. Safe for concurrent push. +type deferredFaults struct { + mu sync.Mutex + pf []*UffdPagefault +} + +func (d *deferredFaults) push(pf *UffdPagefault) { + d.mu.Lock() + d.pf = append(d.pf, pf) + d.mu.Unlock() +} + +// drain returns all accumulated pagefaults and resets the internal list. +func (d *deferredFaults) drain() []*UffdPagefault { + d.mu.Lock() + out := d.pf + d.pf = nil + d.mu.Unlock() + + return out +} diff --git a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/fd_helpers_test.go b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/fd_helpers_test.go index c85d8b1233..c30d1823f5 100644 --- a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/fd_helpers_test.go +++ b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/fd_helpers_test.go @@ -31,6 +31,7 @@ func configureApi(f Fd, pagesize uint64) error { } features |= UFFD_FEATURE_WP_ASYNC + features |= UFFD_FEATURE_EVENT_REMOVE api := newUffdioAPI(UFFD_API, features) ret, _, errno := syscall.Syscall(syscall.SYS_IOCTL, uintptr(f), UFFDIO_API, uintptr(unsafe.Pointer(&api))) @@ -42,9 +43,9 @@ func configureApi(f Fd, pagesize uint64) error { } // unregister tears down the UFFD registration over [addr, addr+size). -// Used in test cleanup so that any in-flight REMOVE events the kernel -// may have queued (once UFFD_FEATURE_EVENT_REMOVE is enabled in a -// follow-up) don't keep munmap blocked on un-acked events. +// Used in test cleanup so in-flight REMOVE events queued by the kernel +// (configureApi enables UFFD_FEATURE_EVENT_REMOVE on this branch) don't +// keep munmap blocked on un-acked events. func unregister(f Fd, addr uintptr, size uint64) error { r := newUffdioRange(CULong(addr), CULong(size)) diff --git a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/helpers_test.go b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/helpers_test.go index 054aa9e333..5cd7752a58 100644 --- a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/helpers_test.go +++ b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/helpers_test.go @@ -4,14 +4,18 @@ import ( "bytes" "context" "fmt" - "slices" + "io" + "net/rpc" + "os/exec" "sync" "testing" + "time" "unsafe" "github.com/RoaringBitmap/roaring/v2" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "golang.org/x/sys/unix" "github.com/e2b-dev/infra/packages/orchestrator/pkg/sandbox/uffd/testutils" ) @@ -26,6 +30,20 @@ type testConfig struct { operations []operation // alwaysWP makes the handler copy with UFFDIO_COPY_MODE_WP for all faults. alwaysWP bool + // gated enables pause/resume control over the handler's serve loop. + gated bool + // barriers wires up the per-worker fault hooks in the child + // (used by race tests). Off by default so the worker hot path + // stays a single nil-pointer load + branch in non-race tests. + barriers bool + // sourcePatcher, if non-nil, is invoked on the random source data + // AFTER it's generated but BEFORE it's written to the on-disk + // content file the child reads. Tests can use this to plant + // deterministic sentinel bytes in the source so the post-test + // assertion can distinguish "post-fix zero-fault" from "pre-fix + // UFFDIO_COPY of stale src bytes" without depending on the + // happenstance value of randomly-generated bytes. + sourcePatcher func([]byte) } type operationMode uint32 @@ -33,35 +51,54 @@ type operationMode uint32 const ( operationModeRead operationMode = 1 << iota operationModeWrite + operationModeRemove + operationModeServePause + operationModeServeResume + // operationModeSleep pauses for a short duration to let async goroutines + // enter their blocking syscalls before proceeding. + operationModeSleep ) type operation struct { // Offset in bytes. Must be smaller than the (numberOfPages-1) * pagesize as it reads a page and it must be aligned to the pagesize from the testConfig. offset int64 mode operationMode + // async runs the operation in a background goroutine. + async bool } // handlerPageStates is a snapshot of the pageTracker grouped by state. It // lets tests assert on the set of pages that the handler observed in each -// state, rather than a flat list of "accessed" offsets. Follow-up PRs can -// add more state-specific fields (e.g. removed) without touching the -// existing call sites. +// state, rather than a flat list of "accessed" offsets. type handlerPageStates struct { faulted []uint + removed []uint } // allAccessed returns the sorted union of offsets that the handler touched -// in any non-missing state. Tests that only care about "which pages did the -// handler see" can compare directly against this. +// in any non-missing state. // -// pageStatesOnce already returns each per-state slice sorted, and a page +// pageStatesOnce returns each per-state slice already sorted, and a page // has exactly one state at a time in pageTracker, so the per-state slices -// are disjoint. Follow-up PRs that add more state-specific fields should -// sorted-merge them here instead of reaching for a bitset — byte offsets -// make poor bit indices (a single hugepage offset would force ~1.8 MB of -// backing storage). +// are disjoint. We merge them with a simple sorted merge instead of a +// bitset — byte offsets make poor bit indices (a single hugepage offset +// would force ~1.8 MB of backing storage). func (s handlerPageStates) allAccessed() []uint { - return slices.Clone(s.faulted) + out := make([]uint, 0, len(s.faulted)+len(s.removed)) + i, j := 0, 0 + for i < len(s.faulted) && j < len(s.removed) { + if s.faulted[i] <= s.removed[j] { + out = append(out, s.faulted[i]) + i++ + } else { + out = append(out, s.removed[j]) + j++ + } + } + out = append(out, s.faulted[i:]...) + out = append(out, s.removed[j:]...) + + return out } type testHandler struct { @@ -69,25 +106,91 @@ type testHandler struct { pagesize uint64 data *MemorySlicer // pageStatesOnce returns a per-state snapshot of the handler's pageTracker. - // It can only be called once. + // Backed by the PageStates RPC; callable any number of times. + // The "Once" suffix is kept for source-stability with the existing + // test sites. pageStatesOnce func() (handlerPageStates, error) - mutex sync.Mutex + // servePause and serveResume gate the UFFD event loop in the child process. + // Tests use them to deterministically drain a batch of REMOVE events + // before more faults are processed. + servePause func() error + serveResume func() error + + // client is the RPC channel to the child helper process. + client *rpc.Client + conn io.Closer + cmd *exec.Cmd + + mutex sync.Mutex +} + +// installFaultBarrier asks the child to park the next worker that +// hits `point` for `addr`. Returns a token that must be passed to +// waitFaultHeld and releaseFault. +func (h *testHandler) installFaultBarrier(_ context.Context, addr uintptr, point barrierPoint) (uint64, error) { + var reply FaultBarrierReply + err := h.client.Call("Service.InstallFaultBarrier", &FaultBarrierArgs{Addr: uint64(addr), Point: uint8(point)}, &reply) + + return reply.Token, err +} + +// waitFaultHeld blocks until the child reports that a worker has +// reached the barrier identified by token. The wait is bounded via +// context by issuing the call on a goroutine and racing it against +// ctx; net/rpc's Call doesn't take a context directly. +func (h *testHandler) waitFaultHeld(ctx context.Context, token uint64) error { + call := h.client.Go("Service.WaitFaultHeld", &TokenArgs{Token: token}, &Empty{}, nil) + select { + case <-call.Done: + return call.Error + case <-ctx.Done(): + return ctx.Err() + } +} + +// releaseFault releases a parked worker so it proceeds past the +// barrier. +func (h *testHandler) releaseFault(_ context.Context, token uint64) error { + return h.client.Call("Service.ReleaseFault", &TokenArgs{Token: token}, &Empty{}) } func (h *testHandler) executeAll(t *testing.T, operations []operation) { t.Helper() + var asyncErrors []chan error + for i, op := range operations { + if op.async { + errCh := make(chan error, 1) + asyncErrors = append(asyncErrors, errCh) + + go func() { + errCh <- h.executeOperation(t.Context(), op) + }() + + continue + } + err := h.executeOperation(t.Context(), op) require.NoError(t, err, "step %d: %v at offset %d", i, op.mode, op.offset) } + + for _, errCh := range asyncErrors { + select { + case err := <-errCh: + require.NoError(t, err, "async operation") + case <-t.Context().Done(): + t.Fatal("timed out waiting for async operation") + } + } } type pageExpectation uint8 const ( - expectClean pageExpectation = iota // read-only: present + WP set - expectDirty // written: present + WP cleared + expectClean pageExpectation = iota // read-only: present + WP set + expectDirty // written: present + WP cleared + expectRemoved // removed: not present ) func (h *testHandler) checkDirtiness(t *testing.T, operations []operation) { @@ -100,17 +203,25 @@ func (h *testHandler) checkDirtiness(t *testing.T, operations []operation) { memStart := uintptr(unsafe.Pointer(&(*h.memoryArea)[0])) // Track the final expected state per offset by replaying operations in order. + // A remove after a read/write makes the page not present. + // A read/write after a remove makes it present again. expected := make(map[uint]pageExpectation) for _, op := range operations { off := uint(op.offset) switch op.mode { case operationModeRead: - if _, seen := expected[off]; !seen { + curr, seen := expected[off] + // If we haven't seen this page before or the page + // has previously been removed then the page should be clean + // after this read operation. + if !seen || curr == expectRemoved { expected[off] = expectClean } case operationModeWrite: expected[off] = expectDirty + case operationModeRemove: + expected[off] = expectRemoved } } @@ -119,6 +230,8 @@ func (h *testHandler) checkDirtiness(t *testing.T, operations []operation) { require.NoError(t, err, "pagemap read at offset %d", off) switch expect { + case expectRemoved: + assert.False(t, entry.IsPresent(), "removed page at offset %d should not be present", off) case expectDirty: assert.True(t, entry.IsPresent(), "written page at offset %d should be present", off) assert.False(t, entry.IsWriteProtected(), "written page at offset %d should be dirty", off) @@ -135,11 +248,27 @@ func (h *testHandler) executeOperation(ctx context.Context, op operation) error return h.executeRead(ctx, op) case operationModeWrite: return h.executeWrite(ctx, op) + case operationModeRemove: + return h.executeRemove(op) + case operationModeServePause: + return h.servePause() + case operationModeServeResume: + return h.serveResume() + case operationModeSleep: + time.Sleep(50 * time.Millisecond) + + return nil default: return fmt.Errorf("invalid operation mode: %d", op.mode) } } +func (h *testHandler) executeRemove(op operation) error { + page := (*h.memoryArea)[op.offset : op.offset+int64(h.pagesize)] + + return unix.Madvise(page, unix.MADV_DONTNEED) +} + func (h *testHandler) executeRead(ctx context.Context, op operation) error { readBytes := (*h.memoryArea)[op.offset : op.offset+int64(h.pagesize)] diff --git a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/page_tracker.go b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/page_tracker.go index da76d310a8..2f57ab9966 100644 --- a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/page_tracker.go +++ b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/page_tracker.go @@ -7,6 +7,7 @@ type pageState uint8 const ( missing pageState = iota faulted + removed ) type pageTracker struct { @@ -23,6 +24,18 @@ func newPageTracker(pageSize uintptr) *pageTracker { } } +func (pt *pageTracker) get(addr uintptr) pageState { + pt.mu.RLock() + defer pt.mu.RUnlock() + + state, ok := pt.m[addr] + if !ok { + return missing + } + + return state +} + func (pt *pageTracker) setState(start, end uintptr, state pageState) { pt.mu.Lock() defer pt.mu.Unlock() diff --git a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/prefault.go b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/prefault.go index 89bae9fedc..c1c2bff97a 100644 --- a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/prefault.go +++ b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/prefault.go @@ -2,6 +2,7 @@ package userfaultfd import ( "context" + "errors" "fmt" "github.com/e2b-dev/infra/packages/orchestrator/pkg/sandbox/block" @@ -11,6 +12,9 @@ import ( // This is used to speed up sandbox starts by prefetching pages that are known to be needed. // Returns nil on success, or if the page is already mapped (EEXIST is handled gracefully). func (u *Userfaultfd) Prefault(ctx context.Context, offset int64, data []byte) error { + u.settleRequests.RLock() + defer u.settleRequests.RUnlock() + ctx, span := tracer.Start(ctx, "prefault page") defer span.End() @@ -23,7 +27,45 @@ func (u *Userfaultfd) Prefault(ctx context.Context, offset int64, data []byte) e return fmt.Errorf("data length (%d) does not match pagesize (%d)", len(data), u.pageSize) } - return u.faultPage(ctx, addr, offset, directDataSource{data, int64(u.pageSize)}, nil, block.Prefetch) + // If page has already been faulted in due to on-demand page fault handling or removed because + // Firecracker called madvise() on it, skip it. + state := u.pageTracker.get(addr) + if state == faulted || state == removed { + return nil + } + + // We're treating prefault handling as if it was caused by a read access. + // This way, we will fault the page with UFFD_COPY_MODE_WP which will set + // the WP bit for the page. This works even in the case of a race with a + // concurrent on-demand write access. + // + // If the on-demand fault handler beats us, we will get an EEXIST here. + // If we beat the on-demand handler, it will get the EEXIST. + // + // In both cases, the WP bit will be cleared because it is handled asynchronously + // by the kernel. + handled, err := u.faultPage( + ctx, + addr, + offset, + block.Read, + directDataSource{data, int64(u.pageSize)}, + nil, + ) + if err != nil { + span.RecordError(errors.New("could not prefault page")) + + return fmt.Errorf("failed to fault page: %w", err) + } + + if !handled { + span.AddEvent("prefault: page already faulted or write returned EAGAIN") + } else { + u.pageTracker.setState(addr, addr+u.pageSize, faulted) + u.prefetchTracker.Add(offset, block.Prefetch) + } + + return nil } // directDataSource wraps a byte slice to implement block.Slicer for prefaulting. diff --git a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/race_test.go b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/race_test.go new file mode 100644 index 0000000000..b742ef25e2 --- /dev/null +++ b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/race_test.go @@ -0,0 +1,407 @@ +package userfaultfd + +import ( + "context" + "fmt" + "testing" + "time" + "unsafe" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "golang.org/x/sys/unix" + + "github.com/e2b-dev/infra/packages/orchestrator/pkg/sandbox/uffd/testutils" + "github.com/e2b-dev/infra/packages/shared/pkg/storage/header" +) + +// raceHappyPathBudget bounds every race test in this file. The whole +// point of these tests is that they detect a regression as a fast, +// targeted assertion rather than as a CI -timeout 30m hang. None of +// these tests should approach this budget on a healthy build. +const raceHappyPathBudget = 5 * time.Second + +// barrierArrivalDeadline is how long the test will wait for a worker +// to reach an installed barrier. The hook fires the first thing in +// the worker goroutine, so on a healthy build it's a sub-millisecond +// rendezvous over the unix-socket RPC. Anything approaching this +// deadline means the handler dispatch is wedged. +const barrierArrivalDeadline = 2 * time.Second + +// madviseBudget is how long we allow MADV_DONTNEED to spend in the +// kernel after we've parked a worker mid-handler. The fix guarantees +// madvise unblocks as soon as the handler drains the REMOVE event +// from the uffd fd, regardless of any worker holding RLock — +// readEvents requires no lock. +const madviseBudget = 2 * time.Second + +// withRaceContext bounds a single race test to raceHappyPathBudget, +// failing with a clear "deadlock" message if the budget is exceeded. +func withRaceContext(t *testing.T, body func(ctx context.Context)) { + t.Helper() + + ctx, cancel := context.WithTimeout(t.Context(), raceHappyPathBudget) + defer cancel() + + done := make(chan struct{}) + go func() { + defer close(done) + body(ctx) + }() + + select { + case <-done: + case <-ctx.Done(): + t.Fatalf("race test exceeded happy-path budget of %s — handler is wedged", raceHappyPathBudget) + } +} + +// TestStaleSourceRaceMissingAndRemove is the deterministic regression +// test for the production fix in Serve(): +// +// - Pre-fix the parent serve loop captured `state == missing` and +// `source = u.src` BEFORE handing the work to a worker goroutine. +// A REMOVE event for the same page that arrived between then and +// the worker actually running would silently leave the worker +// with a stale `source = u.src` snapshot, which it would then +// UFFDIO_COPY into the page that the kernel had just unmapped. +// +// - Post-fix the worker reads pageTracker state INSIDE the +// goroutine, under settleRequests.RLock, atomically with the +// decision of which `source` to use. +// +// The test installs a barrierBeforeRLock on page X (so the worker +// for X parks before it can read state), triggers a MISSING-write +// fault on X from the parent, waits for the worker to park, fires +// MADV_DONTNEED on X (which can take settleRequests.Lock immediately +// — no worker holds RLock), and then releases the worker. After +// release the worker, post-fix, observes state=removed under RLock +// and zero-faults; pre-fix it would have UFFDIO_COPY'd the planted +// sentinel byte from u.src. A direct read of the page contents +// distinguishes the two outcomes deterministically. +func TestStaleSourceRaceMissingAndRemove(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + pagesize uint64 + }{ + {name: "4k", pagesize: header.PageSize}, + {name: "hugepage", pagesize: header.HugepageSize}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + withRaceContext(t, func(ctx context.Context) { + // Plant a deterministic, non-zero sentinel as the + // first byte of the source data for the page we'll + // race on. Pre-fix, the worker would UFFDIO_COPY this + // sentinel into the page after the REMOVE has already + // unmapped it. Post-fix the worker reads + // state == removed under RLock and zero-fills. + // Planting goes through testConfig.sourcePatcher so + // it lands BOTH in the parent's MemorySlicer and in + // the on-disk content file the child reads. + const sentinel = byte(0xC3) + const pageIdx = 1 + pageOffset := int64(pageIdx) * int64(tt.pagesize) + + cfg := testConfig{ + pagesize: tt.pagesize, + numberOfPages: 4, + barriers: true, + sourcePatcher: func(content []byte) { + content[pageOffset] = sentinel + }, + } + + h, err := configureCrossProcessTest(t, cfg) + require.NoError(t, err) + + memStart := uintptr(unsafe.Pointer(&(*h.memoryArea)[0])) + addr := memStart + uintptr(pageIdx)*uintptr(tt.pagesize) + + token, err := h.installFaultBarrier(ctx, addr, barrierBeforeRLock) + require.NoError(t, err) + + // Trigger a READ fault (NOT a write — a write would + // overwrite the very byte we want to inspect to + // distinguish the two outcomes). h.executeRead does + // the touch + content check; we run it in a goroutine + // because it blocks on the fault until we release the + // barrier. + readErrCh := make(chan error, 1) + go func() { + readErrCh <- h.executeRead(ctx, operation{offset: pageOffset, mode: operationModeRead}) + }() + + // Wait for the worker for `addr` to park at the + // pre-RLock barrier. + waitCtx, waitCancel := context.WithTimeout(ctx, barrierArrivalDeadline) + err = h.waitFaultHeld(waitCtx, token) + waitCancel() + require.NoError(t, err, "worker for page %d (addr %#x) did not park at barrier", pageIdx, addr) + + // Fire MADV_DONTNEED on the same page from the + // parent. The serve loop can take Lock immediately + // because the parked worker has not yet acquired + // RLock. + madviseCtx, madviseCancel := context.WithTimeout(ctx, madviseBudget) + err = h.executeRemove(operation{offset: pageOffset, mode: operationModeRemove}) + madviseCancel() + _ = madviseCtx + require.NoError(t, err, "MADV_DONTNEED on page %d did not return — handler dispatch wedged", pageIdx) + + // Wait for the handler to commit setState(removed). + // A tight poll loop with a hard deadline is used + // rather than a sleep — the transition is + // microseconds in the happy path. + require.NoError(t, waitForState(ctx, h, uint64(pageOffset), removed, barrierArrivalDeadline), + "handler did not transition page %d to `removed` after MADV_DONTNEED", pageIdx) + + // Release the parked worker. Post-fix it will + // observe state == removed and zero-fault; pre-fix + // it would proceed with the captured stale source. + require.NoError(t, h.releaseFault(ctx, token)) + + select { + case err := <-readErrCh: + // Pre-fix: executeRead's bytes.Equal succeeds + // (page contains src bytes), so err == nil but + // the page is observably wrong. Post-fix: + // bytes.Equal fails (page is zero-filled), so + // err != nil. We use the page-content assertion + // below instead of relying on this side-channel. + _ = err + case <-ctx.Done(): + t.Fatalf("read of page %d did not unblock after barrier release", pageIdx) + } + + // THE bug-detection assertion: post-fix the page + // MUST be zero-filled. Pre-fix the worker + // UFFDIO_COPY'd the planted sentinel. + page := (*h.memoryArea)[pageOffset : pageOffset+int64(tt.pagesize)] + assert.Equalf(t, byte(0), page[0], + "page %d first byte: want 0 (post-fix zero-fault for `removed` state), got %#x — "+ + "if this equals the sentinel %#x, the worker used a stale `source = u.src` snapshot (regression)", + pageIdx, page[0], sentinel, + ) + + // Sanity: verify with /proc/self/pagemap that the + // page is in fact present after the racing read was + // served (worker re-mapped it as zero). + pagemap, err := testutils.NewPagemapReader() + require.NoError(t, err) + defer pagemap.Close() + entry, err := pagemap.ReadEntry(addr) + require.NoError(t, err) + assert.True(t, entry.IsPresent(), "page %d should be present after the racing read", pageIdx) + }) + }) + } +} + +// TestNoMadviseDeadlockWithInflightCopy is a liveness regression test +// for the user-visible symptom that originally surfaced the stale- +// source race: the orchestrator's parent madvise(MADV_DONTNEED) +// blocking forever because the UFFD handler loop was wedged behind a +// worker. +// +// The harness parks the worker AFTER it has taken settleRequests.RLock +// AND captured `source` (i.e. as if its UFFDIO_COPY was in flight). +// From the parent we then issue MADV_DONTNEED on the same page and +// require that madvise returns within `madviseBudget`. madvise +// unblocks as soon as the handler's readEvents drains the REMOVE +// event, and readEvents requires no lock — so any future change that +// accidentally couples readEvents to settleRequests fails this test +// at the `madviseBudget` boundary instead of as a 30-minute CI +// timeout. +func TestNoMadviseDeadlockWithInflightCopy(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + pagesize uint64 + }{ + {name: "4k", pagesize: header.PageSize}, + {name: "hugepage", pagesize: header.HugepageSize}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + withRaceContext(t, func(ctx context.Context) { + cfg := testConfig{ + pagesize: tt.pagesize, + numberOfPages: 4, + barriers: true, + } + + h, err := configureCrossProcessTest(t, cfg) + require.NoError(t, err) + + const pageIdx = 2 + pageOffset := int64(pageIdx) * int64(tt.pagesize) + + memStart := uintptr(unsafe.Pointer(&(*h.memoryArea)[0])) + addr := memStart + uintptr(pageIdx)*uintptr(tt.pagesize) + + token, err := h.installFaultBarrier(ctx, addr, barrierBeforeFaultPage) + require.NoError(t, err) + + writeErrCh := make(chan error, 1) + go func() { + writeErrCh <- h.executeWrite(ctx, operation{offset: pageOffset, mode: operationModeWrite}) + }() + + waitCtx, waitCancel := context.WithTimeout(ctx, barrierArrivalDeadline) + err = h.waitFaultHeld(waitCtx, token) + waitCancel() + require.NoError(t, err, "worker for page %d (addr %#x) did not park at pre-COPY barrier", pageIdx, addr) + + // Worker is parked AFTER RLock. Issue MADV_DONTNEED + // on the same page from the parent. The handler's + // readEvents must drain the REMOVE event (so madvise + // returns) even while the worker holds RLock. + madviseDone := make(chan error, 1) + go func() { + madviseDone <- unix.Madvise((*h.memoryArea)[pageOffset:pageOffset+int64(tt.pagesize)], unix.MADV_DONTNEED) + }() + + select { + case err := <-madviseDone: + require.NoError(t, err) + case <-time.After(madviseBudget): + _ = h.releaseFault(ctx, token) + <-writeErrCh + t.Fatalf("DEADLOCK: madvise(MADV_DONTNEED) on page %d did not return within %s "+ + "while a worker was parked holding settleRequests.RLock — readEvents must not require any lock", + pageIdx, madviseBudget) + } + + require.NoError(t, h.releaseFault(ctx, token)) + + select { + case err := <-writeErrCh: + require.NoError(t, err) + case <-ctx.Done(): + t.Fatalf("user-side write of page %d did not unblock after barrier release", pageIdx) + } + }) + }) + } +} + +// TestFaultedShortCircuitOrdering uses the gated harness to +// deterministically queue a WRITE pagefault for a fresh page AND a +// REMOVE for an already-faulted page in the SAME serve-loop +// iteration. After resume, the post-batch state is asserted: the +// REMOVE'd page is `removed` and the racing-write page is `faulted`. +// +// Both pre-fix and post-fix code reach the same end state for this +// scenario (REMOVE batch runs before the pagefault dispatch loop in +// every Serve iteration). This test guards the batch-processing +// invariant itself: any future change that, for example, dispatched +// pagefaults before draining REMOVEs would fail this test as a +// concrete state-mismatch assertion rather than a 30-minute hang. +// +// NOTE: this test deliberately does NOT call t.Parallel(). While the +// handler is in the gated `paused` state, the user thread that +// triggered the queued WRITE fault is suspended in the kernel's +// pagefault path. From the Go runtime's perspective that goroutine +// is "running" (not in syscall, since it's a plain memory store) but +// can't be preempted. If a CONCURRENT cross-process test in the same +// binary triggers a stop-the-world GC pause during this window, STW +// will wait forever for the suspended goroutine to reach a safe +// point — the kernel can't deliver the SIGURG preempt signal until +// the pagefault is served, and the handler is paused. Running this +// test sequentially avoids that interleaving. +func TestFaultedShortCircuitOrdering(t *testing.T) { + tests := []struct { + name string + pagesize uint64 + }{ + {name: "4k", pagesize: header.PageSize}, + {name: "hugepage", pagesize: header.HugepageSize}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + withRaceContext(t, func(_ context.Context) { + cfg := testConfig{ + pagesize: tt.pagesize, + numberOfPages: 2, + gated: true, + operations: []operation{ + {offset: 0, mode: operationModeRead}, + {mode: operationModeServePause}, + {offset: 0, mode: operationModeRemove, async: true}, + {mode: operationModeSleep}, + {offset: int64(tt.pagesize), mode: operationModeWrite, async: true}, + {mode: operationModeSleep}, + {mode: operationModeServeResume}, + }, + } + + h, err := configureCrossProcessTest(t, cfg) + require.NoError(t, err) + + h.executeAll(t, cfg.operations) + + states, err := h.pageStatesOnce() + require.NoError(t, err) + + assert.Contains(t, states.removed, uint(0), + "page 0 should be `removed` after REMOVE batch (got removed=%v faulted=%v)", + states.removed, states.faulted, + ) + assert.Contains(t, states.faulted, uint(tt.pagesize), + "page 1 (offset %d) should be `faulted` after the racing write was served (got removed=%v faulted=%v)", + tt.pagesize, states.removed, states.faulted, + ) + }) + }) + } +} + +// waitForState polls the child's PageStates RPC until the page at +// the given offset reaches `want` or `deadline` elapses. Each RPC +// round-trip is microseconds-to-low-milliseconds; we yield with a +// small sleep between polls so the harness doesn't burn an entire +// CPU on tight-loop encoding while the rest of the suite is also +// running cross-process tests. +func waitForState(ctx context.Context, h *testHandler, offset uint64, want pageState, deadline time.Duration) error { + const pollInterval = 1 * time.Millisecond + + end := time.Now().Add(deadline) + for { + states, err := h.pageStatesOnce() + if err != nil { + return err + } + + var bucket []uint + switch want { + case removed: + bucket = states.removed + case faulted: + bucket = states.faulted + } + + for _, off := range bucket { + if uint64(off) == offset { + return nil + } + } + + if time.Now().After(end) { + return fmt.Errorf("page state at offset %d: want %d after %s — last seen removed=%v faulted=%v", + offset, want, deadline, states.removed, states.faulted) + } + + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(pollInterval): + } + } +} diff --git a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/remove_test.go b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/remove_test.go new file mode 100644 index 0000000000..6ec229f78f --- /dev/null +++ b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/remove_test.go @@ -0,0 +1,297 @@ +package userfaultfd + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/e2b-dev/infra/packages/shared/pkg/storage/header" +) + +func TestRemove(t *testing.T) { + t.Parallel() + + tests := []testConfig{ + { + name: "4k read then remove", + pagesize: header.PageSize, + numberOfPages: 2, + operations: []operation{ + {offset: 0, mode: operationModeRead}, + {offset: 0, mode: operationModeRemove}, + {mode: operationModeSleep}, + }, + }, + { + name: "hugepage read then remove", + pagesize: header.HugepageSize, + numberOfPages: 2, + operations: []operation{ + {offset: 0, mode: operationModeRead}, + {offset: 0, mode: operationModeRemove}, + {mode: operationModeSleep}, + }, + }, + { + name: "4k write then remove", + pagesize: header.PageSize, + numberOfPages: 2, + operations: []operation{ + {offset: 0, mode: operationModeWrite}, + {offset: 0, mode: operationModeRemove}, + {mode: operationModeSleep}, + }, + }, + { + name: "hugepage write then remove", + pagesize: header.HugepageSize, + numberOfPages: 2, + operations: []operation{ + {offset: 0, mode: operationModeWrite}, + {offset: 0, mode: operationModeRemove}, + {mode: operationModeSleep}, + }, + }, + { + name: "4k selective remove", + pagesize: header.PageSize, + numberOfPages: 2, + operations: []operation{ + {offset: 0, mode: operationModeRead}, + {offset: int64(header.PageSize), mode: operationModeWrite}, + {offset: 0, mode: operationModeRemove}, + {mode: operationModeSleep}, + }, + }, + { + name: "hugepage selective remove", + pagesize: header.HugepageSize, + numberOfPages: 2, + operations: []operation{ + {offset: 0, mode: operationModeRead}, + {offset: int64(header.HugepageSize), mode: operationModeWrite}, + {offset: 0, mode: operationModeRemove}, + {mode: operationModeSleep}, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + h, err := configureCrossProcessTest(t, tt) + require.NoError(t, err) + + h.executeAll(t, tt.operations) + + states, err := h.pageStatesOnce() + require.NoError(t, err) + + removedOffsets := getOperationsOffsets(tt.operations, operationModeRemove) + assert.ElementsMatch(t, removedOffsets, states.removed) + + faultedOffsets := getOperationsOffsets(tt.operations, operationModeRead|operationModeWrite) + for _, r := range removedOffsets { + faultedOffsets = removeOffset(faultedOffsets, r) + } + assert.ElementsMatch(t, faultedOffsets, states.faulted) + + h.checkDirtiness(t, tt.operations) + }) + } +} + +// TestRemoveThenFault asserts that after MADV_DONTNEED + a subsequent write, +// the handler re-faults the page (state transitions: faulted → removed → faulted). +func TestRemoveThenFault(t *testing.T) { + t.Parallel() + + tests := []testConfig{ + { + name: "4k read, remove, write", + pagesize: header.PageSize, + numberOfPages: 2, + operations: []operation{ + {offset: 0, mode: operationModeRead}, + {offset: 0, mode: operationModeRemove}, + {offset: 0, mode: operationModeWrite}, + }, + }, + { + name: "hugepage read, remove, write", + pagesize: header.HugepageSize, + numberOfPages: 2, + operations: []operation{ + {offset: 0, mode: operationModeRead}, + {offset: 0, mode: operationModeRemove}, + {offset: 0, mode: operationModeWrite}, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + h, err := configureCrossProcessTest(t, tt) + require.NoError(t, err) + + h.executeAll(t, tt.operations) + + states, err := h.pageStatesOnce() + require.NoError(t, err) + + assert.Empty(t, states.removed, "page should not be in removed state after re-fault") + assert.Contains(t, states.faulted, uint(0), "page should be back in faulted state") + + h.checkDirtiness(t, tt.operations) + }) + } +} + +// TestRemoveThenWriteGated verifies that when the handler is stopped, the +// kernel keeps the page mapped until REMOVE is acked. A concurrent write +// succeeds without faulting because MADV_DONTNEED blocks (waiting for ack) +// and doesn't unmap the page until the handler processes the event. +// When the handler resumes, it only sees the REMOVE — no MISSING fault. +// +// NOTE: this test (and the other gated tests below) deliberately does +// NOT call t.Parallel(). While the handler is paused, any user thread +// that triggers a queued pagefault on the registered region is +// suspended in the kernel's pagefault path. From the Go runtime's +// perspective that goroutine is "running" (not in syscall, since it's +// a plain memory store) and cannot be preempted until the fault is +// served. If a CONCURRENT cross-process test in the same binary +// triggers a stop-the-world GC pause during this window, STW will +// wait forever for the suspended goroutine to reach a safe point — +// the kernel cannot deliver the SIGURG preempt signal until the +// pagefault is served, and the handler is paused. Running the gated +// tests sequentially avoids that interleaving. +func TestRemoveThenWriteGated(t *testing.T) { + tests := []testConfig{ + { + name: "4k gated remove with concurrent write", + pagesize: header.PageSize, + numberOfPages: 2, + gated: true, + operations: []operation{ + {offset: 0, mode: operationModeRead}, + {mode: operationModeServePause}, + {offset: 0, mode: operationModeRemove, async: true}, + {mode: operationModeSleep}, + {offset: 0, mode: operationModeWrite, async: true}, + {mode: operationModeSleep}, + {mode: operationModeServeResume}, + }, + }, + { + name: "hugepage gated remove with concurrent write", + pagesize: header.HugepageSize, + numberOfPages: 2, + gated: true, + operations: []operation{ + {offset: 0, mode: operationModeRead}, + {mode: operationModeServePause}, + {offset: 0, mode: operationModeRemove, async: true}, + {mode: operationModeSleep}, + {offset: 0, mode: operationModeWrite, async: true}, + {mode: operationModeSleep}, + {mode: operationModeServeResume}, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + h, err := configureCrossProcessTest(t, tt) + require.NoError(t, err) + + h.executeAll(t, tt.operations) + + states, err := h.pageStatesOnce() + require.NoError(t, err) + + // The page stays mapped until REMOVE is acked, so the concurrent + // write succeeds without triggering a MISSING fault. The handler + // only processes the REMOVE event. + assert.ElementsMatch(t, []uint{0}, states.removed) + assert.Empty(t, states.faulted) + }) + } +} + +// TestWriteThenRemoveGated verifies the serve loop's ordering guarantee: +// REMOVE events are processed before pagefaults even when the MISSING pagefault +// was queued first. The write to a missing page triggers MISSING (queued first), +// then MADV_DONTNEED triggers REMOVE (queued second). When the handler resumes, +// it processes REMOVE first, then MISSING — the write is not skipped. +// +// See TestRemoveThenWriteGated for why this test is not parallel. +func TestWriteThenRemoveGated(t *testing.T) { + tests := []testConfig{ + { + name: "4k write then remove in same batch", + pagesize: header.PageSize, + numberOfPages: 2, + gated: true, + operations: []operation{ + {offset: 0, mode: operationModeRead}, + {mode: operationModeServePause}, + // MISSING for page 1 queued first + {offset: int64(header.PageSize), mode: operationModeWrite, async: true}, + {mode: operationModeSleep}, + // REMOVE for page 0 queued second + {offset: 0, mode: operationModeRemove, async: true}, + {mode: operationModeSleep}, + {mode: operationModeServeResume}, + }, + }, + { + name: "hugepage write then remove in same batch", + pagesize: header.HugepageSize, + numberOfPages: 2, + gated: true, + operations: []operation{ + {offset: 0, mode: operationModeRead}, + {mode: operationModeServePause}, + {offset: int64(header.HugepageSize), mode: operationModeWrite, async: true}, + {mode: operationModeSleep}, + {offset: 0, mode: operationModeRemove, async: true}, + {mode: operationModeSleep}, + {mode: operationModeServeResume}, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + h, err := configureCrossProcessTest(t, tt) + require.NoError(t, err) + + h.executeAll(t, tt.operations) + + states, err := h.pageStatesOnce() + require.NoError(t, err) + + // Page 0 was removed + assert.Contains(t, states.removed, uint(0)) + // Page 1 was faulted by the write — not skipped + pageOffset := uint(tt.pagesize) + assert.Contains(t, states.faulted, pageOffset, + "write pagefault should not be skipped even when batched with REMOVE") + }) + } +} + +func removeOffset(offsets []uint, target uint) []uint { + result := make([]uint, 0, len(offsets)) + for _, o := range offsets { + if o != target { + result = append(result, o) + } + } + + return result +} diff --git a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/userfaultfd.go b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/userfaultfd.go index 133af0f547..da2200163b 100644 --- a/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/userfaultfd.go +++ b/packages/orchestrator/pkg/sandbox/uffd/userfaultfd/userfaultfd.go @@ -21,6 +21,7 @@ import ( "github.com/e2b-dev/infra/packages/orchestrator/pkg/sandbox/uffd/fdexit" "github.com/e2b-dev/infra/packages/orchestrator/pkg/sandbox/uffd/memory" "github.com/e2b-dev/infra/packages/shared/pkg/logger" + "github.com/e2b-dev/infra/packages/shared/pkg/storage/header" ) var tracer = otel.Tracer("github.com/e2b-dev/infra/packages/orchestrator/pkg/sandbox/uffd/userfaultfd") @@ -53,16 +54,45 @@ type Userfaultfd struct { pageSize uintptr pageTracker *pageTracker - // We use the settleRequests to guard the pageTracker so we can access a consistent state of the pageTracker after the requests are finished. - settleRequests sync.RWMutex - + // settleRequests guards the pageTracker and prefetchTracker so we can access a + // consistent state after in-flight requests have finished, and so REMOVE events + // can update the pageTracker without racing with concurrent faultPage workers. + settleRequests sync.RWMutex prefetchTracker *block.PrefetchTracker - wg errgroup.Group - // defaultCopyMode overrides the UFFDIO_COPY mode for all faults when non-zero. defaultCopyMode CULong + wg errgroup.Group + + // wakeupPipe is a self-pipe used to wake the poll loop when a goroutine + // defers a page fault. Without this, a deferred fault could be orphaned + // if no new UFFD events arrive to wake poll. + wakeupPipe [2]int + + // Test-only synchronisation hooks. Both default to nil and the nil + // branch costs a single un-predictable load + branch in the hot path, + // so they are effectively free in production. They MUST only be set + // from _test.go files. They let tests park a worker goroutine at a + // known point so a racing event (REMOVE, MISSING) can be issued + // deterministically before the worker proceeds. + // + // - beforeWorkerRLockHook: called as the very first thing in the + // worker goroutine, BEFORE settleRequests.RLock(). At this point + // the test holds the goroutine before it can claim the read lock, + // so a parallel REMOVE batch in the parent loop can take the + // write lock immediately and mutate page state. This is the + // window the production fix actually closes — the post-fix + // worker reads state under RLock, so it observes the REMOVE. + // + // - beforeFaultPageHook: called inside the worker AFTER RLock and + // AFTER the state-vs-source decision, but BEFORE the actual + // UFFDIO_COPY/UFFDIO_ZEROPAGE syscall. Lets a test simulate a + // slow data fetch / in-flight COPY so a parent madvise can race + // against an in-flight worker. + beforeWorkerRLockHook func(addr uintptr) + beforeFaultPageHook func(addr uintptr) + logger logger.Logger } @@ -76,6 +106,11 @@ func NewUserfaultfdFromFd(fd uintptr, src block.Slicer, m *memory.Mapping, logge } } + var wakeupPipe [2]int + if err := syscall.Pipe2(wakeupPipe[:], syscall.O_NONBLOCK|syscall.O_CLOEXEC); err != nil { + return nil, fmt.Errorf("failed to create wakeup pipe: %w", err) + } + u := &Userfaultfd{ fd: Fd(fd), src: src, @@ -83,6 +118,7 @@ func NewUserfaultfdFromFd(fd uintptr, src block.Slicer, m *memory.Mapping, logge pageTracker: newPageTracker(uintptr(blockSize)), prefetchTracker: block.NewPrefetchTracker(blockSize), ma: m, + wakeupPipe: wakeupPipe, logger: logger, } @@ -94,8 +130,61 @@ func NewUserfaultfdFromFd(fd uintptr, src block.Slicer, m *memory.Mapping, logge return u, nil } -func (u *Userfaultfd) Close() error { - return u.fd.close() +// readEvents reads all available UFFD events from the file descriptor, +// returning removes and pagefaults separately. +func (u *Userfaultfd) readEvents(ctx context.Context) ([]*UffdRemove, []*UffdPagefault, error) { + // We are reusing the same buffer for all events, but that's fine, + // because getMsgArg, will make a copy of the actual event from `buf` + // and it's a pointer to this copy that we are returning to caller. + buf := make([]byte, unsafe.Sizeof(UffdMsg{})) + + var removes []*UffdRemove + var pagefaults []*UffdPagefault + + for { + n, err := syscall.Read(int(u.fd), buf) + if errors.Is(err, syscall.EINTR) { + u.logger.Debug(ctx, "uffd: interrupted read. Reading again") + + continue + } + + if errors.Is(err, syscall.EAGAIN) { + // EAGAIN means that we have drained all the available events for the file descriptor. + // We are done. + break + } + + if err != nil { + return nil, nil, fmt.Errorf("failed reading uffd: %w", err) + } + + // `Read` returned with 0 bytes actually read. No more events to read + // and the writing end has been closed. This should never happen, unless + // something (us or Firecracker) closes the file descriptor + // TODO: Ignore it for now, but maybe we should return an error(?) + if n == 0 { + break + } + + msg := (*UffdMsg)(unsafe.Pointer(&buf[0])) + + event := getMsgEvent(msg) + arg := getMsgArg(msg) + + switch event { + case UFFD_EVENT_PAGEFAULT: + v := *(*UffdPagefault)(unsafe.Pointer(&arg[0])) + pagefaults = append(pagefaults, &v) + case UFFD_EVENT_REMOVE: + v := *(*UffdRemove)(unsafe.Pointer(&arg[0])) + removes = append(removes, &v) + default: + return nil, nil, ErrUnexpectedEventType + } + } + + return removes, pagefaults, nil } func (u *Userfaultfd) Serve( @@ -105,6 +194,7 @@ func (u *Userfaultfd) Serve( pollFds := []unix.PollFd{ {Fd: int32(u.fd), Events: unix.POLLIN}, {Fd: fdExit.Reader(), Events: unix.POLLIN}, + {Fd: int32(u.wakeupPipe[0]), Events: unix.POLLIN}, } eagainCounter := newCounterReporter(u.logger, "uffd: eagain with no pagefaults (accumulated)") @@ -125,6 +215,8 @@ func (u *Userfaultfd) Serve( unix.POLLNVAL: "POLLNVAL", } + var deferred deferredFaults + for { if _, err := unix.Poll( pollFds, @@ -166,6 +258,11 @@ func (u *Userfaultfd) Serve( } } + // Drain the wakeup pipe if it fired (a goroutine deferred a fault). + if hasEvent(pollFds[2].Revents, unix.POLLIN) { + u.drainWakeupPipe() + } + uffdFd := pollFds[0] // Track uffd error events @@ -175,56 +272,41 @@ func (u *Userfaultfd) Serve( } } - if !hasEvent(uffdFd.Revents, unix.POLLIN) { - // Uffd is not ready for reading as there is nothing to read on the fd. - // https://github.com/firecracker-microvm/firecracker/issues/5056 - // https://elixir.bootlin.com/linux/v6.8.12/source/fs/userfaultfd.c#L1149 - // TODO: Check for all the errors - // - https://docs.kernel.org/admin-guide/mm/userfaultfd.html - // - https://elixir.bootlin.com/linux/v6.8.12/source/fs/userfaultfd.c - // - https://man7.org/linux/man-pages/man2/userfaultfd.2.html - // It might be possible to just check for data != 0 in the syscall.Read loop - // but I don't feel confident about doing that. - noDataCounter.Increase("POLLIN") - - continue - } - - buf := make([]byte, unsafe.Sizeof(UffdMsg{})) - + var removes []*UffdRemove var pagefaults []*UffdPagefault - for { - _, err := syscall.Read(int(u.fd), buf) - if err == syscall.EINTR { - u.logger.Debug(ctx, "uffd: interrupted read, reading again") - - continue - } - - if err == syscall.EAGAIN { - break - } + if hasEvent(uffdFd.Revents, unix.POLLIN) { + var err error + removes, pagefaults, err = u.readEvents(ctx) if err != nil { u.logger.Error(ctx, "uffd: read error", zap.Error(err)) return fmt.Errorf("failed to read: %w", err) } + } else { + noDataCounter.Increase("POLLIN") + } - msg := *(*UffdMsg)(unsafe.Pointer(&buf[0])) - - if msgEvent := getMsgEvent(&msg); msgEvent != UFFD_EVENT_PAGEFAULT { - u.logger.Error(ctx, "UFFD serve unexpected event type", zap.Any("event_type", msgEvent)) - - return ErrUnexpectedEventType + // First handle the UFFD_EVENT_REMOVE events. Take the settleRequests write lock to ensure that no + // other page or pre-fault operation is running concurrently. + // A goroutine from the previous batch or a prefault operation could still be executing + // setState(faulted) after its UFFDIO_COPY returned. If we process a REMOVE for the same + // page before that goroutine finishes, the goroutine's setState(faulted) would + // overwrite the removed state we just set. + if len(removes) > 0 { + u.settleRequests.Lock() + for _, rm := range removes { + u.pageTracker.setState(uintptr(rm.start), uintptr(rm.end), removed) } - - arg := getMsgArg(&msg) - pagefault := *(*UffdPagefault)(unsafe.Pointer(&arg[0])) - pagefaults = append(pagefaults, &pagefault) + u.settleRequests.Unlock() } + // Collect deferred pagefaults from previous goroutines that got EAGAIN. + // The wakeup pipe ensures we don't sleep through these. + pagefaults = append(deferred.drain(), pagefaults...) + if len(pagefaults) == 0 { + // Woke up but nothing to do (e.g., only REMOVE events, or spurious wakeup). eagainCounter.Increase("EMPTY_DRAIN") continue @@ -233,169 +315,289 @@ func (u *Userfaultfd) Serve( eagainCounter.Log(ctx) noDataCounter.Log(ctx) - for _, pagefault := range pagefaults { - flags := pagefault.flags + for _, pf := range pagefaults { + // We don't handle minor page faults. + if pf.flags&UFFD_PAGEFAULT_FLAG_MINOR != 0 { + return errors.New("unexpected MINOR pagefault event, closing UFFD") + } - addr := getPagefaultAddress(pagefault) + // We don't handle write-protection page faults, we're using asynchronous write protection. + if pf.flags&UFFD_PAGEFAULT_FLAG_WP != 0 { + return errors.New("unexpected WP pagefault event, closing UFFD") + } + addr := getPagefaultAddress(pf) offset, err := u.ma.GetOffset(addr) if err != nil { - u.logger.Error(ctx, "UFFD serve get mapping error", zap.Error(err)) + u.logger.Error(ctx, "UFFD serve got mapping error", zap.Error(err)) return fmt.Errorf("failed to map: %w", err) } - // Handle write to missing page (WRITE flag) - // If the event has WRITE flag, it was a write to a missing page. - // For the write to be executed, we first need to copy the page from the source to the guest memory. - if flags&UFFD_PAGEFAULT_FLAG_WRITE != 0 { - u.wg.Go(func() error { - return u.faultPage(ctx, addr, offset, u.src, fdExit.SignalExit, block.Write) - }) - - continue - } - - // Handle read to missing page ("MISSING" flag) - // If the event has no flags, it was a read to a missing page and we need to copy the page from the source to the guest memory. - if flags == 0 { - u.wg.Go(func() error { - return u.faultPage(ctx, addr, offset, u.src, fdExit.SignalExit, block.Read) - }) - - continue - } - - // MINOR and WP flags are not expected as we don't register the uffd with these flags. - return fmt.Errorf("unexpected event type: %d, closing uffd", flags) + u.wg.Go(func() error { + // Test-only barrier: park the worker BEFORE it takes + // RLock. While parked, the parent loop is free to take + // settleRequests.Lock() to process REMOVE events, which + // is exactly the window the production fix had to close + // (pre-fix the worker had already captured a stale state + // snapshot in the parent loop). + if hook := u.beforeWorkerRLockHook; hook != nil { + hook(addr) + } + + // The RLock must be acquired inside the goroutine — and it must be acquired + // BEFORE we read the pageTracker / u.src state — so that the read+act+commit + // sequence (state lookup → faultPage → setState) is atomic with respect to + // any concurrent REMOVE batch (which takes settleRequests.Lock()). If the + // state read happened in the parent loop, a REMOVE could land between the + // read and the goroutine acquiring the RLock, and the goroutine would still + // commit `faulted` afterwards, overwriting `removed`. + // + // This also protects the read of u.src: in the future src could be swapped + // out under settleRequests; reading it under the RLock keeps that safe. + u.settleRequests.RLock() + defer u.settleRequests.RUnlock() + + var source block.Slicer + + switch state := u.pageTracker.get(addr); state { + case faulted: + // Skip faulting the page. This has already been faulted, either during pre-faulting + // or because we handled another page fault on the same address in the current + // iteration. It can only transition out of `faulted` via a UFFD_EVENT_REMOVE, which + // will mark the page as `removed`. + // For this to work correctly, the used pages cannot be swappable. + return nil + case removed: + // Fault the page as empty (no source). The page was MADV_DONTNEED'd; the + // kernel still expects an UFFDIO_COPY/ZEROPAGE ack for the original + // MISSING fault, otherwise the faulting thread stays blocked. + case missing: + source = u.src + default: + return fmt.Errorf("unexpected pageState: %#v", state) + } + + var accessType block.AccessType + + if pf.flags&UFFD_PAGEFAULT_FLAG_WRITE == 0 { + accessType = block.Read + } else { + accessType = block.Write + } + + // Test-only barrier: park the worker AFTER state has been + // read under RLock but BEFORE the actual UFFDIO_* syscall. + // Lets tests simulate a slow / in-flight COPY so the + // parent's madvise (and the subsequent REMOVE batch) can + // race against a worker that already holds RLock. + if hook := u.beforeFaultPageHook; hook != nil { + hook(addr) + } + + handled, err := u.faultPage( + ctx, + addr, + offset, + accessType, + source, + fdExit.SignalExit, + ) + if err != nil { + return err + } + + if handled { + u.pageTracker.setState(addr, addr+u.pageSize, faulted) + u.prefetchTracker.Add(offset, accessType) + } else { + deferred.push(pf) + u.signalWakeup() + } + + return nil + }) } } } -func (u *Userfaultfd) PrefetchData() block.PrefetchData { - // This will be at worst cancelled when the uffd is closed. - u.settleRequests.Lock() - // The locking here would work even without using defer (just lock-then-unlock the mutex), but at this point let's make it lock to the clone, - // so it is consistent even if there is a another uffd call after. - defer u.settleRequests.Unlock() - - return u.prefetchTracker.PrefetchData() -} - func (u *Userfaultfd) faultPage( ctx context.Context, addr uintptr, offset int64, + accessType block.AccessType, source block.Slicer, onFailure func() error, - accessType block.AccessType, -) error { +) (bool, error) { span := trace.SpanFromContext(ctx) - // The RLock must be called inside the goroutine to ensure RUnlock runs via defer, - // even if the errgroup is cancelled or the goroutine returns early. - // This guards against races between marking the page faulted / prefetched - // and another caller observing the pageTracker or prefetchTracker. - u.settleRequests.RLock() - defer u.settleRequests.RUnlock() - defer func() { if r := recover(); r != nil { u.logger.Error(ctx, "UFFD serve panic", zap.Any("pagesize", u.pageSize), zap.Any("panic", r)) } }() - var b []byte - var dataErr error - var attempt int + var writeErr error -retryLoop: - for attempt = range sliceMaxRetries + 1 { - b, dataErr = source.Slice(ctx, offset, int64(u.pageSize)) - if dataErr == nil { + mode := u.defaultCopyMode + if accessType == block.Read { + mode = UFFDIO_COPY_MODE_WP + } + + // Write to guest memory. nil data means zero-fill + switch { + case source == nil && u.pageSize == header.PageSize && accessType == block.Read: + // Firecracker uses anonymous mappings for 4K pages. Anonymous mappings can only + // be write protected once pages are populated. We need to enable write-protection + // *after* we serve the page fault. + // + // To avoid the race condition, first serve the page without waking the thread + writeErr = u.fd.zero(addr, u.pageSize, UFFDIO_ZEROPAGE_MODE_DONTWAKE) + if writeErr != nil { break } - - if attempt >= sliceMaxRetries || ctx.Err() != nil { + // Then, write-protect the page + writeErr = u.fd.writeProtect(addr, u.pageSize, UFFDIO_WRITEPROTECT_MODE_WP) + if writeErr != nil { break } + // And, finally, wake up the faulting thread + writeErr = u.fd.wake(addr, u.pageSize) + case source == nil && u.pageSize == header.PageSize && accessType == block.Write: + // If this was a write access to a 4K page simply provide the zero page (clearing the WP bit) + // and wake up the thread in one step. + writeErr = u.fd.zero(addr, u.pageSize, 0) + case source == nil && u.pageSize == header.HugepageSize: + writeErr = u.fd.copy(addr, u.pageSize, header.EmptyHugePage, mode) + default: + var b []byte + var dataErr error + var attempt int + + retryLoop: + for attempt = range sliceMaxRetries + 1 { + b, dataErr = source.Slice(ctx, offset, int64(u.pageSize)) + if dataErr == nil { + break + } - u.logger.Warn(ctx, "UFFD serve slice error, retrying", - zap.Int("attempt", attempt+1), - zap.Int("max_attempts", sliceMaxRetries+1), - zap.Error(dataErr), - ) - - delay := min(sliceRetryBaseDelay<= sliceMaxRetries || ctx.Err() != nil { + break + } - backoff := time.NewTimer(delay + jitter) + u.logger.Warn(ctx, "UFFD serve slice error, retrying", + zap.Int("attempt", attempt+1), + zap.Int("max_attempts", sliceMaxRetries+1), + zap.Error(dataErr), + ) - select { - case <-ctx.Done(): - backoff.Stop() + delay := min(sliceRetryBaseDelay< 1 || (v.lastReleaseVersion.Major() == 1 && v.lastReleaseVersion.Minor() >= 14) +} diff --git a/packages/shared/pkg/featureflags/flags.go b/packages/shared/pkg/featureflags/flags.go index 83a4a0c11c..27cf349da8 100644 --- a/packages/shared/pkg/featureflags/flags.go +++ b/packages/shared/pkg/featureflags/flags.go @@ -119,6 +119,7 @@ var ( ExecutionMetricsOnWebhooksFlag = NewBoolFlag("execution-metrics-on-webhooks", false) // TODO: Remove NLT 20250315 SandboxLabelBasedSchedulingFlag = NewBoolFlag("sandbox-label-based-scheduling", false) OptimisticResourceAccountingFlag = NewBoolFlag("sandbox-placement-optimistic-resource-accounting", false) + FreePageReportingFlag = NewBoolFlag("free-page-reporting", false) ) type IntFlag struct { diff --git a/packages/shared/pkg/grpc/template-manager/template-manager.pb.go b/packages/shared/pkg/grpc/template-manager/template-manager.pb.go index 131823fd24..8c6ceae914 100644 --- a/packages/shared/pkg/grpc/template-manager/template-manager.pb.go +++ b/packages/shared/pkg/grpc/template-manager/template-manager.pb.go @@ -706,6 +706,7 @@ type TemplateConfig struct { Source isTemplateConfig_Source `protobuf_oneof:"source"` FromImageRegistry *FromImageRegistry `protobuf:"bytes,15,opt,name=fromImageRegistry,proto3,oneof" json:"fromImageRegistry,omitempty"` TeamID string `protobuf:"bytes,16,opt,name=teamID,proto3" json:"teamID,omitempty"` + FreePageReporting *bool `protobuf:"varint,17,opt,name=freePageReporting,proto3,oneof" json:"freePageReporting,omitempty"` } func (x *TemplateConfig) Reset() { @@ -862,6 +863,13 @@ func (x *TemplateConfig) GetTeamID() string { return "" } +func (x *TemplateConfig) GetFreePageReporting() bool { + if x != nil && x.FreePageReporting != nil { + return *x.FreePageReporting + } + return false +} + type isTemplateConfig_Source interface { isTemplateConfig_Source() } @@ -1432,7 +1440,7 @@ var file_template_manager_proto_rawDesc = []byte{ 0x03, 0x67, 0x63, 0x70, 0x12, 0x2c, 0x0a, 0x07, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x6c, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x10, 0x2e, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x6c, 0x52, 0x65, 0x67, 0x69, 0x73, 0x74, 0x72, 0x79, 0x48, 0x00, 0x52, 0x07, 0x67, 0x65, 0x6e, 0x65, 0x72, - 0x61, 0x6c, 0x42, 0x06, 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x22, 0x90, 0x05, 0x0a, 0x0e, 0x54, + 0x61, 0x6c, 0x42, 0x06, 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x22, 0xd9, 0x05, 0x0a, 0x0e, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x12, 0x1e, 0x0a, 0x0a, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x49, 0x44, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x49, 0x44, 0x12, 0x18, 0x0a, @@ -1471,134 +1479,139 @@ var file_template_manager_proto_rawDesc = []byte{ 0x74, 0x72, 0x79, 0x48, 0x02, 0x52, 0x11, 0x66, 0x72, 0x6f, 0x6d, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x52, 0x65, 0x67, 0x69, 0x73, 0x74, 0x72, 0x79, 0x88, 0x01, 0x01, 0x12, 0x16, 0x0a, 0x06, 0x74, 0x65, 0x61, 0x6d, 0x49, 0x44, 0x18, 0x10, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x74, 0x65, 0x61, - 0x6d, 0x49, 0x44, 0x42, 0x08, 0x0a, 0x06, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x42, 0x08, 0x0a, - 0x06, 0x5f, 0x66, 0x6f, 0x72, 0x63, 0x65, 0x42, 0x14, 0x0a, 0x12, 0x5f, 0x66, 0x72, 0x6f, 0x6d, - 0x49, 0x6d, 0x61, 0x67, 0x65, 0x52, 0x65, 0x67, 0x69, 0x73, 0x74, 0x72, 0x79, 0x22, 0xa3, 0x01, - 0x0a, 0x15, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x43, 0x72, 0x65, 0x61, 0x74, 0x65, - 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x2b, 0x0a, 0x08, 0x74, 0x65, 0x6d, 0x70, 0x6c, - 0x61, 0x74, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x0f, 0x2e, 0x54, 0x65, 0x6d, 0x70, - 0x6c, 0x61, 0x74, 0x65, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x52, 0x08, 0x74, 0x65, 0x6d, 0x70, - 0x6c, 0x61, 0x74, 0x65, 0x12, 0x23, 0x0a, 0x0a, 0x63, 0x61, 0x63, 0x68, 0x65, 0x53, 0x63, 0x6f, - 0x70, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x48, 0x00, 0x52, 0x0a, 0x63, 0x61, 0x63, 0x68, - 0x65, 0x53, 0x63, 0x6f, 0x70, 0x65, 0x88, 0x01, 0x01, 0x12, 0x1d, 0x0a, 0x07, 0x76, 0x65, 0x72, - 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x48, 0x01, 0x52, 0x07, 0x76, 0x65, - 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x88, 0x01, 0x01, 0x42, 0x0d, 0x0a, 0x0b, 0x5f, 0x63, 0x61, 0x63, - 0x68, 0x65, 0x53, 0x63, 0x6f, 0x70, 0x65, 0x42, 0x0a, 0x0a, 0x08, 0x5f, 0x76, 0x65, 0x72, 0x73, - 0x69, 0x6f, 0x6e, 0x22, 0x8b, 0x03, 0x0a, 0x15, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, - 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x1e, 0x0a, - 0x0a, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x49, 0x44, 0x18, 0x01, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x0a, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x49, 0x44, 0x12, 0x18, 0x0a, - 0x07, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x49, 0x44, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, - 0x62, 0x75, 0x69, 0x6c, 0x64, 0x49, 0x44, 0x12, 0x1b, 0x0a, 0x06, 0x6f, 0x66, 0x66, 0x73, 0x65, - 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x48, 0x00, 0x52, 0x06, 0x6f, 0x66, 0x66, 0x73, 0x65, - 0x74, 0x88, 0x01, 0x01, 0x12, 0x24, 0x0a, 0x05, 0x6c, 0x65, 0x76, 0x65, 0x6c, 0x18, 0x04, 0x20, - 0x01, 0x28, 0x0e, 0x32, 0x09, 0x2e, 0x4c, 0x6f, 0x67, 0x4c, 0x65, 0x76, 0x65, 0x6c, 0x48, 0x01, - 0x52, 0x05, 0x6c, 0x65, 0x76, 0x65, 0x6c, 0x88, 0x01, 0x01, 0x12, 0x19, 0x0a, 0x05, 0x6c, 0x69, - 0x6d, 0x69, 0x74, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0d, 0x48, 0x02, 0x52, 0x05, 0x6c, 0x69, 0x6d, - 0x69, 0x74, 0x88, 0x01, 0x01, 0x12, 0x35, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, 0x18, 0x06, - 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, - 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x54, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, - 0x48, 0x03, 0x52, 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, 0x88, 0x01, 0x01, 0x12, 0x31, 0x0a, 0x03, - 0x65, 0x6e, 0x64, 0x18, 0x07, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67, - 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x54, 0x69, 0x6d, 0x65, - 0x73, 0x74, 0x61, 0x6d, 0x70, 0x48, 0x04, 0x52, 0x03, 0x65, 0x6e, 0x64, 0x88, 0x01, 0x01, 0x12, - 0x31, 0x0a, 0x09, 0x64, 0x69, 0x72, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x08, 0x20, 0x01, - 0x28, 0x0e, 0x32, 0x0e, 0x2e, 0x4c, 0x6f, 0x67, 0x73, 0x44, 0x69, 0x72, 0x65, 0x63, 0x74, 0x69, - 0x6f, 0x6e, 0x48, 0x05, 0x52, 0x09, 0x64, 0x69, 0x72, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x88, - 0x01, 0x01, 0x42, 0x09, 0x0a, 0x07, 0x5f, 0x6f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x42, 0x08, 0x0a, - 0x06, 0x5f, 0x6c, 0x65, 0x76, 0x65, 0x6c, 0x42, 0x08, 0x0a, 0x06, 0x5f, 0x6c, 0x69, 0x6d, 0x69, - 0x74, 0x42, 0x08, 0x0a, 0x06, 0x5f, 0x73, 0x74, 0x61, 0x72, 0x74, 0x42, 0x06, 0x0a, 0x04, 0x5f, - 0x65, 0x6e, 0x64, 0x42, 0x0c, 0x0a, 0x0a, 0x5f, 0x64, 0x69, 0x72, 0x65, 0x63, 0x74, 0x69, 0x6f, - 0x6e, 0x22, 0x56, 0x0a, 0x1a, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x42, 0x75, 0x69, - 0x6c, 0x64, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, - 0x18, 0x0a, 0x07, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x49, 0x44, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x07, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x49, 0x44, 0x12, 0x1e, 0x0a, 0x0a, 0x74, 0x65, 0x6d, - 0x70, 0x6c, 0x61, 0x74, 0x65, 0x49, 0x44, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x74, - 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x49, 0x44, 0x22, 0xbb, 0x01, 0x0a, 0x15, 0x54, 0x65, - 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x42, 0x75, 0x69, 0x6c, 0x64, 0x4d, 0x65, 0x74, 0x61, 0x64, - 0x61, 0x74, 0x61, 0x12, 0x24, 0x0a, 0x0d, 0x72, 0x6f, 0x6f, 0x74, 0x66, 0x73, 0x53, 0x69, 0x7a, - 0x65, 0x4b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0d, 0x72, 0x6f, 0x6f, 0x74, - 0x66, 0x73, 0x53, 0x69, 0x7a, 0x65, 0x4b, 0x65, 0x79, 0x12, 0x26, 0x0a, 0x0e, 0x65, 0x6e, 0x76, - 0x64, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x4b, 0x65, 0x79, 0x18, 0x02, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x0e, 0x65, 0x6e, 0x76, 0x64, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x4b, 0x65, - 0x79, 0x12, 0x24, 0x0a, 0x0d, 0x6b, 0x65, 0x72, 0x6e, 0x65, 0x6c, 0x56, 0x65, 0x72, 0x73, 0x69, - 0x6f, 0x6e, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x6b, 0x65, 0x72, 0x6e, 0x65, 0x6c, - 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x2e, 0x0a, 0x12, 0x66, 0x69, 0x72, 0x65, 0x63, - 0x72, 0x61, 0x63, 0x6b, 0x65, 0x72, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x04, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x12, 0x66, 0x69, 0x72, 0x65, 0x63, 0x72, 0x61, 0x63, 0x6b, 0x65, 0x72, - 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x22, 0x83, 0x02, 0x0a, 0x15, 0x54, 0x65, 0x6d, 0x70, - 0x6c, 0x61, 0x74, 0x65, 0x42, 0x75, 0x69, 0x6c, 0x64, 0x4c, 0x6f, 0x67, 0x45, 0x6e, 0x74, 0x72, - 0x79, 0x12, 0x38, 0x0a, 0x09, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x18, 0x01, - 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, - 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x54, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, - 0x52, 0x09, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x12, 0x18, 0x0a, 0x07, 0x6d, - 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, - 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x1f, 0x0a, 0x05, 0x6c, 0x65, 0x76, 0x65, 0x6c, 0x18, 0x03, - 0x20, 0x01, 0x28, 0x0e, 0x32, 0x09, 0x2e, 0x4c, 0x6f, 0x67, 0x4c, 0x65, 0x76, 0x65, 0x6c, 0x52, - 0x05, 0x6c, 0x65, 0x76, 0x65, 0x6c, 0x12, 0x3a, 0x0a, 0x06, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x73, - 0x18, 0x04, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x22, 0x2e, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, - 0x65, 0x42, 0x75, 0x69, 0x6c, 0x64, 0x4c, 0x6f, 0x67, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x2e, 0x46, - 0x69, 0x65, 0x6c, 0x64, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x06, 0x66, 0x69, 0x65, 0x6c, - 0x64, 0x73, 0x1a, 0x39, 0x0a, 0x0b, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x45, 0x6e, 0x74, 0x72, - 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, - 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0x57, 0x0a, - 0x19, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x42, 0x75, 0x69, 0x6c, 0x64, 0x53, 0x74, - 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x61, 0x73, 0x6f, 0x6e, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, - 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, - 0x73, 0x61, 0x67, 0x65, 0x12, 0x17, 0x0a, 0x04, 0x73, 0x74, 0x65, 0x70, 0x18, 0x02, 0x20, 0x01, - 0x28, 0x09, 0x48, 0x00, 0x52, 0x04, 0x73, 0x74, 0x65, 0x70, 0x88, 0x01, 0x01, 0x42, 0x07, 0x0a, - 0x05, 0x5f, 0x73, 0x74, 0x65, 0x70, 0x22, 0x86, 0x02, 0x0a, 0x1b, 0x54, 0x65, 0x6d, 0x70, 0x6c, + 0x6d, 0x49, 0x44, 0x12, 0x31, 0x0a, 0x11, 0x66, 0x72, 0x65, 0x65, 0x50, 0x61, 0x67, 0x65, 0x52, + 0x65, 0x70, 0x6f, 0x72, 0x74, 0x69, 0x6e, 0x67, 0x18, 0x11, 0x20, 0x01, 0x28, 0x08, 0x48, 0x03, + 0x52, 0x11, 0x66, 0x72, 0x65, 0x65, 0x50, 0x61, 0x67, 0x65, 0x52, 0x65, 0x70, 0x6f, 0x72, 0x74, + 0x69, 0x6e, 0x67, 0x88, 0x01, 0x01, 0x42, 0x08, 0x0a, 0x06, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, + 0x42, 0x08, 0x0a, 0x06, 0x5f, 0x66, 0x6f, 0x72, 0x63, 0x65, 0x42, 0x14, 0x0a, 0x12, 0x5f, 0x66, + 0x72, 0x6f, 0x6d, 0x49, 0x6d, 0x61, 0x67, 0x65, 0x52, 0x65, 0x67, 0x69, 0x73, 0x74, 0x72, 0x79, + 0x42, 0x14, 0x0a, 0x12, 0x5f, 0x66, 0x72, 0x65, 0x65, 0x50, 0x61, 0x67, 0x65, 0x52, 0x65, 0x70, + 0x6f, 0x72, 0x74, 0x69, 0x6e, 0x67, 0x22, 0xa3, 0x01, 0x0a, 0x15, 0x54, 0x65, 0x6d, 0x70, 0x6c, + 0x61, 0x74, 0x65, 0x43, 0x72, 0x65, 0x61, 0x74, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, + 0x12, 0x2b, 0x0a, 0x08, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x0b, 0x32, 0x0f, 0x2e, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x43, 0x6f, 0x6e, + 0x66, 0x69, 0x67, 0x52, 0x08, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x12, 0x23, 0x0a, + 0x0a, 0x63, 0x61, 0x63, 0x68, 0x65, 0x53, 0x63, 0x6f, 0x70, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, + 0x09, 0x48, 0x00, 0x52, 0x0a, 0x63, 0x61, 0x63, 0x68, 0x65, 0x53, 0x63, 0x6f, 0x70, 0x65, 0x88, + 0x01, 0x01, 0x12, 0x1d, 0x0a, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x03, 0x20, + 0x01, 0x28, 0x09, 0x48, 0x01, 0x52, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x88, 0x01, + 0x01, 0x42, 0x0d, 0x0a, 0x0b, 0x5f, 0x63, 0x61, 0x63, 0x68, 0x65, 0x53, 0x63, 0x6f, 0x70, 0x65, + 0x42, 0x0a, 0x0a, 0x08, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x22, 0x8b, 0x03, 0x0a, + 0x15, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, + 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x1e, 0x0a, 0x0a, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, + 0x74, 0x65, 0x49, 0x44, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x74, 0x65, 0x6d, 0x70, + 0x6c, 0x61, 0x74, 0x65, 0x49, 0x44, 0x12, 0x18, 0x0a, 0x07, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x49, + 0x44, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x49, 0x44, + 0x12, 0x1b, 0x0a, 0x06, 0x6f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, + 0x48, 0x00, 0x52, 0x06, 0x6f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x88, 0x01, 0x01, 0x12, 0x24, 0x0a, + 0x05, 0x6c, 0x65, 0x76, 0x65, 0x6c, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x09, 0x2e, 0x4c, + 0x6f, 0x67, 0x4c, 0x65, 0x76, 0x65, 0x6c, 0x48, 0x01, 0x52, 0x05, 0x6c, 0x65, 0x76, 0x65, 0x6c, + 0x88, 0x01, 0x01, 0x12, 0x19, 0x0a, 0x05, 0x6c, 0x69, 0x6d, 0x69, 0x74, 0x18, 0x05, 0x20, 0x01, + 0x28, 0x0d, 0x48, 0x02, 0x52, 0x05, 0x6c, 0x69, 0x6d, 0x69, 0x74, 0x88, 0x01, 0x01, 0x12, 0x35, + 0x0a, 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, + 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, + 0x54, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x48, 0x03, 0x52, 0x05, 0x73, 0x74, 0x61, + 0x72, 0x74, 0x88, 0x01, 0x01, 0x12, 0x31, 0x0a, 0x03, 0x65, 0x6e, 0x64, 0x18, 0x07, 0x20, 0x01, + 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x54, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x48, 0x04, + 0x52, 0x03, 0x65, 0x6e, 0x64, 0x88, 0x01, 0x01, 0x12, 0x31, 0x0a, 0x09, 0x64, 0x69, 0x72, 0x65, + 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x08, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x0e, 0x2e, 0x4c, 0x6f, + 0x67, 0x73, 0x44, 0x69, 0x72, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x48, 0x05, 0x52, 0x09, 0x64, + 0x69, 0x72, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x88, 0x01, 0x01, 0x42, 0x09, 0x0a, 0x07, 0x5f, + 0x6f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x42, 0x08, 0x0a, 0x06, 0x5f, 0x6c, 0x65, 0x76, 0x65, 0x6c, + 0x42, 0x08, 0x0a, 0x06, 0x5f, 0x6c, 0x69, 0x6d, 0x69, 0x74, 0x42, 0x08, 0x0a, 0x06, 0x5f, 0x73, + 0x74, 0x61, 0x72, 0x74, 0x42, 0x06, 0x0a, 0x04, 0x5f, 0x65, 0x6e, 0x64, 0x42, 0x0c, 0x0a, 0x0a, + 0x5f, 0x64, 0x69, 0x72, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x22, 0x56, 0x0a, 0x1a, 0x54, 0x65, + 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x42, 0x75, 0x69, 0x6c, 0x64, 0x44, 0x65, 0x6c, 0x65, 0x74, + 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x18, 0x0a, 0x07, 0x62, 0x75, 0x69, 0x6c, + 0x64, 0x49, 0x44, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x62, 0x75, 0x69, 0x6c, 0x64, + 0x49, 0x44, 0x12, 0x1e, 0x0a, 0x0a, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x49, 0x44, + 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, + 0x49, 0x44, 0x22, 0xbb, 0x01, 0x0a, 0x15, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x42, + 0x75, 0x69, 0x6c, 0x64, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x12, 0x24, 0x0a, 0x0d, + 0x72, 0x6f, 0x6f, 0x74, 0x66, 0x73, 0x53, 0x69, 0x7a, 0x65, 0x4b, 0x65, 0x79, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x05, 0x52, 0x0d, 0x72, 0x6f, 0x6f, 0x74, 0x66, 0x73, 0x53, 0x69, 0x7a, 0x65, 0x4b, + 0x65, 0x79, 0x12, 0x26, 0x0a, 0x0e, 0x65, 0x6e, 0x76, 0x64, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, + 0x6e, 0x4b, 0x65, 0x79, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, 0x65, 0x6e, 0x76, 0x64, + 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x4b, 0x65, 0x79, 0x12, 0x24, 0x0a, 0x0d, 0x6b, 0x65, + 0x72, 0x6e, 0x65, 0x6c, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x03, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x0d, 0x6b, 0x65, 0x72, 0x6e, 0x65, 0x6c, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, + 0x12, 0x2e, 0x0a, 0x12, 0x66, 0x69, 0x72, 0x65, 0x63, 0x72, 0x61, 0x63, 0x6b, 0x65, 0x72, 0x56, + 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x12, 0x66, 0x69, + 0x72, 0x65, 0x63, 0x72, 0x61, 0x63, 0x6b, 0x65, 0x72, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, + 0x22, 0x83, 0x02, 0x0a, 0x15, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x42, 0x75, 0x69, + 0x6c, 0x64, 0x4c, 0x6f, 0x67, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x38, 0x0a, 0x09, 0x74, 0x69, + 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, + 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, + 0x54, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x52, 0x09, 0x74, 0x69, 0x6d, 0x65, 0x73, + 0x74, 0x61, 0x6d, 0x70, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, + 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x1f, + 0x0a, 0x05, 0x6c, 0x65, 0x76, 0x65, 0x6c, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x09, 0x2e, + 0x4c, 0x6f, 0x67, 0x4c, 0x65, 0x76, 0x65, 0x6c, 0x52, 0x05, 0x6c, 0x65, 0x76, 0x65, 0x6c, 0x12, + 0x3a, 0x0a, 0x06, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0b, 0x32, + 0x22, 0x2e, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x42, 0x75, 0x69, 0x6c, 0x64, 0x4c, + 0x6f, 0x67, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x2e, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x45, 0x6e, + 0x74, 0x72, 0x79, 0x52, 0x06, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x1a, 0x39, 0x0a, 0x0b, 0x46, + 0x69, 0x65, 0x6c, 0x64, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, + 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, + 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, + 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0x57, 0x0a, 0x19, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, + 0x74, 0x65, 0x42, 0x75, 0x69, 0x6c, 0x64, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x61, + 0x73, 0x6f, 0x6e, 0x12, 0x18, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x17, 0x0a, + 0x04, 0x73, 0x74, 0x65, 0x70, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x48, 0x00, 0x52, 0x04, 0x73, + 0x74, 0x65, 0x70, 0x88, 0x01, 0x01, 0x42, 0x07, 0x0a, 0x05, 0x5f, 0x73, 0x74, 0x65, 0x70, 0x22, + 0x86, 0x02, 0x0a, 0x1b, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x42, 0x75, 0x69, 0x6c, + 0x64, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, + 0x2b, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, + 0x13, 0x2e, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x42, 0x75, 0x69, 0x6c, 0x64, 0x53, + 0x74, 0x61, 0x74, 0x65, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x32, 0x0a, 0x08, + 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x16, + 0x2e, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x42, 0x75, 0x69, 0x6c, 0x64, 0x4d, 0x65, + 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x52, 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, + 0x12, 0x36, 0x0a, 0x0a, 0x6c, 0x6f, 0x67, 0x45, 0x6e, 0x74, 0x72, 0x69, 0x65, 0x73, 0x18, 0x05, + 0x20, 0x03, 0x28, 0x0b, 0x32, 0x16, 0x2e, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x42, + 0x75, 0x69, 0x6c, 0x64, 0x4c, 0x6f, 0x67, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x0a, 0x6c, 0x6f, + 0x67, 0x45, 0x6e, 0x74, 0x72, 0x69, 0x65, 0x73, 0x12, 0x37, 0x0a, 0x06, 0x72, 0x65, 0x61, 0x73, + 0x6f, 0x6e, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x42, 0x75, 0x69, 0x6c, 0x64, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, - 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x2b, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x13, 0x2e, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, - 0x65, 0x42, 0x75, 0x69, 0x6c, 0x64, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x06, 0x73, 0x74, 0x61, - 0x74, 0x75, 0x73, 0x12, 0x32, 0x0a, 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, - 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x16, 0x2e, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, - 0x42, 0x75, 0x69, 0x6c, 0x64, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x52, 0x08, 0x6d, - 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x12, 0x36, 0x0a, 0x0a, 0x6c, 0x6f, 0x67, 0x45, 0x6e, - 0x74, 0x72, 0x69, 0x65, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x16, 0x2e, 0x54, 0x65, - 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x42, 0x75, 0x69, 0x6c, 0x64, 0x4c, 0x6f, 0x67, 0x45, 0x6e, - 0x74, 0x72, 0x79, 0x52, 0x0a, 0x6c, 0x6f, 0x67, 0x45, 0x6e, 0x74, 0x72, 0x69, 0x65, 0x73, 0x12, - 0x37, 0x0a, 0x06, 0x72, 0x65, 0x61, 0x73, 0x6f, 0x6e, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0b, 0x32, - 0x1a, 0x2e, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x42, 0x75, 0x69, 0x6c, 0x64, 0x53, - 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x61, 0x73, 0x6f, 0x6e, 0x48, 0x00, 0x52, 0x06, 0x72, - 0x65, 0x61, 0x73, 0x6f, 0x6e, 0x88, 0x01, 0x01, 0x42, 0x09, 0x0a, 0x07, 0x5f, 0x72, 0x65, 0x61, - 0x73, 0x6f, 0x6e, 0x4a, 0x04, 0x08, 0x03, 0x10, 0x04, 0x4a, 0x04, 0x08, 0x04, 0x10, 0x05, 0x2a, - 0x34, 0x0a, 0x08, 0x4c, 0x6f, 0x67, 0x4c, 0x65, 0x76, 0x65, 0x6c, 0x12, 0x09, 0x0a, 0x05, 0x44, - 0x65, 0x62, 0x75, 0x67, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x49, 0x6e, 0x66, 0x6f, 0x10, 0x01, - 0x12, 0x08, 0x0a, 0x04, 0x57, 0x61, 0x72, 0x6e, 0x10, 0x02, 0x12, 0x09, 0x0a, 0x05, 0x45, 0x72, - 0x72, 0x6f, 0x72, 0x10, 0x03, 0x2a, 0x2a, 0x0a, 0x0d, 0x4c, 0x6f, 0x67, 0x73, 0x44, 0x69, 0x72, - 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x12, 0x0b, 0x0a, 0x07, 0x46, 0x6f, 0x72, 0x77, 0x61, 0x72, - 0x64, 0x10, 0x00, 0x12, 0x0c, 0x0a, 0x08, 0x42, 0x61, 0x63, 0x6b, 0x77, 0x61, 0x72, 0x64, 0x10, - 0x01, 0x2a, 0x3d, 0x0a, 0x12, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x42, 0x75, 0x69, - 0x6c, 0x64, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x0c, 0x0a, 0x08, 0x42, 0x75, 0x69, 0x6c, 0x64, - 0x69, 0x6e, 0x67, 0x10, 0x00, 0x12, 0x0a, 0x0a, 0x06, 0x46, 0x61, 0x69, 0x6c, 0x65, 0x64, 0x10, - 0x01, 0x12, 0x0d, 0x0a, 0x09, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74, 0x65, 0x64, 0x10, 0x02, - 0x32, 0xbe, 0x02, 0x0a, 0x0f, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x53, 0x65, 0x72, - 0x76, 0x69, 0x63, 0x65, 0x12, 0x40, 0x0a, 0x0e, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, - 0x43, 0x72, 0x65, 0x61, 0x74, 0x65, 0x12, 0x16, 0x2e, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, - 0x65, 0x43, 0x72, 0x65, 0x61, 0x74, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x16, - 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, - 0x2e, 0x45, 0x6d, 0x70, 0x74, 0x79, 0x12, 0x4b, 0x0a, 0x13, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, - 0x74, 0x65, 0x42, 0x75, 0x69, 0x6c, 0x64, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x16, 0x2e, - 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, - 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x1c, 0x2e, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, - 0x42, 0x75, 0x69, 0x6c, 0x64, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, - 0x6e, 0x73, 0x65, 0x12, 0x4a, 0x0a, 0x13, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x42, - 0x75, 0x69, 0x6c, 0x64, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x12, 0x1b, 0x2e, 0x54, 0x65, 0x6d, - 0x70, 0x6c, 0x61, 0x74, 0x65, 0x42, 0x75, 0x69, 0x6c, 0x64, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, + 0x61, 0x73, 0x6f, 0x6e, 0x48, 0x00, 0x52, 0x06, 0x72, 0x65, 0x61, 0x73, 0x6f, 0x6e, 0x88, 0x01, + 0x01, 0x42, 0x09, 0x0a, 0x07, 0x5f, 0x72, 0x65, 0x61, 0x73, 0x6f, 0x6e, 0x4a, 0x04, 0x08, 0x03, + 0x10, 0x04, 0x4a, 0x04, 0x08, 0x04, 0x10, 0x05, 0x2a, 0x34, 0x0a, 0x08, 0x4c, 0x6f, 0x67, 0x4c, + 0x65, 0x76, 0x65, 0x6c, 0x12, 0x09, 0x0a, 0x05, 0x44, 0x65, 0x62, 0x75, 0x67, 0x10, 0x00, 0x12, + 0x08, 0x0a, 0x04, 0x49, 0x6e, 0x66, 0x6f, 0x10, 0x01, 0x12, 0x08, 0x0a, 0x04, 0x57, 0x61, 0x72, + 0x6e, 0x10, 0x02, 0x12, 0x09, 0x0a, 0x05, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x10, 0x03, 0x2a, 0x2a, + 0x0a, 0x0d, 0x4c, 0x6f, 0x67, 0x73, 0x44, 0x69, 0x72, 0x65, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x12, + 0x0b, 0x0a, 0x07, 0x46, 0x6f, 0x72, 0x77, 0x61, 0x72, 0x64, 0x10, 0x00, 0x12, 0x0c, 0x0a, 0x08, + 0x42, 0x61, 0x63, 0x6b, 0x77, 0x61, 0x72, 0x64, 0x10, 0x01, 0x2a, 0x3d, 0x0a, 0x12, 0x54, 0x65, + 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x42, 0x75, 0x69, 0x6c, 0x64, 0x53, 0x74, 0x61, 0x74, 0x65, + 0x12, 0x0c, 0x0a, 0x08, 0x42, 0x75, 0x69, 0x6c, 0x64, 0x69, 0x6e, 0x67, 0x10, 0x00, 0x12, 0x0a, + 0x0a, 0x06, 0x46, 0x61, 0x69, 0x6c, 0x65, 0x64, 0x10, 0x01, 0x12, 0x0d, 0x0a, 0x09, 0x43, 0x6f, + 0x6d, 0x70, 0x6c, 0x65, 0x74, 0x65, 0x64, 0x10, 0x02, 0x32, 0xbe, 0x02, 0x0a, 0x0f, 0x54, 0x65, + 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x12, 0x40, 0x0a, + 0x0e, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x43, 0x72, 0x65, 0x61, 0x74, 0x65, 0x12, + 0x16, 0x2e, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x43, 0x72, 0x65, 0x61, 0x74, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x16, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x45, 0x6d, 0x70, 0x74, 0x79, 0x12, - 0x50, 0x0a, 0x13, 0x49, 0x6e, 0x69, 0x74, 0x4c, 0x61, 0x79, 0x65, 0x72, 0x46, 0x69, 0x6c, 0x65, - 0x55, 0x70, 0x6c, 0x6f, 0x61, 0x64, 0x12, 0x1b, 0x2e, 0x49, 0x6e, 0x69, 0x74, 0x4c, 0x61, 0x79, - 0x65, 0x72, 0x46, 0x69, 0x6c, 0x65, 0x55, 0x70, 0x6c, 0x6f, 0x61, 0x64, 0x52, 0x65, 0x71, 0x75, - 0x65, 0x73, 0x74, 0x1a, 0x1c, 0x2e, 0x49, 0x6e, 0x69, 0x74, 0x4c, 0x61, 0x79, 0x65, 0x72, 0x46, - 0x69, 0x6c, 0x65, 0x55, 0x70, 0x6c, 0x6f, 0x61, 0x64, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, - 0x65, 0x42, 0x33, 0x5a, 0x31, 0x68, 0x74, 0x74, 0x70, 0x73, 0x3a, 0x2f, 0x2f, 0x67, 0x69, 0x74, - 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x65, 0x32, 0x62, 0x2d, 0x64, 0x65, 0x76, 0x2f, - 0x69, 0x6e, 0x66, 0x72, 0x61, 0x2f, 0x74, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x2d, 0x6d, - 0x61, 0x6e, 0x61, 0x67, 0x65, 0x72, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x4b, 0x0a, 0x13, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x42, 0x75, 0x69, 0x6c, 0x64, + 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x16, 0x2e, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, + 0x65, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x1c, + 0x2e, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x42, 0x75, 0x69, 0x6c, 0x64, 0x53, 0x74, + 0x61, 0x74, 0x75, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x4a, 0x0a, 0x13, + 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x42, 0x75, 0x69, 0x6c, 0x64, 0x44, 0x65, 0x6c, + 0x65, 0x74, 0x65, 0x12, 0x1b, 0x2e, 0x54, 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x42, 0x75, + 0x69, 0x6c, 0x64, 0x44, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, + 0x1a, 0x16, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, + 0x75, 0x66, 0x2e, 0x45, 0x6d, 0x70, 0x74, 0x79, 0x12, 0x50, 0x0a, 0x13, 0x49, 0x6e, 0x69, 0x74, + 0x4c, 0x61, 0x79, 0x65, 0x72, 0x46, 0x69, 0x6c, 0x65, 0x55, 0x70, 0x6c, 0x6f, 0x61, 0x64, 0x12, + 0x1b, 0x2e, 0x49, 0x6e, 0x69, 0x74, 0x4c, 0x61, 0x79, 0x65, 0x72, 0x46, 0x69, 0x6c, 0x65, 0x55, + 0x70, 0x6c, 0x6f, 0x61, 0x64, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x1c, 0x2e, 0x49, + 0x6e, 0x69, 0x74, 0x4c, 0x61, 0x79, 0x65, 0x72, 0x46, 0x69, 0x6c, 0x65, 0x55, 0x70, 0x6c, 0x6f, + 0x61, 0x64, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x42, 0x33, 0x5a, 0x31, 0x68, 0x74, + 0x74, 0x70, 0x73, 0x3a, 0x2f, 0x2f, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, + 0x2f, 0x65, 0x32, 0x62, 0x2d, 0x64, 0x65, 0x76, 0x2f, 0x69, 0x6e, 0x66, 0x72, 0x61, 0x2f, 0x74, + 0x65, 0x6d, 0x70, 0x6c, 0x61, 0x74, 0x65, 0x2d, 0x6d, 0x61, 0x6e, 0x61, 0x67, 0x65, 0x72, 0x62, + 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var (