From 7c403079b65081ed239300f1e7edee2236dd18ad Mon Sep 17 00:00:00 2001 From: Dmitrii Creed Date: Wed, 20 May 2026 16:38:10 +0400 Subject: [PATCH 1/2] feat(k8s/vpa): expose controlledValues + fix controlledResources nesting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two related fixes to cloudExtras.vpa generation, found while rightsizing the PAY-SPACE org's GKE Autopilot clusters. 1. Add ControlledValues field to VPAConfig. K8s VPA's controlledValues knob ("RequestsAndLimits" default, "RequestsOnly" optional) is currently not exposed by SC. Without it, VPA always scales the CPU limit proportionally with the request. Lowering minAllowed.cpu below ~250m therefore shrinks the container's CPU limit far enough that Django/gunicorn-style cold starts CPU-throttle, fail the startup probe, and get SIGKILL'd by kubelet — even though the actual workload has plenty of headroom in steady state. With controlledValues: RequestsOnly, VPA rewrites only requests at admission and leaves the deployment template's limits alone, so cold-start bursts use the (higher) template limit. 2. Move controlledResources from resourcePolicy into the containerPolicy entry. Per the VPA CRD (autoscaling.k8s.io/v1), controlledResources is a per-container field — it lives at resourcePolicy.containerPolicies[*].controlledResources, not at resourcePolicy.controlledResources. Before this commit SC wrote it at the wrong nesting level; k8s silently dropped it on admission. Verified by `kubectl explain vpa.spec.resourcePolicy.containerPolicies.controlledResources` and by reading a live VPA on a PAY-SPACE cluster that had controlledResources set in client.yaml but missing from the in-cluster spec. The new ControlledValues field is placed inside containerPolicy in the same fix. No schema break — these are additive struct fields and a containerPolicy reshuffle that previously did nothing. Existing tests in TestStackConfigCompose_Copy/VPA_configuration_in_CloudExtras extended to cover controlledValues round-trip. Usage in client.yaml after this lands: cloudExtras: vpa: enabled: true updateMode: "Auto" minAllowed: { cpu: "50m", memory: "64Mi" } maxAllowed: { cpu: "2", memory: "4Gi" } controlledResources: ["cpu", "memory"] controlledValues: "RequestsOnly" This unblocks PAY-SPACE/crypto#853's hotfix pattern: the 250m CPU floor we currently hold across the org to avoid the proportional shrink can drop to 50m once consumers adopt controlledValues: RequestsOnly. Frees ~15 CPU on the production cluster, which is currently at the 64-CPU global quota cap. Signed-off-by: Dmitrii Creed --- pkg/api/copy_test.go | 1 + pkg/clouds/k8s/kube_run.go | 11 ++++++++++- .../pulumi/kubernetes/simple_container.go | 19 +++++++++++++------ 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/pkg/api/copy_test.go b/pkg/api/copy_test.go index 2c4f11ca..38836907 100644 --- a/pkg/api/copy_test.go +++ b/pkg/api/copy_test.go @@ -224,6 +224,7 @@ func TestStackConfigCompose_Copy(t *testing.T) { "memory": "2Gi", }, "controlledResources": []interface{}{"cpu", "memory"}, + "controlledValues": "RequestsOnly", }, } cloudExtras := any(vpaConfig) diff --git a/pkg/clouds/k8s/kube_run.go b/pkg/clouds/k8s/kube_run.go index a7de93b2..84eb3b4c 100644 --- a/pkg/clouds/k8s/kube_run.go +++ b/pkg/clouds/k8s/kube_run.go @@ -117,8 +117,17 @@ type VPAConfig struct { MinAllowed *VPAResourceRequirements `json:"minAllowed" yaml:"minAllowed"` // MaxAllowed specifies maximum allowed resources MaxAllowed *VPAResourceRequirements `json:"maxAllowed" yaml:"maxAllowed"` - // ControlledResources specifies which resources VPA should control + // ControlledResources specifies which resources VPA should control. + // Per the VPA CRD this is a per-container field; SC places it inside each + // containerPolicy entry, not at resourcePolicy level. ControlledResources []string `json:"controlledResources" yaml:"controlledResources"` + // ControlledValues specifies which resource values VPA should control. + // One of "RequestsAndLimits" (default) or "RequestsOnly". Use "RequestsOnly" + // when the underlying deployment template's limits are sized for cold-start + // bursts (e.g. Django/gunicorn) and you don't want VPA to scale the limit + // proportionally with a lowered request — the proportional shrink causes + // CPU-throttle-induced startup probe failures. + ControlledValues *string `json:"controlledValues" yaml:"controlledValues"` } // VPAResourceRequirements defines resource requirements for VPA diff --git a/pkg/clouds/pulumi/kubernetes/simple_container.go b/pkg/clouds/pulumi/kubernetes/simple_container.go index fc3805e5..eaa8d583 100644 --- a/pkg/clouds/pulumi/kubernetes/simple_container.go +++ b/pkg/clouds/pulumi/kubernetes/simple_container.go @@ -994,17 +994,24 @@ func createVPA(ctx *sdk.Context, args *SimpleContainerArgs, deploymentName strin } // Add resource policy if specified - if args.VPA.MinAllowed != nil || args.VPA.MaxAllowed != nil || len(args.VPA.ControlledResources) > 0 { + if args.VPA.MinAllowed != nil || args.VPA.MaxAllowed != nil || len(args.VPA.ControlledResources) > 0 || args.VPA.ControlledValues != nil { resourcePolicy := map[string]interface{}{} - // Add controlled resources + // Build the container policy. Per the VPA CRD, controlledResources and + // controlledValues are per-container fields and live inside the + // containerPolicy entry — not at the resourcePolicy level. Placing them + // at resourcePolicy level (the previous behavior) caused k8s to silently + // drop them. + containerPolicy := map[string]interface{}{ + "containerName": "*", + } + if len(args.VPA.ControlledResources) > 0 { - resourcePolicy["controlledResources"] = args.VPA.ControlledResources + containerPolicy["controlledResources"] = args.VPA.ControlledResources } - // Add container policies - containerPolicy := map[string]interface{}{ - "containerName": "*", + if args.VPA.ControlledValues != nil { + containerPolicy["controlledValues"] = lo.FromPtr(args.VPA.ControlledValues) } if args.VPA.MinAllowed != nil { From 36c470c28ac5ca1a5c3330531a424cd0324e5425 Mon Sep 17 00:00:00 2001 From: Dmitrii Creed Date: Wed, 20 May 2026 23:22:03 +0400 Subject: [PATCH 2/2] test(k8s/vpa): exercise ControlledValues + ControlledResources path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass-through smoke test that constructs a SimpleContainer with VPA configured for the full surface (minAllowed, maxAllowed, controlledResources, controlledValues) and verifies the resource creation succeeds without error. Pairs with the existing TestNewSimpleContainer_WithVPA, which only exercises the minimal enabled+updateMode shape. The exact in-cluster VPA spec shape (controlledResources and controlledValues living inside containerPolicy rather than at resourcePolicy level) is asserted by reading and trusting createVPA in simple_container.go — verified against the live K8s VPA CRD via kubectl explain. Addresses the Gemini review feedback on the parent commit: prior to this, no kubernetes-package test exercised the new ControlledValues code path at all. Signed-off-by: Dmitrii Creed --- .../kubernetes/simple_container_test.go | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/pkg/clouds/pulumi/kubernetes/simple_container_test.go b/pkg/clouds/pulumi/kubernetes/simple_container_test.go index a2441e17..095ada2a 100644 --- a/pkg/clouds/pulumi/kubernetes/simple_container_test.go +++ b/pkg/clouds/pulumi/kubernetes/simple_container_test.go @@ -94,6 +94,29 @@ func createVPATestArgs() *SimpleContainerArgs { return args } +// createVPATestArgsWithControlledValues exercises the full VPA surface area: +// minAllowed, maxAllowed, controlledResources (which lives inside the +// containerPolicy per the VPA CRD), and the controlledValues knob that lets +// callers opt out of VPA scaling limits proportionally with requests. +func createVPATestArgsWithControlledValues() *SimpleContainerArgs { + args := createBasicTestArgs() + args.VPA = &k8s.VPAConfig{ + Enabled: true, + UpdateMode: lo.ToPtr("Auto"), + MinAllowed: &k8s.VPAResourceRequirements{ + CPU: lo.ToPtr("50m"), + Memory: lo.ToPtr("64Mi"), + }, + MaxAllowed: &k8s.VPAResourceRequirements{ + CPU: lo.ToPtr("2"), + Memory: lo.ToPtr("4Gi"), + }, + ControlledResources: []string{"cpu", "memory"}, + ControlledValues: lo.ToPtr("RequestsOnly"), + } + return args +} + // createComplexTestArgs creates SimpleContainerArgs with many features enabled func createComplexTestArgs() *SimpleContainerArgs { args := createBasicTestArgs() @@ -332,6 +355,28 @@ func TestNewSimpleContainer_WithHPA(t *testing.T) { Expect(err).ToNot(HaveOccurred(), "Test should complete without errors") } +// TestNewSimpleContainer_WithVPA_ControlledValues exercises the new +// ControlledValues + ControlledResources fields on VPAConfig. Asserts the +// resource creation succeeds; the actual CRD shape (controlledValues + +// controlledResources living inside containerPolicy, not at resourcePolicy +// level) is enforced by simple_container.go's createVPA implementation. +func TestNewSimpleContainer_WithVPA_ControlledValues(t *testing.T) { + RegisterTestingT(t) + + mocks := NewSimpleContainerMocks() + args := createVPATestArgsWithControlledValues() + + err := pulumi.RunErr(func(ctx *pulumi.Context) error { + sc, err := NewSimpleContainer(ctx, args) + Expect(err).ToNot(HaveOccurred(), "SimpleContainer with VPA controlledValues should be created successfully") + Expect(sc).ToNot(BeNil(), "SimpleContainer should not be nil") + Expect(sc.Deployment).ToNot(BeNil(), "Deployment should not be nil") + return nil + }, pulumi.WithMocks("project", "stack", mocks)) + + Expect(err).ToNot(HaveOccurred(), "Test should complete without errors") +} + func TestNewSimpleContainer_WithVPA(t *testing.T) { RegisterTestingT(t)