From a53dc77de466f1d8d621370a1f6c9f9d0c568eb5 Mon Sep 17 00:00:00 2001 From: Surbhi Kakarya Date: Fri, 21 Nov 2025 20:01:27 +0000 Subject: [PATCH 01/14] This patch includes: 1. Device State: CLEAN, DIRTY, SCRUB, BUSY 2. DeviceStateUpdate RPC 3. DeviceChanged Event Signed-off-by: Surbhi Kakarya --- protect/control/v1/common.proto | 11 +++++++++++ protect/control/v1/control.proto | 13 +++++++++++++ 2 files changed, 24 insertions(+) diff --git a/protect/control/v1/common.proto b/protect/control/v1/common.proto index a785e39..55513e1 100644 --- a/protect/control/v1/common.proto +++ b/protect/control/v1/common.proto @@ -581,3 +581,14 @@ message OciRegistryAuthentication { string registry_token = 4; } } + +message PciDevice { + PciDeviceState state = 1; +} + +enum PciDeviceState { + CLEAN = 0; + DIRTY = 1; + SCRUB = 2; + BUSY = 3; +} diff --git a/protect/control/v1/control.proto b/protect/control/v1/control.proto index 54efefd..9e14a1f 100644 --- a/protect/control/v1/control.proto +++ b/protect/control/v1/control.proto @@ -55,6 +55,7 @@ service ControlService { rpc SetHostPowerManagementPolicy(SetHostPowerManagementPolicyRequest) returns (SetHostPowerManagementPolicyReply); rpc DialNetworkSocket(stream DialNetworkSocketRequest) returns (stream DialNetworkSocketReply); + rpc DeviceStateUpdate(DeviceStateRequest) returns (DeviceStateReply); } message GetHostStatusRequest {} @@ -73,6 +74,13 @@ message GetHostStatusReply { optional string protect_branch = 11; } +message DeviceStateRequest { + string name = 1; + protect.common.v1.PciDevice state = 2; +} + +message DeviceStateReply {} + message CreateZoneRequest { ZoneSpec spec = 1; } @@ -398,6 +406,7 @@ message WatchEventsReply { oneof event { ZoneChangedEvent zone_changed = 1; WorkloadChangedEvent workload_changed = 2; + DeviceChangedEvent device_changed = 3; } } @@ -409,6 +418,10 @@ message WorkloadChangedEvent { Workload workload = 1; } +message DeviceChangedEvent { + protect.common.v1.DeviceReferenceSpec device = 1; +} + message ReadZoneMetricsRequest { string zone_id = 1; } From 4dcd19df097cab1e6d5570cb20ddfe648592daf3 Mon Sep 17 00:00:00 2001 From: Surbhi Kakarya Date: Fri, 21 Nov 2025 20:21:54 +0000 Subject: [PATCH 02/14] Fixing the failure Signed-off-by: Surbhi Kakarya --- protect/control/v1/control.proto | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/protect/control/v1/control.proto b/protect/control/v1/control.proto index 9e14a1f..4a19f01 100644 --- a/protect/control/v1/control.proto +++ b/protect/control/v1/control.proto @@ -76,7 +76,7 @@ message GetHostStatusReply { message DeviceStateRequest { string name = 1; - protect.common.v1.PciDevice state = 2; + PciDevice state = 2; } message DeviceStateReply {} @@ -419,7 +419,7 @@ message WorkloadChangedEvent { } message DeviceChangedEvent { - protect.common.v1.DeviceReferenceSpec device = 1; + DeviceReferenceSpec device = 1; } message ReadZoneMetricsRequest { From 9f764bee115c87bf890ca61fbc66ff6f6e67167e Mon Sep 17 00:00:00 2001 From: Surbhi Kakarya Date: Fri, 21 Nov 2025 20:58:08 +0000 Subject: [PATCH 03/14] Fixing more failure Signed-off-by: Surbhi Kakarya --- protect/control/v1/common.proto | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/protect/control/v1/common.proto b/protect/control/v1/common.proto index 55513e1..e40dee2 100644 --- a/protect/control/v1/common.proto +++ b/protect/control/v1/common.proto @@ -583,12 +583,12 @@ message OciRegistryAuthentication { } message PciDevice { - PciDeviceState state = 1; + PciDeviceState state = 1; } enum PciDeviceState { - CLEAN = 0; - DIRTY = 1; - SCRUB = 2; - BUSY = 3; + PCI_DEVICE_STATE_CLEAN = 0; + PCI_DEVICE_STATE_DIRTY = 1; + PCI_DEVICE_STATE_SCRUB = 2; + PCI_DEVICE_STATE_BUSY = 3; } From f10e9378fe8757d1624f966b1562a6868015652c Mon Sep 17 00:00:00 2001 From: Alex Zenla Date: Thu, 15 Jan 2026 10:07:18 -0800 Subject: [PATCH 04/14] prototype of device database api --- protect/control/v1/common.proto | 50 ++++++++++++++++++++++---------- protect/control/v1/control.proto | 40 ++++++++++++++++++++----- 2 files changed, 68 insertions(+), 22 deletions(-) diff --git a/protect/control/v1/common.proto b/protect/control/v1/common.proto index e40dee2..a7aad5e 100644 --- a/protect/control/v1/common.proto +++ b/protect/control/v1/common.proto @@ -152,7 +152,7 @@ message AnnotationSpec { } message DeviceReferenceSpec { - string name = 1; + string id = 1; } message WorkloadBlockDeviceSpec { @@ -506,10 +506,41 @@ message OciImageProgressIndicationCompleted { bool is_bytes = 3; } -message DeviceInfo { +message PciDeviceSpec { + string location = 1; + bool permissive = 2; +} + +message BlockDeviceSpec { + string device_path = 1; +} + +message DeviceSpec { string name = 1; - bool claimed = 2; - string owner_zone = 3; + oneof device_type { + PciDeviceSpec pci = 2; + BlockDeviceSpec block = 3; + } + repeated string modules = 4; + repeated KernelModuleParameter module_parameters = 5; +} + +enum DeviceState { + DEVICE_STATE_UNKNOWN = 0; + DEVICE_STATE_AVAILABLE = 1; + DEVICE_STATE_TAINTED = 2; + DEVICE_STATE_USED = 3; +} + +message DeviceStatus { + DeviceState state = 1; + string zone_id = 2; +} + +message Device { + string id = 1; + DeviceSpec spec = 2; + DeviceStatus status = 3; } message ZoneScratchDiskSpec { @@ -581,14 +612,3 @@ message OciRegistryAuthentication { string registry_token = 4; } } - -message PciDevice { - PciDeviceState state = 1; -} - -enum PciDeviceState { - PCI_DEVICE_STATE_CLEAN = 0; - PCI_DEVICE_STATE_DIRTY = 1; - PCI_DEVICE_STATE_SCRUB = 2; - PCI_DEVICE_STATE_BUSY = 3; -} diff --git a/protect/control/v1/control.proto b/protect/control/v1/control.proto index 4a19f01..d863306 100644 --- a/protect/control/v1/control.proto +++ b/protect/control/v1/control.proto @@ -9,7 +9,11 @@ service ControlService { rpc SnoopIdm(SnoopIdmRequest) returns (stream SnoopIdmReply); rpc GetHostCpuTopology(GetHostCpuTopologyRequest) returns (GetHostCpuTopologyReply); + rpc CreateDevice(CreateDeviceRequest) returns (CreateDeviceReply); rpc ListDevices(ListDevicesRequest) returns (ListDevicesReply); + rpc UpdateDeviceState(UpdateDeviceStateRequest) returns (UpdateDeviceStateReply); + rpc DestroyDevice(DestroyDeviceRequest) returns (DestroyDeviceReply); + rpc ResolveDeviceIds(ResolveDeviceIdsRequest) returns (ResolveDeviceIdsReply); rpc CreateNetworkReservation(CreateNetworkReservationRequest) returns (CreateNetworkReservationReply); rpc DestroyNetworkReservation(DestroyNetworkReservationRequest) returns (DestroyNetworkReservationReply); @@ -55,7 +59,6 @@ service ControlService { rpc SetHostPowerManagementPolicy(SetHostPowerManagementPolicyRequest) returns (SetHostPowerManagementPolicyReply); rpc DialNetworkSocket(stream DialNetworkSocketRequest) returns (stream DialNetworkSocketReply); - rpc DeviceStateUpdate(DeviceStateRequest) returns (DeviceStateReply); } message GetHostStatusRequest {} @@ -74,12 +77,12 @@ message GetHostStatusReply { optional string protect_branch = 11; } -message DeviceStateRequest { - string name = 1; - PciDevice state = 2; +message UpdateDeviceStateRequest { + string id = 1; + DeviceState state = 2; } -message DeviceStateReply {} +message UpdateDeviceStateReply {} message CreateZoneRequest { ZoneSpec spec = 1; @@ -419,7 +422,7 @@ message WorkloadChangedEvent { } message DeviceChangedEvent { - DeviceReferenceSpec device = 1; + Device device = 1; } message ReadZoneMetricsRequest { @@ -493,7 +496,8 @@ message ListImagesReply { message ListDevicesRequest {} message ListDevicesReply { - repeated DeviceInfo devices = 1; + reserved 1; + repeated Device devices = 2; } message GetHostCpuTopologyRequest {} @@ -630,3 +634,25 @@ message DialNetworkSocketData { message DialNetworkSocketReply { DialNetworkSocketData data = 1; } + +message CreateDeviceRequest { + DeviceSpec spec = 1; +} + +message CreateDeviceReply { + string id = 1; +} + +message DestroyDeviceRequest { + string id = 1; +} + +message DestroyDeviceReply {} + +message ResolveDeviceIdsRequest { + string name = 1; +} + +message ResolveDeviceIdsReply { + string device_ids = 1; +} From c075ae5e6918e5a12bdb164bba8b8a70e66d5998 Mon Sep 17 00:00:00 2001 From: Alex Zenla Date: Thu, 15 Jan 2026 10:20:46 -0800 Subject: [PATCH 05/14] change field name to device_id --- protect/control/v1/control.proto | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/protect/control/v1/control.proto b/protect/control/v1/control.proto index d863306..c2d7d38 100644 --- a/protect/control/v1/control.proto +++ b/protect/control/v1/control.proto @@ -640,7 +640,7 @@ message CreateDeviceRequest { } message CreateDeviceReply { - string id = 1; + string device_id = 1; } message DestroyDeviceRequest { From f616da27da81f3f3f87a1ec79e47237284ebd4c8 Mon Sep 17 00:00:00 2001 From: Alex Zenla Date: Thu, 15 Jan 2026 10:42:10 -0800 Subject: [PATCH 06/14] more fixes to device protos --- protect/control/v1/common.proto | 2 ++ protect/control/v1/control.proto | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/protect/control/v1/common.proto b/protect/control/v1/common.proto index a7aad5e..3227661 100644 --- a/protect/control/v1/common.proto +++ b/protect/control/v1/common.proto @@ -530,6 +530,8 @@ enum DeviceState { DEVICE_STATE_AVAILABLE = 1; DEVICE_STATE_TAINTED = 2; DEVICE_STATE_USED = 3; + DEVICE_STATE_DESTROYING = 4; + DEVICE_STATE_DESTROYED = 5; } message DeviceStatus { diff --git a/protect/control/v1/control.proto b/protect/control/v1/control.proto index c2d7d38..c48e472 100644 --- a/protect/control/v1/control.proto +++ b/protect/control/v1/control.proto @@ -78,7 +78,7 @@ message GetHostStatusReply { } message UpdateDeviceStateRequest { - string id = 1; + string device_id = 1; DeviceState state = 2; } @@ -644,7 +644,7 @@ message CreateDeviceReply { } message DestroyDeviceRequest { - string id = 1; + string device_id = 1; } message DestroyDeviceReply {} @@ -654,5 +654,5 @@ message ResolveDeviceIdsRequest { } message ResolveDeviceIdsReply { - string device_ids = 1; + repeated string device_ids = 1; } From 3b1018136bd1c642775a0c63fc9917d2145683a7 Mon Sep 17 00:00:00 2001 From: Alex Zenla Date: Thu, 15 Jan 2026 10:57:29 -0800 Subject: [PATCH 07/14] add get device rpc --- protect/control/v1/control.proto | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/protect/control/v1/control.proto b/protect/control/v1/control.proto index c48e472..f80a18d 100644 --- a/protect/control/v1/control.proto +++ b/protect/control/v1/control.proto @@ -11,6 +11,7 @@ service ControlService { rpc CreateDevice(CreateDeviceRequest) returns (CreateDeviceReply); rpc ListDevices(ListDevicesRequest) returns (ListDevicesReply); + rpc GetDevice(GetDeviceRequest) returns (GetDeviceReply); rpc UpdateDeviceState(UpdateDeviceStateRequest) returns (UpdateDeviceStateReply); rpc DestroyDevice(DestroyDeviceRequest) returns (DestroyDeviceReply); rpc ResolveDeviceIds(ResolveDeviceIdsRequest) returns (ResolveDeviceIdsReply); @@ -656,3 +657,11 @@ message ResolveDeviceIdsRequest { message ResolveDeviceIdsReply { repeated string device_ids = 1; } + +message GetDeviceRequest { + string device_id = 1; +} + +message GetDeviceReply { + Device device = 1; +} From bb698f8102dcad1d430870f82d9a6fe1b9df6d75 Mon Sep 17 00:00:00 2001 From: Surbhi Kakarya Date: Mon, 26 Jan 2026 20:21:39 +0000 Subject: [PATCH 08/14] Add pci device configurations to the pci spec. Signed-off-by: Surbhi Kakarya --- protect/control/v1/common.proto | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/protect/control/v1/common.proto b/protect/control/v1/common.proto index 3227661..a8c1aeb 100644 --- a/protect/control/v1/common.proto +++ b/protect/control/v1/common.proto @@ -509,6 +509,16 @@ message OciImageProgressIndicationCompleted { message PciDeviceSpec { string location = 1; bool permissive = 2; + bool msi_translate = 3; + bool power_management = 4; + PciDeviceRdmReservePolicy rdm_reserve_policy = 5; +} + +enum PciDeviceRdmReservePolicy { + PCI_DEVICE_RDM_RESERVE_POLICY_UNKNOWN = 0; + PCI_DEVICE_RDM_RESERVE_POLICY_STRICT = 1; + PCI_DEVICE_RDM_RESERVE_POLICY_RELAXED = 2; + } message BlockDeviceSpec { From f33b3624f641875e46f219c9da39454d762de988 Mon Sep 17 00:00:00 2001 From: Surbhi Kakarya Date: Tue, 3 Feb 2026 18:03:22 +0000 Subject: [PATCH 09/14] Adding "Creating" device to be used when a new device is created. This help us distinguish between creating and available state where available state will be marked for "Clean Device" especially after scrubbing. Signed-off-by: Surbhi Kakarya --- protect/control/v1/common.proto | 1 + 1 file changed, 1 insertion(+) diff --git a/protect/control/v1/common.proto b/protect/control/v1/common.proto index a8c1aeb..3a518ca 100644 --- a/protect/control/v1/common.proto +++ b/protect/control/v1/common.proto @@ -542,6 +542,7 @@ enum DeviceState { DEVICE_STATE_USED = 3; DEVICE_STATE_DESTROYING = 4; DEVICE_STATE_DESTROYED = 5; + DEVICE_STATE_CREATING = 6; } message DeviceStatus { From ded7a2825e6b593fee60c18ed965410879dc16fa Mon Sep 17 00:00:00 2001 From: Surbhi Kakarya Date: Wed, 11 Feb 2026 17:31:44 +0000 Subject: [PATCH 10/14] Introduce a new Device State: Failed. This is marked when Scrub Zone or Workload fails due to any reason and prevent marking it from Tainted, otherwise it ends in a device reconciler endles loop. Signed-off-by: Surbhi Kakarya --- protect/control/v1/common.proto | 1 + 1 file changed, 1 insertion(+) diff --git a/protect/control/v1/common.proto b/protect/control/v1/common.proto index 3a518ca..74655eb 100644 --- a/protect/control/v1/common.proto +++ b/protect/control/v1/common.proto @@ -543,6 +543,7 @@ enum DeviceState { DEVICE_STATE_DESTROYING = 4; DEVICE_STATE_DESTROYED = 5; DEVICE_STATE_CREATING = 6; + DEVICE_STATE_FAILED = 7; } message DeviceStatus { From 17db572610a430d6ccc49f21c2d84cc1183f6ad9 Mon Sep 17 00:00:00 2001 From: Surbhi Kakarya Date: Thu, 12 Feb 2026 20:05:45 +0000 Subject: [PATCH 11/14] Minor modification Signed-off-by: Surbhi Kakarya --- protect/control/v1/common.proto | 1 - 1 file changed, 1 deletion(-) diff --git a/protect/control/v1/common.proto b/protect/control/v1/common.proto index 74655eb..822cadf 100644 --- a/protect/control/v1/common.proto +++ b/protect/control/v1/common.proto @@ -518,7 +518,6 @@ enum PciDeviceRdmReservePolicy { PCI_DEVICE_RDM_RESERVE_POLICY_UNKNOWN = 0; PCI_DEVICE_RDM_RESERVE_POLICY_STRICT = 1; PCI_DEVICE_RDM_RESERVE_POLICY_RELAXED = 2; - } message BlockDeviceSpec { From aa2bca7243922ac5ac87f23ba842736074e02d00 Mon Sep 17 00:00:00 2001 From: Surbhi Kakarya Date: Fri, 13 Feb 2026 16:59:11 +0000 Subject: [PATCH 12/14] DeviceErrorStatus support Signed-off-by: Surbhi Kakarya --- protect/control/v1/common.proto | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/protect/control/v1/common.proto b/protect/control/v1/common.proto index 822cadf..2403191 100644 --- a/protect/control/v1/common.proto +++ b/protect/control/v1/common.proto @@ -548,6 +548,11 @@ enum DeviceState { message DeviceStatus { DeviceState state = 1; string zone_id = 2; + DeviceErrorStatus error_status = 3; +} + +message DeviceErrorStatus { + string message = 1; } message Device { From ebf910fc57309d3226d3b770a4c70118dcaf2b18 Mon Sep 17 00:00:00 2001 From: Surbhi Kakarya Date: Thu, 16 Apr 2026 07:02:56 -0700 Subject: [PATCH 13/14] wip Signed-off-by: Surbhi Kakarya --- protect/control/v1/common.proto | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/protect/control/v1/common.proto b/protect/control/v1/common.proto index 2403191..498f411 100644 --- a/protect/control/v1/common.proto +++ b/protect/control/v1/common.proto @@ -330,7 +330,8 @@ message WorkloadSpec { string hostname = 10; repeated WorkloadBlockDeviceSpec block_devices = 11; optional int32 oom_score_adj = 12; - repeated WorkloadPciDeviceSpec pci_devices = 13; + repeated DeviceReferenceSpec devices = 13; + //repeated WorkloadPciDeviceSpec pci_devices = 13; } message CgroupLimit { From 8968c2259280542481023afa4f8dea3ccfa6e866 Mon Sep 17 00:00:00 2001 From: Surbhi Kakarya Date: Tue, 21 Apr 2026 12:58:16 -0700 Subject: [PATCH 14/14] wip Signed-off-by: Surbhi Kakarya --- protect/control/v1/common.proto | 11 +++++----- protect/control/v1/control.proto | 35 +++++++++++++++++++++----------- 2 files changed, 28 insertions(+), 18 deletions(-) diff --git a/protect/control/v1/common.proto b/protect/control/v1/common.proto index 498f411..4419c67 100644 --- a/protect/control/v1/common.proto +++ b/protect/control/v1/common.proto @@ -155,16 +155,16 @@ message DeviceReferenceSpec { string id = 1; } +message WorkloadPciDeviceSpec { + string location = 1; +} + message WorkloadBlockDeviceSpec { string target_path = 1; string device_path = 2; BlockDeviceMountOptions mount_options = 3; } -message WorkloadPciDeviceSpec { - string location = 1; -} - message BlockDeviceMountOptions { bool readonly = 1; string permissions = 2; @@ -331,7 +331,6 @@ message WorkloadSpec { repeated WorkloadBlockDeviceSpec block_devices = 11; optional int32 oom_score_adj = 12; repeated DeviceReferenceSpec devices = 13; - //repeated WorkloadPciDeviceSpec pci_devices = 13; } message CgroupLimit { @@ -548,7 +547,7 @@ enum DeviceState { message DeviceStatus { DeviceState state = 1; - string zone_id = 2; + string owner_id = 2; DeviceErrorStatus error_status = 3; } diff --git a/protect/control/v1/control.proto b/protect/control/v1/control.proto index f80a18d..b7cd082 100644 --- a/protect/control/v1/control.proto +++ b/protect/control/v1/control.proto @@ -14,7 +14,8 @@ service ControlService { rpc GetDevice(GetDeviceRequest) returns (GetDeviceReply); rpc UpdateDeviceState(UpdateDeviceStateRequest) returns (UpdateDeviceStateReply); rpc DestroyDevice(DestroyDeviceRequest) returns (DestroyDeviceReply); - rpc ResolveDeviceIds(ResolveDeviceIdsRequest) returns (ResolveDeviceIdsReply); + rpc ResolveAndValidateDevice(ResolveAndValidateDeviceRequest) returns (ResolveAndValidateDeviceReply); + rpc UpdateZoneDevices(UpdateZoneDevicesRequest) returns (UpdateZoneDevicesReply); rpc CreateNetworkReservation(CreateNetworkReservationRequest) returns (CreateNetworkReservationReply); rpc DestroyNetworkReservation(DestroyNetworkReservationRequest) returns (DestroyNetworkReservationReply); @@ -85,6 +86,21 @@ message UpdateDeviceStateRequest { message UpdateDeviceStateReply {} +message UpdateZoneDevicesRequest { + string zone_id = 1; + repeated DeviceReferenceSpec devices = 2; +} + +message GetDeviceRequest { + string device_id = 1; +} + +message GetDeviceReply { + Device device = 1; +} + +message UpdateZoneDevicesReply {} + message CreateZoneRequest { ZoneSpec spec = 1; } @@ -650,18 +666,13 @@ message DestroyDeviceRequest { message DestroyDeviceReply {} -message ResolveDeviceIdsRequest { - string name = 1; -} - -message ResolveDeviceIdsReply { - repeated string device_ids = 1; +message ResolveAndValidateDeviceRequest { + oneof DeviceRequest { + string name = 1; + string location = 2; + } } -message GetDeviceRequest { +message ResolveAndValidateDeviceReply { string device_id = 1; } - -message GetDeviceReply { - Device device = 1; -}