diff --git a/attestation-key-register/src/main.rs b/attestation-key-register/src/main.rs index a9d80a71..e946a650 100644 --- a/attestation-key-register/src/main.rs +++ b/attestation-key-register/src/main.rs @@ -16,7 +16,8 @@ use uuid::Uuid; use trusted_cluster_operator_lib::endpoints::ATTESTATION_KEY_REGISTER_RESOURCE; use trusted_cluster_operator_lib::{ - generate_owner_reference, get_trusted_execution_cluster, AttestationKey, AttestationKeySpec, + self, generate_owner_controller_reference, get_trusted_execution_cluster, AttestationKey, + AttestationKeySpec, }; #[derive(Parser)] @@ -64,15 +65,17 @@ async fn handle_registration( let api: Api = Api::default_namespaced(client.clone()); - // Get the TrustedExecutionCluster to use as owner reference + // TEC is owner-controller of the AttestationKey until the operator approves the key and transfers control to Machine, which then becomes the controller and manages lifecycle of the AttestationKey. let cluster = match get_trusted_execution_cluster(client.clone()).await { Ok(c) => c, Err(e) => return internal_error(e.context("Failed to get TrustedExecutionCluster")), }; - let owner_reference = match generate_owner_reference(&cluster) { + let owner_controller_reference = match generate_owner_controller_reference(&cluster) { Ok(o) => o, - Err(e) => return internal_error(e.context("Failed to generate owner reference")), + Err(e) => { + return internal_error(e.context("Failed to generate owner-controller reference")) + } }; match api.list(&Default::default()).await { @@ -104,7 +107,7 @@ async fn handle_registration( let attestation_key = AttestationKey { metadata: ObjectMeta { name: Some(name.clone()), - owner_references: Some(vec![owner_reference]), + owner_references: Some(vec![owner_controller_reference]), ..Default::default() }, spec: AttestationKeySpec { @@ -114,7 +117,17 @@ async fn handle_registration( status: None, }; - match api.create(&Default::default(), &attestation_key).await { + // Client side apply, as this is a one time operation (no reconciliation loop) for each attestation key, and one owner. SSA is mainly used for patching. Setting field manager, so that we can identify the source of the creation later for reconciliation by operator. + match api + .create( + &kube::api::PostParams { + field_manager: Some(trusted_cluster_operator_lib::FIELD_MANAGER.to_string()), + ..Default::default() + }, + &attestation_key, + ) + .await + { Ok(created) => { let name = created.metadata.name.unwrap_or_default(); info!("Successfully created AttestationKey: {name}",); diff --git a/compute-pcrs/src/main.rs b/compute-pcrs/src/main.rs index 0c1bfacf..548eb06b 100644 --- a/compute-pcrs/src/main.rs +++ b/compute-pcrs/src/main.rs @@ -12,6 +12,8 @@ use std::{fs::File, io::Read}; use trusted_cluster_operator_lib::{conditions::INSTALLED_REASON, reference_values::*, *}; +const FIELD_MANAGER: &str = "compute-pcrs"; + #[derive(Parser)] #[command(version, about)] struct Args { @@ -77,6 +79,16 @@ async fn main() -> Result<()> { let committed = committed_condition(INSTALLED_REASON, image.metadata.generation, &None); let conditions = Some(vec![committed]); let status = ApprovedImageStatus { conditions }; - update_status!(approved_images, &args.resource_name, status)?; + + // As operator also updates the status field, while requeuing every 10 seconds, we need to force this update to avoid race condition. + // TODO: Simplify ownership of this field. + update_status!( + approved_images, + &args.resource_name, + status, + ApprovedImage, + FIELD_MANAGER, + force + )?; Ok(()) } diff --git a/lib/src/lib.rs b/lib/src/lib.rs index 35405ade..cc3ac542 100644 --- a/lib/src/lib.rs +++ b/lib/src/lib.rs @@ -30,13 +30,31 @@ use conditions::*; use k8s_openapi::apimachinery::pkg::apis::meta::v1::{Condition, OwnerReference, Time}; use kube::Resource; +pub const FIELD_MANAGER: &str = "trusted-cluster-operator"; + +// 2 arms for updating status, one for normal updates and one for force updates. #[macro_export] macro_rules! update_status { - ($api:ident, $name:expr, $status:expr) => {{ - let patch = kube::api::Patch::Merge(serde_json::json!({"status": $status})); - $api.patch_status($name, &Default::default(), &patch).await + ($api:ident, $name:expr, $status:expr, $type:ty, $field_manager:expr) => {{ + let patch = kube::api::Patch::Apply(serde_json::json!({ + "apiVersion": <$type as kube::Resource>::api_version(&()), + "kind": <$type as kube::Resource>::kind(&()), + "status": $status + })); + let params = kube::api::PatchParams::apply($field_manager); + $api.patch_status($name, ¶ms, &patch).await + .map_err(Into::::into) + }}; + ($api:ident, $name:expr, $status:expr, $type:ty, $field_manager:expr, force) => {{ + let patch = kube::api::Patch::Apply(serde_json::json!({ + "apiVersion": <$type as kube::Resource>::api_version(&()), + "kind": <$type as kube::Resource>::kind(&()), + "status": $status + })); + let params = kube::api::PatchParams::apply($field_manager).force(); + $api.patch_status($name, ¶ms, &patch).await .map_err(Into::::into) - }} + }}; } pub fn condition_status(status: bool) -> String { @@ -108,8 +126,8 @@ pub fn committed_condition( } } -/// Generate an OwnerReference for any Kubernetes resource -pub fn generate_owner_reference>( +/// OwnerReference with `controller: true` for lifecycle and garbage collection. +pub fn generate_owner_controller_reference>( object: &T, ) -> anyhow::Result { let name = object.meta().name.clone(); diff --git a/lib/src/reference_values.rs b/lib/src/reference_values.rs index f262d37d..d21353d1 100644 --- a/lib/src/reference_values.rs +++ b/lib/src/reference_values.rs @@ -29,6 +29,9 @@ macro_rules! update_image_pcrs { let map = (PCR_CONFIG_FILE.to_string(), image_pcrs_json.to_string()); let data = std::collections::BTreeMap::from([map]); $map.data = Some(data); + + // Operator and compute-pcr's both write to this configmap, hence client-side apply makes sense here. + // TODO: Simplify ownership of this field. $api.replace(PCR_CONFIG_MAP, &Default::default(), &$map) .await? }; diff --git a/operator/src/attestation_key_register.rs b/operator/src/attestation_key_register.rs index 917d37d1..5d920343 100644 --- a/operator/src/attestation_key_register.rs +++ b/operator/src/attestation_key_register.rs @@ -14,7 +14,7 @@ use k8s_openapi::apimachinery::pkg::{ util::intstr::IntOrString, }; use kube::{ - Api, Client, Resource, + Api, Client, api::{ListParams, ObjectList, Patch, PatchParams}, runtime::{Controller, controller::Action, finalizer, finalizer::Event, watcher}, }; @@ -29,7 +29,7 @@ use trusted_cluster_operator_lib::{AttestationKey, AttestationKeyStatus, Machine use crate::conditions::attestation_key_approved_condition; use crate::trustee; use operator::{ - ControllerError, TLS_DIR, controller_error_policy, create_or_info_if_exists, read_certificate, + ControllerError, TLS_DIR, apply_resource, controller_error_policy, read_certificate, upsert_condition, }; @@ -97,7 +97,7 @@ pub async fn create_attestation_key_register_deployment( ..Default::default() }; - create_or_info_if_exists!(client, Deployment, deployment); + apply_resource!(client, Deployment, deployment); info!("Attestation key register deployment created successfully"); Ok(()) } @@ -133,7 +133,7 @@ pub async fn create_attestation_key_register_service( ..Default::default() }; - create_or_info_if_exists!(client, Service, service); + apply_resource!(client, Service, service); info!("Attestation key register service created successfully"); Ok(()) } @@ -209,35 +209,45 @@ async fn approve_ak(ak: &AttestationKey, machine: &Machine, client: Client) -> R if changed { let status = AttestationKeyStatus { conditions }; - update_status!(aks, &name, status)?; + update_status!( + aks, + &name, + status, + AttestationKey, + trusted_cluster_operator_lib::FIELD_MANAGER + )?; info!("Approved attestation key {name}"); } let machine_name = machine.metadata.name.clone().unwrap_or_default(); - let has_machine_owner = ak + let has_machine_owner_controller = ak .metadata .owner_references .as_ref() .map(|owners| { - owners - .iter() - .any(|owner| owner.kind == "Machine" && owner.name == machine_name) + owners.iter().any(|owner| { + owner.kind == "Machine" + && owner.name == machine_name + && owner.controller == Some(true) + }) }) .unwrap_or(false); - if !has_machine_owner { - let machine_owner_reference = - trusted_cluster_operator_lib::generate_owner_reference(machine)?; + if !has_machine_owner_controller { + let owner_controller_reference = + trusted_cluster_operator_lib::generate_owner_controller_reference(machine)?; + // Replacing the owner of the AttestationKey to the Machine controller, as now the AttestationKey is tied to the Machine. let patch = json!({ "metadata": { - "ownerReferences": [machine_owner_reference] + "ownerReferences": [owner_controller_reference] } }); + // This requires a client-side patch since merge patches replaces entire owners field. SSA would upsert, and cause issues where we might not cleanly remove the TEC owner reference. aks.patch(&name, &PatchParams::default(), &Patch::Merge(&patch)) .await?; - info!("Set Machine as owner of AttestationKey {name}"); + info!("Set Machine as owner-controller of AttestationKey {name}"); } let secret_name = name.clone(); @@ -248,12 +258,13 @@ async fn approve_ak(ak: &AttestationKey, machine: &Machine, client: Client) -> R let public_key_data = ByteString(ak.spec.public_key.as_bytes().to_vec()); let data = BTreeMap::from([("public_key".to_string(), public_key_data)]); - let owner_reference = trusted_cluster_operator_lib::generate_owner_reference(ak)?; + let owner_controller_reference = + trusted_cluster_operator_lib::generate_owner_controller_reference(ak)?; let secret = Secret { metadata: ObjectMeta { name: Some(secret_name.clone()), - owner_references: Some(vec![owner_reference]), + owner_references: Some(vec![owner_controller_reference]), finalizers: Some(vec![ATTESTATION_KEY_SECRET_FINALIZER.to_string()]), ..Default::default() }, @@ -261,7 +272,7 @@ async fn approve_ak(ak: &AttestationKey, machine: &Machine, client: Client) -> R ..Default::default() }; - create_or_info_if_exists!(client.clone(), Secret, secret); + apply_resource!(client.clone(), Secret, secret); info!("Created secret {secret_name} for attestation key {name} with finalizer"); } @@ -274,13 +285,16 @@ async fn secret_reconcile( ) -> Result { let secret_name = secret.metadata.name.clone().unwrap_or_default(); - // Only handle secrets owned by AttestationKey + // Only handle secrets controlled by an AttestationKey let is_ak_secret = secret .metadata .owner_references .as_ref() - .map(|owners| owners.iter().any(|owner| owner.kind == "AttestationKey")) - .unwrap_or(false); + .is_some_and(|owners| { + owners + .iter() + .any(|owner| owner.kind == "AttestationKey" && owner.controller == Some(true)) + }); if !is_ak_secret { return Ok(Action::await_change()); @@ -374,6 +388,7 @@ pub async fn launch_secret_ak_controller(client: Client) { #[cfg(test)] mod tests { use super::*; + use http::{Method, Request}; use trusted_cluster_operator_test_utils::mock_client::*; #[tokio::test] @@ -405,4 +420,243 @@ mod tests { |client| create_attestation_key_register_service(client, Default::default(), Some(80)); test_create_error(clos).await; } + + fn dummy_ak() -> AttestationKey { + AttestationKey { + metadata: ObjectMeta { + name: Some("ak-test".to_string()), + uid: Some("ak-uid".to_string()), + generation: Some(1), + ..Default::default() + }, + spec: trusted_cluster_operator_lib::AttestationKeySpec { + public_key: "test-key".to_string(), + uuid: Some("machine-uuid".to_string()), + }, + status: None, + } + } + + fn dummy_machine() -> Machine { + Machine { + metadata: ObjectMeta { + name: Some("machine-test".to_string()), + uid: Some("machine-uid".to_string()), + ..Default::default() + }, + spec: trusted_cluster_operator_lib::MachineSpec { + id: "machine-uuid".to_string(), + }, + status: None, + } + } + + #[tokio::test] + async fn test_approve_ak_full_flow() { + // approve_ak with no prior status, no Machine owner, and secret not existing: + // 1. PATCH /status (SSA status update). AttestationKey is approved. + // 2. PATCH owner transfer (Merge). Ownership transfered to Machine. + // 3. GET secret (check existence) + // 4. PATCH secret (apply_resource! SSA create) + let clos = async |req: Request<_>, ctr| match (ctr, req.method()) { + (0, &Method::PATCH) => { + assert!(req.uri().path().contains("/status")); + Ok(serde_json::to_string(&dummy_ak()).unwrap()) + } + (1, &Method::PATCH) => { + assert!(!req.uri().path().contains("/status")); + Ok(serde_json::to_string(&dummy_ak()).unwrap()) + } + (2, &Method::GET) => Err(http::StatusCode::NOT_FOUND), + (3, &Method::PATCH) => { + let body = get_body_string(req).await; + let v: serde_json::Value = serde_json::from_str(&body).unwrap(); + let owners = v["metadata"]["ownerReferences"] + .as_array() + .expect("Secret must have ownerReferences"); + assert_eq!( + owners[0]["kind"], "AttestationKey", + "Secret must be owned by AttestationKey" + ); + assert_eq!( + owners[0]["controller"], true, + "AttestationKey must be controller of Secret" + ); + assert_eq!( + owners[0]["uid"], "ak-uid", + "Secret owner UID must match AK UID" + ); + + // Asserting finalizers + let finalizers = v["metadata"]["finalizers"] + .as_array() + .expect("Secret must have finalizers"); + assert!( + finalizers.iter().any(|f| f + .as_str() + .unwrap() + .contains("attestationkey-secret-finalizer")), + "Secret must have the attestation key secret finalizer" + ); + let data = v["data"].as_object().expect("Secret must have data"); + assert!( + data.contains_key("public_key"), + "Secret data must contain public_key" + ); + Ok(serde_json::to_string(&Secret::default()).unwrap()) + } + + _ => panic!("unexpected API interaction: {req:?}, counter {ctr}"), + }; + count_check!(4, clos, |client| { + let ak = dummy_ak(); + let machine = dummy_machine(); + assert!(approve_ak(&ak, &machine, client).await.is_ok()); + }); + } + + #[tokio::test] + async fn test_approve_ak_already_approved_and_owned() { + // Building a pre-populated AttestationKey with the approved condition. This would prevent upsert from changing the status field, preventing the initial status PATCH. + // Further-more, we set owner of AttestationKey to the Machine, so that no owner transfer PATCH is needed. + // Further, get secret returns valid secret, so no secret PATCH is needed. + // Only 1 GET call to fetch secret is needed. + let mut ak = dummy_ak(); + let approve_reason = + trusted_cluster_operator_lib::conditions::ATTESTATION_KEY_MACHINE_APPROVE; + let existing_condition = crate::conditions::attestation_key_approved_condition( + approve_reason, + ak.metadata.generation, + &ak.status, + ); + ak.status = Some(AttestationKeyStatus { + conditions: Some(vec![existing_condition]), + }); + ak.metadata.owner_references = Some(vec![OwnerReference { + kind: "Machine".to_string(), + name: "machine-test".to_string(), + uid: "machine-uid".to_string(), + api_version: "trusted-execution-clusters.io/v1alpha1".to_string(), + controller: Some(true), + block_owner_deletion: Some(true), + }]); + + // No status or owner PATCH needed; only GET secret (exists) + let clos = async |req: Request<_>, ctr| match (ctr, req.method()) { + (0, &Method::GET) => Ok(serde_json::to_string(&Secret::default()).unwrap()), + _ => panic!("unexpected API interaction: {req:?}, counter {ctr}"), + }; + count_check!(1, clos, |client| { + let machine = dummy_machine(); + assert!(approve_ak(&ak, &machine, client).await.is_ok()); + }); + } + + #[tokio::test] + async fn test_approve_ak_status_update_error() { + let clos = async |req: Request<_>, ctr| match (ctr, req.method()) { + (0, &Method::PATCH) => Err(http::StatusCode::INTERNAL_SERVER_ERROR), + _ => panic!("unexpected API interaction: {req:?}, counter {ctr}"), + }; + count_check!(1, clos, |client| { + let ak = dummy_ak(); + let machine = dummy_machine(); + assert!(approve_ak(&ak, &machine, client).await.is_err()); + }); + } + + #[tokio::test] + async fn test_approve_ak_status_patch_contains_approved_condition() { + use kube::client::Body; + let clos = async |req: Request, ctr| match (ctr, req.method()) { + // Validates whether attestation key is immediately approved as per TOFU (Trust on first use) principles. + // Also makes sure that the approved condition is set to True, and the reason is MachineCreated. + (0, &Method::PATCH) => { + assert!( + req.uri().path().contains("/status"), + "First PATCH must target /status subresource" + ); + let body = get_body_string(req).await; + let v: serde_json::Value = serde_json::from_str(&body).unwrap(); + let conditions = v["status"]["conditions"] + .as_array() + .expect("Status body must contain conditions array"); + let approved_type = + trusted_cluster_operator_lib::conditions::ATTESTATION_KEY_APPROVED_CONDITION; + let approved = conditions.iter().find(|c| c["type"] == approved_type); + assert!( + approved.is_some(), + "Must contain the '{approved_type}' condition" + ); + let cond = approved.unwrap(); + assert_eq!(cond["status"], "True", "Approved condition must be True"); + assert_eq!( + cond["reason"], ATTESTATION_KEY_MACHINE_APPROVE, + "Reason must be MachineCreated" + ); + Ok(serde_json::to_string(&dummy_ak()).unwrap()) + } + (1, &Method::PATCH) => Ok(serde_json::to_string(&dummy_ak()).unwrap()), + (2, &Method::GET) => Err(http::StatusCode::NOT_FOUND), + (3, &Method::PATCH) => Ok(serde_json::to_string(&Secret::default()).unwrap()), + _ => panic!("unexpected API interaction: {req:?}, counter {ctr}"), + }; + count_check!(4, clos, |client| { + let ak = dummy_ak(); + let machine = dummy_machine(); + assert!(approve_ak(&ak, &machine, client).await.is_ok()); + }); + } + + // Makes sure that the owner transfer patch uses merge patch, and not SSA patch. + // SSA can't transfer ownership, and would cause issues where we might not cleanly remove the TEC owner reference. + #[tokio::test] + async fn test_approve_ak_owner_transfer_uses_merge_patch() { + use kube::client::Body; + let clos = async |req: Request, ctr| match (ctr, req.method()) { + (0, &Method::PATCH) => Ok(serde_json::to_string(&dummy_ak()).unwrap()), + (1, &Method::PATCH) => { + assert!( + !req.uri().path().contains("/status"), + "Owner transfer must not target /status" + ); + let query = req.uri().query().unwrap_or(""); + assert!( + !query.contains("fieldManager"), + "Merge patch must NOT use a field manager (not SSA): {query}" + ); + let content_type = req + .headers() + .get("content-type") + .and_then(|v| v.to_str().ok()) + .unwrap_or(""); + assert!( + content_type.contains("merge-patch"), + "Owner transfer must use Merge patch, got content-type: {content_type}" + ); + let body = get_body_string(req).await; + let v: serde_json::Value = serde_json::from_str(&body).unwrap(); + let owners = v["metadata"]["ownerReferences"] + .as_array() + .expect("Merge patch must set ownerReferences"); + assert_eq!( + owners.len(), + 1, + "Must replace entire ownerReferences (not append)" + ); + assert_eq!(owners[0]["kind"], "Machine", "New owner must be Machine"); + assert_eq!(owners[0]["name"], "machine-test"); + assert_eq!(owners[0]["controller"], true, "Machine must be controller"); + Ok(serde_json::to_string(&dummy_ak()).unwrap()) + } + (2, &Method::GET) => Err(http::StatusCode::NOT_FOUND), + (3, &Method::PATCH) => Ok(serde_json::to_string(&Secret::default()).unwrap()), + _ => panic!("unexpected API interaction: {req:?}, counter {ctr}"), + }; + count_check!(4, clos, |client| { + let ak = dummy_ak(); + let machine = dummy_machine(); + assert!(approve_ak(&ak, &machine, client).await.is_ok()); + }); + } } diff --git a/operator/src/lib.rs b/operator/src/lib.rs index c5db0fd3..b8fd061e 100644 --- a/operator/src/lib.rs +++ b/operator/src/lib.rs @@ -18,8 +18,7 @@ use log::{info, warn}; use std::fmt::{Debug, Display}; use std::{sync::Arc, time::Duration}; -// Re-export common functions from the lib -pub use trusted_cluster_operator_lib::generate_owner_reference; +pub use trusted_cluster_operator_lib::generate_owner_controller_reference; #[derive(Clone)] pub struct RvContextData { @@ -46,6 +45,34 @@ pub async fn controller_info(res: Result) { } } +/// Applying a resource using Server-Side Apply (SSA). Creates if absent, updates owned fields if present. +#[macro_export] +macro_rules! apply_resource { + ($client:expr, $type:ident, $resource:ident) => { + $crate::apply_resource!( + $client, + $type, + $resource, + trusted_cluster_operator_lib::FIELD_MANAGER + ) + }; + ($client:expr, $type:ident, $resource:ident, $field_manager:expr) => {{ + let api: Api<$type> = kube::Api::default_namespaced($client); + let name = $resource.metadata.name.as_ref().unwrap().clone(); + let mut body = serde_json::to_value(&$resource).map_err(anyhow::Error::from)?; + body["apiVersion"] = serde_json::json!(<$type as kube::Resource>::api_version(&())); + body["kind"] = serde_json::json!(<$type as kube::Resource>::kind(&())); + let params = kube::api::PatchParams::apply($field_manager); + match api + .patch(&name, ¶ms, &kube::api::Patch::Apply(&body)) + .await + { + Ok(_) => info!("Applied {} {}", <$type as kube::Resource>::kind(&()), name), + Err(e) => return Err(e.into()), + } + }}; +} + #[macro_export] macro_rules! create_or_info_if_exists { ($client:expr, $type:ident, $resource:ident) => { diff --git a/operator/src/main.rs b/operator/src/main.rs index 0ab4603c..a8ab881b 100644 --- a/operator/src/main.rs +++ b/operator/src/main.rs @@ -16,7 +16,7 @@ use kube::runtime::watcher; use kube::{Api, Client}; use log::{error, info, warn}; -use operator::{generate_owner_reference, upsert_condition}; +use operator::{generate_owner_controller_reference, upsert_condition}; use trusted_cluster_operator_lib::{TrustedExecutionCluster, TrustedExecutionClusterStatus}; use trusted_cluster_operator_lib::{conditions::*, images::*, update_status}; @@ -81,7 +81,7 @@ async fn launch_rv_watchers( "First registration of TrustedExecutionCluster {name} by this operator. \ Launching reference value watchers." ); - let owner_reference = generate_owner_reference(&*cluster)?; + let owner_controller_reference = generate_owner_controller_reference(&*cluster)?; let pcrs_compute_image = get_image_or_env( &cluster.spec.pcrs_compute_image, RELATED_IMAGE_COMPUTE_PCRS, @@ -89,7 +89,7 @@ async fn launch_rv_watchers( ); let rv_ctx = RvContextData { client, - owner_reference: owner_reference.clone(), + owner_reference: owner_controller_reference.clone(), pcrs_compute_image, }; reference_values::launch_rv_image_controller(rv_ctx.clone()).await; @@ -125,7 +125,13 @@ async fn reconcile( installed_condition(uninstalling_reason, generation, existing_status); let changed = upsert_condition(&mut conditions, uninstall_condition); if changed { - update_status!(clusters, name, TrustedExecutionClusterStatus { conditions })?; + update_status!( + clusters, + name, + TrustedExecutionClusterStatus { conditions }, + TrustedExecutionCluster, + trusted_cluster_operator_lib::FIELD_MANAGER + )?; } return Ok(Action::await_change()); } @@ -148,7 +154,13 @@ async fn reconcile( installed_condition(NOT_INSTALLED_REASON_NON_UNIQUE, generation, existing_status); let changed = upsert_condition(&mut conditions, non_unique_condition); if changed { - update_status!(clusters, name, TrustedExecutionClusterStatus { conditions })?; + update_status!( + clusters, + name, + TrustedExecutionClusterStatus { conditions }, + TrustedExecutionCluster, + trusted_cluster_operator_lib::FIELD_MANAGER + )?; } return Ok(Action::requeue(Duration::from_secs(60))); } @@ -161,7 +173,13 @@ async fn reconcile( let status = TrustedExecutionClusterStatus { conditions: conditions.clone(), }; - update_status!(clusters, name, status)?; + update_status!( + clusters, + name, + status, + TrustedExecutionCluster, + trusted_cluster_operator_lib::FIELD_MANAGER + )?; } install_trustee_configuration(kube_client.clone(), &cluster).await?; @@ -171,7 +189,13 @@ async fn reconcile( let changed = upsert_condition(&mut conditions, installed_condition); if changed { let status = TrustedExecutionClusterStatus { conditions }; - update_status!(clusters, name, status)?; + update_status!( + clusters, + name, + status, + TrustedExecutionCluster, + trusted_cluster_operator_lib::FIELD_MANAGER + )?; } Ok(Action::await_change()) } @@ -180,7 +204,7 @@ async fn install_trustee_configuration( client: Client, cluster: &TrustedExecutionCluster, ) -> Result<()> { - let owner_reference = generate_owner_reference(cluster)?; + let owner_reference = generate_owner_controller_reference(cluster)?; let trustee_secret = &cluster.spec.trustee_secret; match trustee::generate_trustee_data(client.clone(), owner_reference.clone(), trustee_secret) @@ -222,7 +246,7 @@ async fn install_trustee_configuration( } async fn install_register_server(client: Client, cluster: &TrustedExecutionCluster) -> Result<()> { - let owner_reference = generate_owner_reference(cluster)?; + let owner_reference = generate_owner_controller_reference(cluster)?; let register_server_image = get_image_or_env( &cluster.spec.register_server_image, @@ -256,7 +280,7 @@ async fn install_attestation_key_register( client: Client, cluster: &TrustedExecutionCluster, ) -> Result<()> { - let owner_reference = generate_owner_reference(cluster)?; + let owner_reference = generate_owner_controller_reference(cluster)?; let attestation_key_register_image = get_image_or_env( &cluster.spec.attestation_key_register_image, @@ -481,6 +505,7 @@ mod tests { }; let clos = async |req: Request, _ctr| { + let is_status_patch = req.uri().path().ends_with("/status"); match *req.method() { Method::GET => { let object_list = ObjectList:: { @@ -490,7 +515,11 @@ mod tests { }; Ok(serde_json::to_string(&object_list).unwrap()) } - Method::POST => Ok(serde_json::to_string(&dummy_cluster()).unwrap()), + // Patches which update the status field, like installing secrets, config maps etc. + Method::PATCH if !is_status_patch => { + Ok(serde_json::to_string(&dummy_cluster()).unwrap()) + } + // Patches which update the status field, to check whether foreign conditions are present and not overwritten. Method::PATCH => { let body = req.into_body().collect_bytes().await.unwrap().to_vec(); let body = String::from_utf8_lossy(&body); diff --git a/operator/src/reference_values.rs b/operator/src/reference_values.rs index b8dcbad6..a1da3c1f 100644 --- a/operator/src/reference_values.rs +++ b/operator/src/reference_values.rs @@ -22,7 +22,7 @@ use kube::runtime::{ finalizer::Event, watcher, }; -use kube::{Api, Client, Resource}; +use kube::{Api, Client}; use log::{info, warn}; use oci_client::secrets::RegistryAuth; use oci_spec::image::ImageConfiguration; @@ -32,8 +32,8 @@ use std::{collections::BTreeMap, sync::Arc, time::Duration}; use crate::trustee::{self, get_image_pcrs}; use operator::{ - ControllerError, RvContextData, controller_error_policy, controller_info, - create_or_info_if_exists, upsert_condition, + ControllerError, RvContextData, apply_resource, controller_error_policy, controller_info, + upsert_condition, }; use trusted_cluster_operator_lib::{conditions::*, reference_values::*, *}; @@ -64,7 +64,7 @@ pub async fn create_pcrs_config_map(client: Client, owner_reference: OwnerRefere data: Some(empty_data), ..Default::default() }; - create_or_info_if_exists!(client, ConfigMap, config_map); + apply_resource!(client, ConfigMap, config_map); Ok(()) } @@ -127,7 +127,7 @@ async fn job_reconcile(job: Arc, ctx: Arc) -> Result = Api::default_namespaced(ctx.client.clone()); - // Foreground deletion: Delete the pod too + // Foreground client-side deletion: Delete the pod too let delete = jobs.delete(name, &DeleteParams::foreground()).await; delete.map_err(Into::::into)?; trustee::update_reference_values(Arc::unwrap_or_clone(ctx)).await?; @@ -174,7 +174,7 @@ async fn compute_fresh_pcrs( JOB_LABEL_KEY.to_string(), PCR_COMMAND_NAME.to_string(), )])), - owner_references: Some(vec![ctx.owner_reference]), + owner_references: Some(vec![ctx.owner_reference.clone()]), ..Default::default() }, spec: Some(JobSpec { @@ -192,7 +192,7 @@ async fn compute_fresh_pcrs( }), ..Default::default() }; - create_or_info_if_exists!(ctx.client, Job, job); + apply_resource!(ctx.client, Job, job); Ok(()) } @@ -283,14 +283,22 @@ async fn image_add_reconcile( info!("Adding owner reference from ApprovedImage {name} to TrustedExecutionCluster"); let patch = json!({ + "apiVersion": "trusted-execution-clusters.io/v1alpha1", + "kind": "ApprovedImage", "metadata": { + "name": name, "ownerReferences": [ctx.owner_reference] } }); + // SSA requires even apiVersion and kind to be present in the patch. let images: Api = Api::default_namespaced(kube_client.clone()); images - .patch(name, &PatchParams::default(), &Patch::Merge(&patch)) + .patch( + name, + &PatchParams::apply(trusted_cluster_operator_lib::FIELD_MANAGER), + &Patch::Apply(&patch), + ) .await .map_err(|e| { finalizer::Error::::ApplyFailed(anyhow::Error::from(e).into()) @@ -320,8 +328,17 @@ async fn image_add_reconcile( let changed = upsert_condition(&mut conditions, committed); if changed { let images: Api = Api::default_namespaced(kube_client); - update_status!(images, &name, ApprovedImageStatus { conditions }) - .map_err(|e| finalizer::Error::::ApplyFailed(e.into()))?; + // Forcing the update to avoid race condition with operator, which also updates the status field. + // TODO: Simplify ownership of this field. + update_status!( + images, + &name, + ApprovedImageStatus { conditions }, + ApprovedImage, + trusted_cluster_operator_lib::FIELD_MANAGER, + force + ) + .map_err(|e| finalizer::Error::::ApplyFailed(e.into()))?; } Ok(action) } @@ -425,16 +442,25 @@ mod tests { use k8s_openapi::apimachinery::pkg::apis::meta::v1::Time; use trusted_cluster_operator_test_utils::mock_client::*; + // Assert that the PCR config map is created with the correct owner reference and data. #[tokio::test] async fn test_create_pcrs_cm_success() { - let clos = |client| create_pcrs_config_map(client, Default::default()); - test_create_success::<_, _, ConfigMap>(clos).await; - } - - #[tokio::test] - async fn test_create_pcrs_cm_exists() { - let clos = |client| create_pcrs_config_map(client, Default::default()); - test_create_already_exists(clos).await; + use kube::client::Body; + let clos = async |req: Request, _| { + let body = get_body_string(req).await; + let v: serde_json::Value = serde_json::from_str(&body).unwrap(); + assert!(v["metadata"]["ownerReferences"].as_array().is_some()); + assert!(v["data"].as_object().unwrap().contains_key(PCR_CONFIG_FILE)); + Ok(serde_json::to_string(&ConfigMap::default()).unwrap()) + }; + count_check!(1, clos, |client| { + let owner = OwnerReference { + name: "my-tec".to_string(), + uid: "tec-uid".to_string(), + ..Default::default() + }; + assert!(create_pcrs_config_map(client, owner).await.is_ok()); + }); } #[tokio::test] @@ -465,7 +491,7 @@ mod tests { assert!(req.uri().path().contains(PCR_CONFIG_MAP)); Ok(serde_json::to_string(&dummy_pcrs_map()).unwrap()) } - (2, &Method::GET) | (3, &Method::PUT) => { + (2, &Method::GET) | (3, &Method::PATCH) => { assert!(req.uri().path().contains(trustee::TRUSTEE_DATA_MAP)); Ok(serde_json::to_string(&dummy_trustee_map()).unwrap()) } @@ -507,10 +533,30 @@ mod tests { ); } + // Assert that the compute-pcrs job is created with the correct owner reference and label. #[tokio::test] async fn test_compute_fresh_pcrs_success() { - let clos = |client| compute_fresh_pcrs(generate_rv_ctx(client), "image", "registry"); - test_create_success::<_, _, Job>(clos).await; + use kube::client::Body; + let clos = async |req: Request, _| { + let body = get_body_string(req).await; + let v: serde_json::Value = serde_json::from_str(&body).unwrap(); + assert!(v["metadata"]["ownerReferences"].as_array().is_some()); + assert_eq!(v["metadata"]["labels"]["kind"], "compute-pcrs"); + Ok(serde_json::to_string(&Job::default()).unwrap()) + }; + count_check!(1, clos, |client| { + let mut ctx = generate_rv_ctx(client); + ctx.owner_reference = OwnerReference { + name: "my-tec".to_string(), + uid: "tec-uid".to_string(), + ..Default::default() + }; + assert!( + compute_fresh_pcrs(ctx, "test-image", "quay.io/test") + .await + .is_ok() + ); + }); } #[tokio::test] @@ -529,7 +575,7 @@ mod tests { assert!(req.uri().path().contains(PCR_CONFIG_MAP)); Ok(serde_json::to_string(&dummy_pcrs_map()).unwrap()) } - (3, &Method::GET) | (4, &Method::PUT) => { + (3, &Method::GET) | (4, &Method::PATCH) => { assert!(req.uri().path().contains(trustee::TRUSTEE_DATA_MAP)); Ok(serde_json::to_string(&dummy_trustee_map()).unwrap()) } @@ -540,4 +586,112 @@ mod tests { assert!(disallow_image(ctx, "registry").await.is_ok()); }); } + + // SSA related unit test helpers. + + fn dummy_approved_image(name: &str, image: &str) -> ApprovedImage { + ApprovedImage { + metadata: ObjectMeta { + name: Some(name.to_string()), + generation: Some(1), + ..Default::default() + }, + spec: trusted_cluster_operator_lib::ApprovedImageSpec { + image: image.to_string(), + }, + status: None, + } + } + + #[tokio::test] + async fn test_image_add_reconcile_adopts_owner_and_updates_status() { + use kube::client::Body; + // Image is not owned yet, and is already committed in PCR map. + // 0: PATCH ApprovedImage (SSA owner adoption) + // 1-2: GET PCR_CONFIG_MAP + // 3: GET trustee-data, + // 4: PATCH trustee-data (Full data SSA apply with recomputed reference values) + // 5: PATCH /status (force SSA, as operator also writes to this.) + let clos = async |req: Request, ctr| match (ctr, req.method()) { + (0, &Method::PATCH) => { + assert!(!req.uri().path().contains("/status")); + let body = get_body_string(req).await; + let v: serde_json::Value = serde_json::from_str(&body).unwrap(); + assert_eq!(v["kind"], "ApprovedImage"); + assert!(v["metadata"]["ownerReferences"].as_array().is_some()); + Ok(serde_json::to_string(&dummy_approved_image("cos", "ref")).unwrap()) + } + (1, &Method::GET) | (2, &Method::GET) => { + Ok(serde_json::to_string(&dummy_pcrs_map()).unwrap()) + } + (3, &Method::GET) | (4, &Method::PATCH) => { + Ok(serde_json::to_string(&dummy_trustee_map()).unwrap()) + } + (5, &Method::PATCH) => { + assert!(req.uri().path().contains("/status")); + let query = req.uri().query().unwrap_or(""); + assert!( + query.contains("force=true"), + "Status patch must use force: {query}" + ); + Ok(serde_json::to_string(&dummy_approved_image("cos", "ref")).unwrap()) + } + _ => panic!("unexpected API interaction: {req:?}, counter {ctr}"), + }; + count_check!(6, clos, |client| { + let ctx = generate_rv_ctx(client); + let image = dummy_approved_image("cos", "ref"); + assert!(image_add_reconcile(ctx, &image).await.is_ok()); + }); + } + + #[tokio::test] + async fn test_image_add_reconcile_already_owned_skips_adoption() { + // Image already owned by TEC, skips owner adoption patch. + // Still processes handle_new_image (already committed) and updates status. + let clos = async |req: Request<_>, ctr| match (ctr, req.method()) { + (0, &Method::GET) | (1, &Method::GET) => { + assert!(req.uri().path().contains(PCR_CONFIG_MAP)); + Ok(serde_json::to_string(&dummy_pcrs_map()).unwrap()) + } + (2, &Method::GET) | (3, &Method::PATCH) => { + assert!(req.uri().path().contains(trustee::TRUSTEE_DATA_MAP)); + Ok(serde_json::to_string(&dummy_trustee_map()).unwrap()) + } + (4, &Method::PATCH) => { + assert!(req.uri().path().contains("/status")); + Ok(serde_json::to_string(&dummy_approved_image("cos", "ref")).unwrap()) + } + _ => panic!("unexpected API interaction: {req:?}, counter {ctr}"), + }; + count_check!(5, clos, |client| { + let mut ctx = generate_rv_ctx(client); + ctx.owner_reference = OwnerReference { + kind: "TrustedExecutionCluster".to_string(), + uid: "tec-uid".to_string(), + name: "tec-test".to_string(), + api_version: "trusted-execution-clusters.io/v1alpha1".to_string(), + ..Default::default() + }; + let mut image = dummy_approved_image("cos", "ref"); + image.metadata.owner_references = Some(vec![ctx.owner_reference.clone()]); + let result = image_add_reconcile(ctx, &image).await; + assert!(result.is_ok()); + }); + } + + #[tokio::test] + async fn test_image_add_reconcile_owner_adoption_error() { + // SSA owner adoption patch fails + let clos = async |req: Request<_>, ctr| match (ctr, req.method()) { + (0, &Method::PATCH) => Err(http::StatusCode::INTERNAL_SERVER_ERROR), + _ => panic!("unexpected API interaction: {req:?}, counter {ctr}"), + }; + count_check!(1, clos, |client| { + let ctx = generate_rv_ctx(client); + let image = dummy_approved_image("cos", "ref"); + let result = image_add_reconcile(ctx, &image).await; + assert!(result.is_err()); + }); + } } diff --git a/operator/src/register_server.rs b/operator/src/register_server.rs index 23b9badd..a29c56f8 100644 --- a/operator/src/register_server.rs +++ b/operator/src/register_server.rs @@ -18,7 +18,7 @@ use kube::runtime::{ finalizer, finalizer::Event, }; -use kube::{Api, Client, Resource}; +use kube::{Api, Client}; use log::info; use std::{collections::BTreeMap, sync::Arc}; @@ -85,7 +85,7 @@ pub async fn create_register_server_deployment( ..Default::default() }; - create_or_info_if_exists!(client, Deployment, deployment); + apply_resource!(client, Deployment, deployment); info!("Register server deployment created successfully"); Ok(()) } @@ -120,7 +120,7 @@ pub async fn create_register_server_service( ..Default::default() }; - create_or_info_if_exists!(client, Service, service); + apply_resource!(client, Service, service); info!("Register server service created successfully"); Ok(()) } @@ -137,7 +137,8 @@ async fn keygen_reconcile( let kube_client = Arc::unwrap_or_clone(client); let id = &machine.spec.id.clone(); async { - let owner_reference = generate_owner_reference(&Arc::unwrap_or_clone(machine))?; + let owner_reference = + generate_owner_controller_reference(&Arc::unwrap_or_clone(machine))?; trustee::generate_secret(kube_client.clone(), id, owner_reference).await?; trustee::mount_secret(kube_client, id).await } diff --git a/operator/src/trustee.rs b/operator/src/trustee.rs index 9cbe6a17..5c274369 100644 --- a/operator/src/trustee.rs +++ b/operator/src/trustee.rs @@ -19,11 +19,11 @@ use k8s_openapi::apimachinery::pkg::{ util::intstr::IntOrString, }; use kube::{ - Api, Client, Resource, + Api, Client, api::{ObjectMeta, Patch, PatchParams}, }; use log::info; -use operator::{RvContextData, TLS_DIR, create_or_info_if_exists, read_certificate}; +use operator::{RvContextData, TLS_DIR, apply_resource, read_certificate}; use serde::{Serialize, Serializer}; use serde_json::{Value::String as JsonString, json}; use std::collections::BTreeMap; @@ -102,8 +102,18 @@ pub async fn update_reference_values(ctx: RvContextData) -> Result<()> { let trustee_data = trustee_map.data.as_mut().context(err)?; trustee_data.insert(REFERENCE_VALUES_FILE.to_string(), rv_json); + let body = json!({ + "apiVersion": "v1", + "kind": "ConfigMap", + "metadata": {"name": TRUSTEE_DATA_MAP}, + "data": trustee_data + }); config_maps - .replace(TRUSTEE_DATA_MAP, &Default::default(), &trustee_map) + .patch( + TRUSTEE_DATA_MAP, + &PatchParams::apply(trusted_cluster_operator_lib::FIELD_MANAGER), + &Patch::Apply(&body), + ) .await?; info!("Recomputed reference values"); Ok(()) @@ -163,10 +173,17 @@ pub async fn do_mount_secret(client: Client, id: &str, add: bool) -> Result<()> let container = pod_spec.containers.get_mut(0).context(err)?; let vol_mounts = container.volume_mounts.get_or_insert_default(); + // Additional check to prevent duplicate mounts. Volume and volume mounts are added atomically, so a volume check suffices. if add { - let (volume, volume_mount) = generate_secret_volume(id); - pod_spec.volumes.get_or_insert_default().push(volume); - vol_mounts.push(volume_mount); + let already_mounted = pod_spec + .volumes + .as_ref() + .is_some_and(|vs| vs.iter().any(|v| v.name == id)); + if !already_mounted { + let (volume, volume_mount) = generate_secret_volume(id); + pod_spec.volumes.get_or_insert_default().push(volume); + vol_mounts.push(volume_mount); + } } else { let vol_result = pod_spec.volumes.as_mut().and_then(|vs| { let pos = vs.iter().position(|v| v.name == id); @@ -203,22 +220,27 @@ pub async fn update_attestation_keys(client: Client) -> Result<()> { return false; } + // Removing secrets that don't have an AttestationKey as owner-controller. secret .metadata .owner_references .as_ref() - .map(|owners| owners.iter().any(|owner| owner.kind == "AttestationKey")) - .unwrap_or(false) + .is_some_and(|owners| { + owners.iter().any(|owner| { + owner.kind == "AttestationKey" && owner.controller == Some(true) + }) + }) }) .filter_map(|secret| secret.metadata.name.clone()) .collect(); + // specs should be mutable, as we are patching the deployment. let deployments: Api = Api::default_namespaced(client); - let deployment = deployments.get(TRUSTEE_DEPLOYMENT).await?; + let mut deployment = deployments.get(TRUSTEE_DEPLOYMENT).await?; let err = format!("Deployment {TRUSTEE_DEPLOYMENT} existed, but had no spec"); - let depl_spec = deployment.spec.as_ref().context(err)?; + let depl_spec = deployment.spec.as_mut().context(err)?; let err = format!("Deployment {TRUSTEE_DEPLOYMENT} existed, but had no pod spec"); - let pod_spec = depl_spec.template.spec.as_ref().context(err)?; + let pod_spec = depl_spec.template.spec.as_mut().context(err)?; // Get existing volumes and volumeMounts, filtering out the attestation key volume let mut volumes: Vec = pod_spec @@ -233,7 +255,7 @@ pub async fn update_attestation_keys(client: Client) -> Result<()> { .unwrap_or_default(); let err = format!("Deployment {TRUSTEE_DEPLOYMENT} existed, but had no containers"); - let container = pod_spec.containers.first().context(err)?; + let container = pod_spec.containers.first_mut().context(err)?; let mut vol_mounts: Vec = container .volume_mounts .as_ref() @@ -290,31 +312,21 @@ pub async fn update_attestation_keys(client: Client) -> Result<()> { let vol_mounts_changed = container.volume_mounts.as_ref() != Some(&vol_mounts); if volumes_changed || vol_mounts_changed { - // Patch the deployment with updated volumes and volumeMounts - let patch = json!({ + pod_spec.volumes = Some(volumes); + container.volume_mounts = Some(vol_mounts); + + let body = json!({ "apiVersion": "apps/v1", "kind": "Deployment", - "metadata": { - "name": TRUSTEE_DEPLOYMENT - }, - "spec": { - "template": { - "spec": { - "volumes": volumes, - "containers": [{ - "name": "kbs", - "volumeMounts": vol_mounts - }] - } - } - } + "metadata": {"name": TRUSTEE_DEPLOYMENT}, + "spec": deployment.spec }); deployments .patch( TRUSTEE_DEPLOYMENT, - &PatchParams::apply("trusted-cluster-operator").force(), - &Patch::Apply(&patch), + &PatchParams::apply(trusted_cluster_operator_lib::FIELD_MANAGER), + &Patch::Apply(&body), ) .await?; info!("Successfully patched {TRUSTEE_DEPLOYMENT} with attestation key volumes"); @@ -342,7 +354,7 @@ pub async fn generate_secret( data: Some(data), ..Default::default() }; - create_or_info_if_exists!(client, Secret, secret); + apply_resource!(client, Secret, secret); Ok(()) } @@ -366,7 +378,7 @@ pub async fn generate_attestation_policy( data: Some(data), ..Default::default() }; - create_or_info_if_exists!(client, ConfigMap, config_map); + apply_resource!(client, ConfigMap, config_map); Ok(()) } @@ -415,7 +427,7 @@ pub async fn generate_trustee_data( data: Some(data), ..Default::default() }; - create_or_info_if_exists!(client, ConfigMap, config_map); + apply_resource!(client, ConfigMap, config_map); Ok(()) } @@ -445,7 +457,7 @@ pub async fn generate_kbs_service( }), ..Default::default() }; - create_or_info_if_exists!(client, Service, service); + apply_resource!(client, Service, service); Ok(()) } @@ -571,7 +583,7 @@ pub async fn generate_kbs_deployment( }), ..Default::default() }; - create_or_info_if_exists!(client, Deployment, deployment); + apply_resource!(client, Deployment, deployment); Ok(()) } @@ -635,7 +647,7 @@ mod tests { assert!(req.uri().path().contains(PCR_CONFIG_MAP)); Ok(serde_json::to_string(&dummy_pcrs_map()).unwrap()) } - (1, &Method::GET) | (2, &Method::PUT) => { + (1, &Method::GET) | (2, &Method::PATCH) => { assert!(req.uri().path().contains(TRUSTEE_DATA_MAP)); Ok(serde_json::to_string(&dummy_trustee_map()).unwrap()) } @@ -815,12 +827,6 @@ mod tests { test_create_success::<_, _, ConfigMap>(clos).await; } - #[tokio::test] - async fn test_generate_att_policy_already_exists() { - let clos = |client| generate_attestation_policy(client, Default::default()); - test_create_already_exists(clos).await; - } - #[tokio::test] async fn test_generate_att_policy_error() { let clos = |client| generate_attestation_policy(client, Default::default()); @@ -833,12 +839,6 @@ mod tests { test_create_success::<_, _, Secret>(clos).await; } - #[tokio::test] - async fn test_generate_secret_already_exists() { - let clos = |client| generate_secret(client, "id", Default::default()); - test_create_already_exists(clos).await; - } - #[tokio::test] async fn test_generate_secret_error() { let clos = |client| generate_secret(client, "id", Default::default()); @@ -851,12 +851,6 @@ mod tests { test_create_success::<_, _, ConfigMap>(clos).await; } - #[tokio::test] - async fn test_generate_trustee_data_already_exists() { - let clos = |client| generate_trustee_data(client, Default::default(), &None); - test_create_already_exists(clos).await; - } - #[tokio::test] async fn test_generate_trustee_data_error() { let clos = |client| generate_trustee_data(client, Default::default(), &None); @@ -886,4 +880,189 @@ mod tests { let clos = |client| generate_kbs_deployment(client, Default::default(), "image", &None); test_create_error(clos).await; } + + // SSA related unit tests. + + // No configmap keys are lost when updating the attestation keys. + #[tokio::test] + async fn test_update_rvs_preserves_all_configmap_keys() { + fn full_trustee_map() -> ConfigMap { + let mut m = dummy_trustee_map(); + m.data + .as_mut() + .unwrap() + .insert("kbs-config.toml".to_string(), "config-content".to_string()); + m.data + .as_mut() + .unwrap() + .insert("policy.rego".to_string(), "rego-content".to_string()); + m + } + + let clos = async |req: Request, ctr| match (ctr, req.method()) { + (0, &Method::GET) => Ok(serde_json::to_string(&dummy_pcrs_map()).unwrap()), + (1, &Method::GET) => Ok(serde_json::to_string(&full_trustee_map()).unwrap()), + (2, &Method::PATCH) => { + let body = get_body_string(req).await; + let v: serde_json::Value = serde_json::from_str(&body).unwrap(); + let data = v["data"].as_object().unwrap(); + assert!( + data.contains_key("kbs-config.toml"), + "SSA patch must retain kbs-config.toml" + ); + assert!( + data.contains_key("policy.rego"), + "SSA patch must retain policy.rego" + ); + assert!( + data.contains_key(REFERENCE_VALUES_FILE), + "SSA patch must contain reference-values.json" + ); + Ok(serde_json::to_string(&full_trustee_map()).unwrap()) + } + _ => panic!("unexpected API interaction: {req:?}, counter {ctr}"), + }; + count_check!(3, clos, |client| { + let ctx = generate_rv_ctx(client); + assert!(update_reference_values(ctx).await.is_ok()); + }); + } + + // If a volume is already mounted, it is not duplicated when mounting again. + #[tokio::test] + async fn test_mount_secret_idempotent_skips_duplicate() { + fn depl_with_existing_mount() -> Deployment { + let mut depl = dummy_deployment(); + let spec = depl.spec.as_mut().unwrap(); + let pod_spec = spec.template.spec.as_mut().unwrap(); + let (volume, volume_mount) = generate_secret_volume("already-there"); + pod_spec.volumes = Some(vec![volume]); + pod_spec.containers[0].volume_mounts = Some(vec![volume_mount]); + depl + } + + let clos = async |req: Request, ctr| match (ctr, req.method()) { + (0, &Method::GET) => Ok(serde_json::to_string(&depl_with_existing_mount()).unwrap()), + (1, &Method::PUT) => { + let body = get_body_string(req).await; + let v: serde_json::Value = serde_json::from_str(&body).unwrap(); + let volumes = v["spec"]["template"]["spec"]["volumes"].as_array().unwrap(); + let count = volumes + .iter() + .filter(|vol| vol["name"] == "already-there") + .count(); + assert_eq!(count, 1, "Volume must not be duplicated on re-mount"); + Ok(serde_json::to_string(&depl_with_existing_mount()).unwrap()) + } + _ => panic!("unexpected API interaction: {req:?}, counter {ctr}"), + }; + count_check!(2, clos, |client| { + assert!(mount_secret(client, "already-there").await.is_ok()); + }); + } + + fn dummy_ak_secret(name: &str) -> Secret { + Secret { + metadata: ObjectMeta { + name: Some(name.to_string()), + owner_references: Some(vec![OwnerReference { + kind: "AttestationKey".to_string(), + controller: Some(true), + ..Default::default() + }]), + ..Default::default() + }, + ..Default::default() + } + } + + fn dummy_deployment_with_volumes() -> Deployment { + let pod_spec = generate_kbs_pod_spec("test-image", None); + Deployment { + metadata: ObjectMeta { + name: Some(TRUSTEE_DEPLOYMENT.to_string()), + ..Default::default() + }, + spec: Some(DeploymentSpec { + template: PodTemplateSpec { + spec: Some(pod_spec), + ..Default::default() + }, + ..Default::default() + }), + ..Default::default() + } + } + + fn ak_secrets_json(names: &[&str]) -> String { + let list: kube::api::ObjectList = kube::api::ObjectList { + types: Default::default(), + metadata: Default::default(), + items: names.iter().map(|n| dummy_ak_secret(n)).collect(), + }; + serde_json::to_string(&list).unwrap() + } + + fn deployment_with_volumes_json() -> String { + serde_json::to_string(&dummy_deployment_with_volumes()).unwrap() + } + + #[tokio::test] + async fn test_update_attestation_keys_ssa_body() { + let clos = async |req: Request, ctr| match (ctr, req.method()) { + // Return dummy AK secrets. + (0, &Method::GET) => Ok(ak_secrets_json(&["ak-secret-1", "ak-secret-2"])), + (1, &Method::GET) => Ok(deployment_with_volumes_json()), + // The final patch should aggregate the AK secrets into one single projected volume. + (2, &Method::PATCH) => { + let body = get_body_string(req).await; + let v: serde_json::Value = serde_json::from_str(&body).unwrap(); + + // Projected volume aggregates all AK secrets + let volumes = v["spec"]["template"]["spec"]["volumes"].as_array().unwrap(); + let projected = volumes + .iter() + .find(|vol| vol["name"] == TRUSTED_AK_KEYS_VOLUME) + .expect("Must contain projected volume for AK keys"); + let sources = projected["projected"]["sources"].as_array().unwrap(); + assert_eq!( + sources.len(), + 2, + "Projected volume must aggregate both AK secrets" + ); // ak-secret-1 and ak-secret-2 + + // Base volumes are preserved (full-object SSA) + // Make sure the trustee-data and attestation-policy volumes are still present, in the process of updating the attestation keys and creating the projected volume. + let vol_names: Vec<_> = volumes.iter().filter_map(|v| v["name"].as_str()).collect(); + assert!( + vol_names.contains(&TRUSTEE_DATA_MAP), + "Must retain trustee-data volume" + ); + assert!( + vol_names.contains(&ATT_POLICY_MAP), + "Must retain attestation-policy volume" + ); + assert!( + vol_names.contains(&"resource-dir"), + "Must retain resource-dir volume" + ); + + // No server-populated metadata that would cause 400. Server populated metada should not be passed to the server client-side. + assert!( + v["metadata"].get("managedFields").is_none(), + "Must not contain managedFields" + ); + assert!( + v["metadata"].get("resourceVersion").is_none(), + "Must not contain resourceVersion" + ); + + Ok(deployment_with_volumes_json()) + } + _ => panic!("unexpected API interaction: {req:?}, counter {ctr}"), + }; + count_check!(3, clos, |client| { + assert!(update_attestation_keys(client).await.is_ok()); + }); + } } diff --git a/register-server/src/main.rs b/register-server/src/main.rs index ea920ded..dffdb7b7 100644 --- a/register-server/src/main.rs +++ b/register-server/src/main.rs @@ -24,9 +24,11 @@ use uuid::Uuid; use trusted_cluster_operator_lib::endpoints::*; use trusted_cluster_operator_lib::{ - generate_owner_reference, get_trusted_execution_cluster, Machine, MachineSpec, + generate_owner_controller_reference, get_trusted_execution_cluster, Machine, MachineSpec, }; +const FIELD_MANAGER: &str = "register-server"; + #[derive(Parser)] #[command(name = "register-server")] #[command(about = "HTTP server that generates Clevis PINs with random UUIDs")] @@ -179,18 +181,20 @@ async fn register_handler() -> impl IntoResponse { Err(e) => return internal_error(e.into()), }; - // Get the TrustedExecutionCluster to use as owner reference for the Machine + // Get the TrustedExecutionCluster to use as owner controller reference for the Machine let cluster = match get_trusted_execution_cluster(kube_client.clone()).await { Ok(c) => c, Err(e) => return internal_error(e.context("Failed to get TrustedExecutionCluster")), }; - let owner_reference = match generate_owner_reference(&cluster) { + let owner_controller_reference = match generate_owner_controller_reference(&cluster) { Ok(o) => o, - Err(e) => return internal_error(e.context("Failed to generate owner reference")), + Err(e) => { + return internal_error(e.context("Failed to generate owner controller reference")) + } }; - match create_machine(kube_client.clone(), &id, owner_reference).await { + match create_machine(kube_client.clone(), &id, owner_controller_reference).await { Ok(_) => info!("Machine created successfully: machine-{id}"), Err(e) => return internal_error(e.context("Failed to create machine")), } @@ -219,13 +223,13 @@ async fn register_handler() -> impl IntoResponse { async fn create_machine( client: Client, uuid: &str, - owner_reference: OwnerReference, + owner_controller_reference: OwnerReference, ) -> anyhow::Result<()> { let machine_name = format!("machine-{uuid}"); let machine = Machine { metadata: ObjectMeta { name: Some(machine_name.clone()), - owner_references: Some(vec![owner_reference]), + owner_references: Some(vec![owner_controller_reference]), ..Default::default() }, spec: MachineSpec { @@ -235,7 +239,13 @@ async fn create_machine( }; let machines: Api = Api::default_namespaced(client); - machines.create(&Default::default(), &machine).await?; + let params = kube::api::PostParams { + field_manager: Some(FIELD_MANAGER.to_string()), + ..Default::default() + }; + + // Client side apply, as this is a one time operation for each machine (no reconciliation loop), and SSA is mainly used for patching. Setting field manager, so that we can identify the source of the creation later for reconciliation by operator. + machines.create(¶ms, &machine).await?; info!("Created Machine: {machine_name} with UUID: {uuid}"); Ok(()) } @@ -357,9 +367,31 @@ mod tests { #[tokio::test] async fn test_create_machine() { - let clos = async |_, _| Ok(serde_json::to_string(&dummy_machine()).unwrap()); + use http::{Method, Request}; + let clos = async |req: Request<_>, _| { + assert_eq!(req.method(), Method::POST, "Must use POST, not SSA PATCH"); + let query = req.uri().query().unwrap_or(""); + assert!( + query.contains(&format!("fieldManager={FIELD_MANAGER}")), + "Field manager must be '{FIELD_MANAGER}': {query}" + ); + + let body = get_body_string(req).await; + let v: serde_json::Value = serde_json::from_str(&body).unwrap(); + assert_eq!(v["metadata"]["name"], "machine-my-uuid"); + assert_eq!(v["spec"]["id"], "my-uuid"); + + let owners = v["metadata"]["ownerReferences"] + .as_array() + .expect("Machine must have ownerReferences"); + assert_eq!(owners.len(), 1); + assert_eq!(owners[0]["kind"], "TrustedExecutionCluster"); + assert_eq!(owners[0]["controller"], true, "TEC must be controller"); + assert_eq!(owners[0]["blockOwnerDeletion"], true); + Ok(serde_json::to_string(&dummy_machine()).unwrap()) + }; count_check!(1, clos, |client| { - assert!(create_machine(client, "test", dummy_owner_reference()) + assert!(create_machine(client, "my-uuid", dummy_owner_reference()) .await .is_ok()); }); diff --git a/test_utils/src/mock_client.rs b/test_utils/src/mock_client.rs index 1f084e18..a6af0bbe 100644 --- a/test_utils/src/mock_client.rs +++ b/test_utils/src/mock_client.rs @@ -129,21 +129,6 @@ pub async fn test_create_success< }); } -pub async fn test_create_already_exists< - F: Fn(Client) -> S, - S: Future>, ->( - create: F, -) { - let clos = async |req: Request<_>, _| match req { - r if r.method() == Method::POST => Err(StatusCode::CONFLICT), - _ => panic!("unexpected API interaction: {req:?}"), - }; - count_check!(1, clos, |client| { - assert!(create(client).await.is_ok()); - }); -} - async fn test_error< F: Fn(Client) -> S, S: Future>, @@ -165,7 +150,7 @@ pub async fn test_create_error S, S: Future, _| match req.method() { - &Method::POST => Err(StatusCode::INTERNAL_SERVER_ERROR), + &Method::POST | &Method::PATCH => Err(StatusCode::INTERNAL_SERVER_ERROR), _ => panic!("unexpected API interaction: {req:?}"), }; test_error(create, clos).await; diff --git a/tests/trusted_execution_cluster.rs b/tests/trusted_execution_cluster.rs index c9263ce3..dceb4df4 100644 --- a/tests/trusted_execution_cluster.rs +++ b/tests/trusted_execution_cluster.rs @@ -5,12 +5,16 @@ use compute_pcrs_lib::{Part, Pcr}; use k8s_openapi::api::apps::v1::Deployment; use k8s_openapi::api::core::v1::{ConfigMap, Secret}; -use kube::{Api, api::DeleteParams}; +use kube::{ + Api, + api::{DeleteParams, PostParams}, +}; use std::time::Duration; use trusted_cluster_operator_lib::conditions::NOT_COMMITTED_REASON_PENDING; use trusted_cluster_operator_lib::reference_values::ImagePcrs; use trusted_cluster_operator_lib::{ - ApprovedImage, AttestationKey, Machine, TrustedExecutionCluster, generate_owner_reference, + ApprovedImage, AttestationKey, FIELD_MANAGER, Machine, TrustedExecutionCluster, + generate_owner_controller_reference, }; use trusted_cluster_operator_test_utils::*; @@ -28,10 +32,9 @@ named_test!( let tec_api: Api = Api::namespaced(client.clone(), namespace); let tec = tec_api.get(name).await?; - let owner_reference = generate_owner_reference(&tec)?; + let owner_controller_reference = generate_owner_controller_reference(&tec)?; - // Create a test Machine with TEC as owner reference. We need to set the owner reference - // manually since the machine is not created directly by the operator. + // Create a test Machine with TEC as owner-controller reference. We need to set the owner reference manually since the machine is not created directly by the operator, but by register-server. let machine_uuid = uuid::Uuid::new_v4().to_string(); let machine_name = format!("test-machine-{}", &machine_uuid[..8]); @@ -40,7 +43,7 @@ named_test!( metadata: k8s_openapi::apimachinery::pkg::apis::meta::v1::ObjectMeta { name: Some(machine_name.clone()), namespace: Some(namespace.to_string()), - owner_references: Some(vec![owner_reference.clone()]), + owner_references: Some(vec![owner_controller_reference.clone()]), ..Default::default() }, spec: trusted_cluster_operator_lib::MachineSpec { @@ -49,7 +52,11 @@ named_test!( status: None, }; - machines.create(&Default::default(), &machine).await?; + let pp = PostParams { + field_manager: Some(FIELD_MANAGER.to_string()), + ..Default::default() + }; + machines.create(&pp, &machine).await?; test_ctx.info(format!("Created test Machine: {machine_name}")); // Create an AttestationKey with the same uuid as the Machine @@ -70,9 +77,11 @@ named_test!( status: None, }; - attestation_keys - .create(&Default::default(), &attestation_key) - .await?; + let pp = PostParams { + field_manager: Some(FIELD_MANAGER.to_string()), + ..Default::default() + }; + attestation_keys.create(&pp, &attestation_key).await?; test_ctx.info(format!( "Created test AttestationKey: {ak_name} with uuid: {machine_uuid}", )); @@ -87,6 +96,8 @@ named_test!( .poll_async(|| { let ak_api = attestation_keys.clone(); let ak_name_clone = ak_name.clone(); + let machine_name_clone = machine_name.clone(); + async move { let ak = ak_api.get(&ak_name_clone).await?; @@ -108,6 +119,26 @@ named_test!( )); } + // On approval of attestation key, the operator transfers ownership and control of the AttestationKey to the Machine. Validating that it happens below. + let has_machine_controller = ak + .metadata + .owner_references + .as_ref() + .map(|refs| { + refs.iter().any(|r| { + r.kind == "Machine" + && r.name == machine_name_clone + && r.controller == Some(true) + }) + }) + .unwrap_or(false); + + if !has_machine_controller { + return Err(anyhow::anyhow!( + "AttestationKey does not have Machine as controller yet" + )); + } + Ok(()) } }) @@ -290,7 +321,7 @@ async fn test_attestation_key_lifecycle() -> anyhow::Result<()> { let tec_api: Api = Api::namespaced(client.clone(), namespace); let tec = tec_api.get(tec_name).await?; - let owner_reference = generate_owner_reference(&tec)?; + let owner_controller_reference = generate_owner_controller_reference(&tec)?; let machine_uuid = uuid::Uuid::new_v4().to_string(); @@ -302,7 +333,7 @@ async fn test_attestation_key_lifecycle() -> anyhow::Result<()> { metadata: k8s_openapi::apimachinery::pkg::apis::meta::v1::ObjectMeta { name: Some(ak_name.clone()), namespace: Some(namespace.to_string()), - owner_references: Some(vec![owner_reference.clone()]), + owner_references: Some(vec![owner_controller_reference.clone()]), ..Default::default() }, spec: trusted_cluster_operator_lib::AttestationKeySpec { @@ -312,9 +343,8 @@ async fn test_attestation_key_lifecycle() -> anyhow::Result<()> { status: None, }; - attestation_keys - .create(&Default::default(), &attestation_key) - .await?; + let pp = PostParams { field_manager: Some(FIELD_MANAGER.to_string()), ..Default::default() }; + attestation_keys.create(&pp, &attestation_key).await?; test_ctx.info(format!( "Created test AttestationKey: {ak_name} with uuid: {machine_uuid}", )); @@ -325,7 +355,7 @@ async fn test_attestation_key_lifecycle() -> anyhow::Result<()> { metadata: k8s_openapi::apimachinery::pkg::apis::meta::v1::ObjectMeta { name: Some(machine_name.clone()), namespace: Some(namespace.to_string()), - owner_references: Some(vec![owner_reference.clone()]), + owner_references: Some(vec![owner_controller_reference.clone()]), ..Default::default() }, spec: trusted_cluster_operator_lib::MachineSpec { @@ -334,7 +364,8 @@ async fn test_attestation_key_lifecycle() -> anyhow::Result<()> { status: None, }; - machines.create(&Default::default(), &machine).await?; + let pp = PostParams { field_manager: Some(FIELD_MANAGER.to_string()), ..Default::default() }; + machines.create(&pp, &machine).await?; test_ctx.info(format!( "Created test Machine: {machine_name} with uuid: {machine_uuid}", )); @@ -373,40 +404,44 @@ async fn test_attestation_key_lifecycle() -> anyhow::Result<()> { )); } - // Check for owner reference to the Machine - let has_machine_owner_ref = ak + // Check for owner-controller reference to the Machine + let has_machine_owner_controller_ref = ak .metadata .owner_references .as_ref() - .map(|owner_refs| { - owner_refs.iter().any(|owner_ref| { - owner_ref.kind == "Machine" && owner_ref.name == machine_name_clone + .map(|owner_controller_refs| { + owner_controller_refs.iter().any(|owner_ref| { + owner_ref.kind == "Machine" + && owner_ref.name == machine_name_clone + && owner_ref.controller == Some(true) }) }) .unwrap_or(false); - if !has_machine_owner_ref { + if !has_machine_owner_controller_ref { return Err(anyhow::anyhow!( - "AttestationKey does not have owner reference to Machine yet" + "AttestationKey does not have Machine as owner-controller yet" )); } - // Check that a Secret with the same name exists and has the AttestationKey as owner + // Check that a Secret with the same name exists and has the AttestationKey as owner-controller let secret = secrets.get(&ak_name_clone).await?; - let has_ak_owner_ref = secret + let has_ak_owner_controller_ref = secret .metadata .owner_references .as_ref() - .map(|owner_refs| { - owner_refs.iter().any(|owner_ref| { - owner_ref.kind == "AttestationKey" && owner_ref.name == ak_name_clone + .map(|owner_controller_refs| { + owner_controller_refs.iter().any(|owner_ref| { + owner_ref.kind == "AttestationKey" + && owner_ref.name == ak_name_clone + && owner_ref.controller == Some(true) }) }) .unwrap_or(false); - if !has_ak_owner_ref { + if !has_ak_owner_controller_ref { return Err(anyhow::anyhow!( - "Secret does not have owner reference to AttestationKey yet" + "Secret does not have AttestationKey as owner-controller yet" )); } @@ -415,9 +450,9 @@ async fn test_attestation_key_lifecycle() -> anyhow::Result<()> { }) .await?; - test_ctx.info(format!( - "AttestationKey successfully approved with owner reference to Machine: {machine_name} and Secret created" - )); + test_ctx.info( + "AttestationKey successfully approved with Machine as owner-controller and Secret created", + ); // Delete the Machine let dp = DeleteParams::default(); @@ -444,7 +479,7 @@ async fn test_nonexistent_approved_image() -> anyhow::Result<()> { let namespace = test_ctx.namespace(); let images: Api = Api::namespaced(client.clone(), namespace); - images.create(&Default::default(), &ApprovedImage { + images.create(&PostParams { field_manager: Some(FIELD_MANAGER.to_string()), ..Default::default() }, &ApprovedImage { metadata: k8s_openapi::apimachinery::pkg::apis::meta::v1::ObjectMeta { name: Some("coreos1".to_string()), namespace: Some(namespace.to_string()),