Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 19 additions & 6 deletions attestation-key-register/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ use uuid::Uuid;

use trusted_cluster_operator_lib::endpoints::ATTESTATION_KEY_REGISTER_RESOURCE;
use trusted_cluster_operator_lib::{
generate_owner_reference, get_trusted_execution_cluster, AttestationKey, AttestationKeySpec,
self, generate_owner_controller_reference, get_trusted_execution_cluster, AttestationKey,
AttestationKeySpec,
};

#[derive(Parser)]
Expand Down Expand Up @@ -64,15 +65,17 @@ async fn handle_registration(

let api: Api<AttestationKey> = Api::default_namespaced(client.clone());

// Get the TrustedExecutionCluster to use as owner reference
// TEC is owner-controller of the AttestationKey until the operator approves the key and transfers control to Machine, which then becomes the controller and manages lifecycle of the AttestationKey.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's an owner-controller?

@SpaceFace02 SpaceFace02 Jun 3, 2026

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Previously, we had the function generate_owner_reference which set controller to true.

This is ambiguous, because not all owners are controllers. A k8s object can have only 1 controller in its set of owner references (here). It can however have multiple non-controller linking owner references.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if we keep these renames and comment changes, I think they should be a separate commit. not sure if we do though, cc @alicefr

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, please try to generate clean commits it gets easier to review

let cluster = match get_trusted_execution_cluster(client.clone()).await {
Ok(c) => c,
Err(e) => return internal_error(e.context("Failed to get TrustedExecutionCluster")),
};

let owner_reference = match generate_owner_reference(&cluster) {
let owner_controller_reference = match generate_owner_controller_reference(&cluster) {
Ok(o) => o,
Err(e) => return internal_error(e.context("Failed to generate owner reference")),
Err(e) => {
return internal_error(e.context("Failed to generate owner-controller reference"))
}
};

match api.list(&Default::default()).await {
Expand Down Expand Up @@ -104,7 +107,7 @@ async fn handle_registration(
let attestation_key = AttestationKey {
metadata: ObjectMeta {
name: Some(name.clone()),
owner_references: Some(vec![owner_reference]),
owner_references: Some(vec![owner_controller_reference]),
..Default::default()
},
spec: AttestationKeySpec {
Expand All @@ -114,7 +117,17 @@ async fn handle_registration(
status: None,
};

match api.create(&Default::default(), &attestation_key).await {
// Client side apply, as this is a one time operation (no reconciliation loop) for each attestation key, and one owner. SSA is mainly used for patching. Setting field manager, so that we can identify the source of the creation later for reconciliation by operator.
match api
.create(
&kube::api::PostParams {
field_manager: Some(trusted_cluster_operator_lib::FIELD_MANAGER.to_string()),
..Default::default()
},
&attestation_key,
)
.await
{
Ok(created) => {
let name = created.metadata.name.unwrap_or_default();
info!("Successfully created AttestationKey: {name}",);
Expand Down
14 changes: 13 additions & 1 deletion compute-pcrs/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ use std::{fs::File, io::Read};

use trusted_cluster_operator_lib::{conditions::INSTALLED_REASON, reference_values::*, *};

const FIELD_MANAGER: &str = "compute-pcrs";

#[derive(Parser)]
#[command(version, about)]
struct Args {
Expand Down Expand Up @@ -77,6 +79,16 @@ async fn main() -> Result<()> {
let committed = committed_condition(INSTALLED_REASON, image.metadata.generation, &None);
let conditions = Some(vec![committed]);
let status = ApprovedImageStatus { conditions };
update_status!(approved_images, &args.resource_name, status)?;

// As operator also updates the status field, while requeuing every 10 seconds, we need to force this update to avoid race condition.
// TODO: Simplify ownership of this field.
update_status!(
approved_images,
&args.resource_name,
status,
ApprovedImage,
FIELD_MANAGER,
force
)?;
Ok(())
}
30 changes: 24 additions & 6 deletions lib/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,31 @@ use conditions::*;
use k8s_openapi::apimachinery::pkg::apis::meta::v1::{Condition, OwnerReference, Time};
use kube::Resource;

pub const FIELD_MANAGER: &str = "trusted-cluster-operator";

// 2 arms for updating status, one for normal updates and one for force updates.
#[macro_export]
macro_rules! update_status {
($api:ident, $name:expr, $status:expr) => {{
let patch = kube::api::Patch::Merge(serde_json::json!({"status": $status}));
$api.patch_status($name, &Default::default(), &patch).await
($api:ident, $name:expr, $status:expr, $type:ty, $field_manager:expr) => {{
let patch = kube::api::Patch::Apply(serde_json::json!({
"apiVersion": <$type as kube::Resource>::api_version(&()),
"kind": <$type as kube::Resource>::kind(&()),
"status": $status
}));
let params = kube::api::PatchParams::apply($field_manager);
$api.patch_status($name, &params, &patch).await
.map_err(Into::<anyhow::Error>::into)
}};
($api:ident, $name:expr, $status:expr, $type:ty, $field_manager:expr, force) => {{

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So this sets the component (operator, compute-pcrs) as field manager for the status, but also just uses force the only time that it could actually conflict. I don't really see the downside of having just one field manager by that point but I'm open to hearing @alicefr's opinion before you refactor.

@SpaceFace02 SpaceFace02 Jun 3, 2026

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Previously, we had multiple writers to the Status Field of the ApprovedImage. the ComputePCR also updated the status with committed_condition (while status as PCR's are computing), and the operator requeued every 10 seconds to check whether the computation had completed and then updated the same field.

To fix this, ideally we should have only the operator update the status of the field. We can discuss this design decision in detail with Alice.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't really see the downside of having just one field manager by that point

Field manager is inherently a SSA concept, initially we had 2 writers to the same field, and updated it client side. There was no concept of field managers, because the client patch would overwrite any changes, even if written previously by another writer.

@alicefr alicefr Jun 4, 2026

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To fix this, ideally we should have only the operator update the status of the field. We can discuss this design decision in detail with Alice.

Yeah, we need to revisit the design of the approved images ideally, there should be a single writer. Let's discuss it today

let patch = kube::api::Patch::Apply(serde_json::json!({
"apiVersion": <$type as kube::Resource>::api_version(&()),
"kind": <$type as kube::Resource>::kind(&()),
"status": $status
}));
let params = kube::api::PatchParams::apply($field_manager).force();
$api.patch_status($name, &params, &patch).await
.map_err(Into::<anyhow::Error>::into)
}}
}};
}

pub fn condition_status(status: bool) -> String {
Expand Down Expand Up @@ -108,8 +126,8 @@ pub fn committed_condition(
}
}

/// Generate an OwnerReference for any Kubernetes resource
pub fn generate_owner_reference<T: Resource<DynamicType = ()>>(
/// OwnerReference with `controller: true` for lifecycle and garbage collection.
pub fn generate_owner_controller_reference<T: Resource<DynamicType = ()>>(
Comment on lines -111 to +130

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure why this is necessary, it's not like there's a new function that this function's name must disambiguate from

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mainly to disambiguate between a controller owner, and a linking owner.

object: &T,
) -> anyhow::Result<OwnerReference> {
let name = object.meta().name.clone();
Expand Down
3 changes: 3 additions & 0 deletions lib/src/reference_values.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ macro_rules! update_image_pcrs {
let map = (PCR_CONFIG_FILE.to_string(), image_pcrs_json.to_string());
let data = std::collections::BTreeMap::from([map]);
$map.data = Some(data);

// Operator and compute-pcr's both write to this configmap, hence client-side apply makes sense here.
// TODO: Simplify ownership of this field.
$api.replace(PCR_CONFIG_MAP, &Default::default(), &$map)
.await?
};
Expand Down
Loading