diff --git a/.github/workflows/e2e-tests.yml b/.github/workflows/e2e-tests.yml index 21aefe28..8870e597 100644 --- a/.github/workflows/e2e-tests.yml +++ b/.github/workflows/e2e-tests.yml @@ -41,6 +41,31 @@ jobs: environment: e2e-testing steps: + - name: Clean runner disk + run: | + set -euo pipefail + + echo "Disk usage before cleanup:" + df -h + + # Self-hosted runners keep state across jobs. Remove stale E2E + # artifacts before checkout/build so jobs do not fail with ENOSPC. + rm -rf /tmp/aks-flex-node-e2e-* || true + rm -rf /tmp/opencode/* || true + rm -rf "${HOME}/go/pkg/mod" "${HOME}/.cache/go-build" || true + + if command -v docker >/dev/null 2>&1; then + docker system prune -af --volumes || true + fi + if command -v crictl >/dev/null 2>&1; then + sudo crictl rmi --prune || true + fi + + sudo journalctl --vacuum-time=1d || true + + echo "Disk usage after cleanup:" + df -h + - name: Checkout code uses: actions/checkout@v4 diff --git a/hack/e2e/README.md b/hack/e2e/README.md index b86b9018..04213a40 100644 --- a/hack/e2e/README.md +++ b/hack/e2e/README.md @@ -77,6 +77,7 @@ Additional environment variables: |----------|---------|-------------| | `E2E_SSH_KEY_FILE` | auto-detected | SSH public key used for VM access. | | `E2E_WORK_DIR` | `/tmp/aks-flex-node-e2e` | Working directory for state, configs, and logs. | +| `E2E_KUBECONFIG` | `$E2E_WORK_DIR/kubeconfig` | Per-run kubeconfig path. Defaults to an isolated file instead of the runner-global kubeconfig. | | `E2E_KUBERNETES_VERSION` | `1.35.0` | Kubernetes version used in generated node configs. | | `E2E_CONTAINERD_VERSION` | `2.0.4` | Containerd version used in generated node configs. | | `E2E_RUNC_VERSION` | `1.1.12` | Runc version used in generated node configs. | diff --git a/hack/e2e/lib/common.sh b/hack/e2e/lib/common.sh index 10c6611d..45a74d52 100755 --- a/hack/e2e/lib/common.sh +++ b/hack/e2e/lib/common.sh @@ -173,6 +173,10 @@ load_config() { E2E_BINARY="${E2E_BINARY:-}" E2E_HELPER_BINARY="${E2E_HELPER_BINARY:-}" + # Keep E2E runs isolated from stale or corrupt runner-global kubeconfig state. + E2E_KUBECONFIG="${E2E_KUBECONFIG:-${E2E_WORK_DIR}/kubeconfig}" + export KUBECONFIG="${E2E_KUBECONFIG}" + # Skip cleanup for debugging E2E_SKIP_CLEANUP="${E2E_SKIP_CLEANUP:-0}" @@ -198,6 +202,7 @@ load_config() { log_info " Subscription: ${AZURE_SUBSCRIPTION_ID}" log_info " Name Suffix: ${E2E_NAME_SUFFIX}" log_info " Agent Pool: ${E2E_TARGET_AGENT_POOL_NAME}" + log_info " Kubeconfig: ${KUBECONFIG}" log_info " Skip Cleanup: ${E2E_SKIP_CLEANUP}" } diff --git a/hack/e2e/lib/node-join-kubeadm.sh b/hack/e2e/lib/node-join-kubeadm.sh index da348161..1e14c2a1 100644 --- a/hack/e2e/lib/node-join-kubeadm.sh +++ b/hack/e2e/lib/node-join-kubeadm.sh @@ -13,6 +13,11 @@ set -euo pipefail [[ -n "${_E2E_NODE_JOIN_KUBEADM_LOADED:-}" ]] && return 0 readonly _E2E_NODE_JOIN_KUBEADM_LOADED=1 +# Kubeadm-style bootstrap tokens must carry kubeadm's default bootstrap group. +# AKS Flex Node also uses system:bootstrappers:aks-flex-node for E2E CSR +# authorization, so this flow grants and emits both groups. +readonly kubeadmBootstrapGroup="system:bootstrappers:kubeadm:default-node-token" + # shellcheck disable=SC1091 source "$(dirname "${BASH_SOURCE[0]}")/common.sh" @@ -46,6 +51,9 @@ subjects: - apiGroup: rbac.authorization.k8s.io kind: Group name: system:bootstrappers:aks-flex-node +- apiGroup: rbac.authorization.k8s.io + kind: Group + name: ${kubeadmBootstrapGroup} --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding @@ -59,6 +67,9 @@ subjects: - apiGroup: rbac.authorization.k8s.io kind: Group name: system:bootstrappers:aks-flex-node +- apiGroup: rbac.authorization.k8s.io + kind: Group + name: ${kubeadmBootstrapGroup} --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding @@ -85,6 +96,9 @@ subjects: - apiGroup: rbac.authorization.k8s.io kind: Group name: system:bootstrappers:aks-flex-node +- apiGroup: rbac.authorization.k8s.io + kind: Group + name: ${kubeadmBootstrapGroup} --- apiVersion: rbac.authorization.k8s.io/v1 kind: Role @@ -110,6 +124,9 @@ subjects: - kind: Group apiGroup: rbac.authorization.k8s.io name: system:bootstrappers:aks-flex-node +- kind: Group + apiGroup: rbac.authorization.k8s.io + name: ${kubeadmBootstrapGroup} --- apiVersion: rbac.authorization.k8s.io/v1 kind: Role @@ -135,6 +152,9 @@ subjects: - kind: Group apiGroup: rbac.authorization.k8s.io name: system:bootstrappers:aks-flex-node +- kind: Group + apiGroup: rbac.authorization.k8s.io + name: ${kubeadmBootstrapGroup} --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole @@ -157,6 +177,9 @@ subjects: - kind: Group apiGroup: rbac.authorization.k8s.io name: system:bootstrappers:aks-flex-node +- kind: Group + apiGroup: rbac.authorization.k8s.io + name: ${kubeadmBootstrapGroup} EOF # Publish the ConfigMaps that kubeadm join reads during its preflight phase. @@ -246,7 +269,7 @@ stringData: expiration: "${expiration}" usage-bootstrap-authentication: "true" usage-bootstrap-signing: "true" - auth-extra-groups: "system:bootstrappers:aks-flex-node" + auth-extra-groups: "system:bootstrappers:aks-flex-node,${kubeadmBootstrapGroup}" EOF echo "${bootstrap_token}"