Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions e2e/scenario_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2429,6 +2429,41 @@ func Test_Ubuntu2404_SecureTLSBootstrapping_BootstrapToken_Fallback(t *testing.T
})
}

// Test_Ubuntu2204_SecureTLSBootstrapping_APIServerIPEnvVar validates that the
// CSE shell code (configureAndStartSecureTLSBootstrapping in cse_config.sh)
// resolves the API server IP at provisioning time and writes it as
// APISERVER_IP=<addr> into /etc/default/secure-tls-bootstrap.
//
// Tracking: AB#38327357. The companion STLS client change in
// Azure/aks-secure-tls-bootstrap reads this env var and dials the IP literal
// directly so the gRPC dns:/// resolver is never consulted on retries.
//
// This test validates only the AgentBaker side (the env var is correctly
// populated). The end-to-end "DNS blackhole, STLS still succeeds" test
// requires the new STLS client binary baked into the VHD and is tracked as
// a follow-up.
func Test_Ubuntu2204_SecureTLSBootstrapping_APIServerIPEnvVar(t *testing.T) {
RunScenario(t, &Scenario{
Description: "validates that CSE writes APISERVER_IP into /etc/default/secure-tls-bootstrap so STLS can dial without DNS",
Config: Config{
Cluster: ClusterKubenet,
VHD: config.VHDUbuntu2204Gen2Containerd,
BootstrapConfigMutator: func(_ *Cluster, nbc *datamodel.NodeBootstrappingConfiguration) {
nbc.SecureTLSBootstrappingConfig = &datamodel.SecureTLSBootstrappingConfig{
Enabled: true,
}
},
Validator: func(ctx context.Context, s *Scenario) {
// The env-var line must be present and assign a non-empty value.
// The resolver block falls back through IMDS tag -> getent ahostsv4
// -> getent ahostsv6 and writes nothing if all sources fail, so a
// missing line would indicate a hard regression.
ValidateFileHasContent(ctx, s, "/etc/default/secure-tls-bootstrap", "APISERVER_IP=")
},
},
})
}

func Test_Ubuntu2404Gen2_GPUNoDriver(t *testing.T) {
RunScenario(t, &Scenario{
Description: "Tests that a GPU-enabled node using the Ubuntu 2404 VHD opting for skipping gpu driver installation can be properly bootstrapped",
Expand Down
46 changes: 46 additions & 0 deletions parts/linux/cloud-init/artifacts/cse_config.sh
Original file line number Diff line number Diff line change
Expand Up @@ -548,13 +548,59 @@ configureAndStartSecureTLSBootstrapping() {
BOOTSTRAP_CLIENT_FLAGS="${BOOTSTRAP_CLIENT_FLAGS} --deadline=${SECURE_TLS_BOOTSTRAPPING_DEADLINE}"
fi

# AB#38327357: resolve the apiserver IP locally and hand it to STLS via the
# APISERVER_IP env var so the client can dial the literal IP and skip the
# gRPC dns:/// resolver. If anything fails the var stays empty, the line
# is omitted, and STLS falls back to its existing FQDN dial path. Best
# effort only — must never fail CSE.
APISERVER_IP=""
case "${API_SERVER_NAME}" in
''|*[!0-9.]*)
# Not a plain IPv4 literal. Try the IMDS aksAPIServerIPAddress tag
# (private clusters only — same source reconcile-private-hosts.sh
# uses for privatelink FQDNs), then DNS via getent.
case "${API_SERVER_NAME}" in
*.privatelink.*)
APISERVER_IP=$(curl -sSL -m 5 -H "Metadata: true" \
"http://169.254.169.254/metadata/instance/compute/tags?api-version=2019-03-11&format=text" 2>/dev/null \
| tr ';' '\n' \
| awk -F: 'tolower($1) == "aksapiserveripaddress" { print $2; exit }')
Comment on lines +564 to +567
# Discard IMDS values that are not plausible IP literals so
# we fall through to getent instead of short-circuiting on
# an invalid (or absent) tag.
case "${APISERVER_IP}" in
''|*[!0-9a-fA-F:.]*) APISERVER_IP="" ;;
esac
;;
esac
if [ -z "${APISERVER_IP}" ] && [ -n "${API_SERVER_NAME}" ]; then
APISERVER_IP=$(getent ahostsv4 "${API_SERVER_NAME}" 2>/dev/null | awk '/STREAM/ { print $1; exit }')
fi
if [ -z "${APISERVER_IP}" ] && [ -n "${API_SERVER_NAME}" ]; then
APISERVER_IP=$(getent ahostsv6 "${API_SERVER_NAME}" 2>/dev/null | awk '/STREAM/ { print $1; exit }')
fi
# Final sanity: discard anything that isn't a plausible IP literal.
# The STLS client also validates with net.ParseIP, but reject early
# so we don't write garbage into the EnvironmentFile.
case "${APISERVER_IP}" in
''|*[!0-9a-fA-F:.]*) APISERVER_IP="" ;;
esac
;;
*)
APISERVER_IP="${API_SERVER_NAME}"
;;
esac

mkdir -p "$(dirname "${SECURE_TLS_BOOTSTRAPPING_DEFAULT_FILE}")"
touch "${SECURE_TLS_BOOTSTRAPPING_DEFAULT_FILE}"
chmod 0600 "${SECURE_TLS_BOOTSTRAPPING_DEFAULT_FILE}"
echo "BOOTSTRAP_FLAGS=${BOOTSTRAP_CLIENT_FLAGS}" > "${SECURE_TLS_BOOTSTRAPPING_DEFAULT_FILE}"
if [ -n "${AZURE_ENVIRONMENT_FILEPATH}" ]; then
echo "AZURE_ENVIRONMENT_FILEPATH=${AZURE_ENVIRONMENT_FILEPATH}" >> "${SECURE_TLS_BOOTSTRAPPING_DEFAULT_FILE}"
fi
if [ -n "${APISERVER_IP}" ]; then
echo "APISERVER_IP=${APISERVER_IP}" >> "${SECURE_TLS_BOOTSTRAPPING_DEFAULT_FILE}"
fi

mkdir -p "$(dirname "${SECURE_TLS_BOOTSTRAPPING_DROP_IN}")"
touch "${SECURE_TLS_BOOTSTRAPPING_DROP_IN}"
Expand Down
145 changes: 145 additions & 0 deletions spec/parts/linux/cloud-init/artifacts/cse_config_spec.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1227,6 +1227,17 @@ SETUP_EOF
echo "chmod $@"
}

# AB#38327357: stub external resolvers so the default tests do not depend on
# CI DNS / IMDS reachability. Per-test overrides further down provide
# specific responses for the new IP-resolution cases.
curl() {
return 1
}

getent() {
return 2
}

cleanup() {
rm -rf "$SECURE_TLS_BOOTSTRAPPING_DROP_IN_DIR"
rm -rf "$SECURE_TLS_BOOTSTRAPPING_DEFAULT_FILE_DIR"
Expand All @@ -1250,6 +1261,7 @@ SETUP_EOF
The contents of file "secure-tls-bootstrap.service.d/10-securetlsbootstrap.conf" should include "WantedBy=kubelet.service"
The contents of file "default/secure-tls-bootstrap" should include 'BOOTSTRAP_FLAGS=--aad-resource=6dae42f8-4368-4678-94ff-3960e28e3630 --apiserver-fqdn=fqdn --cloud-provider-config=/etc/kubernetes/azure.json'
The contents of file "default/secure-tls-bootstrap" should not include 'AZURE_ENVIRONMENT_FILEPATH'
The contents of file "default/secure-tls-bootstrap" should not include 'APISERVER_IP='
The status should be success
End

Expand All @@ -1262,6 +1274,7 @@ SETUP_EOF
The output should include "systemctlEnableAndStartNoBlock secure-tls-bootstrap 30"
The contents of file "default/secure-tls-bootstrap" should include 'BOOTSTRAP_FLAGS=--aad-resource=6dae42f8-4368-4678-94ff-3960e28e3630 --apiserver-fqdn=fqdn --cloud-provider-config=/etc/kubernetes/azure.json'
The contents of file "default/secure-tls-bootstrap" should include 'AZURE_ENVIRONMENT_FILEPATH=/etc/kubernetes/akscustom.json'
The contents of file "default/secure-tls-bootstrap" should not include 'APISERVER_IP='
The status should be success
End

Expand Down Expand Up @@ -1291,6 +1304,138 @@ SETUP_EOF
The contents of file "default/secure-tls-bootstrap" should include 'BOOTSTRAP_FLAGS=--aad-resource=custom-resource --apiserver-fqdn=fqdn --cloud-provider-config=/etc/kubernetes/azure.json --user-assigned-identity-id=custom-identity-id --validate-kubeconfig-timeout=custom-validate-kubeconfig-timeout --get-access-token-timeout=custom-get-access-token-timeout --get-instance-data-timeout=custom-get-instance-data-timeout --get-nonce-timeout=custom-get-nonce-timeout --get-attested-data-timeout=custom-get-attested-data-timeout --get-credential-timeout=custom-get-credential-timeout --deadline=custom-deadline'
The status should be success
End

# AB#38327357: APISERVER_IP resolution coverage. The new resolver runs
# before the EnvironmentFile is written so STLS can dial the apiserver
# IP directly and bypass gRPC's dns resolver when node DNS is broken.
It 'should write APISERVER_IP as-is when API_SERVER_NAME is already an IPv4 literal'
systemctlEnableAndStartNoBlock() {
echo "systemctlEnableAndStartNoBlock $@"
}
API_SERVER_NAME="10.0.0.5"
When call configureAndStartSecureTLSBootstrapping
The output should include "systemctlEnableAndStartNoBlock secure-tls-bootstrap 30"
The contents of file "default/secure-tls-bootstrap" should include 'APISERVER_IP=10.0.0.5'
The status should be success
End

It 'should resolve APISERVER_IP via getent ahostsv4 when DNS works'
systemctlEnableAndStartNoBlock() {
echo "systemctlEnableAndStartNoBlock $@"
}
getent() {
# First arg is the database (ahostsv4 / ahostsv6).
if [ "$1" = "ahostsv4" ]; then
printf '10.0.0.6 STREAM example.hcp.eastus.azmk8s.io\n10.0.0.6 DGRAM\n10.0.0.6 RAW\n'
return 0
fi
return 2
}
API_SERVER_NAME="example.hcp.eastus.azmk8s.io"
When call configureAndStartSecureTLSBootstrapping
The output should include "systemctlEnableAndStartNoBlock secure-tls-bootstrap 30"
The contents of file "default/secure-tls-bootstrap" should include 'APISERVER_IP=10.0.0.6'
The status should be success
End

It 'should fall back to getent ahostsv6 when ahostsv4 has no answer'
systemctlEnableAndStartNoBlock() {
echo "systemctlEnableAndStartNoBlock $@"
}
getent() {
if [ "$1" = "ahostsv6" ]; then
printf '2603:1030::1 STREAM v6only.hcp.eastus.azmk8s.io\n'
return 0
fi
return 2
}
API_SERVER_NAME="v6only.hcp.eastus.azmk8s.io"
When call configureAndStartSecureTLSBootstrapping
The output should include "systemctlEnableAndStartNoBlock secure-tls-bootstrap 30"
The contents of file "default/secure-tls-bootstrap" should include 'APISERVER_IP=2603:1030::1'
The status should be success
End

It 'should prefer the IMDS aksAPIServerIPAddress tag for privatelink FQDNs'
systemctlEnableAndStartNoBlock() {
echo "systemctlEnableAndStartNoBlock $@"
}
curl() {
# IMDS returns key:value pairs separated by semicolons.
echo "aksAPIServerIPAddress:10.224.0.4;otherTag:someValue"
return 0
}
getent() {
# Must not be needed once IMDS hits; if called, fail loudly.
echo "getent should not have been called" >&2
return 1
}
API_SERVER_NAME="example.privatelink.eastus.azmk8s.io"
When call configureAndStartSecureTLSBootstrapping
The output should include "systemctlEnableAndStartNoBlock secure-tls-bootstrap 30"
The contents of file "default/secure-tls-bootstrap" should include 'APISERVER_IP=10.224.0.4'
The status should be success
End

It 'should fall back to DNS when IMDS returns no aksAPIServerIPAddress tag'
systemctlEnableAndStartNoBlock() {
echo "systemctlEnableAndStartNoBlock $@"
}
curl() {
echo "otherTag:someValue;anotherTag:moreData"
return 0
}
getent() {
if [ "$1" = "ahostsv4" ]; then
printf '10.224.0.5 STREAM example.privatelink.eastus.azmk8s.io\n'
return 0
fi
return 2
}
API_SERVER_NAME="example.privatelink.eastus.azmk8s.io"
When call configureAndStartSecureTLSBootstrapping
The output should include "systemctlEnableAndStartNoBlock secure-tls-bootstrap 30"
The contents of file "default/secure-tls-bootstrap" should include 'APISERVER_IP=10.224.0.5'
The status should be success
End

It 'should omit APISERVER_IP when every resolver fails'
systemctlEnableAndStartNoBlock() {
echo "systemctlEnableAndStartNoBlock $@"
}
# curl + getent inherit the Describe-level stubs that return failure.
API_SERVER_NAME="unresolvable.example.com"
When call configureAndStartSecureTLSBootstrapping
The output should include "systemctlEnableAndStartNoBlock secure-tls-bootstrap 30"
The contents of file "default/secure-tls-bootstrap" should include 'BOOTSTRAP_FLAGS='
The contents of file "default/secure-tls-bootstrap" should not include 'APISERVER_IP='
The status should be success
End

It 'should reject IMDS responses that are not plausible IP literals'
systemctlEnableAndStartNoBlock() {
echo "systemctlEnableAndStartNoBlock $@"
}
curl() {
# Garbage / injected value masquerading as a tag value.
echo "aksAPIServerIPAddress:not-an-ip!@#"
return 0
}
getent() {
if [ "$1" = "ahostsv4" ]; then
printf '10.224.0.9 STREAM example.privatelink.eastus.azmk8s.io\n'
return 0
fi
return 2
}
API_SERVER_NAME="example.privatelink.eastus.azmk8s.io"
When call configureAndStartSecureTLSBootstrapping
The output should include "systemctlEnableAndStartNoBlock secure-tls-bootstrap 30"
# Garbage discarded, falls through to getent.
The contents of file "default/secure-tls-bootstrap" should include 'APISERVER_IP=10.224.0.9'
The contents of file "default/secure-tls-bootstrap" should not include 'APISERVER_IP=not-an-ip'
The status should be success
End
End

Describe 'configureKubeletAndKubectl'
Expand Down
Loading