diff --git a/.gitignore b/.gitignore index 8d070fa5..faffaa3b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ certs/ -builddir/ \ No newline at end of file +builddir/ + diff --git a/.tekton/build-dm-verity-image-debug.yaml b/.tekton/build-dm-verity-image-debug.yaml index 144ed8b1..21ca0188 100644 --- a/.tekton/build-dm-verity-image-debug.yaml +++ b/.tekton/build-dm-verity-image-debug.yaml @@ -59,12 +59,12 @@ spec: name: varlibcontainers steps: - name: use-trusted-artifact - image: quay.io/konflux-ci/build-trusted-artifacts:latest@sha256:4689f88dd253bd1feebf57f1a76a5a751880f739000719cd662bbdc76990a7fd + image: quay.io/konflux-ci/build-trusted-artifacts:latest@sha256:15d7dc86012e41b10d1eb37679ec03ee75c96436224fadd0938a49dc537aa4ad args: - use - $(params.SOURCE_ARTIFACT)=/var/workdir/source - name: download-rhel-image - image: quay.io/konflux-ci/hermeto:0.29.0@sha256:f577e0399953471df7a9826c1550aef83d28e8b35f76dd65a193441822b629ee + image: quay.io/konflux-ci/hermeto:0.47.0@sha256:22a12fb6220c21c2b4e4b4ea33dfdc4a0294dc3071ebcf30570604b54078f792 env: - name: REDHAT_OFFLINE_TOKEN valueFrom: @@ -101,7 +101,7 @@ spec: curl -X GET "${download_url}" -H "Authorization: Bearer ${token}" --output "${filepath}" echo "${RHEL_IMAGE_CHECKSUM}" "${filepath}" | sha256sum --check - name: build - image: quay.io/konflux-ci/buildah-task:latest@sha256:b82d465a06c926882d02b721cf8a8476048711332749f39926a01089cf85a3f9 + image: quay.io/konflux-ci/buildah-task:latest@sha256:4c470b5a153c4acd14bf4f8731b5e36c61d7faafe09c2bf376bb81ce84aa5709 computeResources: limits: memory: 512Mi @@ -571,7 +571,7 @@ spec: - mountPath: /activation-key name: activation-key - name: sbom-generate - image: quay.io/konflux-ci/mobster@sha256:45298b363ff4b96a084bf77a627b3e23471dcfb821eab55a3fa49a60f0ac43f3 + image: quay.io/konflux-ci/mobster@sha256:a2feb71e321a0164820fe7171e564242c75125d8b62d213d3ba3423cf8fa678b script: | #!/bin/bash set -euo pipefail @@ -600,7 +600,7 @@ spec: mkdir -p /tmp/auth && select-oci-auth "$(cat "$(results.IMAGE_REFERENCE.path)")" > /tmp/auth/config.json DOCKER_CONFIG=/tmp/auth cosign attach sbom --sbom sbom.json --type "$SBOM_TYPE" "$(cat "$(results.IMAGE_REFERENCE.path)")" - name: report-sbom-url - image: quay.io/konflux-ci/yq:latest@sha256:15d0238843d954ee78c9c190705eb8b36f6e52c31434183c37d99a80841a635a + image: quay.io/konflux-ci/yq:latest@sha256:466005c667e6e9ea19fd4738275f71a13f89382f6233c581d5e952a41ccb3b42 script: | #!/bin/bash REPO=${OUTPUT_IMAGE%:*} diff --git a/.tekton/osc-dm-verity-image-debug-pull-request.yaml b/.tekton/osc-dm-verity-image-debug-pull-request.yaml index fe81dff2..9c79b3c4 100644 --- a/.tekton/osc-dm-verity-image-debug-pull-request.yaml +++ b/.tekton/osc-dm-verity-image-debug-pull-request.yaml @@ -10,13 +10,13 @@ metadata: pipelinesascode.tekton.dev/max-keep-runs: "3" pipelinesascode.tekton.dev/on-cel-expression: event == "pull_request" && target_branch - == "main" + == "osc-release-v1.11" creationTimestamp: null labels: - appstudio.openshift.io/application: osc-dm-verity-image-debug - appstudio.openshift.io/component: osc-dm-verity-image-debug + appstudio.openshift.io/application: osc-dm-verity-image-debug-v1-11 + appstudio.openshift.io/component: osc-dm-verity-image-debug-v1-11 pipelines.appstudio.openshift.io/type: build - name: osc-dm-verity-image-debug-on-pull-request + name: osc-dm-verity-image-debug-v1-11-on-pull-request namespace: ose-osc-tenant spec: params: @@ -25,7 +25,7 @@ spec: - name: revision value: "{{revision}}" - name: output-image - value: quay.io/redhat-user-workloads/ose-osc-tenant/osc-dm-verity-image-debug:on-pr-{{revision}} + value: quay.io/redhat-user-workloads/ose-osc-tenant/osc-dm-verity-image-debug-v1-11:on-pr-{{revision}} - name: image-expires-after value: 5d taskRunSpecs: @@ -40,7 +40,7 @@ spec: pipelineRef: name: build-pipeline-debug taskRunTemplate: - serviceAccountName: build-pipeline-osc-dm-verity-image-debug + serviceAccountName: build-pipeline-osc-dm-verity-image-debug-v1-11 workspaces: - name: git-auth secret: diff --git a/.tekton/osc-dm-verity-image-debug-push.yaml b/.tekton/osc-dm-verity-image-debug-push.yaml index 14214851..57ccd1e2 100644 --- a/.tekton/osc-dm-verity-image-debug-push.yaml +++ b/.tekton/osc-dm-verity-image-debug-push.yaml @@ -8,11 +8,11 @@ metadata: pipelinesascode.tekton.dev/cancel-in-progress: "false" pipelinesascode.tekton.dev/max-keep-runs: "3" pipelinesascode.tekton.dev/on-cel-expression: event == "push" && target_branch - == "main" + == "osc-release-v1.11" creationTimestamp: null labels: - appstudio.openshift.io/application: osc-dm-verity-image-debug - appstudio.openshift.io/component: osc-dm-verity-image-debug + appstudio.openshift.io/application: osc-dm-verity-image-debug-v1-11 + appstudio.openshift.io/component: osc-dm-verity-image-debug-v1-11 pipelines.appstudio.openshift.io/type: build name: osc-dm-verity-image-debug-on-push namespace: ose-osc-tenant @@ -23,7 +23,7 @@ spec: - name: revision value: '{{revision}}' - name: output-image - value: quay.io/redhat-user-workloads/ose-osc-tenant/osc-dm-verity-image-debug:{{revision}} + value: quay.io/redhat-user-workloads/ose-osc-tenant/osc-dm-verity-image-debug-v1-11:{{revision}} taskRunSpecs: - pipelineTaskName: build-vm-image stepSpecs: @@ -36,7 +36,7 @@ spec: pipelineRef: name: build-pipeline-debug taskRunTemplate: - serviceAccountName: build-pipeline-osc-dm-verity-image-debug + serviceAccountName: build-pipeline-osc-dm-verity-image-debug-v1-11 workspaces: - name: git-auth secret: diff --git a/.tekton/osc-dm-verity-image-pull-request.yaml b/.tekton/osc-dm-verity-image-pull-request.yaml index e9b9376e..50598e07 100644 --- a/.tekton/osc-dm-verity-image-pull-request.yaml +++ b/.tekton/osc-dm-verity-image-pull-request.yaml @@ -10,13 +10,13 @@ metadata: pipelinesascode.tekton.dev/max-keep-runs: "3" pipelinesascode.tekton.dev/on-cel-expression: event == "pull_request" && target_branch - == "main" + == "osc-release-v1.11" creationTimestamp: null labels: - appstudio.openshift.io/application: openshift-sandboxed-containers - appstudio.openshift.io/component: osc-dm-verity-image + appstudio.openshift.io/application: openshift-sandboxed-containers-v1-11 + appstudio.openshift.io/component: osc-dm-verity-image-v1-11 pipelines.appstudio.openshift.io/type: build - name: osc-dm-verity-image-on-pull-request + name: osc-dm-verity-image-v1-11-on-pull-request namespace: ose-osc-tenant spec: params: @@ -25,7 +25,7 @@ spec: - name: revision value: "{{revision}}" - name: output-image - value: quay.io/redhat-user-workloads/ose-osc-tenant/osc-dm-verity-image:on-pr-{{revision}} + value: quay.io/redhat-user-workloads/ose-osc-tenant/osc-dm-verity-image-v1-11:on-pr-{{revision}} - name: image-expires-after value: 5d - name: dockerfile @@ -42,7 +42,7 @@ spec: pipelineRef: name: build-pipeline taskRunTemplate: - serviceAccountName: build-pipeline-osc-dm-verity-image + serviceAccountName: build-pipeline-osc-dm-verity-image-v1-11 workspaces: - name: git-auth secret: diff --git a/.tekton/osc-dm-verity-image-push.yaml b/.tekton/osc-dm-verity-image-push.yaml index 567f4250..58429d5a 100644 --- a/.tekton/osc-dm-verity-image-push.yaml +++ b/.tekton/osc-dm-verity-image-push.yaml @@ -8,13 +8,13 @@ metadata: pipelinesascode.tekton.dev/cancel-in-progress: "false" pipelinesascode.tekton.dev/max-keep-runs: "3" pipelinesascode.tekton.dev/on-cel-expression: event == "push" && target_branch - == "main" + == "osc-release-v1.11" creationTimestamp: null labels: - appstudio.openshift.io/application: openshift-sandboxed-containers - appstudio.openshift.io/component: osc-dm-verity-image + appstudio.openshift.io/application: openshift-sandboxed-containers-v1-11 + appstudio.openshift.io/component: osc-dm-verity-image-v1-11 pipelines.appstudio.openshift.io/type: build - name: osc-dm-verity-image-on-push + name: osc-dm-verity-image-v1-11-on-push namespace: ose-osc-tenant spec: params: @@ -23,7 +23,7 @@ spec: - name: revision value: '{{revision}}' - name: output-image - value: quay.io/redhat-user-workloads/ose-osc-tenant/osc-dm-verity-image:{{revision}} + value: quay.io/redhat-user-workloads/ose-osc-tenant/osc-dm-verity-image-v1-11:{{revision}} - name: dockerfile value: Dockerfile taskRunSpecs: @@ -38,7 +38,7 @@ spec: pipelineRef: name: build-pipeline taskRunTemplate: - serviceAccountName: build-pipeline-osc-dm-verity-image + serviceAccountName: build-pipeline-osc-dm-verity-image-v1-11 workspaces: - name: git-auth secret: diff --git a/BRIDGE_SETUP.md b/BRIDGE_SETUP.md new file mode 100644 index 00000000..47919f02 --- /dev/null +++ b/BRIDGE_SETUP.md @@ -0,0 +1,431 @@ +# Bridge Setup Guide for Direct VM-to-Container Communication + +## Azure Private DNS Integration + +The bridge setup scripts now support automatic hostname registration in Azure Private DNS zones. This enables hostname-based service discovery across your peer pod infrastructure. + +### Prerequisites + +1. **Azure Private DNS Zone**: Create a private DNS zone (e.g., `spark.local`) +2. **Service Principal**: Create a service principal with appropriate permissions +3. **Required Permissions**: Service principal needs **Private DNS Zone Contributor** role on the DNS zone + +### Configuration + +Add the following to your `aa.toml` configuration file: + +```toml +# Hostname for this VM (will be registered as hostname.spark.local) +hostname = "spark-master" + +# Azure credentials for DNS registration +azure_client_id = "your-client-id" +azure_client_secret = "your-client-secret" +azure_tenant_id = "your-tenant-id" +azure_subscription_id = "your-subscription-id" +``` + +### How It Works + +When the server bridge setup script runs: + +1. **Hostname Configuration**: Sets the system hostname and configures DHCP to send it +2. **Azure Authentication**: Logs in using the service principal credentials +3. **DNS Registration**: Creates/updates an A record mapping `hostname.spark.local` to the VM's IP +4. **Verification**: Confirms the DNS record was created successfully + +### DNS Record Format + +The script creates A records in the format: +``` +hostname.spark.local → VM_IP_ADDRESS +``` + +Example: +``` +spark-master.spark.local → 10.0.1.10 +spark-worker-1.spark.local → 10.0.1.11 +spark-worker-2.spark.local → 10.0.1.12 +``` + +### Service Principal Setup + +Create a service principal with DNS permissions: + +```bash +# Create service principal +az ad sp create-for-rbac --name "peer-pods-dns-updater" \ + --role "Private DNS Zone Contributor" \ + --scopes /subscriptions/{subscription-id}/resourceGroups/{resource-group}/providers/Microsoft.Network/privateDnsZones/{dns-zone-name} + +# Output will include: +# - appId (use as azure_client_id) +# - password (use as azure_client_secret) +# - tenant (use as azure_tenant_id) +``` + +### Troubleshooting DNS Registration + +Check the script logs for DNS registration status: +```bash +# View systemd service logs +sudo journalctl -u azure-bridge-server.service -f + +# Common issues: +# - "Azure CLI not found": Install Azure CLI +# - "Authentication failed": Check service principal credentials +# - "Failed to create DNS A record": Verify service principal has correct permissions +# - "Private DNS zone not found": Verify zone name and resource group +``` + +### Installing Azure CLI + +If Azure CLI is not installed on the VM image: + +```bash +# Install Azure CLI +curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash + +# Verify installation +az --version +``` + +### Benefits + +- **Automatic Registration**: VMs automatically register their hostnames on boot +- **Service Discovery**: Applications can use hostnames instead of IPs +- **Dynamic Updates**: DNS records update if VM IPs change +- **Centralized Management**: All hostname mappings in one Azure Private DNS zone +- **No Manual Configuration**: Eliminates need to manually maintain DNS records + +This guide documents the complete setup for enabling direct communication between peer pod containers bypassing the Kubernetes VXLAN overlay network. + +## Architecture Overview + +``` +Client Container (192.168.0.101) + ↓ eth1 +Client Bridge (192.168.0.51) + ↓ eth0 +Client VM (192.168.0.10) + ↓ Azure VNET +Server VM (192.168.0.11) + ↓ iptables DNAT +Server Bridge (192.168.0.50) + ↓ eth1 +Server Container (10.129.2.25) +``` + +## Server VM Setup + +### 0. Configure Hostname via DHCP (Optional but Recommended) + +This step configures the VM to send its hostname via DHCP and sets the system hostname. This helps with identification and can enable hostname-based communication if DNS is configured. + +```bash +# Find your eth0 connection name +nmcli connection show + +# Configure to send hostname via DHCP (connection name is typically "Wired connection 1") +sudo nmcli connection modify "Wired connection 1" \ + ipv4.dhcp-send-hostname yes \ + ipv4.dhcp-hostname "server-peerpod" + +# Apply changes +sudo nmcli connection down "Wired connection 1" +sudo nmcli connection up "Wired connection 1" + +# Verify DHCP hostname configuration +nmcli connection show "Wired connection 1" | grep dhcp-hostname + +# Set the system hostname permanently +sudo hostnamectl set-hostname server-peerpod + +# Verify hostname +hostnamectl +hostname +hostname -f +``` + +**Note**: If your DHCP server supports dynamic DNS updates, this hostname will be registered automatically. This is particularly useful when using an external DHCP server in the same VNET. + +### 1. Create Bridge Network + +```bash +# Create veth pair in podns namespace +sudo ip netns exec podns ip link add eth1 type veth peer name veth-azure +sudo ip netns exec podns ip addr add 192.168.0.100/24 dev eth1 +sudo ip netns exec podns ip link set eth1 up +sudo ip netns exec podns ip link set veth-azure netns 1 + +# Create bridge in host namespace +sudo ip link add br-azure type bridge +sudo ip link set veth-azure master br-azure +sudo ip link set veth-azure up +sudo ip link set br-azure up +sudo ip addr add 192.168.0.50/24 dev br-azure +``` + +### 2. Configure Routing + +```bash +# Add route in container namespace +sudo ip netns exec podns ip route add 192.168.0.0/24 dev eth1 + +# Add specific host route +sudo ip route add 192.168.0.100/32 dev br-azure + +# Delete conflicting subnet route +sudo ip route del 192.168.0.0/24 dev br-azure + +# Add route to Client container +sudo ip route add 192.168.0.101/32 via 192.168.0.10 dev eth0 +``` + +### 3. Enable Proxy ARP and Forwarding + +```bash +sudo sysctl -w net.ipv4.conf.all.proxy_arp=1 +sudo sysctl -w net.ipv4.conf.br-azure.proxy_arp=1 +sudo sysctl -w net.ipv4.ip_forward=1 +``` + +### 4. Configure iptables DNAT Rules + +```bash +# First DNAT: VM IP → Bridge IP +sudo iptables -t nat -A PREROUTING -d 192.168.0.11 -p tcp --dport 8080 -j DNAT --to-destination 192.168.0.100:8080 +sudo iptables -t nat -A OUTPUT -d 192.168.0.11 -p tcp --dport 8080 -j DNAT --to-destination 192.168.0.100:8080 + +# Second DNAT: Bridge IP → Container IP +sudo iptables -t nat -A PREROUTING -d 192.168.0.100 -p tcp --dport 8080 -j DNAT --to-destination 10.129.2.25:8080 +sudo iptables -t nat -A OUTPUT -d 192.168.0.100 -p tcp --dport 8080 -j DNAT --to-destination 10.129.2.25:8080 + +# MASQUERADE for return traffic +sudo iptables -t nat -A POSTROUTING -d 10.129.2.25 -p tcp --dport 8080 -j MASQUERADE +``` + +## Client VM Setup + +### 0. Configure Hostname via DHCP (Optional but Recommended) + +This step configures the VM to send its hostname via DHCP and sets the system hostname. + +```bash +# Find your eth0 connection name +nmcli connection show + +# Configure to send hostname via DHCP (connection name is typically "Wired connection 1") +sudo nmcli connection modify "Wired connection 1" \ + ipv4.dhcp-send-hostname yes \ + ipv4.dhcp-hostname "client-peerpod" + +# Apply changes +sudo nmcli connection down "Wired connection 1" +sudo nmcli connection up "Wired connection 1" + +# Verify DHCP hostname configuration +nmcli connection show "Wired connection 1" | grep dhcp-hostname + +# Set the system hostname permanently +sudo hostnamectl set-hostname client-peerpod + +# Verify hostname +hostnamectl +hostname +hostname -f +``` + +**Note**: This allows the client VM to be identified by hostname rather than just IP address, which is useful for logging and troubleshooting. + +### 1. Create Bridge Network + +```bash +# Create veth pair with different IP (192.168.0.101) +sudo ip netns exec podns ip link add eth1 type veth peer name veth-azure +sudo ip netns exec podns ip addr add 192.168.0.101/24 dev eth1 +sudo ip netns exec podns ip link set eth1 up +sudo ip netns exec podns ip link set veth-azure netns 1 + +# Create bridge +sudo ip link add br-azure type bridge +sudo ip link set veth-azure master br-azure +sudo ip link set veth-azure up +sudo ip link set br-azure up +sudo ip addr add 192.168.0.51/24 dev br-azure +``` + +### 2. Configure Routing + +```bash +# Add route in container namespace +sudo ip netns exec podns ip route add 192.168.0.0/24 dev eth1 + +# Add specific host route +sudo ip route add 192.168.0.101/32 dev br-azure + +# Delete conflicting subnet route +sudo ip route del 192.168.0.0/24 dev br-azure +``` + +### 3. Enable Proxy ARP and Forwarding + +```bash +sudo sysctl -w net.ipv4.conf.all.proxy_arp=1 +sudo sysctl -w net.ipv4.conf.br-azure.proxy_arp=1 +sudo sysctl -w net.ipv4.ip_forward=1 +``` + +### 4. Configure iptables for Forwarding + +```bash +# Allow forwarding between bridge and eth0 +sudo iptables -I FORWARD -i br-azure -o eth0 -j ACCEPT +sudo iptables -I FORWARD -i eth0 -o br-azure -j ACCEPT + +# MASQUERADE outgoing traffic from container +sudo iptables -t nat -A POSTROUTING -s 192.168.0.101/32 -o eth0 -j MASQUERADE +``` + +## Test Cases + +### Test 1: Server VM ↔ Server Container + +```bash +# On Server VM + +# VM → Container +curl http://192.168.0.11:8080 +# Expected: "Hello from VM IP" + +# Container → VM (start test server first) +python3 -m http.server 9090 & +sudo ip netns exec podns curl http://192.168.0.11:9090 +# Expected: Directory listing +kill %1 +``` + +### Test 2: Client VM ↔ Client Container + +```bash +# On Client VM + +# VM → Container bridge +ping -c 2 192.168.0.101 +# Expected: 0% packet loss + +# Container → VM bridge +sudo ip netns exec podns ping -c 2 192.168.0.51 +# Expected: 0% packet loss +``` + +### Test 3: Server VM ↔ Client VM + +```bash +# From Server VM +curl http://192.168.0.10:9090 +# (Requires test server running on Client VM) + +# From Client VM +curl http://192.168.0.11:8080 +# Expected: "Hello from VM IP" +``` + +### Test 4: Client Container → Server Container (Main Goal) + +```bash +# On Client VM + +# Ping test +sudo ip netns exec podns ping -c 2 192.168.0.11 +# Expected: 0% packet loss + +# HTTP test +sudo ip netns exec podns curl http://192.168.0.11:8080 +# Expected: "Hello from VM IP" +``` + +## Verification Commands + +### Check Bridge Status + +```bash +# On both VMs +ip link show | grep -E "br-azure|veth-azure" +bridge link show +``` + +### Check IP Addresses + +```bash +# On both VMs +ip addr show br-azure +sudo ip netns exec podns ip addr show eth1 +``` + +### Check Routes + +```bash +# On both VMs +ip route show | grep 192.168.0 +sudo ip netns exec podns ip route show | grep 192.168.0 +``` + +### Check iptables Rules + +```bash +# On Server VM +sudo iptables -t nat -L PREROUTING -n -v | grep 8080 +sudo iptables -t nat -L OUTPUT -n -v | grep 8080 +sudo iptables -t nat -L POSTROUTING -n -v | grep 8080 + +# On Client VM +sudo iptables -L FORWARD -n -v +sudo iptables -t nat -L POSTROUTING -n -v +``` + +### Check Proxy ARP + +```bash +# On both VMs +sysctl net.ipv4.conf.all.proxy_arp +sysctl net.ipv4.conf.br-azure.proxy_arp +sysctl net.ipv4.ip_forward +``` + +## Troubleshooting + +### Issue: Ping works but curl fails +- Check iptables DNAT rules exist +- Verify nginx is running: `sudo ip netns exec podns ss -tlnp | grep 8080` +- Check iptables counters: `sudo iptables -t nat -L -n -v | grep 8080` + +### Issue: Connection hangs +- Flush conntrack table: `sudo conntrack -F` +- Check if conntrack table is full: `dmesg | grep conntrack` + +### Issue: ARP resolution fails +- Enable proxy ARP on all interfaces +- Check ARP entries: `sudo ip netns exec podns ip neigh show` +- Manually add ARP if needed: `sudo ip netns exec podns ip neigh add lladdr dev eth1` + +### Issue: Route conflicts +- Ensure no `192.168.0.0/24 dev br-azure` route exists +- Only specific host routes should use br-azure +- Azure VNET traffic should use eth0 + +## Key Points + +1. **Different Bridge IPs**: Server uses 192.168.0.100, Client uses 192.168.0.101 +2. **MASQUERADE Required**: Client needs MASQUERADE for outgoing traffic +3. **Bidirectional Routes**: Server needs route to Client's bridge IP via Client VM +4. **Proxy ARP**: Essential for ARP resolution across namespaces +5. **DNAT Chain**: Server uses double DNAT (VM IP → Bridge IP → Container IP) +6. **No Subnet Routes**: Delete auto-created `192.168.0.0/24 dev br-azure` routes + +## Success Criteria + +✅ Client container can curl Server container on port 8080 +✅ Traffic bypasses Kubernetes VXLAN overlay +✅ Direct VM-to-VM container communication established +✅ Port 8080 accessible via Azure VM private IP \ No newline at end of file diff --git a/HOSTNAME_RESOLUTION.md b/HOSTNAME_RESOLUTION.md new file mode 100644 index 00000000..94bd85f1 --- /dev/null +++ b/HOSTNAME_RESOLUTION.md @@ -0,0 +1,323 @@ +# Azure Container Hostname Resolution + +## Overview + +This document describes the dynamic hostname resolution system implemented for Azure confidential containers. The system enables containers to resolve custom hostnames by automatically configuring `/etc/hosts` entries within container namespaces after the kata-agent has fully initialized. + +## Architecture + +The hostname resolution system consists of three main components: + +1. **Dispatcher Service** - Determines the network role and triggers appropriate setup +2. **Server-side Resolution** - Resolves hostnames using existing `/etc/hosts` entries +3. **Client-side Resolution** - Resolves hostnames dynamically using DNS/curl + +### Component Flow + +``` +azure-bridge.service (Dispatcher) + ↓ + ├─→ [server role] → azure-bridge-server.service → azure-hostname-resolution-server.service + └─→ [client role] → azure-bridge-client.service → azure-hostname-resolution-client.service +``` + +## Configuration + +### aa.toml Parameters + +The system reads configuration from `/run/peerpod/aa.toml`: + +```toml +# Network role: "server" or "client" +bridge_role = "server" + +# Hostname to resolve +hostname = "example.hostname.com" +``` + +## Systemd Services + +### 1. azure-bridge.service + +**Purpose**: Main dispatcher that determines network role and triggers appropriate setup scripts. + +**File**: [`scripts/coco/podvm/luks-scratch/etc/systemd/system/azure-bridge.service`](scripts/coco/podvm/luks-scratch/etc/systemd/system/azure-bridge.service) + +```ini +[Unit] +Description=Azure Bridge Setup Dispatcher +After=kata-agent.service process-user-data.service +Wants=kata-agent.service process-user-data.service + +[Service] +Type=oneshot +RemainAfterExit=yes +ExecStart=/usr/local/sbin/setup-azure-bridge.sh +Restart=on-failure +RestartSec=5s + +[Install] +WantedBy=multi-user.target +``` + +**Script**: [`scripts/coco/podvm/luks-scratch/usr/local/sbin/setup-azure-bridge.sh`](scripts/coco/podvm/luks-scratch/usr/local/sbin/setup-azure-bridge.sh) + +- Reads `bridge_role` from aa.toml +- Dispatches to server or client setup scripts +- Automatically triggers hostname resolution after bridge setup + +### 2. azure-hostname-resolution-server.service + +**Purpose**: Resolves hostnames on server-side containers using existing `/etc/hosts` entries. + +**File**: [`scripts/coco/podvm/luks-scratch/etc/systemd/system/azure-hostname-resolution-server.service`](scripts/coco/podvm/luks-scratch/etc/systemd/system/azure-hostname-resolution-server.service) + +```ini +[Unit] +Description=Azure Container Hostname Resolution +After=azure-bridge-server.service kata-agent.service +Requires=azure-bridge-server.service +ConditionPathExists=/run/peerpod/aa.toml + +[Service] +Type=oneshot +RemainAfterExit=yes +ExecStart=/usr/local/sbin/setup-hostname-resolution-server.sh +Restart=on-failure +RestartSec=10s +StandardOutput=journal +StandardError=journal + +[Install] +WantedBy=multi-user.target +``` + +**Script**: [`scripts/coco/podvm/luks-scratch/usr/local/sbin/setup-hostname-resolution-server.sh`](scripts/coco/podvm/luks-scratch/usr/local/sbin/setup-hostname-resolution-server.sh) + +**Key Features**: +- Waits for container network interfaces (eth0, eth1) to be ready +- Monitors kata-agent for container process spawning (Java, sleep, pause) +- Reads hostname IP from host's `/etc/hosts` +- Injects hostname entry into container's `/etc/hosts` using `nsenter` + +### 3. azure-hostname-resolution-client.service + +**Purpose**: Resolves hostnames on client-side containers using dynamic DNS resolution. + +**File**: [`scripts/coco/podvm/luks-scratch/etc/systemd/system/azure-hostname-resolution-client.service`](scripts/coco/podvm/luks-scratch/etc/systemd/system/azure-hostname-resolution-client.service) + +```ini +[Unit] +Description=Azure Client Container Hostname Resolution +After=azure-bridge-client.service kata-agent.service +Requires=azure-bridge-client.service +ConditionPathExists=/run/peerpod/aa.toml + +[Service] +Type=oneshot +RemainAfterExit=yes +ExecStart=/usr/local/sbin/setup-hostname-resolution-client.sh +Restart=on-failure +RestartSec=10s +StandardOutput=journal +StandardError=journal + +[Install] +WantedBy=multi-user.target +``` + +**Script**: [`scripts/coco/podvm/luks-scratch/usr/local/sbin/setup-hostname-resolution-client.sh`](scripts/coco/podvm/luks-scratch/usr/local/sbin/setup-hostname-resolution-client.sh) + +**Key Features**: +- Waits for container network interfaces (eth0, eth1) to be ready +- Monitors kata-agent for container process spawning +- Resolves hostname dynamically using `curl` with retry logic (20 attempts, 2s delay) +- Extracts IPv4 address from curl verbose output +- Injects hostname entry into container's `/etc/hosts` using `nsenter` + +## Implementation Details + +### Container Process Detection + +Both server and client scripts wait for kata-agent to spawn container processes before attempting hostname resolution. This ensures the container namespace is fully initialized. + +**Detection Strategy** (in priority order): +1. Java processes (`grep -i 'java'`) +2. Sleep infinity processes (`grep 'sleep infinity'`) +3. Pause processes (`grep '/pause'`) + +**Timeout**: 120 seconds (2 minutes) + +### Server-Side Resolution + +**Method**: Read from host's `/etc/hosts` + +```bash +HOSTNAME_IP=$(grep -E "^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+[[:space:]]+${HOSTNAME}" /etc/hosts | awk '{print $1}') +``` + +**Advantages**: +- Fast and reliable +- No network dependency +- Uses pre-configured entries from bridge setup + +### Client-Side Resolution + +**Method**: Dynamic DNS resolution using curl + +```bash +HOSTNAME_IP=$(curl -v "http://${HOSTNAME}" 2>&1 | grep -oP 'IPv4: \K[\d.]+' | head -n1) +``` + +**Retry Logic**: +- Maximum attempts: 20 +- Retry delay: 2 seconds +- Total timeout: ~40 seconds + +**Advantages**: +- Works without pre-configured entries +- Handles dynamic DNS scenarios +- Robust retry mechanism + +### Namespace Entry Injection + +Both scripts use `nsenter` to inject hostname entries into the container's `/etc/hosts`: + +```bash +nsenter -t ${CONTAINER_PID} -a sh -c "echo '${HOSTNAME_IP} ${HOSTNAME}' >> /etc/hosts" +``` + +**Flags**: +- `-t`: Target PID +- `-a`: Enter all namespaces (mount, UTS, IPC, net, PID) + +## Service Dependencies + +``` +kata-agent.service + ↓ +azure-bridge.service (dispatcher) + ↓ + ├─→ azure-bridge-server.service + │ ↓ + │ azure-hostname-resolution-server.service + │ + └─→ azure-bridge-client.service + ↓ + azure-hostname-resolution-client.service +``` + +## Logging and Debugging + +All services log to systemd journal. View logs using: + +```bash +# View dispatcher logs +journalctl -u azure-bridge.service + +# View server hostname resolution logs +journalctl -u azure-hostname-resolution-server.service + +# View client hostname resolution logs +journalctl -u azure-hostname-resolution-client.service + +# Follow logs in real-time +journalctl -u azure-hostname-resolution-server.service -f +``` + +## Troubleshooting + +### Hostname Not Resolved + +1. **Check aa.toml configuration**: + ```bash + cat /run/peerpod/aa.toml + ``` + Verify `hostname` and `bridge_role` are set correctly. + +2. **Check service status**: + ```bash + systemctl status azure-hostname-resolution-server.service + systemctl status azure-hostname-resolution-client.service + ``` + +3. **Verify container process detection**: + ```bash + ip netns exec podns ps aux + ``` + +4. **Check container's /etc/hosts**: + ```bash + CONTAINER_PID=$(ip netns exec podns ps aux | grep -i java | head -n1 | awk '{print $2}') + nsenter -t ${CONTAINER_PID} -a cat /etc/hosts + ``` + +### Service Fails to Start + +1. **Check dependencies**: + - Ensure `azure-bridge-server.service` or `azure-bridge-client.service` completed successfully + - Verify `/run/peerpod/aa.toml` exists + +2. **Check network interfaces**: + ```bash + ip netns exec podns ip link show + ``` + +3. **Manual script execution**: + ```bash + /usr/local/sbin/setup-hostname-resolution-server.sh + # or + /usr/local/sbin/setup-hostname-resolution-client.sh + ``` + +### Client Resolution Timeout + +If client-side resolution fails after 20 attempts: + +1. **Verify DNS resolution**: + ```bash + nslookup ${HOSTNAME} + dig ${HOSTNAME} + ``` + +2. **Test curl manually**: + ```bash + curl -v "http://${HOSTNAME}" + ``` + +3. **Check network connectivity**: + ```bash + ping ${HOSTNAME} + ``` + +## Security Considerations + +- Scripts run with root privileges (required for `nsenter`) +- Hostname entries are only added if not already present (prevents duplicates) +- Services use `ConditionPathExists` to ensure aa.toml is present +- Retry mechanisms prevent indefinite hanging + +## Related Documentation + +- [Bridge Setup Documentation](BRIDGE_SETUP.md) - Network bridge configuration +- [DHCP Setup Documentation](scripts/coco/podvm/DHCP-SETUP.md) - DHCP configuration for containers + +## Files Modified/Created + +### Systemd Service Files +- `scripts/coco/podvm/luks-scratch/etc/systemd/system/azure-bridge.service` +- `scripts/coco/podvm/luks-scratch/etc/systemd/system/azure-hostname-resolution-server.service` +- `scripts/coco/podvm/luks-scratch/etc/systemd/system/azure-hostname-resolution-client.service` + +### Setup Scripts +- `scripts/coco/podvm/luks-scratch/usr/local/sbin/setup-azure-bridge.sh` +- `scripts/coco/podvm/luks-scratch/usr/local/sbin/setup-hostname-resolution-server.sh` +- `scripts/coco/podvm/luks-scratch/usr/local/sbin/setup-hostname-resolution-client.sh` + +## Future Enhancements + +- Support for multiple hostname entries +- IPv6 support +- Configurable retry parameters +- Health check endpoints +- Metrics collection \ No newline at end of file diff --git a/README.md b/README.md index 8303fe76..4c6a1fba 100644 --- a/README.md +++ b/README.md @@ -1,46 +1,123 @@ # How to create a dm-verity image via container 1. Download official RHEL ISO and build a CVM with `helpers/rhel10-dm-root.ks`: + +Change the QCOW2_Name everytime you rebuild it. +Install libvirt and place the downloaded rhel iso image in the given path of ISO_PATH. + ``` -ISO_PATH=rhel-10.0-x86_64-dvd.iso +ISO_PATH=/var/lib/libvirt/images/rhel-10.1-x86_64-dvd.iso KS_LOCATION=helpers/rhel10-dm-root.ks -QCOW2_NAME=my-image +QCOW2_NAME=trial-image-12 + +virt-install --virt-type kvm --os-variant rhel10.0 --arch x86_64 --boot uefi,firmware.feature0.name=secure-boot,firmware.feature0.enabled=no,firmware.feature1.name=enrolled-keys,firmware.feature1.enabled=no --name $QCOW2_NAME --memory 8192 --location $ISO_PATH --disk bus=scsi,size=15 --initrd-inject=$KS_LOCATION --nographics --extra-args "console=ttyS0 inst.ks=file:/rhel10-dm-root.ks" --transient --debug -virt-install --virt-type kvm --os-variant rhel10.0 --arch x86_64 --boot uefi --name $QCOW2_NAME --memory 8192 --location $ISO_PATH --disk bus=scsi,size=7 --initrd-inject=$KS_LOCATION --nographics --extra-args "console=ttyS0 inst.ks=file:/rhel10-dm-root.ks" --transient ``` -Image will be stored in `~/.local/share/libvirt/images/$QCOW2_NAME.qcow2` +Image will be stored in `/var/lib/libvirt/images/$QCOW2_NAME.qcow2` + -2. Do custom modifications in the image +2. Run the automation script from root directory: -3. Optional: if not available, generate private key, PEM and DER certificates using `helpers/create-certs.sh`. This is only needed if secureboot has to be enabled. ``` -Usage: ./helpers/create-certs.sh -Usage: ./helpers/create-certs.sh help +export ACTIVATION_KEY= +export ORG_ID= +export ROOT_PASSWORD=1234 +./example_run.sh +``` +The above file path is :- `/var/lib/libvirt/images/$QCOW2_NAME.qcow2` -The purpose of this script is to create a private key and public DER and PEM certs. -The only input command is to specify where to store the key and certs. +3. Convert the image to a container image: -Options (define them as variable): -SB_CERT_NAME: optional - name of the secureboot certificate added into the gallery. Default: My custom certificate ``` +cd helpers/build-container/ +cp /var/lib/libvirt/images/$QCOW2_NAME.qcow2 . +podman build -t coco-podvm --build-arg PODVM_IMAGE_SRC=$QCOW2_NAME.qcow2 . +``` + +Or use the build script: -4. Build the container (if `dnf install` fails, make sure podman has logged into your RHEL account) ``` -sudo podman build my-coco-podvm . +cd helpers/build-container/ +./build.sh ``` -5. Export the following mandatory variables +4. Push to your registry: + ``` -QCOW2=path/where/qcow2/is +podman tag localhost/coco-podvm:latest +podman push ``` -And if certificates are being used: + +As a result, the input image will contain coco-components and be dm-verity protected. + +After this we need to make changes to our main repo containing the ARO cluster details and the yaml files. Follow from STEP 5 of Debug_Image_Creation.md file in the other PeerPods repo. + + +5. Optionally, upload yourself the image on Azure image gallery using `azure/upload-azure.sh`. In order to use that script, define the following variables (usage message available also by running `azure/upload-azure.sh help`): + +``` +Usage: azure/upload-azure.sh [] +Usage: azure/upload-azure.sh help +Usage: azure/upload-azure.sh [] +Usage: azure/upload-azure.sh help + +The purpose of this script is to take a disk and: +1. convert the disk into vhd +2. if DER_CERTIFICATE is defined, create a deployment with a custom secureboot certificate +3. upload the vhd to Azure +4. create an Azure image gallery with that disk + +Upload options (define them as variable): +AZURE_RESOURCE_GROUP: mandatory - az resource group where to create the gallery +AZURE_REGION: optional - az region where to create the gallery. Default: eastus +IMAGE_GALLERY_NAME: optional - az gallery name. Default: my_gallery +IMAGE_DEFINITION_NAME: optional - az image definition name. Default: podvm-image +IMAGE_DEFINITION_PUBLISHER: optional - az image definition publisher. Default: MyPublisher +IMAGE_DEFINITION_OFFER: optional - az image definition offer. Default: My-PodVM +IMAGE_DEFINITION_SKU: optional - az image definition sku. Default: My-PodVM +IMAGE_VERSION: optional - az image version. Default: 1.0.0 +IMAGE_BLOB_NAME: optional - az image storage blob name. Default: dm-verity +AZURE_SB_TEMPLATE: optional - az deployment template to automatically fill. Default: ./azure/azure-sb-template.json +AZURE_DEPLOYMENT_NAME: optional - az deployment name. Default: my-deployment +UPLOAD_SCRIPT_LOCATION: optional - location of the upload-azure.sh script. Default: ./azure/upload-azure.sh ``` -IMAGE_CERTIFICATE_PEM=path/where/pem_cert/is -IMAGE_PRIVATE_KEY=path/where/private_key/is +The script will print as last line the full Azure Image ID. + +--- + +## Additional Configuration Options + +The `scripts/create-verity-podvm.sh` script (used by `example_run.sh`) supports these optional environment variables: + +The purpose of this script is to take a disk and: +1. convert the disk into vhd +2. if DER_CERTIFICATE is defined, create a deployment with a custom secureboot certificate +3. upload the vhd to Azure +4. create an Azure image gallery with that disk + +Upload options (define them as variable): +AZURE_RESOURCE_GROUP: mandatory - az resource group where to create the gallery +AZURE_REGION: optional - az region where to create the gallery. Default: eastus +IMAGE_GALLERY_NAME: optional - az gallery name. Default: my_gallery +IMAGE_DEFINITION_NAME: optional - az image definition name. Default: podvm-image +IMAGE_DEFINITION_PUBLISHER: optional - az image definition publisher. Default: MyPublisher +IMAGE_DEFINITION_OFFER: optional - az image definition offer. Default: My-PodVM +IMAGE_DEFINITION_SKU: optional - az image definition sku. Default: My-PodVM +IMAGE_VERSION: optional - az image version. Default: 1.0.0 +IMAGE_BLOB_NAME: optional - az image storage blob name. Default: dm-verity +AZURE_SB_TEMPLATE: optional - az deployment template to automatically fill. Default: ./azure/azure-sb-template.json +AZURE_DEPLOYMENT_NAME: optional - az deployment name. Default: my-deployment +UPLOAD_SCRIPT_LOCATION: optional - location of the upload-azure.sh script. Default: ./azure/upload-azure.sh ``` +The script will print as last line the full Azure Image ID. + +--- + +## Additional Configuration Options + +The `scripts/create-verity-podvm.sh` script (used by `example_run.sh`) supports these optional environment variables: -6. Optionally, define additional variables used by `scripts/create-verity-podvm.sh` running inside the container: (usage message available also with `create-verity-podvm.sh help`) ``` Usage: ./create-verity-podvm.sh Usage: ./create-verity-podvm.sh help @@ -74,46 +151,21 @@ ROOT_PASSWORD: optional - set root's password. Default: disabled ``` -7. Run the container. To add the optional exported variables, just add `-e YOUR_VAR=$YOUR_VAR`. -``` -sudo podman run --rm \ - --privileged \ - -v $QCOW2:/disk.qcow2 \ - -v $IMAGE_CERTIFICATE_PEM:/public.pem \ - -v $IMAGE_PRIVATE_KEY:/private.key \ - -v /lib/modules:/lib/modules \ - --user 0 \ - --security-opt=apparmor=unconfined \ - --security-opt=seccomp=unconfined \ - --mount type=bind,source=/dev,target=/dev \ - --mount type=bind,source=/run/udev,target=/run/udev \ - coco-podvm -``` -As a result, the input image will contain coco-components and be dm-verity protected. +## Optional: Generate certificates for secureboot + +If not available, generate private key, PEM and DER certificates using `helpers/create-certs.sh`. This is only needed if secureboot has to be enabled. +## Optional: Generate certificates for secureboot -8. Optionally, upload yourself the image on Azure image gallery using `azure/upload-azure.sh`. In order to use that script, define the following variables (usage message available also by running `azure/upload-azure.sh help`): +If not available, generate private key, PEM and DER certificates using `helpers/create-certs.sh`. This is only needed if secureboot has to be enabled. ``` -Usage: azure/upload-azure.sh [] -Usage: azure/upload-azure.sh help +Usage: ./helpers/create-certs.sh +Usage: ./helpers/create-certs.sh help +Usage: ./helpers/create-certs.sh +Usage: ./helpers/create-certs.sh help -The purpose of this script is to take a disk and: -1. convert the disk into vhd -2. if DER_CERTIFICATE is defined, create a deployment with a custom secureboot certificate -3. upload the vhd to Azure -4. create an Azure image gallery with that disk +The purpose of this script is to create a private key and public DER and PEM certs. +The only input command is to specify where to store the key and certs. + +Options (define them as variable): +SB_CERT_NAME: optional - name of the secureboot certificate added into the gallery. Default: My custom certificate -Upload options (define them as variable): -AZURE_RESOURCE_GROUP: mandatory - az resource group where to create the gallery -AZURE_REGION: optional - az region where to create the gallery. Default: eastus -IMAGE_GALLERY_NAME: optional - az gallery name. Default: my_gallery -IMAGE_DEFINITION_NAME: optional - az image definition name. Default: podvm-image -IMAGE_DEFINITION_PUBLISHER: optional - az image definition publisher. Default: MyPublisher -IMAGE_DEFINITION_OFFER: optional - az image definition offer. Default: My-PodVM -IMAGE_DEFINITION_SKU: optional - az image definition sku. Default: My-PodVM -IMAGE_VERSION: optional - az image version. Default: 1.0.0 -IMAGE_BLOB_NAME: optional - az image storage blob name. Default: dm-verity -AZURE_SB_TEMPLATE: optional - az deployment template to automatically fill. Default: ./azure/azure-sb-template.json -AZURE_DEPLOYMENT_NAME: optional - az deployment name. Default: my-deployment -UPLOAD_SCRIPT_LOCATION: optional - location of the upload-azure.sh script. Default: ./azure/upload-azure.sh -``` -The script will print as last line the full Azure Image ID. diff --git a/example_run.sh b/example_run.sh index 73f02403..41fafda4 100755 --- a/example_run.sh +++ b/example_run.sh @@ -21,7 +21,7 @@ fi [[ -n "${ACTIVATION_KEY}" && -n "${ORG_ID}" ]] && sudo -E podman secret create activation_key --env ACTIVATION_KEY && sudo -E podman secret create org_id --env ORG_ID && \ SM_SECRET_RUN_CMD="--secret activation_key,type=env,target=ACTIVATION_KEY --secret org_id,type=env,target=ORG_ID " -sudo -E podman run --rm \ +sudo -E podman run --rm --network=host \ --privileged \ -v $QCOW2:/disk.qcow2 \ $CERT_OPTIONS \ diff --git a/scripts/coco/podvm/DHCP-SETUP.md b/scripts/coco/podvm/DHCP-SETUP.md new file mode 100644 index 00000000..2963f136 --- /dev/null +++ b/scripts/coco/podvm/DHCP-SETUP.md @@ -0,0 +1,178 @@ +# DHCP Server Setup for Azure Bridge Network + +## Overview + +The Azure bridge scripts now use DHCP for dynamic IP allocation instead of static IPs. This enables scaling to tens of thousands of VMs without IP exhaustion. + +## Network Design + +- **Bridge Subnet**: `172.16.0.0/12` (1,048,574 available IPs) +- **Avoids conflicts with**: + - Flannel CNI: `10.244.0.0/16` + - OVN-Kubernetes: `10.128.0.0/14` + - Azure CNI: Uses Azure VNET subnet (typically `10.0.0.0/8` or `192.168.0.0/16`) + - AWS VPC CNI: Uses VPC CIDR + +## DHCP Server Requirements + +### Option 1: Single DHCP Server (Small-Medium Scale) + +**Recommended for**: Up to 100,000 VMs + +```bash +# Install dnsmasq on a dedicated VM in Azure VNET +sudo dnf install -y dnsmasq + +# Configure /etc/dnsmasq.conf +interface=eth0 +bind-interfaces +dhcp-range=172.16.0.1,172.31.255.254,255.240.0.0,10m +dhcp-option=3 # No default gateway (VMs use Azure VNET routing) +dhcp-leasefile=/var/lib/dnsmasq/dnsmasq.leases +log-dhcp +log-queries + +# Enable and start +sudo systemctl enable --now dnsmasq +``` + +### Option 2: Sharded DHCP Servers (Large Scale) + +**Recommended for**: 100,000+ VMs + +Deploy multiple DHCP servers, each handling a portion of the IP range: + +```bash +# Server 1: 172.16.0.0/16 +dhcp-range=172.16.0.1,172.16.255.254,255.255.0.0,10m + +# Server 2: 172.17.0.0/16 +dhcp-range=172.17.0.1,172.17.255.254,255.255.0.0,10m + +# Server 3: 172.18.0.0/16 +dhcp-range=172.18.0.1,172.18.255.254,255.255.0.0,10m + +# ... up to Server 16: 172.31.0.0/16 +``` + +Use Azure Load Balancer or DHCP relay agents to distribute requests. + +### Option 3: ISC DHCP Server (Enterprise) + +```bash +# Install ISC DHCP +sudo dnf install -y dhcp-server + +# Configure /etc/dhcp/dhcpd.conf +subnet 172.16.0.0 netmask 255.240.0.0 { + range 172.16.0.1 172.31.255.254; + default-lease-time 600; + max-lease-time 7200; + # No routers option - VMs use Azure VNET routing +} + +# Enable and start +sudo systemctl enable --now dhcpd +``` + +## High Availability Setup + +For production, deploy DHCP servers in HA configuration: + +```bash +# Primary DHCP server +dhcp-range=172.16.0.1,172.31.255.254,255.240.0.0,10m + +# Secondary DHCP server (failover) +dhcp-range=172.16.0.1,172.31.255.254,255.240.0.0,10m +``` + +Use Azure Availability Sets or Zones to ensure redundancy. + +## Network Configuration + +### Azure VNET Setup + +1. **Create a dedicated subnet** for DHCP server(s): + ``` + DHCP Subnet: 10.0.255.0/24 + ``` + +2. **Ensure connectivity** between DHCP subnet and PodVM subnets + +3. **Configure NSG rules**: + - Allow UDP 67 (DHCP server) + - Allow UDP 68 (DHCP client) + +### Firewall Rules on DHCP Server + +```bash +# Allow DHCP traffic +sudo firewall-cmd --permanent --add-service=dhcp +sudo firewall-cmd --reload +``` + +## Monitoring and Troubleshooting + +### Check DHCP Leases + +**dnsmasq**: +```bash +cat /var/lib/dnsmasq/dnsmasq.leases +``` + +**ISC DHCP**: +```bash +cat /var/lib/dhcpd/dhcpd.leases +``` + +### Monitor DHCP Requests + +```bash +# dnsmasq +sudo journalctl -u dnsmasq -f + +# ISC DHCP +sudo journalctl -u dhcpd -f +``` + +### Test DHCP from PodVM + +```bash +# On PodVM, test DHCP request +sudo dhclient -v eth1 +``` + +### Common Issues + +1. **DHCP timeout**: Check network connectivity and NSG rules +2. **IP exhaustion**: Reduce lease time or add more DHCP servers +3. **Lease conflicts**: Ensure DHCP servers don't have overlapping ranges + +## Scaling Considerations + +| VMs | DHCP Servers | Lease Time | IP Pool Size | +|-----|--------------|------------|--------------| +| 1K-10K | 1 | 10 min | 172.16.0.0/16 (65K IPs) | +| 10K-100K | 2-4 | 5 min | 172.16.0.0/12 (1M IPs) | +| 100K+ | 8-16 (sharded) | 5 min | 172.16.0.0/12 (1M IPs) | + +## Lease Reclamation + +With short lease times (5-10 minutes), IPs are automatically reclaimed when VMs are deleted. No manual intervention needed. + +## Security Considerations + +1. **DHCP snooping**: Enable on Azure NSG to prevent rogue DHCP servers +2. **IP reservation**: Reserve specific IPs for critical services +3. **Rate limiting**: Limit DHCP requests per source to prevent DoS + +## Migration from Static IPs + +The scripts automatically detect and use DHCP. No changes needed to existing deployments - new VMs will use DHCP, old VMs continue with static IPs until recreated. + +## References + +- dnsmasq documentation: http://www.thekelleys.org.uk/dnsmasq/doc.html +- ISC DHCP documentation: https://www.isc.org/dhcp/ +- Azure VNET documentation: https://docs.microsoft.com/azure/virtual-network/ \ No newline at end of file diff --git a/scripts/coco/podvm/luks-config.tar.gz b/scripts/coco/podvm/luks-config.tar.gz index 0772032c..af98292c 100644 Binary files a/scripts/coco/podvm/luks-config.tar.gz and b/scripts/coco/podvm/luks-config.tar.gz differ diff --git a/scripts/coco/podvm/luks-scratch/etc/systemd/system/azure-bridge-client.service b/scripts/coco/podvm/luks-scratch/etc/systemd/system/azure-bridge-client.service new file mode 100644 index 00000000..1e0e0764 --- /dev/null +++ b/scripts/coco/podvm/luks-scratch/etc/systemd/system/azure-bridge-client.service @@ -0,0 +1,16 @@ +[Unit] +Description=Azure Bridge Setup for Client Container with Hostname Resolution +After=kata-agent.service network-online.target +Wants=kata-agent.service network-online.target + +[Service] +Type=oneshot +RemainAfterExit=yes +ExecStart=/usr/local/sbin/setup-azure-bridge-client.sh +Restart=on-failure +RestartSec=5s +StandardOutput=journal +StandardError=journal + +[Install] +WantedBy=multi-user.target \ No newline at end of file diff --git a/scripts/coco/podvm/luks-scratch/etc/systemd/system/azure-bridge-server.service b/scripts/coco/podvm/luks-scratch/etc/systemd/system/azure-bridge-server.service new file mode 100644 index 00000000..1dd65713 --- /dev/null +++ b/scripts/coco/podvm/luks-scratch/etc/systemd/system/azure-bridge-server.service @@ -0,0 +1,17 @@ +[Unit] +Description=Azure Bridge Setup for Server Container with Hostname Resolution +After=kata-agent.service network-online.target +Wants=kata-agent.service network-online.target + +[Service] +Type=oneshot +RemainAfterExit=yes +EnvironmentFile=-/etc/default/azure-bridge +ExecStart=/usr/local/sbin/setup-azure-bridge-server.sh +Restart=on-failure +RestartSec=5s +StandardOutput=journal +StandardError=journal + +[Install] +WantedBy=multi-user.target diff --git a/scripts/coco/podvm/luks-scratch/etc/systemd/system/azure-bridge.service b/scripts/coco/podvm/luks-scratch/etc/systemd/system/azure-bridge.service new file mode 100644 index 00000000..8bc0eaee --- /dev/null +++ b/scripts/coco/podvm/luks-scratch/etc/systemd/system/azure-bridge.service @@ -0,0 +1,14 @@ +[Unit] +Description=Azure Bridge Setup Dispatcher +After=kata-agent.service process-user-data.service +Wants=kata-agent.service process-user-data.service + +[Service] +Type=oneshot +RemainAfterExit=yes +ExecStart=/usr/local/sbin/setup-azure-bridge.sh +Restart=on-failure +RestartSec=5s + +[Install] +WantedBy=multi-user.target \ No newline at end of file diff --git a/scripts/coco/podvm/luks-scratch/etc/systemd/system/azure-hostname-resolution-client.service b/scripts/coco/podvm/luks-scratch/etc/systemd/system/azure-hostname-resolution-client.service new file mode 100644 index 00000000..77ac635d --- /dev/null +++ b/scripts/coco/podvm/luks-scratch/etc/systemd/system/azure-hostname-resolution-client.service @@ -0,0 +1,17 @@ +[Unit] +Description=Azure Client Container Hostname Resolution +After=azure-bridge-client.service kata-agent.service +Requires=azure-bridge-client.service +ConditionPathExists=/run/peerpod/aa.toml + +[Service] +Type=oneshot +RemainAfterExit=yes +ExecStart=/usr/local/sbin/setup-hostname-resolution-client.sh +Restart=on-failure +RestartSec=10s +StandardOutput=journal +StandardError=journal + +[Install] +WantedBy=multi-user.target diff --git a/scripts/coco/podvm/luks-scratch/etc/systemd/system/azure-hostname-resolution-server.service b/scripts/coco/podvm/luks-scratch/etc/systemd/system/azure-hostname-resolution-server.service new file mode 100644 index 00000000..35231f70 --- /dev/null +++ b/scripts/coco/podvm/luks-scratch/etc/systemd/system/azure-hostname-resolution-server.service @@ -0,0 +1,18 @@ +[Unit] +Description=Azure Container Hostname Resolution +After=azure-bridge-server.service kata-agent.service +Requires=azure-bridge-server.service +# Only run after bridge setup is complete and kata-agent is stable +ConditionPathExists=/run/peerpod/aa.toml + +[Service] +Type=oneshot +RemainAfterExit=yes +ExecStart=/usr/local/sbin/setup-hostname-resolution-server.sh +Restart=on-failure +RestartSec=10s +StandardOutput=journal +StandardError=journal + +[Install] +WantedBy=multi-user.target diff --git a/scripts/coco/podvm/luks-scratch/usr/local/sbin/setup-azure-bridge-client.sh b/scripts/coco/podvm/luks-scratch/usr/local/sbin/setup-azure-bridge-client.sh new file mode 100644 index 00000000..486b607e --- /dev/null +++ b/scripts/coco/podvm/luks-scratch/usr/local/sbin/setup-azure-bridge-client.sh @@ -0,0 +1,174 @@ +#!/bin/bash +set -e + +# Static bridge topology for direct VM-to-container communication +# Client host bridge IP: 192.168.0.51 +# Client container bridge IP: 192.168.0.101 +CLIENT_HOST_BRIDGE_IP="192.168.0.51" +CLIENT_CONTAINER_BRIDGE_IP="192.168.0.101" +BRIDGE_SUBNET="192.168.0.0/24" + +AA_CONFIG_FILE="/run/peerpod/aa.toml" + +# Get VM's dynamic IP from eth0 +VM_IP=$(ip -4 addr show eth0 | grep -oP '(?<=inet\s)\d+(\.\d+){3}') +echo "Detected client VM IP: $VM_IP" + +# Read hostname from aa.toml if present +# Client only needs hostname - DNS resolution will happen through the network +if [ -f "$AA_CONFIG_FILE" ]; then + HOSTNAME=$(sed -n 's/^[[:space:]]*hostname[[:space:]]*=[[:space:]]*["'\'']\{0,1\}\([^"'\'']*\)["'\'']\{0,1\}[[:space:]]*$/\1/p' "$AA_CONFIG_FILE" | head -n1 | tr -d '[:space:]') + + if [ -n "$HOSTNAME" ]; then + echo "Hostname found in aa.toml: $HOSTNAME" + echo "Client will use hostname '$HOSTNAME' to reach server (DNS resolution via network)" + else + echo "No hostname found in aa.toml - client will use direct IP addressing" + fi +else + echo "aa.toml not found at $AA_CONFIG_FILE" +fi + +# Wait for kata-agent/container namespace to exist (max 60s) +echo "Waiting for container namespace to start..." +for i in {1..60}; do + if ip netns list | grep -q podns; then + echo "podns namespace found" + break + fi + sleep 1 +done + +if ! ip netns list | grep -q podns; then + echo "ERROR: podns namespace not found" + exit 1 +fi + +# Check if already configured +if ip netns exec podns ip link show eth1 &>/dev/null; then + echo "Client bridge already configured, skipping setup" + exit 0 +fi + +# Create veth pair in podns namespace +echo "Creating client veth pair..." +ip netns exec podns ip link add eth1 type veth peer name veth-azure || true +ip netns exec podns ip link set eth1 up +ip netns exec podns ip link set veth-azure netns 1 + +# Create bridge in host namespace +echo "Creating client bridge..." +ip link add br-azure type bridge 2>/dev/null || true +ip link set veth-azure master br-azure 2>/dev/null || true +ip link set veth-azure up +ip link set br-azure up + +# Configure static bridge IPs +echo "Configuring static client bridge addresses..." +ip addr flush dev br-azure 2>/dev/null || true +ip addr add ${CLIENT_HOST_BRIDGE_IP}/24 dev br-azure 2>/dev/null || true + +ip netns exec podns ip addr flush dev eth1 2>/dev/null || true +ip netns exec podns ip addr add ${CLIENT_CONTAINER_BRIDGE_IP}/24 dev eth1 +echo "Client container bridge IP: $CLIENT_CONTAINER_BRIDGE_IP" +echo "Client host bridge IP: $CLIENT_HOST_BRIDGE_IP" + +# Configure routing exactly per documented setup +echo "Configuring client routes..." +ip netns exec podns ip route add ${BRIDGE_SUBNET} dev eth1 2>/dev/null || true +ip route add ${CLIENT_CONTAINER_BRIDGE_IP}/32 dev br-azure 2>/dev/null || true +ip route del ${BRIDGE_SUBNET} dev br-azure 2>/dev/null || true + +# Enable proxy ARP and forwarding +echo "Enabling proxy ARP and forwarding..." +sysctl -w net.ipv4.conf.all.proxy_arp=1 +sysctl -w net.ipv4.conf.br-azure.proxy_arp=1 +sysctl -w net.ipv4.ip_forward=1 + +# Configure iptables for forwarding and outbound masquerade +echo "Configuring client iptables..." +if ! iptables -C FORWARD -i br-azure -o eth0 -j ACCEPT 2>/dev/null; then + iptables -I FORWARD -i br-azure -o eth0 -j ACCEPT +fi + +if ! iptables -C FORWARD -i eth0 -o br-azure -j ACCEPT 2>/dev/null; then + iptables -I FORWARD -i eth0 -o br-azure -j ACCEPT +fi + +if ! iptables -t nat -C POSTROUTING -s ${CLIENT_CONTAINER_BRIDGE_IP}/32 -o eth0 -j MASQUERADE 2>/dev/null; then + iptables -t nat -A POSTROUTING -s ${CLIENT_CONTAINER_BRIDGE_IP}/32 -o eth0 -j MASQUERADE +fi + +echo "Azure client bridge setup completed successfully" +echo "Client VM IP: $VM_IP" +echo "Client host bridge IP: $CLIENT_HOST_BRIDGE_IP" +echo "Client container bridge IP: $CLIENT_CONTAINER_BRIDGE_IP" +echo "Bridge subnet: $BRIDGE_SUBNET" + +# Resolve hostname in container namespace (after bridge setup) +# if [ -n "$HOSTNAME" ]; then +# echo "Resolving hostname in container namespace..." + +# # Log ps aux for debugging +# echo "=== Current processes (ps aux) ===" +# ps aux +# echo "==================================" + +# # Get the hostname's IP using getent (works at VM level) +# HOSTNAME_IP=$(getent hosts "${HOSTNAME}" | awk '{print $1}' | head -n1) + +# if [ -n "$HOSTNAME_IP" ]; then +# echo "Resolved ${HOSTNAME} to ${HOSTNAME_IP}" + +# # Find container process in podns namespace with retry logic +# echo "Looking for container process in podns namespace..." + +# # Log processes in podns namespace +# echo "=== Processes in podns namespace ===" +# ip netns exec podns ps aux || echo "Failed to list podns processes" +# echo "====================================" + +# CONTAINER_PID="" +# MAX_WAIT=90 # Wait up to 90 seconds + +# for attempt in $(seq 1 $MAX_WAIT); do +# # Try Java first +# CONTAINER_PID=$(ip netns exec podns ps aux 2>/dev/null | grep -i 'java' | grep -v grep | head -n1 | awk '{print $2}') + +# # Try sleep infinity +# if [ -z "$CONTAINER_PID" ]; then +# CONTAINER_PID=$(ip netns exec podns ps aux 2>/dev/null | grep 'sleep infinity' | grep -v grep | head -n1 | awk '{print $2}') +# fi + +# # Try pause +# if [ -z "$CONTAINER_PID" ]; then +# CONTAINER_PID=$(ip netns exec podns ps aux 2>/dev/null | grep '/pause' | grep -v grep | head -n1 | awk '{print $2}') +# fi + +# if [ -n "$CONTAINER_PID" ]; then +# echo "✓ Found container process PID: ${CONTAINER_PID} (attempt $attempt)" +# break +# fi + +# if [ $attempt -eq $MAX_WAIT ]; then +# echo "Warning: Container process not found after ${MAX_WAIT}s, skipping /etc/hosts update" +# exit 0 # Exit successfully - bridge is configured +# fi + +# sleep 1 +# done + +# # Now update /etc/hosts if we found the process +# if [ -n "$CONTAINER_PID" ]; then +# if ! nsenter -t ${CONTAINER_PID} -a sh -c "grep -q '${HOSTNAME}' /etc/hosts" 2>/dev/null; then +# nsenter -t ${CONTAINER_PID} -a sh -c "echo '${HOSTNAME_IP} ${HOSTNAME}' >> /etc/hosts" +# echo "✓ Added ${HOSTNAME} -> ${HOSTNAME_IP} to container's /etc/hosts" +# else +# echo "Hostname already exists in container's /etc/hosts" +# fi +# fi +# else +# echo "Warning: Could not resolve hostname ${HOSTNAME}" +# fi +# fi + diff --git a/scripts/coco/podvm/luks-scratch/usr/local/sbin/setup-azure-bridge-server.sh b/scripts/coco/podvm/luks-scratch/usr/local/sbin/setup-azure-bridge-server.sh new file mode 100644 index 00000000..7abfc936 --- /dev/null +++ b/scripts/coco/podvm/luks-scratch/usr/local/sbin/setup-azure-bridge-server.sh @@ -0,0 +1,294 @@ +#!/bin/bash +set -e + +# Static bridge topology for direct VM-to-container communication +# Server host bridge IP: 192.168.0.50 +# Server container bridge IP: 192.168.0.100 +# Remote client VM/container IPs are learned from aa.toml when present. +SERVER_HOST_BRIDGE_IP="192.168.0.50" +SERVER_CONTAINER_BRIDGE_IP="192.168.0.100" +BRIDGE_SUBNET="192.168.0.0/24" + + +AA_CONFIG_FILE="/run/peerpod/aa.toml" + +# Get VM's dynamic IP from eth0 +VM_IP=$(ip -4 addr show eth0 | grep -oP '(?<=inet\s)\d+(\.\d+){3}') +echo "Detected VM IP: $VM_IP" + +# Read hostname, ports, and Azure credentials from aa.toml if present +HOSTNAME="" +PORTS=() + + +if [ -f "$AA_CONFIG_FILE" ]; then + # Extract hostname + HOSTNAME=$(sed -n 's/^[[:space:]]*hostname[[:space:]]*=[[:space:]]*["'\'']\{0,1\}\([^"'\'']*\)["'\'']\{0,1\}[[:space:]]*$/\1/p' "$AA_CONFIG_FILE" | head -n1 | tr -d '[:space:]') + + # Extract ports (supports both single port and array format) + # Format: ports = [8080, 8081, 8082] or ports = 8080 + PORTS_LINE=$(sed -n 's/^[[:space:]]*ports[[:space:]]*=[[:space:]]*\(.*\)$/\1/p' "$AA_CONFIG_FILE" | head -n1) + if [ -n "$PORTS_LINE" ]; then + # Remove brackets, quotes, and split by comma + PORTS_CLEAN=$(echo "$PORTS_LINE" | tr -d '[]"'\''' | tr ',' ' ') + read -ra PORTS <<< "$PORTS_CLEAN" + echo "Ports found in aa.toml: ${PORTS[*]}" + fi + + + + if [ -n "$HOSTNAME" ]; then + echo "Hostname found in aa.toml: $HOSTNAME" + + # Set the system hostname permanently first + echo "Setting system hostname..." + if hostnamectl set-hostname "$HOSTNAME"; then + echo "✓ System hostname set to: $HOSTNAME" + + # Verify hostname configuration + echo "Verifying hostname configuration:" + echo " hostname: $(hostname)" + echo " hostname -f: $(hostname -f 2>/dev/null || echo 'N/A')" + else + echo "Warning: Failed to set system hostname" + fi + + # Configure hostname via DHCP using nmcli + echo "Configuring hostname via DHCP..." + CONNECTION_NAME="Wired connection 1" + + # Check if connection exists + if nmcli connection show "$CONNECTION_NAME" &>/dev/null; then + echo "Found connection: $CONNECTION_NAME" + + # Configure DHCP to send hostname + echo "Modifying connection to send hostname via DHCP..." + if nmcli connection modify "$CONNECTION_NAME" \ + ipv4.dhcp-send-hostname yes \ + ipv4.dhcp-hostname "$HOSTNAME"; then + echo "✓ Configured DHCP to send hostname: $HOSTNAME" + + # Apply changes by restarting the connection + echo "Applying network configuration changes..." + if nmcli connection down "$CONNECTION_NAME" && nmcli connection up "$CONNECTION_NAME"; then + echo "✓ Network connection restarted" + + # Wait a moment for DHCP to complete + sleep 2 + + # Verify the configuration + echo "Verifying DHCP hostname configuration:" + nmcli connection show "$CONNECTION_NAME" | grep "dhcp-hostname" || true + else + echo "Warning: Failed to restart network connection" + fi + else + echo "ERROR: Failed to configure DHCP hostname via nmcli" + fi + else + echo "Warning: Connection '$CONNECTION_NAME' not found" + echo "Available connections:" + nmcli connection show + fi + + # Add hostname to /etc/hosts mapping to VM IP + if ! grep -q "^${VM_IP}[[:space:]].*${HOSTNAME}" /etc/hosts; then + echo "${VM_IP} ${HOSTNAME}" >> /etc/hosts + echo "✓ Added hostname mapping to /etc/hosts: ${VM_IP} ${HOSTNAME}" + else + echo "Hostname mapping already exists in /etc/hosts" + fi + + echo "Hostname configuration completed (skipping Azure DNS zone registration)" + else + echo "No hostname found in aa.toml" + fi +else + echo "aa.toml not found at $AA_CONFIG_FILE" +fi + +# If no ports specified, default to 8080 for backward compatibility +if [ ${#PORTS[@]} -eq 0 ]; then + echo "No ports found in aa.toml, using default port 8080" + PORTS=(8080) +fi + +# Wait for kata-agent and container to start (max 60s) +echo "Waiting for container to start..." +for i in {1..60}; do + if ip netns list | grep -q podns; then + echo "podns namespace found" + break + fi + sleep 1 +done + +# Wait for container to get IP (max 30s) +for i in {1..30}; do + CONTAINER_IP=$(ip netns exec podns ip -4 addr show eth0 2>/dev/null | grep -oP '(?<=inet\s)\d+(\.\d+){3}' || echo "") + if [ -n "$CONTAINER_IP" ]; then + echo "Container IP detected: $CONTAINER_IP" + break + fi + sleep 1 +done + +if [ -z "$CONTAINER_IP" ]; then + echo "ERROR: Could not detect container IP" + exit 1 +fi + +# Check if already configured +if ip netns exec podns ip link show eth1 &>/dev/null; then + echo "Bridge already configured, skipping setup" + exit 0 +fi + +# Create veth pair in podns namespace +echo "Creating veth pair..." +ip netns exec podns ip link add eth1 type veth peer name veth-azure || true +ip netns exec podns ip link set eth1 up +ip netns exec podns ip link set veth-azure netns 1 + +# Create bridge in host namespace +echo "Creating bridge..." +ip link add br-azure type bridge 2>/dev/null || true +ip link set veth-azure master br-azure 2>/dev/null || true +ip link set veth-azure up +ip link set br-azure up + +# Configure static bridge IPs +echo "Configuring static server bridge addresses..." +ip addr flush dev br-azure 2>/dev/null || true +ip addr add ${SERVER_HOST_BRIDGE_IP}/24 dev br-azure 2>/dev/null || true + +ip netns exec podns ip addr flush dev eth1 2>/dev/null || true +ip netns exec podns ip addr add ${SERVER_CONTAINER_BRIDGE_IP}/24 dev eth1 +echo "Server container bridge IP: $SERVER_CONTAINER_BRIDGE_IP" +echo "Server host bridge IP: $SERVER_HOST_BRIDGE_IP" + +# Configure routing +echo "Configuring routes..." +ip netns exec podns ip route add ${BRIDGE_SUBNET} dev eth1 2>/dev/null || true +ip route add ${SERVER_CONTAINER_BRIDGE_IP}/32 dev br-azure 2>/dev/null || true +ip route del ${BRIDGE_SUBNET} dev br-azure 2>/dev/null || true + +# Enable proxy ARP and forwarding +echo "Enabling proxy ARP and forwarding..." +sysctl -w net.ipv4.conf.all.proxy_arp=1 +sysctl -w net.ipv4.conf.br-azure.proxy_arp=1 +sysctl -w net.ipv4.ip_forward=1 + +# Configure iptables DNAT rules for all ports +echo "Configuring iptables for ports: ${PORTS[*]}" +for PORT in "${PORTS[@]}"; do + # Trim whitespace + PORT=$(echo "$PORT" | xargs) + + echo "Setting up iptables rules for port $PORT..." + + # VM IP -> Server container bridge IP + if ! iptables -t nat -C PREROUTING -d ${VM_IP} -p tcp --dport ${PORT} -j DNAT --to-destination ${SERVER_CONTAINER_BRIDGE_IP}:${PORT} 2>/dev/null; then + iptables -t nat -A PREROUTING -d ${VM_IP} -p tcp --dport ${PORT} -j DNAT --to-destination ${SERVER_CONTAINER_BRIDGE_IP}:${PORT} + fi + + if ! iptables -t nat -C OUTPUT -d ${VM_IP} -p tcp --dport ${PORT} -j DNAT --to-destination ${SERVER_CONTAINER_BRIDGE_IP}:${PORT} 2>/dev/null; then + iptables -t nat -A OUTPUT -d ${VM_IP} -p tcp --dport ${PORT} -j DNAT --to-destination ${SERVER_CONTAINER_BRIDGE_IP}:${PORT} + fi + + # Server container bridge IP -> Container IP + if ! iptables -t nat -C PREROUTING -d ${SERVER_CONTAINER_BRIDGE_IP} -p tcp --dport ${PORT} -j DNAT --to-destination ${CONTAINER_IP}:${PORT} 2>/dev/null; then + iptables -t nat -A PREROUTING -d ${SERVER_CONTAINER_BRIDGE_IP} -p tcp --dport ${PORT} -j DNAT --to-destination ${CONTAINER_IP}:${PORT} + fi + + if ! iptables -t nat -C OUTPUT -d ${SERVER_CONTAINER_BRIDGE_IP} -p tcp --dport ${PORT} -j DNAT --to-destination ${CONTAINER_IP}:${PORT} 2>/dev/null; then + iptables -t nat -A OUTPUT -d ${SERVER_CONTAINER_BRIDGE_IP} -p tcp --dport ${PORT} -j DNAT --to-destination ${CONTAINER_IP}:${PORT} + fi + + # MASQUERADE for container traffic + if ! iptables -t nat -C POSTROUTING -d ${CONTAINER_IP} -p tcp --dport ${PORT} -j MASQUERADE 2>/dev/null; then + iptables -t nat -A POSTROUTING -d ${CONTAINER_IP} -p tcp --dport ${PORT} -j MASQUERADE + fi +done + +echo "Azure bridge setup completed successfully" +echo "VM IP: $VM_IP" +echo "Server container bridge IP: $SERVER_CONTAINER_BRIDGE_IP" +echo "Server host bridge IP: $SERVER_HOST_BRIDGE_IP" +echo "Container IP: $CONTAINER_IP" +echo "Bridge subnet: $BRIDGE_SUBNET" +echo "Exposed ports: ${PORTS[*]}" + +# Resolve hostname in container namespace (after bridge setup) +# if [ -n "$HOSTNAME" ]; then +# echo "Resolving hostname in container namespace..." + +# # Log ps aux for debugging +# echo "=== Current processes (ps aux) ===" +# ps aux +# echo "==================================" + +# # Retry logic for hostname resolution (server resolves first, then client) +# HOSTNAME_IP="" +# MAX_RETRIES=10 +# RETRY_DELAY=2 + +# for attempt in $(seq 1 $MAX_RETRIES); do +# echo "Attempt $attempt/$MAX_RETRIES: Resolving hostname ${HOSTNAME}..." +# HOSTNAME_IP=$(curl -v "http://${HOSTNAME}" 2>&1 | grep -oP 'Trying \K[\d.]+' | head -n1) + +# if [ -n "$HOSTNAME_IP" ]; then +# echo "✓ Resolved ${HOSTNAME} to ${HOSTNAME_IP} on attempt $attempt" +# break +# else +# echo "Failed to resolve ${HOSTNAME}, retrying in ${RETRY_DELAY}s..." +# sleep $RETRY_DELAY +# fi +# done + +# if [ -n "$HOSTNAME_IP" ]; then +# echo "Successfully resolved ${HOSTNAME} to ${HOSTNAME_IP}" + +# # Find Java process in podns namespace +# echo "Looking for Java process in podns namespace..." + +# # Log processes in podns namespace +# echo "=== Processes in podns namespace ===" +# ip netns exec podns ps aux || echo "Failed to list podns processes" +# echo "====================================" + +# # Try to find Java process in podns namespace +# JAVA_PID=$(ip netns exec podns ps aux | grep -i 'java' | grep -v grep | head -n1 | awk '{print $2}') + +# # If Java not found, try sleep infinity +# if [ -z "$JAVA_PID" ]; then +# echo "Java process not found, trying 'sleep infinity'..." +# JAVA_PID=$(ip netns exec podns ps aux | grep 'sleep infinity' | grep -v grep | head -n1 | awk '{print $2}') +# fi + +# # If sleep not found, try pause +# if [ -z "$JAVA_PID" ]; then +# echo "Sleep process not found, trying 'pause'..." +# JAVA_PID=$(ip netns exec podns ps aux | grep '/pause' | grep -v grep | head -n1 | awk '{print $2}') +# fi + +# if [ -n "$JAVA_PID" ]; then +# echo "Found container process PID: ${JAVA_PID}" + +# # Check if hostname already exists in container's /etc/hosts +# if ! nsenter -t ${JAVA_PID} -a sh -c "grep -q '${HOSTNAME}' /etc/hosts" 2>/dev/null; then +# # Add hostname to container's /etc/hosts +# nsenter -t ${JAVA_PID} -a sh -c "echo '${HOSTNAME_IP} ${HOSTNAME}' >> /etc/hosts" +# echo "✓ Added ${HOSTNAME} -> ${HOSTNAME_IP} to container's /etc/hosts" +# else +# echo "Hostname already exists in container's /etc/hosts" +# fi +# else +# echo "Warning: Container process not found in podns namespace, skipping container /etc/hosts update" +# fi +# else +# echo "Warning: Could not resolve hostname ${HOSTNAME} after $MAX_RETRIES attempts" +# fi +# fi + + diff --git a/scripts/coco/podvm/luks-scratch/usr/local/sbin/setup-azure-bridge.sh b/scripts/coco/podvm/luks-scratch/usr/local/sbin/setup-azure-bridge.sh new file mode 100644 index 00000000..343758d0 --- /dev/null +++ b/scripts/coco/podvm/luks-scratch/usr/local/sbin/setup-azure-bridge.sh @@ -0,0 +1,38 @@ +#!/bin/bash +set -e + +AA_CONFIG_FILE="/run/peerpod/aa.toml" + +echo "Azure bridge role dispatcher starting" + +if [ ! -f "$AA_CONFIG_FILE" ]; then + echo "No aa.toml found at $AA_CONFIG_FILE, skipping azure bridge setup" + exit 0 +fi + +ROLE=$(sed -n 's/^[[:space:]]*bridge_role[[:space:]]*=[[:space:]]*["'\'']\{0,1\}\([^"'\'']*\)["'\'']\{0,1\}[[:space:]]*$/\1/p' "$AA_CONFIG_FILE" | head -n1 | tr -d '[:space:]') + +if [ -z "$ROLE" ]; then + echo "No bridge_role key found in $AA_CONFIG_FILE, skipping azure bridge setup" + exit 0 +fi + +echo "Detected network role from $AA_CONFIG_FILE bridge_role key: $ROLE" + +case "$ROLE" in + server) + /usr/local/sbin/setup-azure-bridge-server.sh + echo "Bridge setup complete, starting hostname resolution for server..." + /usr/local/sbin/setup-hostname-resolution-server.sh + ;; + client) + /usr/local/sbin/setup-azure-bridge-client.sh + echo "Bridge setup complete, starting hostname resolution for client..." + /usr/local/sbin/setup-hostname-resolution-client.sh + ;; + *) + echo "ERROR: Invalid network role '$ROLE'. Expected 'server' or 'client'." + exit 1 + ;; +esac +# Made with Bob diff --git a/scripts/coco/podvm/luks-scratch/usr/local/sbin/setup-hostname-resolution-client.sh b/scripts/coco/podvm/luks-scratch/usr/local/sbin/setup-hostname-resolution-client.sh new file mode 100644 index 00000000..c23ac5e8 --- /dev/null +++ b/scripts/coco/podvm/luks-scratch/usr/local/sbin/setup-hostname-resolution-client.sh @@ -0,0 +1,109 @@ +#!/bin/bash +set -e + +AA_CONFIG_FILE="/run/peerpod/aa.toml" + +# Read hostname from aa.toml +HOSTNAME="" +if [ -f "$AA_CONFIG_FILE" ]; then + HOSTNAME=$(sed -n 's/^[[:space:]]*hostname[[:space:]]*=[[:space:]]*["'\'']\{0,1\}\([^"'\'']*\)["'\'']\{0,1\}[[:space:]]*$/\1/p' "$AA_CONFIG_FILE" | head -n1 | tr -d '[:space:]') +fi + +if [ -z "$HOSTNAME" ]; then + echo "No hostname found in aa.toml, skipping hostname resolution" + exit 0 +fi + +echo "Resolving hostname in container namespace: ${HOSTNAME}" + +# Wait for container namespace to be fully ready +echo "Waiting for container to be fully operational..." +for i in {1..30}; do + if ip netns exec podns ip link show eth0 &>/dev/null && \ + ip netns exec podns ip link show eth1 &>/dev/null; then + echo "Container network interfaces ready" + break + fi + sleep 1 +done + +# Wait for kata-agent to spawn container processes (indicates kata-agent is ready) +echo "Waiting for kata-agent to spawn container processes..." +KATA_AGENT_PID="" +CONTAINER_PID="" +MAX_WAIT=120 # Wait up to 2 minutes + +for attempt in $(seq 1 $MAX_WAIT); do + # Find kata-agent process + KATA_AGENT_PID=$(ps aux | grep -E 'kata-agent|agent-ctl' | grep -v grep | head -n1 | awk '{print $2}') + + if [ -n "$KATA_AGENT_PID" ]; then + echo "Found kata-agent PID: ${KATA_AGENT_PID} (attempt $attempt)" + + # Check if kata-agent has spawned container processes in podns namespace + # Try Java first + CONTAINER_PID=$(ip netns exec podns ps aux 2>/dev/null | grep -i 'java' | grep -v grep | head -n1 | awk '{print $2}') + + # Try sleep infinity + if [ -z "$CONTAINER_PID" ]; then + CONTAINER_PID=$(ip netns exec podns ps aux 2>/dev/null | grep 'sleep infinity' | grep -v grep | head -n1 | awk '{print $2}') + fi + + # Try pause + if [ -z "$CONTAINER_PID" ]; then + CONTAINER_PID=$(ip netns exec podns ps aux 2>/dev/null | grep '/pause' | grep -v grep | head -n1 | awk '{print $2}') + fi + + if [ -n "$CONTAINER_PID" ]; then + echo "✓ Container process found in podns namespace: PID ${CONTAINER_PID}" + echo "✓ Kata-agent has completed container initialization" + break + fi + fi + + if [ $attempt -eq $MAX_WAIT ]; then + echo "Warning: Container process not spawned by kata-agent after ${MAX_WAIT}s" + exit 1 + fi + + sleep 1 +done + +# Resolve hostname using curl (same as server script for consistency) +echo "Resolving hostname using curl..." +HOSTNAME_IP="" +MAX_RETRIES=20 +RETRY_DELAY=2 + +for attempt in $(seq 1 $MAX_RETRIES); do + echo "Attempt $attempt/$MAX_RETRIES: Resolving hostname ${HOSTNAME}..." + HOSTNAME_IP=$(curl -v "http://${HOSTNAME}" 2>&1 | grep -oP 'IPv4: \K[\d.]+' | head -n1) + + if [ -n "$HOSTNAME_IP" ]; then + echo "✓ Resolved ${HOSTNAME} to ${HOSTNAME_IP} on attempt $attempt" + break + else + echo "Failed to resolve ${HOSTNAME}, retrying in ${RETRY_DELAY}s..." + sleep $RETRY_DELAY + fi +done + +if [ -z "$HOSTNAME_IP" ]; then + echo "Warning: Could not resolve hostname ${HOSTNAME} after $MAX_RETRIES attempts" + exit 1 +fi + +echo "Successfully resolved ${HOSTNAME} to ${HOSTNAME_IP}" +echo "Using container process PID: ${CONTAINER_PID}" + +# Update /etc/hosts in container +if [ -n "$CONTAINER_PID" ]; then + if ! nsenter -t ${CONTAINER_PID} -a sh -c "grep -q '${HOSTNAME}' /etc/hosts" 2>/dev/null; then + nsenter -t ${CONTAINER_PID} -a sh -c "echo '${HOSTNAME_IP} ${HOSTNAME}' >> /etc/hosts" + echo "✓ Added ${HOSTNAME} -> ${HOSTNAME_IP} to container's /etc/hosts" + else + echo "Hostname already exists in container's /etc/hosts" + fi +fi + +echo "Client hostname resolution completed successfully" diff --git a/scripts/coco/podvm/luks-scratch/usr/local/sbin/setup-hostname-resolution-server.sh b/scripts/coco/podvm/luks-scratch/usr/local/sbin/setup-hostname-resolution-server.sh new file mode 100644 index 00000000..51a9aefe --- /dev/null +++ b/scripts/coco/podvm/luks-scratch/usr/local/sbin/setup-hostname-resolution-server.sh @@ -0,0 +1,93 @@ +#!/bin/bash +set -e + +AA_CONFIG_FILE="/run/peerpod/aa.toml" + +# Read hostname from aa.toml +HOSTNAME="" +if [ -f "$AA_CONFIG_FILE" ]; then + HOSTNAME=$(sed -n 's/^[[:space:]]*hostname[[:space:]]*=[[:space:]]*["'\'']\{0,1\}\([^"'\'']*\)["'\'']\{0,1\}[[:space:]]*$/\1/p' "$AA_CONFIG_FILE" | head -n1 | tr -d '[:space:]') +fi + +if [ -z "$HOSTNAME" ]; then + echo "No hostname found in aa.toml, skipping hostname resolution" + exit 0 +fi + +echo "Resolving hostname in container namespace: ${HOSTNAME}" + +# Wait for container namespace to be fully ready +echo "Waiting for container to be fully operational..." +for i in {1..30}; do + if ip netns exec podns ip link show eth0 &>/dev/null && \ + ip netns exec podns ip link show eth1 &>/dev/null; then + echo "Container network interfaces ready" + break + fi + sleep 1 +done + +# Wait for kata-agent to spawn container processes (indicates kata-agent is ready) +echo "Waiting for kata-agent to spawn container processes..." +KATA_AGENT_PID="" +CONTAINER_PID="" +MAX_WAIT=120 # Wait up to 2 minutes + +for attempt in $(seq 1 $MAX_WAIT); do + # Find kata-agent process + KATA_AGENT_PID=$(ps aux | grep -E 'kata-agent|agent-ctl' | grep -v grep | head -n1 | awk '{print $2}') + + if [ -n "$KATA_AGENT_PID" ]; then + echo "Found kata-agent PID: ${KATA_AGENT_PID} (attempt $attempt)" + + # Check if kata-agent has spawned container processes in podns namespace + # Try Java first + CONTAINER_PID=$(ip netns exec podns ps aux 2>/dev/null | grep -i 'java' | grep -v grep | head -n1 | awk '{print $2}') + + # Try sleep infinity + if [ -z "$CONTAINER_PID" ]; then + CONTAINER_PID=$(ip netns exec podns ps aux 2>/dev/null | grep 'sleep infinity' | grep -v grep | head -n1 | awk '{print $2}') + fi + + # Try pause + if [ -z "$CONTAINER_PID" ]; then + CONTAINER_PID=$(ip netns exec podns ps aux 2>/dev/null | grep '/pause' | grep -v grep | head -n1 | awk '{print $2}') + fi + + if [ -n "$CONTAINER_PID" ]; then + echo "✓ Container process found in podns namespace: PID ${CONTAINER_PID}" + echo "✓ Kata-agent has completed container initialization" + break + fi + fi + + if [ $attempt -eq $MAX_WAIT ]; then + echo "Warning: Container process not spawned by kata-agent after ${MAX_WAIT}s" + exit 1 + fi + + sleep 1 +done + +# Get hostname IP from /etc/hosts (already set by bridge setup script) +echo "Reading hostname IP from /etc/hosts..." +HOSTNAME_IP=$(grep -E "^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+[[:space:]]+${HOSTNAME}([[:space:]]|$)" /etc/hosts | awk '{print $1}' | head -n1) + +if [ -z "$HOSTNAME_IP" ]; then + echo "Warning: Could not find hostname ${HOSTNAME} in /etc/hosts" + exit 1 +fi + +echo "Found ${HOSTNAME} -> ${HOSTNAME_IP} in /etc/hosts" +echo "Using container process PID: ${CONTAINER_PID}" + +# Check if hostname already exists in container's /etc/hosts +if ! nsenter -t ${CONTAINER_PID} -a sh -c "grep -q '${HOSTNAME}' /etc/hosts" 2>/dev/null; then + # Add hostname to container's /etc/hosts + nsenter -t ${CONTAINER_PID} -a sh -c "echo '${HOSTNAME_IP} ${HOSTNAME}' >> /etc/hosts" + echo "✓ Added ${HOSTNAME} -> ${HOSTNAME_IP} to container's /etc/hosts" +else + echo "Hostname already exists in container's /etc/hosts" +fi + +echo "Hostname resolution completed successfully" diff --git a/scripts/coco/podvm/podvm-binaries.tar.gz b/scripts/coco/podvm/podvm-binaries.tar.gz new file mode 100644 index 00000000..6512a32c Binary files /dev/null and b/scripts/coco/podvm/podvm-binaries.tar.gz differ diff --git a/scripts/coco/podvm/podvm_maker.sh b/scripts/coco/podvm/podvm_maker.sh index 05387a3d..5a3c2dae 100755 --- a/scripts/coco/podvm/podvm_maker.sh +++ b/scripts/coco/podvm/podvm_maker.sh @@ -7,6 +7,7 @@ else dnf config-manager --add-repo=https://mirror.stream.centos.org/10-stream/AppStream/x86_64/os/ && dnf install -y --nogpgcheck afterburn e2fsprogs && dnf clean all && dnf config-manager --set-disabled "*centos*" fi + cat < /etc/systemd/system/afterburn-checkin.service [Unit] ConditionKernelCommandLine= @@ -23,12 +24,24 @@ tar -xzvf /tmp/pause-bundle.tar.gz -C / # TODO: move to payload ? tar -xzvf /tmp/luks-config.tar.gz -C / +# Make scripts executable +chmod +x /usr/local/sbin/create-scratch.sh +[ -f /usr/local/sbin/setup-azure-bridge.sh ] && chmod +x /usr/local/sbin/setup-azure-bridge.sh +[ -f /usr/local/sbin/setup-azure-bridge-server.sh ] && chmod +x /usr/local/sbin/setup-azure-bridge-server.sh +[ -f /usr/local/sbin/setup-azure-bridge-client.sh ] && chmod +x /usr/local/sbin/setup-azure-bridge-client.sh +[ -f /usr/local/sbin/setup-hostname-resolution-client.sh ] && chmod +x /usr/local/sbin/setup-hostname-resolution-client.sh +[ -f /usr/local/sbin/setup-hostname-resolution-server.sh ] && chmod +x /usr/local/sbin/setup-hostname-resolution-server.sh + dnf remove -y cloud-init WALinuxAgent # fixes a failure of the podns@netns service, paths differ due to Selinux equivalency rules semanage fcontext -a -t bin_t /usr/bin/ip && restorecon -v /usr/sbin/ip systemctl enable /etc/systemd/system/luks-scratch.service +systemctl enable kata-agent.service +[ -f /etc/systemd/system/azure-bridge.service ] && systemctl enable azure-bridge.service + + # Configuration to make PCR values to be printed at boot cat < /usr/libexec/gen-issue diff --git a/task/build-dm-verity-image/0.1/build-dm-verity-image.yaml b/task/build-dm-verity-image/0.1/build-dm-verity-image.yaml index 85a04d45..effeed24 100644 --- a/task/build-dm-verity-image/0.1/build-dm-verity-image.yaml +++ b/task/build-dm-verity-image/0.1/build-dm-verity-image.yaml @@ -59,12 +59,12 @@ spec: name: varlibcontainers steps: - name: use-trusted-artifact - image: quay.io/konflux-ci/build-trusted-artifacts:latest@sha256:4689f88dd253bd1feebf57f1a76a5a751880f739000719cd662bbdc76990a7fd + image: quay.io/konflux-ci/build-trusted-artifacts:latest@sha256:15d7dc86012e41b10d1eb37679ec03ee75c96436224fadd0938a49dc537aa4ad args: - use - $(params.SOURCE_ARTIFACT)=/var/workdir/source - name: download-rhel-image - image: quay.io/konflux-ci/hermeto:0.29.0@sha256:f577e0399953471df7a9826c1550aef83d28e8b35f76dd65a193441822b629ee + image: quay.io/konflux-ci/hermeto:0.47.0@sha256:22a12fb6220c21c2b4e4b4ea33dfdc4a0294dc3071ebcf30570604b54078f792 env: - name: REDHAT_OFFLINE_TOKEN valueFrom: @@ -101,7 +101,7 @@ spec: curl -X GET "${download_url}" -H "Authorization: Bearer ${token}" --output "${filepath}" echo "${RHEL_IMAGE_CHECKSUM}" "${filepath}" | sha256sum --check - name: build - image: quay.io/konflux-ci/buildah-task:latest@sha256:b82d465a06c926882d02b721cf8a8476048711332749f39926a01089cf85a3f9 + image: quay.io/konflux-ci/buildah-task:latest@sha256:4c470b5a153c4acd14bf4f8731b5e36c61d7faafe09c2bf376bb81ce84aa5709 computeResources: limits: memory: 512Mi @@ -572,7 +572,7 @@ spec: - mountPath: /activation-key name: activation-key - name: sbom-generate - image: quay.io/konflux-ci/mobster@sha256:45298b363ff4b96a084bf77a627b3e23471dcfb821eab55a3fa49a60f0ac43f3 + image: quay.io/konflux-ci/mobster@sha256:a2feb71e321a0164820fe7171e564242c75125d8b62d213d3ba3423cf8fa678b script: | #!/bin/bash set -euo pipefail @@ -601,7 +601,7 @@ spec: mkdir -p /tmp/auth && select-oci-auth "$(cat "$(results.IMAGE_REFERENCE.path)")" > /tmp/auth/config.json DOCKER_CONFIG=/tmp/auth cosign attach sbom --sbom sbom.json --type "$SBOM_TYPE" "$(cat "$(results.IMAGE_REFERENCE.path)")" - name: report-sbom-url - image: quay.io/konflux-ci/yq:latest@sha256:15d0238843d954ee78c9c190705eb8b36f6e52c31434183c37d99a80841a635a + image: quay.io/konflux-ci/yq:latest@sha256:466005c667e6e9ea19fd4738275f71a13f89382f6233c581d5e952a41ccb3b42 script: | #!/bin/bash REPO=${OUTPUT_IMAGE%:*}