From a03f3c31b25332c8b52343ec48f34dda86d2508a Mon Sep 17 00:00:00 2001 From: Benjamin Ritter Date: Tue, 28 Apr 2026 15:02:08 +0200 Subject: [PATCH 01/24] WIP feat: add gatewayapi support Signed-off-by: Benjamin Ritter --- Makefile | 3 +- compose.yaml | 11 +++++++ deploy_control_plane.yaml | 2 ++ inventories/group_vars/all/control_plane.yaml | 2 +- inventories/group_vars/control_plane/dex.yaml | 2 +- .../group_vars/control_plane/ingress.yaml | 3 ++ .../group_vars/control_plane/metal.yml | 18 +++++++++++- roles/gateway/defaults/main.yml | 8 +++++ roles/gateway/files/gatewayclass.yaml | 6 ++++ roles/gateway/tasks/main.yml | 22 ++++++++++++++ roles/gateway/templates/envoyproxy.yaml | 14 +++++++++ roles/gateway/templates/gateway.yaml | 29 +++++++++++++++++++ 12 files changed, 116 insertions(+), 4 deletions(-) create mode 100644 roles/gateway/defaults/main.yml create mode 100644 roles/gateway/files/gatewayclass.yaml create mode 100644 roles/gateway/tasks/main.yml create mode 100644 roles/gateway/templates/envoyproxy.yaml create mode 100644 roles/gateway/templates/gateway.yaml diff --git a/Makefile b/Makefile index 190c64a4..8a23f19b 100644 --- a/Makefile +++ b/Makefile @@ -10,7 +10,7 @@ KINDCONFIG := $(or $(KINDCONFIG),control-plane/kind.yaml) KUBECONFIG := $(shell pwd)/.kubeconfig METALCTL_HMAC := $(or $(METALCTL_HMAC),metal-admin) -METALCTL_API_URL := $(or $(METALCTL_API_URL),http://api.172.17.0.1.nip.io:8080/metal) +METALCTL_API_URL := $(or $(METALCTL_API_URL),http://api.172.18.0.42.nip.io:8080/metal) MKE2FS_CONFIG := $(shell pwd)/mke2fs.conf # Default values @@ -124,6 +124,7 @@ control-plane-bake: --config $(KINDCONFIG) \ --kubeconfig $(KUBECONFIG); fi $(MAKE) create-proxy-registries + docker compose up -d --force-recreate cloud-provider-kind .PHONY: partition partition: partition-bake diff --git a/compose.yaml b/compose.yaml index a2f0d224..2ae1c6de 100644 --- a/compose.yaml +++ b/compose.yaml @@ -128,6 +128,17 @@ services: - REGISTRY_PROXY_TTL=168h - REGISTRY_STORAGE_DELETE_ENABLED=true - OTEL_TRACES_EXPORTER=none + cloud-provider-kind: + image: registry.k8s.io/cloud-provider-kind/cloud-controller-manager:v0.10.0 + restart: always + network_mode: kind + command: + # v0.10.0 of cloud controller does not support tcproutes, since it does not support the experimental gateway api channel + # using envoy-gateway deployed via roles/gateway instead + - --gateway-channel + - disabled + volumes: + - /var/run/docker.sock:/var/run/docker.sock volumes: proxy-docker: proxy-gcr: diff --git a/deploy_control_plane.yaml b/deploy_control_plane.yaml index bd0f0241..1db87690 100644 --- a/deploy_control_plane.yaml +++ b/deploy_control_plane.yaml @@ -6,6 +6,8 @@ roles: - name: ansible-common tags: always + - name: gateway + tags: gateway - name: ingress-controller tags: ingress-controller - name: metal-roles/control-plane/roles/prepare diff --git a/inventories/group_vars/all/control_plane.yaml b/inventories/group_vars/all/control_plane.yaml index 1a93b331..b6ee893a 100644 --- a/inventories/group_vars/all/control_plane.yaml +++ b/inventories/group_vars/all/control_plane.yaml @@ -1,6 +1,6 @@ --- metal_control_plane_provider_tenant: metal-stack -metal_control_plane_ingress_dns: 172.17.0.1.nip.io +metal_control_plane_ingress_dns: 172.18.0.42.nip.io metal_control_plane_stage_name: test metal_control_plane_namespace: metal-control-plane metal_control_plane_image_pull_policy: Always diff --git a/inventories/group_vars/control_plane/dex.yaml b/inventories/group_vars/control_plane/dex.yaml index 6866f208..d1d8ce2d 100644 --- a/inventories/group_vars/control_plane/dex.yaml +++ b/inventories/group_vars/control_plane/dex.yaml @@ -8,7 +8,7 @@ auth_dex_static_clients: name: "metal-stack" secret: secret redirectURIs: - - 'http://v2.api.172.17.0.1.nip.io:8080/auth/oidc/callback' + - 'http://v2.api.172.18.0.42.nip.io:8080/auth/oidc/callback' auth_dex_static_passwords: - email: admin@metal-stack.io diff --git a/inventories/group_vars/control_plane/ingress.yaml b/inventories/group_vars/control_plane/ingress.yaml index 0ef8644b..4fe93dcf 100644 --- a/inventories/group_vars/control_plane/ingress.yaml +++ b/inventories/group_vars/control_plane/ingress.yaml @@ -2,3 +2,6 @@ ingress_tcp_service_exposals: "4150": "{{ metal_control_plane_namespace }}/nsqd:4150" "50051": "{{ metal_control_plane_namespace }}/metal-api:50051" +gateway_tcp_listeners: + nsq: 4150 + metal-api: 50051 \ No newline at end of file diff --git a/inventories/group_vars/control_plane/metal.yml b/inventories/group_vars/control_plane/metal.yml index 6e914d6e..6ffb0dd1 100644 --- a/inventories/group_vars/control_plane/metal.yml +++ b/inventories/group_vars/control_plane/metal.yml @@ -3,7 +3,23 @@ metal_set_resource_limits: no metal_check_api_health_endpoint: http://api.{{ metal_control_plane_ingress_dns }}:8080/metal/v1/health metal_api_headscale_control_plane_address: "http://headscale.{{ metal_control_plane_ingress_dns }}:8080" -# metal_helm_chart_local_path: /helm-charts/charts/metal-control-plane +metal_helm_chart_local_path: /helm-charts/charts/metal-control-plane + +metal_deploy_ingress: false + +metal_httproute: + enabled: true + parentRefs: + - name: metal-control-plane + namespace: "{{ metal_control_plane_namespace }}" + sectionName: http + httpsRedirect: + enabled: false + # redirectParentRefs: + # - name: metal-control-plane + # namespace: "{{ metal_control_plane_namespace }}" + # sectionName: http + metal_api_pdb_min_available: 1 metal_api_replicas: 1 diff --git a/roles/gateway/defaults/main.yml b/roles/gateway/defaults/main.yml new file mode 100644 index 00000000..5bb8dff6 --- /dev/null +++ b/roles/gateway/defaults/main.yml @@ -0,0 +1,8 @@ +--- +gateway_namespace: "{{ metal_control_plane_namespace }}" +gateway_http_port: 8080 +gateway_https_port: 4443 + +gateway_tcp_listeners: {} + # nsq: 4150 + # metal-api: 50051 diff --git a/roles/gateway/files/gatewayclass.yaml b/roles/gateway/files/gatewayclass.yaml new file mode 100644 index 00000000..a750b9fd --- /dev/null +++ b/roles/gateway/files/gatewayclass.yaml @@ -0,0 +1,6 @@ +apiVersion: gateway.networking.k8s.io/v1 +kind: GatewayClass +metadata: + name: eg +spec: + controllerName: gateway.envoyproxy.io/gatewayclass-controller \ No newline at end of file diff --git a/roles/gateway/tasks/main.yml b/roles/gateway/tasks/main.yml new file mode 100644 index 00000000..34e4a059 --- /dev/null +++ b/roles/gateway/tasks/main.yml @@ -0,0 +1,22 @@ +- name: Deploy envoy-gateway + kubernetes.core.helm: + release_name: envoy-gateway + release_namespace: envoy-gateway-system + create_namespace: true + chart_ref: "oci://docker.io/envoyproxy/gateway-helm" + chart_version: "{{ envoy_gateway_chart_version | default(omit) }}" + # values: "{{ lookup('template', 'values.yaml') | from_yaml }}" + wait: true +- name: Deploy Envoy GatewayClass + kubernetes.core.k8s: + definition: "{{ lookup('file', 'gatewayclass.yaml') | from_yaml }}" +- name: Deploy metal-control-plane nsq_namespace + kubernetes.core.k8s: + kind: Namespace + name: "{{ metal_control_plane_namespace }}" +- name: Deploy metal-control-plane EnvoyProxy + kubernetes.core.k8s: + definition: "{{ lookup('template', 'envoyproxy.yaml') | from_yaml }}" +- name: Deploy metal-control-plane Gateway + kubernetes.core.k8s: + definition: "{{ lookup('template', 'gateway.yaml') | from_yaml }}" \ No newline at end of file diff --git a/roles/gateway/templates/envoyproxy.yaml b/roles/gateway/templates/envoyproxy.yaml new file mode 100644 index 00000000..ce3b4700 --- /dev/null +++ b/roles/gateway/templates/envoyproxy.yaml @@ -0,0 +1,14 @@ +--- +apiVersion: gateway.envoyproxy.io/v1alpha1 +kind: EnvoyProxy +metadata: + name: metal-control-plane + namespace: "{{ gateway_namespace }}" +spec: + provider: + type: Kubernetes + kubernetes: + envoyService: + externalTrafficPolicy: Local + type: LoadBalancer + loadBalancerIP: 172.18.0.42 \ No newline at end of file diff --git a/roles/gateway/templates/gateway.yaml b/roles/gateway/templates/gateway.yaml new file mode 100644 index 00000000..ae971a43 --- /dev/null +++ b/roles/gateway/templates/gateway.yaml @@ -0,0 +1,29 @@ +--- +apiVersion: gateway.networking.k8s.io/v1 +kind: Gateway +metadata: + name: metal-control-plane + namespace: {{ gateway_namespace }} +spec: + gatewayClassName: eg + addresses: + - value: 172.18.0.42 + infrastructure: + parametersRef: + group: gateway.envoyproxy.io + kind: EnvoyProxy + name: metal-control-plane + listeners: + - protocol: HTTP + port: {{ gateway_http_port }} + name: http + hostname: "*.{{ metal_control_plane_ingress_dns }}" + - protocol: HTTPS + port: {{ gateway_https_port }} + name: https + hostname: "*.{{ metal_control_plane_ingress_dns }}" +{% for name, port in gateway_tcp_listeners.items() %} + - protocol: TCP + name: {{ name }} + port: {{ port }} +{% endfor %} From 7f467b07abc5effbe85f5b340bbd6a3d9d3e937f Mon Sep 17 00:00:00 2001 From: Benjamin Ritter Date: Wed, 29 Apr 2026 11:11:53 +0200 Subject: [PATCH 02/24] feat: add https gateway listener with self-signed cert Signed-off-by: Benjamin Ritter --- files/certs/default-gateway/server.json | 19 +++++++++++++++++++ roles/gateway/tasks/main.yml | 12 ++++++++++++ roles/gateway/templates/gateway.yaml | 8 +++++++- scripts/roll_certs.sh | 3 ++- 4 files changed, 40 insertions(+), 2 deletions(-) create mode 100644 files/certs/default-gateway/server.json diff --git a/files/certs/default-gateway/server.json b/files/certs/default-gateway/server.json new file mode 100644 index 00000000..4a2b87e8 --- /dev/null +++ b/files/certs/default-gateway/server.json @@ -0,0 +1,19 @@ +{ + "CN": "default-gateway", + "hosts": [ + "*.nip.io" + ], + "key": { + "algo": "rsa", + "size": 4096 + }, + "names": [ + { + "C": "DE", + "L": "Munich", + "O": "metal-stack", + "OU": "DevOps", + "ST": "Bavaria" + } + ] +} diff --git a/roles/gateway/tasks/main.yml b/roles/gateway/tasks/main.yml index 34e4a059..3bc063cf 100644 --- a/roles/gateway/tasks/main.yml +++ b/roles/gateway/tasks/main.yml @@ -17,6 +17,18 @@ - name: Deploy metal-control-plane EnvoyProxy kubernetes.core.k8s: definition: "{{ lookup('template', 'envoyproxy.yaml') | from_yaml }}" +- name: Deploy default-tls certificate secret + kubernetes.core.k8s: + definition: + apiVersion: v1 + kind: Secret + type: kubernetes.io/tls + metadata: + name: default-tls + namespace: "{{ gateway_namespace }}" + data: + tls.crt: "{{ lookup('file', 'certs/default-gateway/server.pem') | b64encode }}" + tls.key: "{{ lookup('file', 'certs/default-gateway/server-key.pem') | b64encode }}" - name: Deploy metal-control-plane Gateway kubernetes.core.k8s: definition: "{{ lookup('template', 'gateway.yaml') | from_yaml }}" \ No newline at end of file diff --git a/roles/gateway/templates/gateway.yaml b/roles/gateway/templates/gateway.yaml index ae971a43..f1fdd297 100644 --- a/roles/gateway/templates/gateway.yaml +++ b/roles/gateway/templates/gateway.yaml @@ -3,7 +3,7 @@ apiVersion: gateway.networking.k8s.io/v1 kind: Gateway metadata: name: metal-control-plane - namespace: {{ gateway_namespace }} + namespace: "{{ gateway_namespace }}" spec: gatewayClassName: eg addresses: @@ -22,6 +22,12 @@ spec: port: {{ gateway_https_port }} name: https hostname: "*.{{ metal_control_plane_ingress_dns }}" + tls: + mode: Terminate + certificateRefs: + - group: "" + kind: Secret + name: default-tls {% for name, port in gateway_tcp_listeners.items() %} - protocol: TCP name: {{ name }} diff --git a/scripts/roll_certs.sh b/scripts/roll_certs.sh index 7fc34acd..13487d85 100755 --- a/scripts/roll_certs.sh +++ b/scripts/roll_certs.sh @@ -13,7 +13,8 @@ rm *.csr for component in \ grpc \ - masterdata-api; do + masterdata-api \ + default-gateway; do pushd $component echo "generating $component certs" From c847feb72478a3d5cc867e1d5d5279719af6b693 Mon Sep 17 00:00:00 2001 From: Benjamin Ritter Date: Wed, 29 Apr 2026 11:12:52 +0200 Subject: [PATCH 03/24] feat: enable zitadel httproute Signed-off-by: Benjamin Ritter --- inventories/group_vars/control_plane/zitadel.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/inventories/group_vars/control_plane/zitadel.yaml b/inventories/group_vars/control_plane/zitadel.yaml index f45059d2..cbb76e36 100644 --- a/inventories/group_vars/control_plane/zitadel.yaml +++ b/inventories/group_vars/control_plane/zitadel.yaml @@ -6,6 +6,10 @@ zitadel_port: 8080 zitadel_skip_verify_tls: true zitadel_insecure: true +zitadel_httproute_enabled: true +zitadel_httproute_parent_refs: +- name: metal-control-plane + zitadel_init_config: static_users: - first_name: Olli From 8ba623231c515e4b4835a729da3705542a56a709 Mon Sep 17 00:00:00 2001 From: Benjamin Ritter Date: Thu, 30 Apr 2026 15:40:37 +0200 Subject: [PATCH 04/24] feat: move kind and sonic containerlab to dedicated network Signed-off-by: Benjamin Ritter --- Makefile | 9 +++++++-- compose.yaml | 5 ++++- control-plane/kind.yaml | 2 +- deploy_gardener.yaml | 2 +- docs/overview-kamaji.drawio.svg | 4 ++-- docs/overview.drawio.svg | 4 ++-- env.sh | 1 + files/certs/grpc/server.json | 2 +- files/dev_images.yaml | 4 ++-- files/startup-config/leaf01_4.4.3.json | 2 +- files/startup-config/leaf01_4.5.1.json | 2 +- files/startup-config/leaf02_4.4.3.json | 2 +- files/startup-config/leaf02_4.5.1.json | 2 +- inventories/group_vars/all/control_plane.yaml | 2 +- inventories/group_vars/control_plane/dex.yaml | 2 +- .../group_vars/control_plane/gardener/gardenlet.yaml | 2 +- .../group_vars/control_plane/gardener/operator.yaml | 4 ++-- inventories/group_vars/control_plane/metal.yml | 2 +- inventories/group_vars/control_plane/minio.yaml | 2 +- inventories/group_vars/control_plane/powerdns.yaml | 4 ++-- inventories/group_vars/control_plane/zitadel.yaml | 2 +- inventories/group_vars/leaves/main.yaml | 2 +- inventories/group_vars/partition/common.yaml | 2 +- inventories/group_vars/partition/router.yaml | 2 +- mini-lab.sonic.yaml | 2 +- roles/gateway/templates/envoyproxy.yaml | 2 +- roles/gateway/templates/gateway.yaml | 2 +- 27 files changed, 41 insertions(+), 32 deletions(-) diff --git a/Makefile b/Makefile index 8a23f19b..8a673c4e 100644 --- a/Makefile +++ b/Makefile @@ -10,7 +10,7 @@ KINDCONFIG := $(or $(KINDCONFIG),control-plane/kind.yaml) KUBECONFIG := $(shell pwd)/.kubeconfig METALCTL_HMAC := $(or $(METALCTL_HMAC),metal-admin) -METALCTL_API_URL := $(or $(METALCTL_API_URL),http://api.172.18.0.42.nip.io:8080/metal) +METALCTL_API_URL := $(or $(METALCTL_API_URL),http://api.172.42.0.42.nip.io:8080/metal) MKE2FS_CONFIG := $(shell pwd)/mke2fs.conf # Default values @@ -26,6 +26,8 @@ MINI_LAB_VM_IMAGE := $(or $(MINI_LAB_VM_IMAGE),ghcr.io/metal-stack/mini-lab-vms: MINI_LAB_SONIC_IMAGE := $(or $(MINI_LAB_SONIC_IMAGE),ghcr.io/metal-stack/mini-lab-sonic:latest) MINI_LAB_DELL_SONIC_VERSION := $(or $(MINI_LAB_DELL_SONIC_VERSION),4.5.1) +MINI_LAB_INTERNAL_NETWORK=mini_lab_internal + MACHINE_OS=debian-12.0 MAX_RETRIES := 30 @@ -117,6 +119,8 @@ create-proxy-registries: .PHONY: control-plane-bake control-plane-bake: + + @if ! docker network ls | grep -q mini_lab_internal; then docker network create mini_lab_internal --gateway 172.42.0.1 --ip-range=172.42.0.0/24 --subnet=172.42.0.0/24 --ipv6=false ; fi @if ! which kind > /dev/null; then echo "kind needs to be installed"; exit 1; fi @if ! kind get clusters | grep metal-control-plane > /dev/null; then \ kind create cluster $(KIND_ARGS) \ @@ -167,6 +171,7 @@ env: .PHONY: cleanup cleanup: cleanup-control-plane cleanup-partition + docker network rm --force mini_lab_internal .PHONY: cleanup-control-plane cleanup-control-plane: @@ -439,7 +444,7 @@ build-dell-sonic: fetch-virtual-kubeconfig: # TODO: it's hard to get the latest issued generic kubeconfig secret... just take the first result for now kubectl --kubeconfig=$(KUBECONFIG) get secret -n garden $(shell kubectl --kubeconfig=$(KUBECONFIG) get secret -n garden -l managed-by=secrets-manager,manager-identity=gardener-operator,name=generic-token-kubeconfig --no-headers | awk '{ print $$1 }') -o jsonpath='{.data.kubeconfig}' | base64 -d > .virtual-kubeconfig - @kubectl --kubeconfig=.virtual-kubeconfig config set-cluster garden --server=https://api.gardener-kube-apiserver.172.17.0.1.nip.io:4443 + @kubectl --kubeconfig=.virtual-kubeconfig config set-cluster garden --server=https://api.gardener-kube-apiserver.172.42.0.1.nip.io:4443 @kubectl --kubeconfig=.virtual-kubeconfig config set-credentials garden --token=$(shell kubectl --kubeconfig=$(KUBECONFIG) get secret -n garden shoot-access-virtual-garden -o jsonpath='{.data.token}' | base64 -d) @kubectl --kubeconfig=$(KUBECONFIG) config unset users.garden @kubectl --kubeconfig=$(KUBECONFIG) config unset contexts.garden diff --git a/compose.yaml b/compose.yaml index 2ae1c6de..41989089 100644 --- a/compose.yaml +++ b/compose.yaml @@ -131,7 +131,10 @@ services: cloud-provider-kind: image: registry.k8s.io/cloud-provider-kind/cloud-controller-manager:v0.10.0 restart: always - network_mode: kind + networks: + - kind + environment: + - KIND_EXPERIMENTAL_DOCKER_NETWORK=${KIND_EXPERIMENTAL_DOCKER_NETWORK:-kind} command: # v0.10.0 of cloud controller does not support tcproutes, since it does not support the experimental gateway api channel # using envoy-gateway deployed via roles/gateway instead diff --git a/control-plane/kind.yaml b/control-plane/kind.yaml index a55b4aaf..c0af5d47 100644 --- a/control-plane/kind.yaml +++ b/control-plane/kind.yaml @@ -2,7 +2,7 @@ kind: Cluster apiVersion: kind.x-k8s.io/v1alpha4 networking: apiServerPort: 6443 - apiServerAddress: 0.0.0.0 + apiServerAddress: 172.42.0.1 nodes: - role: control-plane extraMounts: diff --git a/deploy_gardener.yaml b/deploy_gardener.yaml index 43eb72d4..888c6f8c 100644 --- a/deploy_gardener.yaml +++ b/deploy_gardener.yaml @@ -81,7 +81,7 @@ status: loadBalancer: ingress: - - ip: "172.17.0.1" + - ip: "172.42.0.1" tags: gardener - name: Expose istio gateway through ingress-nginx (for local environments) diff --git a/docs/overview-kamaji.drawio.svg b/docs/overview-kamaji.drawio.svg index f6e083c1..e9b2b33e 100644 --- a/docs/overview-kamaji.drawio.svg +++ b/docs/overview-kamaji.drawio.svg @@ -470,13 +470,13 @@
- docker network 172.17.0.1/16 + docker network 172.42.0.1/16
- docker network 172.17.0.1/16 + docker network 172.42.0.1/16 diff --git a/docs/overview.drawio.svg b/docs/overview.drawio.svg index 1267de68..fdb8f9b9 100644 --- a/docs/overview.drawio.svg +++ b/docs/overview.drawio.svg @@ -470,13 +470,13 @@
- docker network 172.17.0.1/16 + docker network 172.42.0.1/16
- docker network 172.17.0.1/16 + docker network 172.42.0.1/16 diff --git a/env.sh b/env.sh index a01de375..daac37ec 100755 --- a/env.sh +++ b/env.sh @@ -24,4 +24,5 @@ DEPLOYMENT_BASE_IMAGE_TAG=${DEPLOYMENT_BASE_IMAGE_TAG} CI=${CI:=false} DOCKER_HUB_USER=${DOCKER_HUB_USER:=} DOCKER_HUB_TOKEN=${DOCKER_HUB_TOKEN:=} +KIND_EXPERIMENTAL_DOCKER_NETWORK=${MINI_LAB_INTERNAL_NETWORK:=} EOF diff --git a/files/certs/grpc/server.json b/files/certs/grpc/server.json index 17ba2a53..e1b8bc35 100644 --- a/files/certs/grpc/server.json +++ b/files/certs/grpc/server.json @@ -1,7 +1,7 @@ { "CN": "metal-api", "hosts": [ - "172.17.0.1", + "172.42.0.42", "203.0.113.1" ], "key": { diff --git a/files/dev_images.yaml b/files/dev_images.yaml index e0699518..91b8285c 100644 --- a/files/dev_images.yaml +++ b/files/dev_images.yaml @@ -1,6 +1,6 @@ --- # Do not change these values metal_api_image_tag: dev -metal_core_image_name: 172.17.0.1:5000/metalstack/metal-core +metal_core_image_name: 172.42.0.42:5000/metalstack/metal-core metal_core_image_tag: dev -metal_hammer_image_url: http://172.17.0.1:20015/metal-hammer-initrd.img.lz4 +metal_hammer_image_url: http://172.42.0.42:20015/metal-hammer-initrd.img.lz4 diff --git a/files/startup-config/leaf01_4.4.3.json b/files/startup-config/leaf01_4.4.3.json index 622b5df9..60393ec3 100644 --- a/files/startup-config/leaf01_4.4.3.json +++ b/files/startup-config/leaf01_4.4.3.json @@ -11,7 +11,7 @@ } }, "DNS_SERVER": { - "172.17.0.1": {}, + "172.42.0.1": {}, "1.1.1.1": {}, "1.0.0.1": {} }, diff --git a/files/startup-config/leaf01_4.5.1.json b/files/startup-config/leaf01_4.5.1.json index cd4224ac..78106fe8 100644 --- a/files/startup-config/leaf01_4.5.1.json +++ b/files/startup-config/leaf01_4.5.1.json @@ -11,7 +11,7 @@ } }, "DNS_SERVER": { - "172.17.0.1": {}, + "172.42.0.1": {}, "1.1.1.1": {}, "1.0.0.1": {} }, diff --git a/files/startup-config/leaf02_4.4.3.json b/files/startup-config/leaf02_4.4.3.json index 5a8d4a78..7359f574 100644 --- a/files/startup-config/leaf02_4.4.3.json +++ b/files/startup-config/leaf02_4.4.3.json @@ -11,7 +11,7 @@ } }, "DNS_SERVER": { - "172.17.0.1": {}, + "172.42.0.1": {}, "1.1.1.1": {}, "1.0.0.1": {} }, diff --git a/files/startup-config/leaf02_4.5.1.json b/files/startup-config/leaf02_4.5.1.json index 41c8f118..335ef382 100644 --- a/files/startup-config/leaf02_4.5.1.json +++ b/files/startup-config/leaf02_4.5.1.json @@ -11,7 +11,7 @@ } }, "DNS_SERVER": { - "172.17.0.1": {}, + "172.42.0.1": {}, "1.1.1.1": {}, "1.0.0.1": {} }, diff --git a/inventories/group_vars/all/control_plane.yaml b/inventories/group_vars/all/control_plane.yaml index b6ee893a..d1eec945 100644 --- a/inventories/group_vars/all/control_plane.yaml +++ b/inventories/group_vars/all/control_plane.yaml @@ -1,6 +1,6 @@ --- metal_control_plane_provider_tenant: metal-stack -metal_control_plane_ingress_dns: 172.18.0.42.nip.io +metal_control_plane_ingress_dns: 172.42.0.42.nip.io metal_control_plane_stage_name: test metal_control_plane_namespace: metal-control-plane metal_control_plane_image_pull_policy: Always diff --git a/inventories/group_vars/control_plane/dex.yaml b/inventories/group_vars/control_plane/dex.yaml index d1d8ce2d..f09642d9 100644 --- a/inventories/group_vars/control_plane/dex.yaml +++ b/inventories/group_vars/control_plane/dex.yaml @@ -8,7 +8,7 @@ auth_dex_static_clients: name: "metal-stack" secret: secret redirectURIs: - - 'http://v2.api.172.18.0.42.nip.io:8080/auth/oidc/callback' + - 'http://v2.api.172.42.0.42.nip.io:8080/auth/oidc/callback' auth_dex_static_passwords: - email: admin@metal-stack.io diff --git a/inventories/group_vars/control_plane/gardener/gardenlet.yaml b/inventories/group_vars/control_plane/gardener/gardenlet.yaml index 8fed1d0f..fe02bb70 100644 --- a/inventories/group_vars/control_plane/gardener/gardenlet.yaml +++ b/inventories/group_vars/control_plane/gardener/gardenlet.yaml @@ -1,5 +1,5 @@ --- -gardener_gardenlet_default_dns_domain: "gardener.172.17.0.1.nip.io" +gardener_gardenlet_default_dns_domain: "gardener.172.42.0.1.nip.io" gardener_gardenlet_default_dns_provider: powerdns gardener_gardenlet_default_dns_credentials: apiKey: "{{ powerdns_api_key | b64encode }}" diff --git a/inventories/group_vars/control_plane/gardener/operator.yaml b/inventories/group_vars/control_plane/gardener/operator.yaml index 5cddad2e..842293b6 100644 --- a/inventories/group_vars/control_plane/gardener/operator.yaml +++ b/inventories/group_vars/control_plane/gardener/operator.yaml @@ -1,5 +1,5 @@ --- -gardener_operator_ingress_dns_domain: "gardener.172.17.0.1.nip.io" +gardener_operator_ingress_dns_domain: "gardener.172.42.0.1.nip.io" gardener_operator_backup_infrastructure: provider: S3 @@ -17,7 +17,7 @@ gardener_operator_backup_infrastructure_secret: s3ForcePathStyle: "{{ 'true' | b64encode }}" # enable mini-lab patches -gardener_operator_patch_istio_ingress_gateway_service_ip: 172.17.0.1 +gardener_operator_patch_istio_ingress_gateway_service_ip: 172.42.0.1 gardener_operator_expose_virtual_garden_through_ingress_nginx: true # for local setups this should be sufficient diff --git a/inventories/group_vars/control_plane/metal.yml b/inventories/group_vars/control_plane/metal.yml index 6ffb0dd1..6b7acd93 100644 --- a/inventories/group_vars/control_plane/metal.yml +++ b/inventories/group_vars/control_plane/metal.yml @@ -39,7 +39,7 @@ metal_apiserver_oidc_discovery_url: https://zitadel.{{ metal_control_plane_ingre metal_apiserver_oidc_end_session_url: "https://zitadel.{{ metal_control_plane_ingress_dns }}:4443/oidc/v1/end_session" metal_apiserver_redis_password: change-me-soon -metal_apiserver_admin_subjects: "admin@metal-stack.zitadel.172.17.0.1.nip.io@openid-connect" +metal_apiserver_admin_subjects: "admin@metal-stack.zitadel.172.42.0.1.nip.io@openid-connect" metal_api_images: - id: firewall-ubuntu-3.0 diff --git a/inventories/group_vars/control_plane/minio.yaml b/inventories/group_vars/control_plane/minio.yaml index 73801c7c..219fe31c 100644 --- a/inventories/group_vars/control_plane/minio.yaml +++ b/inventories/group_vars/control_plane/minio.yaml @@ -2,4 +2,4 @@ minio_root_user: mini-lab minio_root_password: change-me -minio_dns_name: minio.172.17.0.1.nip.io +minio_dns_name: minio.172.42.0.42.nip.io diff --git a/inventories/group_vars/control_plane/powerdns.yaml b/inventories/group_vars/control_plane/powerdns.yaml index bde65e72..595f6d51 100644 --- a/inventories/group_vars/control_plane/powerdns.yaml +++ b/inventories/group_vars/control_plane/powerdns.yaml @@ -10,11 +10,11 @@ powerdns_load_balancer_dns_name: "ns.{{ metal_control_plane_ingress_dns }}" powerdns_api_dns_name: "powerdns-api.{{ metal_control_plane_ingress_dns }}" powerdns_zones: - - name: "gardener.172.17.0.1.nip.io." + - name: "gardener.172.42.0.42.nip.io." kind: Master nameservers: - "{{ powerdns_load_balancer_dns_name }}." - - name: "gardener-kube-apiserver.172.17.0.1.nip.io." + - name: "gardener-kube-apiserver.172.42.0.42.nip.io." kind: Master nameservers: - "{{ powerdns_load_balancer_dns_name }}." diff --git a/inventories/group_vars/control_plane/zitadel.yaml b/inventories/group_vars/control_plane/zitadel.yaml index cbb76e36..7f0cf208 100644 --- a/inventories/group_vars/control_plane/zitadel.yaml +++ b/inventories/group_vars/control_plane/zitadel.yaml @@ -27,4 +27,4 @@ zitadel_init_config: # later id will be added but currently not possible with zitadel id: metal-stack name: metal-stack - redirect_uri: http://v2.api.172.17.0.1.nip.io:8080/auth/openid-connect/callback + redirect_uri: http://v2.api.172.42.0.42.nip.io:8080/auth/openid-connect/callback diff --git a/inventories/group_vars/leaves/main.yaml b/inventories/group_vars/leaves/main.yaml index fbc83ac4..e1d1e596 100644 --- a/inventories/group_vars/leaves/main.yaml +++ b/inventories/group_vars/leaves/main.yaml @@ -12,7 +12,7 @@ sonic_config_frr_render: false sonic_config_loopback_address: "{{ lo }}" sonic_config_mgmt_interface: ip: "{{ ansible_host }}/16`" - gateway_address: "172.17.0.1" + gateway_address: "172.42.0.1" sonic_config_mgmt_vrf: false sonic_config_nameservers: "{{ router_nameservers }}" diff --git a/inventories/group_vars/partition/common.yaml b/inventories/group_vars/partition/common.yaml index 61da08a0..d91f43df 100644 --- a/inventories/group_vars/partition/common.yaml +++ b/inventories/group_vars/partition/common.yaml @@ -14,4 +14,4 @@ metal_partition_metal_api_grpc_ca_cert: "{{ lookup('file', 'certs/ca.pem') }}" metal_partition_metal_api_grpc_client_cert: "{{ lookup('file', 'certs/grpc/client.pem') }}" metal_partition_metal_api_grpc_client_key: "{{ lookup('file', 'certs/grpc/client-key.pem') }}" -metal_partition_mgmt_gateway: 172.17.0.1 +metal_partition_mgmt_gateway: 172.42.0.42 diff --git a/inventories/group_vars/partition/router.yaml b/inventories/group_vars/partition/router.yaml index bdcbc460..a85d2998 100644 --- a/inventories/group_vars/partition/router.yaml +++ b/inventories/group_vars/partition/router.yaml @@ -1,5 +1,5 @@ --- router_nameservers: - - 172.17.0.1 + - 172.42.0.1 - 1.1.1.1 - 1.0.0.1 diff --git a/mini-lab.sonic.yaml b/mini-lab.sonic.yaml index 5d46a128..a14ad7e0 100644 --- a/mini-lab.sonic.yaml +++ b/mini-lab.sonic.yaml @@ -2,7 +2,7 @@ name: mini-lab prefix: "" mgmt: - network: bridge + network: mini_lab_internal topology: defaults: diff --git a/roles/gateway/templates/envoyproxy.yaml b/roles/gateway/templates/envoyproxy.yaml index ce3b4700..edb7c0fe 100644 --- a/roles/gateway/templates/envoyproxy.yaml +++ b/roles/gateway/templates/envoyproxy.yaml @@ -11,4 +11,4 @@ spec: envoyService: externalTrafficPolicy: Local type: LoadBalancer - loadBalancerIP: 172.18.0.42 \ No newline at end of file + loadBalancerIP: 172.42.0.42 \ No newline at end of file diff --git a/roles/gateway/templates/gateway.yaml b/roles/gateway/templates/gateway.yaml index f1fdd297..8bcf5ccf 100644 --- a/roles/gateway/templates/gateway.yaml +++ b/roles/gateway/templates/gateway.yaml @@ -7,7 +7,7 @@ metadata: spec: gatewayClassName: eg addresses: - - value: 172.18.0.42 + - value: 172.42.0.42 infrastructure: parametersRef: group: gateway.envoyproxy.io From 06054706b529312c5d619b74fb869baf202c902c Mon Sep 17 00:00:00 2001 From: Benjamin Ritter Date: Mon, 4 May 2026 14:55:58 +0200 Subject: [PATCH 05/24] fix: bind zitadel only to https listener Signed-off-by: Benjamin Ritter --- inventories/group_vars/control_plane/zitadel.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/inventories/group_vars/control_plane/zitadel.yaml b/inventories/group_vars/control_plane/zitadel.yaml index 7f0cf208..10fc1776 100644 --- a/inventories/group_vars/control_plane/zitadel.yaml +++ b/inventories/group_vars/control_plane/zitadel.yaml @@ -9,6 +9,7 @@ zitadel_insecure: true zitadel_httproute_enabled: true zitadel_httproute_parent_refs: - name: metal-control-plane + sectionName: https zitadel_init_config: static_users: From 540904dc0872e83bfab42ed199945ee693e56c26 Mon Sep 17 00:00:00 2001 From: Benjamin Ritter Date: Mon, 4 May 2026 14:56:48 +0200 Subject: [PATCH 06/24] feat: expose metal-api gRPC endpoint Signed-off-by: Benjamin Ritter --- inventories/group_vars/control_plane/ingress.yaml | 2 +- inventories/group_vars/control_plane/metal.yml | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/inventories/group_vars/control_plane/ingress.yaml b/inventories/group_vars/control_plane/ingress.yaml index 4fe93dcf..fe68a6b6 100644 --- a/inventories/group_vars/control_plane/ingress.yaml +++ b/inventories/group_vars/control_plane/ingress.yaml @@ -4,4 +4,4 @@ ingress_tcp_service_exposals: "50051": "{{ metal_control_plane_namespace }}/metal-api:50051" gateway_tcp_listeners: nsq: 4150 - metal-api: 50051 \ No newline at end of file + metal-api-grpc: 50051 \ No newline at end of file diff --git a/inventories/group_vars/control_plane/metal.yml b/inventories/group_vars/control_plane/metal.yml index 6b7acd93..ae01c253 100644 --- a/inventories/group_vars/control_plane/metal.yml +++ b/inventories/group_vars/control_plane/metal.yml @@ -20,6 +20,12 @@ metal_httproute: # namespace: "{{ metal_control_plane_namespace }}" # sectionName: http +metal_tcproute: + enabled: true + parentRefs: + - name: metal-control-plane + namespace: "{{ metal_control_plane_namespace }}" + sectionName: metal-api-grpc metal_api_pdb_min_available: 1 metal_api_replicas: 1 @@ -39,7 +45,7 @@ metal_apiserver_oidc_discovery_url: https://zitadel.{{ metal_control_plane_ingre metal_apiserver_oidc_end_session_url: "https://zitadel.{{ metal_control_plane_ingress_dns }}:4443/oidc/v1/end_session" metal_apiserver_redis_password: change-me-soon -metal_apiserver_admin_subjects: "admin@metal-stack.zitadel.172.42.0.1.nip.io@openid-connect" +metal_apiserver_admin_subjects: "admin@metal-stack.zitadel.172.42.0.42.nip.io@openid-connect" metal_api_images: - id: firewall-ubuntu-3.0 From 3bf3f045490fa99eabec467f90bb85254ba8ccf5 Mon Sep 17 00:00:00 2001 From: Benjamin Ritter Date: Mon, 4 May 2026 14:57:13 +0200 Subject: [PATCH 07/24] feat: expose nsq endpoint Signed-off-by: Benjamin Ritter --- inventories/group_vars/control_plane/nsq.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/inventories/group_vars/control_plane/nsq.yaml b/inventories/group_vars/control_plane/nsq.yaml index b8983676..c7a1f802 100644 --- a/inventories/group_vars/control_plane/nsq.yaml +++ b/inventories/group_vars/control_plane/nsq.yaml @@ -9,3 +9,8 @@ nsq_certs_client_cert: "{{ lookup('file', 'certs/nsq/client.crt') }}" nsq_certs_ca_cert: "{{ lookup('file', 'certs/ca.pem') }}" nsq_broadcast_address: nsqd + +nsq_tcproute_enabled: true +nsq_tcproute_parent_refs: +- name: metal-control-plane + sectionName: nsq From 9df6b85020341d0fcb521e43b832bf1d0b6cb1bd Mon Sep 17 00:00:00 2001 From: Benjamin Ritter Date: Mon, 4 May 2026 15:18:33 +0200 Subject: [PATCH 08/24] fix: improve naming consistency Signed-off-by: Benjamin Ritter --- .../group_vars/control_plane/metal.yml | 24 +++++++------------ 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/inventories/group_vars/control_plane/metal.yml b/inventories/group_vars/control_plane/metal.yml index ae01c253..f446df4b 100644 --- a/inventories/group_vars/control_plane/metal.yml +++ b/inventories/group_vars/control_plane/metal.yml @@ -7,25 +7,17 @@ metal_helm_chart_local_path: /helm-charts/charts/metal-control-plane metal_deploy_ingress: false -metal_httproute: - enabled: true - parentRefs: +metal_httproute_enabled: true +metal_httproute_parent_refs: - name: metal-control-plane namespace: "{{ metal_control_plane_namespace }}" sectionName: http - httpsRedirect: - enabled: false - # redirectParentRefs: - # - name: metal-control-plane - # namespace: "{{ metal_control_plane_namespace }}" - # sectionName: http - -metal_tcproute: - enabled: true - parentRefs: - - name: metal-control-plane - namespace: "{{ metal_control_plane_namespace }}" - sectionName: metal-api-grpc + +metal_tcproute_enabled: true +metal_tcproute_parent_refs: +- name: metal-control-plane + namespace: "{{ metal_control_plane_namespace }}" + sectionName: metal-api-grpc metal_api_pdb_min_available: 1 metal_api_replicas: 1 From 611f35378730d9d0509dbccc25acb6a5e506534a Mon Sep 17 00:00:00 2001 From: Benjamin Ritter Date: Mon, 4 May 2026 16:28:39 +0200 Subject: [PATCH 09/24] fix: use valid hosts for gateway certificates Signed-off-by: Benjamin Ritter --- files/certs/default-gateway/server.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/files/certs/default-gateway/server.json b/files/certs/default-gateway/server.json index 4a2b87e8..fb3a27a2 100644 --- a/files/certs/default-gateway/server.json +++ b/files/certs/default-gateway/server.json @@ -1,7 +1,8 @@ { "CN": "default-gateway", "hosts": [ - "*.nip.io" + "api.172.42.0.42.nip.io", + "v2.api.172.42.0.42.nip.io" ], "key": { "algo": "rsa", From 4fd0126f6276c32f394a6372ea2450ae7171800e Mon Sep 17 00:00:00 2001 From: Benjamin Ritter Date: Thu, 7 May 2026 11:28:00 +0200 Subject: [PATCH 10/24] fix: move gateway configuration into their respective sections Signed-off-by: Benjamin Ritter --- .../group_vars/control_plane/metal.yml | 26 +++++++++++++------ 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/inventories/group_vars/control_plane/metal.yml b/inventories/group_vars/control_plane/metal.yml index f446df4b..c7980a60 100644 --- a/inventories/group_vars/control_plane/metal.yml +++ b/inventories/group_vars/control_plane/metal.yml @@ -7,14 +7,16 @@ metal_helm_chart_local_path: /helm-charts/charts/metal-control-plane metal_deploy_ingress: false -metal_httproute_enabled: true -metal_httproute_parent_refs: - - name: metal-control-plane - namespace: "{{ metal_control_plane_namespace }}" - sectionName: http - -metal_tcproute_enabled: true -metal_tcproute_parent_refs: +metal_api_httproute_enabled: true +metal_api_httproute_hostnames: +- "{{ metal_ingress_dns }}" +metal_api_httproute_parent_refs: +- name: metal-control-plane + namespace: "{{ metal_control_plane_namespace }}" + sectionName: http + +metal_api_tcproute_enabled: true +metal_api_tcproute_parent_refs: - name: metal-control-plane namespace: "{{ metal_control_plane_namespace }}" sectionName: metal-api-grpc @@ -39,6 +41,14 @@ metal_apiserver_oidc_end_session_url: "https://zitadel.{{ metal_control_plane_in metal_apiserver_redis_password: change-me-soon metal_apiserver_admin_subjects: "admin@metal-stack.zitadel.172.42.0.42.nip.io@openid-connect" +metal_apiserver_httproute_enabled: true +metal_apiserver_httproute_hostnames: +- "{{ metal_ingress_v2_dns }}" +metal_apiserver_httproute_parent_refs: +- name: metal-control-plane + namespace: "{{ metal_control_plane_namespace }}" + sectionName: http + metal_api_images: - id: firewall-ubuntu-3.0 name: Firewall 3 Ubuntu From baaae61bbf0e207283155d7b3062dfd6fe2f5b9f Mon Sep 17 00:00:00 2001 From: Benjamin Ritter Date: Fri, 8 May 2026 11:29:07 +0200 Subject: [PATCH 11/24] fix: undo changes to gardener ingress ips Signed-off-by: Benjamin Ritter --- inventories/group_vars/control_plane/minio.yaml | 2 +- inventories/group_vars/control_plane/powerdns.yaml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/inventories/group_vars/control_plane/minio.yaml b/inventories/group_vars/control_plane/minio.yaml index 219fe31c..0e7566ee 100644 --- a/inventories/group_vars/control_plane/minio.yaml +++ b/inventories/group_vars/control_plane/minio.yaml @@ -2,4 +2,4 @@ minio_root_user: mini-lab minio_root_password: change-me -minio_dns_name: minio.172.42.0.42.nip.io +minio_dns_name: minio.172.42.0.1.nip.io diff --git a/inventories/group_vars/control_plane/powerdns.yaml b/inventories/group_vars/control_plane/powerdns.yaml index 595f6d51..d001532f 100644 --- a/inventories/group_vars/control_plane/powerdns.yaml +++ b/inventories/group_vars/control_plane/powerdns.yaml @@ -10,11 +10,11 @@ powerdns_load_balancer_dns_name: "ns.{{ metal_control_plane_ingress_dns }}" powerdns_api_dns_name: "powerdns-api.{{ metal_control_plane_ingress_dns }}" powerdns_zones: - - name: "gardener.172.42.0.42.nip.io." + - name: "gardener.172.42.0.1.nip.io." kind: Master nameservers: - "{{ powerdns_load_balancer_dns_name }}." - - name: "gardener-kube-apiserver.172.42.0.42.nip.io." + - name: "gardener-kube-apiserver.172.42.0.1.nip.io." kind: Master nameservers: - "{{ powerdns_load_balancer_dns_name }}." From 3485b417f0fb04a688f2e9a10351d8d0b14b5c9c Mon Sep 17 00:00:00 2001 From: Benjamin Ritter Date: Fri, 8 May 2026 16:21:41 +0200 Subject: [PATCH 12/24] fix: fix incorrect use of gateway instead of ingress controller Signed-off-by: Benjamin Ritter --- deploy_gardener.yaml | 2 +- inventories/group_vars/all/control_plane.yaml | 3 ++- inventories/group_vars/control_plane/dex.yaml | 2 +- .../control_plane/gardener/operator.yaml | 2 +- inventories/group_vars/control_plane/metal.yml | 16 ++++++---------- .../group_vars/control_plane/zitadel.yaml | 2 +- inventories/group_vars/partition/common.yaml | 2 +- roles/gateway/templates/gateway.yaml | 4 ++-- roles/kamaji/templates/metallb-kind-ip-pool.yaml | 2 +- 9 files changed, 16 insertions(+), 19 deletions(-) diff --git a/deploy_gardener.yaml b/deploy_gardener.yaml index 888c6f8c..9cfad385 100644 --- a/deploy_gardener.yaml +++ b/deploy_gardener.yaml @@ -13,7 +13,7 @@ name: shoot-info namespace: kube-system data: - nodeNetwork: 172.18.0.0/16 + nodeNetwork: 172.42.0.0/16 podNetwork: 10.244.0.0/24 serviceNetwork: 10.96.0.0/16 tags: gardener diff --git a/inventories/group_vars/all/control_plane.yaml b/inventories/group_vars/all/control_plane.yaml index d1eec945..a0f045fe 100644 --- a/inventories/group_vars/all/control_plane.yaml +++ b/inventories/group_vars/all/control_plane.yaml @@ -1,6 +1,7 @@ --- metal_control_plane_provider_tenant: metal-stack -metal_control_plane_ingress_dns: 172.42.0.42.nip.io +metal_control_plane_ingress_dns: 172.42.0.1.nip.io +metal_control_plane_gateway_dns: 172.42.0.42.nip.io metal_control_plane_stage_name: test metal_control_plane_namespace: metal-control-plane metal_control_plane_image_pull_policy: Always diff --git a/inventories/group_vars/control_plane/dex.yaml b/inventories/group_vars/control_plane/dex.yaml index f09642d9..f370f762 100644 --- a/inventories/group_vars/control_plane/dex.yaml +++ b/inventories/group_vars/control_plane/dex.yaml @@ -8,7 +8,7 @@ auth_dex_static_clients: name: "metal-stack" secret: secret redirectURIs: - - 'http://v2.api.172.42.0.42.nip.io:8080/auth/oidc/callback' + - 'http://v2.api.{{ metal_control_plane_gateway_dns }}:8080/auth/oidc/callback' auth_dex_static_passwords: - email: admin@metal-stack.io diff --git a/inventories/group_vars/control_plane/gardener/operator.yaml b/inventories/group_vars/control_plane/gardener/operator.yaml index 842293b6..06f6d525 100644 --- a/inventories/group_vars/control_plane/gardener/operator.yaml +++ b/inventories/group_vars/control_plane/gardener/operator.yaml @@ -1,5 +1,5 @@ --- -gardener_operator_ingress_dns_domain: "gardener.172.42.0.1.nip.io" +gardener_operator_ingress_dns_domain: "gardener.{{ metal_control_plane_ingress_dns }}" gardener_operator_backup_infrastructure: provider: S3 diff --git a/inventories/group_vars/control_plane/metal.yml b/inventories/group_vars/control_plane/metal.yml index c7980a60..1ec0f1dc 100644 --- a/inventories/group_vars/control_plane/metal.yml +++ b/inventories/group_vars/control_plane/metal.yml @@ -1,15 +1,13 @@ --- metal_set_resource_limits: no -metal_check_api_health_endpoint: http://api.{{ metal_control_plane_ingress_dns }}:8080/metal/v1/health -metal_api_headscale_control_plane_address: "http://headscale.{{ metal_control_plane_ingress_dns }}:8080" +metal_check_api_health_endpoint: http://api.{{ metal_control_plane_gateway_dns }}:8080/metal/v1/health +metal_api_headscale_control_plane_address: "http://headscale.{{ metal_control_plane_gateway_dns }}:8080" metal_helm_chart_local_path: /helm-charts/charts/metal-control-plane metal_deploy_ingress: false metal_api_httproute_enabled: true -metal_api_httproute_hostnames: -- "{{ metal_ingress_dns }}" metal_api_httproute_parent_refs: - name: metal-control-plane namespace: "{{ metal_control_plane_namespace }}" @@ -32,18 +30,16 @@ metal_api_nsq_tcp_address: nsqd:4150 metal_apiserver_pdb_min_available: 1 metal_apiserver_enabled: true -metal_apiserver_url: http://v2.api.{{ metal_control_plane_ingress_dns }}:8080 +metal_apiserver_url: http://v2.api.{{ metal_control_plane_gateway_dns }}:8080 metal_apiserver_oidc_secret_name: zitadel-client-credentials -metal_apiserver_oidc_discovery_url: https://zitadel.{{ metal_control_plane_ingress_dns }}:4443/.well-known/openid-configuration -metal_apiserver_oidc_end_session_url: "https://zitadel.{{ metal_control_plane_ingress_dns }}:4443/oidc/v1/end_session" +metal_apiserver_oidc_discovery_url: https://zitadel.{{ metal_control_plane_gateway_dns }}:4443/.well-known/openid-configuration +metal_apiserver_oidc_end_session_url: "https://zitadel.{{ metal_control_plane_gateway_dns }}:4443/oidc/v1/end_session" metal_apiserver_redis_password: change-me-soon -metal_apiserver_admin_subjects: "admin@metal-stack.zitadel.172.42.0.42.nip.io@openid-connect" +metal_apiserver_admin_subjects: "admin@metal-stack.zitadel.{{ metal_control_plane_gateway_dns }}@openid-connect" metal_apiserver_httproute_enabled: true -metal_apiserver_httproute_hostnames: -- "{{ metal_ingress_v2_dns }}" metal_apiserver_httproute_parent_refs: - name: metal-control-plane namespace: "{{ metal_control_plane_namespace }}" diff --git a/inventories/group_vars/control_plane/zitadel.yaml b/inventories/group_vars/control_plane/zitadel.yaml index 10fc1776..ad229877 100644 --- a/inventories/group_vars/control_plane/zitadel.yaml +++ b/inventories/group_vars/control_plane/zitadel.yaml @@ -1,6 +1,6 @@ --- zitadel_endpoint: zitadel.{{ metal_control_plane_namespace }}.svc.cluster.local -zitadel_external_domain: zitadel.{{ metal_control_plane_ingress_dns }} +zitadel_external_domain: zitadel.{{ metal_control_plane_gateway_dns }} zitadel_ingress_dns: https://{{ zitadel_external_domain }}:4443 zitadel_port: 8080 zitadel_skip_verify_tls: true diff --git a/inventories/group_vars/partition/common.yaml b/inventories/group_vars/partition/common.yaml index d91f43df..57761405 100644 --- a/inventories/group_vars/partition/common.yaml +++ b/inventories/group_vars/partition/common.yaml @@ -3,7 +3,7 @@ metal_partition_timezone: Europe/Berlin metal_partition_id: mini-lab metal_partition_metal_api_protocol: http -metal_partition_metal_api_addr: api.{{ metal_control_plane_ingress_dns }} +metal_partition_metal_api_addr: api.{{ metal_control_plane_gateway_dns }} metal_partition_metal_api_port: 8080 metal_partition_metal_api_basepath: /metal/ metal_partition_metal_api_hmac_edit_key: metal-edit diff --git a/roles/gateway/templates/gateway.yaml b/roles/gateway/templates/gateway.yaml index 8bcf5ccf..697ce745 100644 --- a/roles/gateway/templates/gateway.yaml +++ b/roles/gateway/templates/gateway.yaml @@ -17,11 +17,11 @@ spec: - protocol: HTTP port: {{ gateway_http_port }} name: http - hostname: "*.{{ metal_control_plane_ingress_dns }}" + hostname: "*.{{ metal_control_plane_gateway_dns }}" - protocol: HTTPS port: {{ gateway_https_port }} name: https - hostname: "*.{{ metal_control_plane_ingress_dns }}" + hostname: "*.{{ metal_control_plane_gateway_dns }}" tls: mode: Terminate certificateRefs: diff --git a/roles/kamaji/templates/metallb-kind-ip-pool.yaml b/roles/kamaji/templates/metallb-kind-ip-pool.yaml index 45034660..eeea201c 100644 --- a/roles/kamaji/templates/metallb-kind-ip-pool.yaml +++ b/roles/kamaji/templates/metallb-kind-ip-pool.yaml @@ -6,7 +6,7 @@ metadata: namespace: metallb-system spec: addresses: - - 172.18.255.200-172.18.255.250 + - 172.42.255.200-172.42.255.250 --- apiVersion: metallb.io/v1beta1 kind: L2Advertisement From f84c000cc8a29b6a64d91d67cd6f6920be024d86 Mon Sep 17 00:00:00 2001 From: Benjamin Ritter Date: Fri, 8 May 2026 16:48:01 +0200 Subject: [PATCH 13/24] feat: remove ingress-nginx exposed ports Signed-off-by: Benjamin Ritter --- control-plane/kind.yaml | 4 ---- inventories/group_vars/control_plane/ingress.yaml | 3 --- 2 files changed, 7 deletions(-) diff --git a/control-plane/kind.yaml b/control-plane/kind.yaml index c0af5d47..018c5454 100644 --- a/control-plane/kind.yaml +++ b/control-plane/kind.yaml @@ -13,10 +13,6 @@ nodes: hostPort: 4443 - containerPort: 8080 hostPort: 8080 - - containerPort: 4150 - hostPort: 4150 - - containerPort: 50051 - hostPort: 50051 # if you want to run gardener operator + metal-stack, you need more pods kubeadmConfigPatches: - | diff --git a/inventories/group_vars/control_plane/ingress.yaml b/inventories/group_vars/control_plane/ingress.yaml index fe68a6b6..d7a9fd25 100644 --- a/inventories/group_vars/control_plane/ingress.yaml +++ b/inventories/group_vars/control_plane/ingress.yaml @@ -1,7 +1,4 @@ --- -ingress_tcp_service_exposals: - "4150": "{{ metal_control_plane_namespace }}/nsqd:4150" - "50051": "{{ metal_control_plane_namespace }}/metal-api:50051" gateway_tcp_listeners: nsq: 4150 metal-api-grpc: 50051 \ No newline at end of file From d91c7b3ab81524868abcff4bee123f451823b525 Mon Sep 17 00:00:00 2001 From: Benjamin Ritter Date: Mon, 11 May 2026 18:28:26 +0200 Subject: [PATCH 14/24] fix: change mgmt network to mini_lab_internal Signed-off-by: Benjamin Ritter --- mini-lab.dell_sonic.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mini-lab.dell_sonic.yaml b/mini-lab.dell_sonic.yaml index 91bf0990..d1342437 100644 --- a/mini-lab.dell_sonic.yaml +++ b/mini-lab.dell_sonic.yaml @@ -2,7 +2,7 @@ name: mini-lab prefix: "" mgmt: - network: bridge + network: mini_lab_internal topology: defaults: From 18566496057a07b315398ec44c5f77397c954f5c Mon Sep 17 00:00:00 2001 From: Benjamin Ritter Date: Wed, 20 May 2026 21:25:33 +0200 Subject: [PATCH 15/24] feat: add sonic-vpp image --- .github/workflows/base-image.yaml | 1 + Makefile | 1 + images/sonic/Dockerfile | 4 ++-- images/sonic/base-202511-vpp/Dockerfile | 25 +++++++++++++++++++++++++ 4 files changed, 29 insertions(+), 2 deletions(-) create mode 100644 images/sonic/base-202511-vpp/Dockerfile diff --git a/.github/workflows/base-image.yaml b/.github/workflows/base-image.yaml index 039e6f01..83ca6040 100644 --- a/.github/workflows/base-image.yaml +++ b/.github/workflows/base-image.yaml @@ -16,6 +16,7 @@ jobs: - name: 202311 - name: 202411 - name: 202505 + - name: 202511-vpp steps: - name: Log in to the container registry diff --git a/Makefile b/Makefile index 8a673c4e..7f5556e9 100644 --- a/Makefile +++ b/Makefile @@ -418,6 +418,7 @@ build-sonic-base: docker build -t ghcr.io/metal-stack/mini-lab-sonic-base:202311 images/sonic/base-202311 docker build -t ghcr.io/metal-stack/mini-lab-sonic-base:202411 images/sonic/base-202411 docker build -t ghcr.io/metal-stack/mini-lab-sonic-base:202505 images/sonic/base-202505 + docker build -t ghcr.io/metal-stack/mini-lab-sonic-base:202511 images/sonic/base-202511-vpp ## DEV TARGETS ## diff --git a/images/sonic/Dockerfile b/images/sonic/Dockerfile index fe262415..af042be8 100644 --- a/images/sonic/Dockerfile +++ b/images/sonic/Dockerfile @@ -14,8 +14,8 @@ RUN apt-get update && \ qemu-system-x86 \ telnet -COPY --from=ghcr.io/metal-stack/mini-lab-sonic-base:202505 /sonic-vs.img /sonic-vs.img -COPY --from=ghcr.io/metal-stack/mini-lab-sonic-base:202505 /frr-pythontools.deb /frr-pythontools.deb +COPY --from=ghcr.io/metal-stack/mini-lab-sonic-base:202511-vpp /sonic-vs.img /sonic-vs.img +COPY --from=ghcr.io/metal-stack/mini-lab-sonic-base:202511-vpp /frr-pythontools.deb /frr-pythontools.deb ENTRYPOINT ["/launch.py"] diff --git a/images/sonic/base-202511-vpp/Dockerfile b/images/sonic/base-202511-vpp/Dockerfile new file mode 100644 index 00000000..32daf97e --- /dev/null +++ b/images/sonic/base-202511-vpp/Dockerfile @@ -0,0 +1,25 @@ +# Check: https://sonic-build.azurewebsites.net/ui/sonic/pipelines +ARG SONIC_BASE_URL=https://sonic-build.azurewebsites.net/api/sonic/artifacts?branchName=202511&definitionId=2818&artifactName=sonic-buildimage.vpp +ARG SONIC_IMG_URL=${SONIC_BASE_URL}&target=target%2Fsonic-vpp.img.gz +ARG FRR_RELOAD_URL=${SONIC_BASE_URL}&target=target%2Fdebs%2Fbookworm%2Ffrr-pythontools_10.4.1-sonic-0_all.deb + +FROM docker.io/library/busybox:stable AS download + +ARG SONIC_IMG_URL +ARG FRR_RELOAD_URL + +ADD "${SONIC_IMG_URL}" /sonic-vs.img.gz +ADD "${FRR_RELOAD_URL}" /frr-pythontools.deb + +RUN gunzip /sonic-vs.img.gz + +FROM scratch + +ARG SONIC_IMG_URL +ARG FRR_RELOAD_URL + +LABEL sonic-img-url=${SONIC_IMG_URL} \ + frr-reload-url=${FRR_RELOAD_URL} + +COPY --from=download /frr-pythontools.deb /frr-pythontools.deb +COPY --from=download /sonic-vs.img /sonic-vs.img From 2625f17e30d4ef4718d89372808e92622d6ad561 Mon Sep 17 00:00:00 2001 From: Benjamin Ritter Date: Sat, 23 May 2026 10:59:08 +0200 Subject: [PATCH 16/24] WIP feat: working sonic-vpp example it is hold together by hopes and dreams, is slow as all hell, but it works Signed-off-by: Benjamin Ritter --- Makefile | 13 ++++-- images/sonic/launch.py | 101 +++++++++++++++++++++++++++++++++-------- 2 files changed, 91 insertions(+), 23 deletions(-) diff --git a/Makefile b/Makefile index 7f5556e9..be25e84d 100644 --- a/Makefile +++ b/Makefile @@ -23,7 +23,7 @@ ANSIBLE_DISPLAY_SKIPPED_HOSTS=false MINI_LAB_FLAVOR := $(or $(MINI_LAB_FLAVOR),sonic) MINI_LAB_VM_IMAGE := $(or $(MINI_LAB_VM_IMAGE),ghcr.io/metal-stack/mini-lab-vms:latest) -MINI_LAB_SONIC_IMAGE := $(or $(MINI_LAB_SONIC_IMAGE),ghcr.io/metal-stack/mini-lab-sonic:latest) +MINI_LAB_SONIC_IMAGE := $(or $(MINI_LAB_SONIC_IMAGE),ghcr.io/metal-stack/mini-lab-sonic:202511-vpp) MINI_LAB_DELL_SONIC_VERSION := $(or $(MINI_LAB_DELL_SONIC_VERSION),4.5.1) MINI_LAB_INTERNAL_NETWORK=mini_lab_internal @@ -141,7 +141,7 @@ ifeq ($(CI),true) docker pull $(MINI_LAB_SONIC_IMAGE) endif ifneq ($(filter $(MINI_LAB_FLAVOR),dell_sonic capms),$(MINI_LAB_FLAVOR)) - docker pull $(MINI_LAB_SONIC_IMAGE) + #docker pull $(MINI_LAB_SONIC_IMAGE) endif @if ! sudo $(CONTAINERLAB) --topo $(LAB_TOPOLOGY) inspect | grep -i leaf01 > /dev/null; then \ sudo --preserve-env=MINI_LAB_SONIC_IMAGE --preserve-env=MINI_LAB_DELL_SONIC_VERSION --preserve-env=MINI_LAB_VM_IMAGE $(CONTAINERLAB) deploy --topo $(LAB_TOPOLOGY) --reconfigure && \ @@ -418,7 +418,14 @@ build-sonic-base: docker build -t ghcr.io/metal-stack/mini-lab-sonic-base:202311 images/sonic/base-202311 docker build -t ghcr.io/metal-stack/mini-lab-sonic-base:202411 images/sonic/base-202411 docker build -t ghcr.io/metal-stack/mini-lab-sonic-base:202505 images/sonic/base-202505 - docker build -t ghcr.io/metal-stack/mini-lab-sonic-base:202511 images/sonic/base-202511-vpp + docker build -t ghcr.io/metal-stack/mini-lab-sonic-base:202511-vpp images/sonic/base-202511-vpp + +.PHONY: build-sonic +build-sonic: + docker build -t ghcr.io/metal-stack/mini-lab-sonic-base:202311 images/sonic/base-202311 + docker build -t ghcr.io/metal-stack/mini-lab-sonic-base:202411 images/sonic/base-202411 + docker build -t ghcr.io/metal-stack/mini-lab-sonic-base:202505 images/sonic/base-202505 + docker build -t ghcr.io/metal-stack/mini-lab-sonic-base:202511-vpp images/sonic/base-202511-vpp ## DEV TARGETS ## diff --git a/images/sonic/launch.py b/images/sonic/launch.py index c8c3ee35..0eeccf56 100755 --- a/images/sonic/launch.py +++ b/images/sonic/launch.py @@ -10,6 +10,7 @@ import struct import subprocess import sys +import telnetlib import time from typing import Callable @@ -48,7 +49,15 @@ def guestfs(self) -> GuestFS: g = guestfs.GuestFS(python_return_dict=True) g.add_drive_opts(filename=self._disk, format="qcow2", readonly=False) g.launch() - g.mount('/dev/sda3', '/') + # SONiC stores its rootfs as a read-only squashfs at /image-*/fs.squashfs; + # the sibling rw/ tree only holds overlay overrides. Use mkmountpoint so + # we can expose both: the writable partition under /disk and the base + # rootfs (loop-mounted from fs.squashfs) under /rootfs. + g.mkmountpoint('/disk') + g.mkmountpoint('/rootfs') + g.mount('/dev/sda3', '/disk') + image = g.glob_expand('/disk/image-*')[0] + g.mount_loop(image + 'fs.squashfs', '/rootfs') return g def start(self) -> None: @@ -78,7 +87,7 @@ def start(self) -> None: with open(f'/sys/class/net/{iface}/address', 'r') as f: mac = f.read().strip() cmd.append('-device') - cmd.append(f'virtio-net-pci,netdev=hn{i},mac={mac}') + cmd.append(f'e1000,netdev=hn{i},mac={mac}') cmd.append(f'-netdev') cmd.append(f'tap,id=hn{i},ifname=tap{i},script=/mirror_tap_to_front_panel.sh,downscript=no') @@ -89,9 +98,9 @@ def wait(self) -> None: def initial_configuration(g: GuestFS, hwsku: str) -> None: - image = g.glob_expand('/image-*')[0] + image = g.glob_expand('/disk/image-*')[0] - g.rm(image + 'platform/firsttime') + # g.rm(image + 'platform/firsttime') systemd_system = image + 'rw/etc/systemd/system/' sonic_target_wants = systemd_system + 'sonic.target.wants/' @@ -122,13 +131,16 @@ def initial_configuration(g: GuestFS, hwsku: str) -> None: g.ln_s(linkname=systemd_system + 'watchdog-control.service', target='/dev/null') sonic_share = image + 'rw/usr/share/sonic/' - hwsku_dir = image + 'rw' + VS_DEVICES_PATH + hwsku - g.mkdir_p(hwsku_dir) - - g.write(path=image + 'rw' + VS_DEVICES_PATH + 'default_sku', content=f'{hwsku} empty'.encode('utf-8')) - g.ln_s(linkname=sonic_share + 'hwsku', target=VS_DEVICES_PATH + hwsku) - g.ln_s(linkname=sonic_share + 'platform', target=VS_DEVICES_PATH) + # Reads come from the read-only rootfs (loop-mounted squashfs); writes + # would have to target image + 'rw' + VS_DEVICES_PATH + hwsku. + hwsku_dir = '/rootfs' + VS_DEVICES_PATH + hwsku + hwsku_dir_rw = image + 'rw' + VS_DEVICES_PATH + hwsku + #g.mkdir_p(hwsku_dir) + # The lanemap.ini file is used by the virtual switch image to assign front panels to the Linux interfaces ethX. + # This assignment will later also be used by the script mirror_tap_to_front_panel.sh. + # g.download(remotefilename=hwsku_dir + '/port_config.ini', filename='/port_config.ini') + # g.download(remotefilename=hwsku_dir + '/lanemap.ini', filename='/lanemap.ini') ifaces = get_ethernet_interfaces() # The port_config.ini file contains the assignment of front panels to lanes. port_config = parse_port_config() @@ -138,8 +150,9 @@ def initial_configuration(g: GuestFS, hwsku: str) -> None: with open('/lanemap.ini', 'w') as f: f.write('\n'.join(lanemap)) - g.copy_in(localpath='/lanemap.ini', remotedir=hwsku_dir) - g.copy_in(localpath='/port_config.ini', remotedir=hwsku_dir) + g.mkdir_p(hwsku_dir_rw) + g.copy_in(localpath='/lanemap.ini', remotedir=hwsku_dir_rw) + g.copy_in(localpath='/port_config.ini', remotedir=hwsku_dir_rw) etc_sonic = image + 'rw/etc/sonic/' g.mkdir_p(etc_sonic) @@ -149,7 +162,6 @@ def initial_configuration(g: GuestFS, hwsku: str) -> None: PLATFORM=x86_64-kvm_x86_64-r0 HWSKU={hwsku} DEVICE_TYPE=LeafRouter - ASIC_TYPE=vs '''.encode('utf-8') g.write(path=etc_sonic + 'sonic-environment', content=sonic_environment) @@ -164,7 +176,7 @@ def initial_configuration(g: GuestFS, hwsku: str) -> None: config_db['PORT'] = ports config_db_json = json.dumps(config_db, indent=4, sort_keys=True) - g.write(path=etc_sonic + 'config_db.json', content=config_db_json.encode('utf-8')) + g.write(path=image + 'rw/golden_config_db.json', content=config_db_json.encode('utf-8')) if os.path.exists('/authorized_keys'): g.mkdir_p(image + 'rw/root/.ssh') @@ -181,7 +193,7 @@ def main(): logger = logging.getLogger() name = os.getenv('CLAB_LABEL_CLAB_NODE_NAME', default='switch') - smp = os.getenv('QEMU_SMP', default='2') + smp = os.getenv('QEMU_SMP', default='8') memory = os.getenv('QEMU_MEMORY', default='2048') interfaces = int(os.getenv('CLAB_INTFS', 0)) + 1 hwsku = os.getenv('HWSKU', default='Accton-AS7726-32X') @@ -203,6 +215,8 @@ def main(): logger.info('Start QEMU') vm.start() + apply_golden_config_via_serial(logger) + # SONiC will start sending LLDP packets after PortConfigDone is set in APPL database logger.info('Wait until eth0 has an IPv4 address') sniff(iface='eth0', filter='ether proto 0x88cc', stop_filter=has_an_IPv4_address('eth0'), store=0) @@ -219,6 +233,57 @@ def handle_exit(signal, frame): sys.exit(0) +def apply_golden_config_via_serial(logger) -> None: + logger.info('Connecting to SONiC serial console on 127.0.0.1:5000') + while True: + try: + tn = telnetlib.Telnet('127.0.0.1', 5000, timeout=600) + break + except ConnectionRefusedError: + time.sleep(1) + + def send(data: bytes, *, redact: bool = False) -> None: + display = '***' if redact else data.rstrip(b'\n').decode('utf-8', errors='replace') + logger.info(f'serial> {display}') + tn.write(data) + + def read_until(marker: bytes, timeout: int) -> str: + text = tn.read_until(marker, timeout=timeout).decode('utf-8', errors='replace') + for line in text.splitlines(): + stripped = line.rstrip() + if stripped: + logger.info(f'serial< {stripped}') + return text + + logger.info('Waiting for login prompt') + read_until(b'login: ', timeout=600) + send(b'admin\n') + + read_until(b'Password: ', timeout=60) + send(b'YourPaSsWoRd\n', redact=True) + + read_until(b'$ ', timeout=60) + + # hacked together system readiness check since show system-health does not work in virtual sonic + # stolen from https://github.com/sonic-net/sonic-utilities/blob/master/config/main.py + logger.info('Waiting for systemctl is-system-running to return running') + while True: + send(b'sudo systemctl is-system-running\n') + text = read_until(b'$ ', timeout=30) + if any(line.strip() == 'running' for line in text.splitlines()): + break + time.sleep(5) + + logger.info('Installing golden config_db.json') + send(b'sudo cp /golden_config_db.json /etc/sonic/config_db.json \n') + read_until(b'$ ', timeout=60) + + logger.info('Rebooting SONiC to apply golden config') + send(b'sudo reboot\n') + + tn.close() + + def wait_until_all_interfaces_are_connected(interfaces: int) -> None: while True: i = 0 @@ -356,12 +421,8 @@ def create_config_db(hwsku: str) -> dict: 'alias': 'eth0', 'admin_status': 'up' } - }, - 'VERSIONS': { - 'DATABASE': { - 'VERSION': 'version_202311_03' - } } + } From c28abed50c1027914e0f5ce906ad972e1450747d Mon Sep 17 00:00:00 2001 From: Benjamin Ritter Date: Sun, 24 May 2026 14:57:06 +0200 Subject: [PATCH 17/24] WIP feat: booting sonic-vpp and make up succeeds current issue: DHCP discovers from machines do not arrive Signed-off-by: Benjamin Ritter --- deploy_partition.yaml | 6 +- images/sonic/README.md | 8 ++ images/sonic/launch.py | 56 ++++---- images/sonic/port_config.ini | 125 +----------------- .../group_vars/all/release_vector.yaml | 2 +- inventories/group_vars/leaves/main.yaml | 2 +- mini-lab.sonic.yaml | 8 +- roles/sonic/tasks/main.yaml | 46 +++---- 8 files changed, 72 insertions(+), 181 deletions(-) create mode 100644 images/sonic/README.md diff --git a/deploy_partition.yaml b/deploy_partition.yaml index 705d707a..5909531d 100644 --- a/deploy_partition.yaml +++ b/deploy_partition.yaml @@ -7,7 +7,7 @@ - name: Wait for system to become reachable ansible.builtin.wait_for_connection: delay: 10 - timeout: 50 + timeout: 300 roles: - name: ansible-common tags: always @@ -136,10 +136,6 @@ hosts: leaves any_errors_fatal: true become: true - pre_tasks: - - name: Wait some time - pause: - seconds: 120 roles: - name: ansible-common tags: always diff --git a/images/sonic/README.md b/images/sonic/README.md new file mode 100644 index 00000000..bdec0df1 --- /dev/null +++ b/images/sonic/README.md @@ -0,0 +1,8 @@ +# Virtual Sonic Images + +We use sonic-vpp to emulate SONiC switches. It is running in kvm inside a containerlab container. To provide better emulation accuracy we use sonic-vpp, which used the Vector Package Processor to emulate somthing like a switch ASIC, like the Broadcom Tomahawk 3 used in our Edgecore Accton AS7726-X32 workhorse we use in production. We migrated to sonic-vpp because the sonic-vs image used mostly netlink primitives, which behaved differently than an ASIC driven through SONiCs SAI layer. It's slower but still sane. + + +# Configuration knobs + +You can edit the port_config.ini to add more ports. Keep the number as low as possible. It will put less strain on your system because it will spawn fewer VPP worker threads. You will have to set up the switch from scratch afterwards, since VPP will generate some configuration on first startup. \ No newline at end of file diff --git a/images/sonic/launch.py b/images/sonic/launch.py index 0eeccf56..b99e19a6 100755 --- a/images/sonic/launch.py +++ b/images/sonic/launch.py @@ -87,7 +87,7 @@ def start(self) -> None: with open(f'/sys/class/net/{iface}/address', 'r') as f: mac = f.read().strip() cmd.append('-device') - cmd.append(f'e1000,netdev=hn{i},mac={mac}') + cmd.append(f'virtio-net-pci,netdev=hn{i},mac={mac}') cmd.append(f'-netdev') cmd.append(f'tap,id=hn{i},ifname=tap{i},script=/mirror_tap_to_front_panel.sh,downscript=no') @@ -110,32 +110,31 @@ def initial_configuration(g: GuestFS, hwsku: str) -> None: g.copy_in(localpath='/frr-pythontools.deb', remotedir=image + 'rw/') # Workaround: Speed up lldp startup by remove hardcoded wait of 90 seconds - g.ln_s(linkname=systemd_system + 'aaastatsd.timer', target='/dev/null') # Radius - g.ln_s(linkname=systemd_system + 'featured.timer', target='/dev/null') # Feature handling not necessary - g.ln_s(linkname=systemd_system + 'hostcfgd.timer', target='/dev/null') # After boot Host configuration - g.ln_s(linkname=systemd_system + 'rasdaemon.timer', target='/dev/null') # After boot Host configuration - g.ln_s(linkname=systemd_system + 'tacacs-config.timer', target='/dev/null') # After boot Host configuration + # g.ln_s(linkname=systemd_system + 'aaastatsd.timer', target='/dev/null') # Radius + # g.ln_s(linkname=systemd_system + 'featured.timer', target='/dev/null') # Feature handling not necessary + # g.ln_s(linkname=systemd_system + 'hostcfgd.timer', target='/dev/null') # After boot Host configuration + # g.ln_s(linkname=systemd_system + 'rasdaemon.timer', target='/dev/null') # After boot Host configuration + # g.ln_s(linkname=systemd_system + 'tacacs-config.timer', target='/dev/null') # After boot Host configuration # Started by featured - g.ln_s(linkname=sonic_target_wants + 'lldp.service', target='/lib/systemd/system/lldp.service') - g.ln_s(linkname=systemd_system + 'pmon.service', target='/lib/systemd/system/pmon.service') - g.ln_s(linkname=sonic_target_wants + 'pmon.service', target='/lib/systemd/system/pmon.service') + # g.ln_s(linkname=sonic_target_wants + 'lldp.service', target='/lib/systemd/system/lldp.service') + # g.ln_s(linkname=systemd_system + 'pmon.service', target='/lib/systemd/system/pmon.service') + # g.ln_s(linkname=sonic_target_wants + 'pmon.service', target='/lib/systemd/system/pmon.service') # Workaround: Only useful for BackEndToRRouter - g.ln_s(linkname=systemd_system + 'backend-acl.service', target='/dev/null') + # g.ln_s(linkname=systemd_system + 'backend-acl.service', target='/dev/null') # Workaround: We don't need LACP - g.ln_s(linkname=systemd_system + 'teamd.service', target='/dev/null') + # g.ln_s(linkname=systemd_system + 'teamd.service', target='/dev/null') # Workaround: Python module sonic_platform not present on vs images g.ln_s(linkname=systemd_system + 'system-health.service', target='/dev/null') g.ln_s(linkname=systemd_system + 'watchdog-control.service', target='/dev/null') sonic_share = image + 'rw/usr/share/sonic/' - # Reads come from the read-only rootfs (loop-mounted squashfs); writes - # would have to target image + 'rw' + VS_DEVICES_PATH + hwsku. - hwsku_dir = '/rootfs' + VS_DEVICES_PATH + hwsku + platform_dir = image + 'rw' + VS_DEVICES_PATH hwsku_dir_rw = image + 'rw' + VS_DEVICES_PATH + hwsku - #g.mkdir_p(hwsku_dir) + g.mkdir_p(platform_dir) + g.write(path=platform_dir + '/default_sku', content=f'{hwsku} empty'.encode('utf-8')) # The lanemap.ini file is used by the virtual switch image to assign front panels to the Linux interfaces ethX. # This assignment will later also be used by the script mirror_tap_to_front_panel.sh. @@ -156,14 +155,15 @@ def initial_configuration(g: GuestFS, hwsku: str) -> None: etc_sonic = image + 'rw/etc/sonic/' g.mkdir_p(etc_sonic) - sonic_version = image.removeprefix('/image-').removesuffix('/') - sonic_environment = f''' - SONIC_VERSION=${sonic_version} - PLATFORM=x86_64-kvm_x86_64-r0 - HWSKU={hwsku} - DEVICE_TYPE=LeafRouter - '''.encode('utf-8') - g.write(path=etc_sonic + 'sonic-environment', content=sonic_environment) + # sonic_version = image.removeprefix('/image-').removesuffix('/') + # sonic_environment = f''' + # SONIC_VERSION=${sonic_version} + # PLATFORM=x86_64-kvm_x86_64-r0 + # HWSKU={hwsku} + # DEVICE_TYPE=LeafRouter + # ASIC_TYPE=vpp + # '''.encode('utf-8') + # g.write(path=etc_sonic + 'sonic-environment', content=sonic_environment) config_db = create_config_db(hwsku) ports = {} @@ -193,8 +193,8 @@ def main(): logger = logging.getLogger() name = os.getenv('CLAB_LABEL_CLAB_NODE_NAME', default='switch') - smp = os.getenv('QEMU_SMP', default='8') - memory = os.getenv('QEMU_MEMORY', default='2048') + smp = os.getenv('QEMU_SMP', default='4') + memory = os.getenv('QEMU_MEMORY', default='4096') interfaces = int(os.getenv('CLAB_INTFS', 0)) + 1 hwsku = os.getenv('HWSKU', default='Accton-AS7726-32X') @@ -421,6 +421,12 @@ def create_config_db(hwsku: str) -> dict: 'alias': 'eth0', 'admin_status': 'up' } + }, + 'LLDP': { + 'GLOBAL': { + 'enabled': 'true', + 'hello_time': '10' + } } } diff --git a/images/sonic/port_config.ini b/images/sonic/port_config.ini index acc1f3d2..2dfa3d87 100644 --- a/images/sonic/port_config.ini +++ b/images/sonic/port_config.ini @@ -1,123 +1,4 @@ # name lanes alias index speed -Ethernet0 1 Eth1/1 1 25000 -Ethernet1 2 Eth1/2 1 25000 -Ethernet2 3 Eth1/3 1 25000 -Ethernet3 4 Eth1/4 1 25000 -Ethernet4 5 Eth2/1 2 25000 -Ethernet5 6 Eth2/2 2 25000 -Ethernet6 7 Eth2/3 2 25000 -Ethernet7 8 Eth2/4 2 25000 -Ethernet8 9 Eth3/1 3 25000 -Ethernet9 10 Eth3/2 3 25000 -Ethernet10 11 Eth3/3 3 25000 -Ethernet11 12 Eth3/4 3 25000 -Ethernet12 13 Eth4/1 4 25000 -Ethernet13 14 Eth4/2 4 25000 -Ethernet14 15 Eth4/3 4 25000 -Ethernet15 16 Eth4/4 4 25000 -Ethernet16 17 Eth5/1 5 25000 -Ethernet17 18 Eth5/2 5 25000 -Ethernet18 19 Eth5/3 5 25000 -Ethernet19 20 Eth5/4 5 25000 -Ethernet20 21 Eth6/1 6 25000 -Ethernet21 22 Eth6/2 6 25000 -Ethernet22 23 Eth6/3 6 25000 -Ethernet23 24 Eth6/4 6 25000 -Ethernet24 25 Eth7/1 7 25000 -Ethernet25 26 Eth7/2 7 25000 -Ethernet26 27 Eth7/3 7 25000 -Ethernet27 28 Eth7/4 7 25000 -Ethernet28 29 Eth8/1 8 25000 -Ethernet29 30 Eth8/2 8 25000 -Ethernet30 31 Eth8/3 8 25000 -Ethernet31 32 Eth8/4 8 25000 -Ethernet32 33 Eth9/1 9 25000 -Ethernet33 34 Eth9/2 9 25000 -Ethernet34 35 Eth9/3 9 25000 -Ethernet35 36 Eth9/4 9 25000 -Ethernet36 37 Eth10/1 10 25000 -Ethernet37 38 Eth10/2 10 25000 -Ethernet38 39 Eth10/3 10 25000 -Ethernet39 40 Eth10/4 10 25000 -Ethernet40 41 Eth11/1 11 25000 -Ethernet41 42 Eth11/2 11 25000 -Ethernet42 43 Eth11/3 11 25000 -Ethernet43 44 Eth11/4 11 25000 -Ethernet44 45 Eth12/1 12 25000 -Ethernet45 46 Eth12/2 12 25000 -Ethernet46 47 Eth12/3 12 25000 -Ethernet47 48 Eth12/4 12 25000 -Ethernet48 49 Eth13/1 13 25000 -Ethernet49 50 Eth13/2 13 25000 -Ethernet50 51 Eth13/3 13 25000 -Ethernet51 52 Eth13/4 13 25000 -Ethernet52 53 Eth14/1 14 25000 -Ethernet53 54 Eth14/2 14 25000 -Ethernet54 55 Eth14/3 14 25000 -Ethernet55 56 Eth14/4 14 25000 -Ethernet56 57 Eth15/1 15 25000 -Ethernet57 58 Eth15/2 15 25000 -Ethernet58 59 Eth15/3 15 25000 -Ethernet59 60 Eth15/4 15 25000 -Ethernet60 61 Eth16/1 16 25000 -Ethernet61 62 Eth16/2 16 25000 -Ethernet62 63 Eth16/3 16 25000 -Ethernet63 64 Eth16/4 16 25000 -Ethernet64 65 Eth17/1 17 25000 -Ethernet65 66 Eth17/2 17 25000 -Ethernet66 67 Eth17/3 17 25000 -Ethernet67 68 Eth17/4 17 25000 -Ethernet68 69 Eth18/1 18 25000 -Ethernet69 70 Eth18/2 18 25000 -Ethernet70 71 Eth18/3 18 25000 -Ethernet71 72 Eth18/4 18 25000 -Ethernet72 73 Eth19/1 19 25000 -Ethernet73 74 Eth19/2 19 25000 -Ethernet74 75 Eth19/3 19 25000 -Ethernet75 76 Eth19/4 19 25000 -Ethernet76 77 Eth20/1 20 25000 -Ethernet77 78 Eth20/2 20 25000 -Ethernet78 79 Eth20/3 20 25000 -Ethernet79 80 Eth20/4 20 25000 -Ethernet80 81 Eth21/1 21 25000 -Ethernet81 82 Eth21/2 21 25000 -Ethernet82 83 Eth21/3 21 25000 -Ethernet83 84 Eth21/4 21 25000 -Ethernet84 85 Eth22/1 22 25000 -Ethernet85 86 Eth22/2 22 25000 -Ethernet86 87 Eth22/3 22 25000 -Ethernet87 88 Eth22/4 22 25000 -Ethernet88 89 Eth23/1 23 25000 -Ethernet89 90 Eth23/2 23 25000 -Ethernet90 91 Eth23/3 23 25000 -Ethernet91 92 Eth23/4 23 25000 -Ethernet92 93 Eth24/1 24 25000 -Ethernet93 94 Eth24/2 24 25000 -Ethernet94 95 Eth24/3 24 25000 -Ethernet95 96 Eth24/4 24 25000 -Ethernet96 97 Eth25/1 25 25000 -Ethernet97 98 Eth25/2 25 25000 -Ethernet98 99 Eth25/3 25 25000 -Ethernet99 100 Eth25/4 25 25000 -Ethernet100 101 Eth26/1 26 25000 -Ethernet101 102 Eth26/2 26 25000 -Ethernet102 103 Eth26/3 26 25000 -Ethernet103 104 Eth26/4 26 25000 -Ethernet104 105 Eth27/1 27 25000 -Ethernet105 106 Eth27/2 27 25000 -Ethernet106 107 Eth27/3 27 25000 -Ethernet107 108 Eth27/4 27 25000 -Ethernet108 109 Eth28/1 28 25000 -Ethernet109 110 Eth28/2 28 25000 -Ethernet110 111 Eth28/3 28 25000 -Ethernet111 112 Eth28/4 28 25000 -Ethernet112 113 Eth29/1 29 25000 -Ethernet113 114 Eth29/2 29 25000 -Ethernet114 115 Eth29/3 29 25000 -Ethernet115 116 Eth29/4 29 25000 -Ethernet116 117 Eth30/1 30 25000 -Ethernet117 118 Eth30/2 30 25000 -Ethernet118 119 Eth30/3 30 25000 -Ethernet119 120 Eth30/4 30 25000 -Ethernet120 121,122,123,124 Eth31 31 100000 -Ethernet124 125,126,127,128 Eth32 32 100000 +Ethernet0 1,2,3,4 Eth1 1 100000 +Ethernet4 5,6,7,8 Eth2 2 100000 +Ethernet8 121,122,123,124 Eth3 3 100000 \ No newline at end of file diff --git a/inventories/group_vars/all/release_vector.yaml b/inventories/group_vars/all/release_vector.yaml index f801fffc..daec2675 100644 --- a/inventories/group_vars/all/release_vector.yaml +++ b/inventories/group_vars/all/release_vector.yaml @@ -1,5 +1,5 @@ --- -metal_stack_release_version: develop +metal_stack_release_version: v0.22.11 metal_stack_release_vectors: - url: oci://ghcr.io/metal-stack/releases:{{ metal_stack_release_version }} diff --git a/inventories/group_vars/leaves/main.yaml b/inventories/group_vars/leaves/main.yaml index e1d1e596..765c3bec 100644 --- a/inventories/group_vars/leaves/main.yaml +++ b/inventories/group_vars/leaves/main.yaml @@ -4,7 +4,7 @@ dhcp_listening_interfaces: metal_core_cidr_mask: 25 metal_core_spine_uplinks: - - Ethernet120 + - Ethernet8 sonic_config_docker_routing_config_mode: split-unified sonic_config_frr_render: false diff --git a/mini-lab.sonic.yaml b/mini-lab.sonic.yaml index a14ad7e0..c29fd11a 100644 --- a/mini-lab.sonic.yaml +++ b/mini-lab.sonic.yaml @@ -58,7 +58,7 @@ topology: mtu: 9000 - endpoints: ["leaf01:Ethernet0", "machine01:lan0"] - endpoints: ["leaf02:Ethernet0", "machine01:lan1"] - - endpoints: ["leaf01:Ethernet1", "machine02:lan0"] - - endpoints: ["leaf02:Ethernet1", "machine02:lan1"] - - endpoints: ["leaf01:Ethernet120", "exit:eth1"] - - endpoints: ["leaf02:Ethernet120", "exit:eth2"] + - endpoints: ["leaf01:Ethernet4", "machine02:lan0"] + - endpoints: ["leaf02:Ethernet4", "machine02:lan1"] + - endpoints: ["leaf01:Ethernet8", "exit:eth1"] + - endpoints: ["leaf02:Ethernet8", "exit:eth2"] diff --git a/roles/sonic/tasks/main.yaml b/roles/sonic/tasks/main.yaml index c8ee8460..5966245c 100644 --- a/roles/sonic/tasks/main.yaml +++ b/roles/sonic/tasks/main.yaml @@ -2,33 +2,33 @@ - name: Install frr-pythontools ansible.builtin.import_tasks: frr-reload.yaml -- name: Fix Network Performance - ansible.builtin.import_tasks: fix-network-performance.yaml +# - name: Fix Network Performance +# ansible.builtin.import_tasks: fix-network-performance.yaml -- name: Set lldp tx-interval to 10 - ansible.builtin.command: lldpcli configure lldp tx-interval 10 - retries: 10 - delay: 3 - register: result - until: result.rc == 0 +# - name: Set lldp tx-interval to 10 +# ansible.builtin.command: lldpcli configure lldp tx-interval 10 +# retries: 10 +# delay: 3 +# register: result +# until: result.rc == 0 -- name: Activate IP MASQUERADE on eth0 - ansible.builtin.iptables: - chain: POSTROUTING - jump: MASQUERADE - out_interface: eth0 - table: nat +# - name: Activate IP MASQUERADE on eth0 +# ansible.builtin.iptables: +# chain: POSTROUTING +# jump: MASQUERADE +# out_interface: eth0 +# table: nat -- name: Activate ipv4 forwarding on eth0 - ansible.posix.sysctl: - name: net.ipv4.conf.eth0.forwarding - reload: no - sysctl_set: yes - value: "1" +# - name: Activate ipv4 forwarding on eth0 +# ansible.posix.sysctl: +# name: net.ipv4.conf.eth0.forwarding +# reload: no +# sysctl_set: yes +# value: "1" -# We need to fill some values for the sonic-exporter (uses the STATE_DB) -- name: Mock sonic platform for kvm - ansible.builtin.import_tasks: mock-platform.yaml +# # We need to fill some values for the sonic-exporter (uses the STATE_DB) +# - name: Mock sonic platform for kvm +# ansible.builtin.import_tasks: mock-platform.yaml # ntp restarting for monitoring -> otherwise some NodeTimeOutOfSync error - name: restart chrony From 4f39ef8b8a7f419bcf84fc6d65b18172fb021d76 Mon Sep 17 00:00:00 2001 From: Benjamin Ritter Date: Sun, 24 May 2026 16:31:53 +0200 Subject: [PATCH 18/24] WIP fix: faster bootup Signed-off-by: Benjamin Ritter --- images/sonic/launch.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/images/sonic/launch.py b/images/sonic/launch.py index b99e19a6..526ed224 100755 --- a/images/sonic/launch.py +++ b/images/sonic/launch.py @@ -275,11 +275,11 @@ def read_until(marker: bytes, timeout: int) -> str: time.sleep(5) logger.info('Installing golden config_db.json') - send(b'sudo cp /golden_config_db.json /etc/sonic/config_db.json \n') + send(b'sudo config reload -f -y /golden_config_db.json \n') read_until(b'$ ', timeout=60) - logger.info('Rebooting SONiC to apply golden config') - send(b'sudo reboot\n') + #logger.info('Rebooting SONiC to apply golden config') + #send(b'sudo reboot\n') tn.close() From 52682a207f831d7046f508d46f0214a7ec0ebe59 Mon Sep 17 00:00:00 2001 From: Benjamin Ritter Date: Tue, 26 May 2026 07:42:58 +0200 Subject: [PATCH 19/24] WIP feat: wire up SONiC DHCP relay Signed-off-by: Benjamin Ritter --- inventories/group_vars/leaves/main.yaml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/inventories/group_vars/leaves/main.yaml b/inventories/group_vars/leaves/main.yaml index 765c3bec..27828bd6 100644 --- a/inventories/group_vars/leaves/main.yaml +++ b/inventories/group_vars/leaves/main.yaml @@ -20,6 +20,10 @@ sonic_config_nameservers: "{{ router_nameservers }}" sonic_config_vlans: - id: 4000 ip: "{{ metal_core_cidr }}" + dhcp_servers: + - "{{ lo }}" + dhcp_relay_link_select: "enable" # not wired up in metal-roles/sonic-configdb-utils + dhcp_relay_src_intf: "Loopback0" # not wired up in metal-roles/sonic-configdb-utils sonic_config_vtep: enabled: true @@ -30,3 +34,10 @@ sonic_config_ntp: - 1.europe.pool.ntp.org - 2.europe.pool.ntp.org - 3.europe.pool.ntp.org + +sonic_config_features: + dhcp_relay: + enabled: true + auto_restart: true + +# TODO: config dhcpv4_relay add --source-interface Loopback0 --link-selection enable --dhcpv4-servers 10.0.1.1 Vlan4000 \ No newline at end of file From 03144b6a48b43cfbcf3f2ce1cdcc2ab8a96172d5 Mon Sep 17 00:00:00 2001 From: Benjamin Ritter Date: Thu, 28 May 2026 11:03:25 +0200 Subject: [PATCH 20/24] fix: go back to virtio-net-pci devices e1000 did not play well with vpp due to tx/rx queue count mismatch (vpp expected two, e1000 had one, second tx queue did overflow since it never got emptied by the virtual nic) Signed-off-by: Benjamin Ritter --- images/sonic/launch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/images/sonic/launch.py b/images/sonic/launch.py index 526ed224..c4fb0959 100755 --- a/images/sonic/launch.py +++ b/images/sonic/launch.py @@ -87,7 +87,7 @@ def start(self) -> None: with open(f'/sys/class/net/{iface}/address', 'r') as f: mac = f.read().strip() cmd.append('-device') - cmd.append(f'virtio-net-pci,netdev=hn{i},mac={mac}') + cmd.append(f'virtio-net-pci,netdev=hn{i},mac={mac},mq=off,host_mtu=9216') cmd.append(f'-netdev') cmd.append(f'tap,id=hn{i},ifname=tap{i},script=/mirror_tap_to_front_panel.sh,downscript=no') From 97c3a4c733ad8699d50c58bd6f8614ff83fff534 Mon Sep 17 00:00:00 2001 From: Benjamin Ritter Date: Thu, 28 May 2026 11:22:26 +0200 Subject: [PATCH 21/24] feat: move dhcp server off the sonic switch troubleshooting steps to isolate why the dhcp return path via dhcp relay does not work Signed-off-by: Benjamin Ritter --- Makefile | 6 ++++- deploy_partition.yaml | 41 ++++++++++++++++++++++------------- inventories/partition.yaml | 12 ++++++++-- mini-lab.sonic.yaml | 13 +++++++++++ scripts/build_ubuntu_image.sh | 5 +++++ 5 files changed, 59 insertions(+), 18 deletions(-) create mode 100755 scripts/build_ubuntu_image.sh diff --git a/Makefile b/Makefile index be25e84d..c5fcbb52 100644 --- a/Makefile +++ b/Makefile @@ -136,7 +136,11 @@ partition: partition-bake .PHONY: partition-bake partition-bake: external_network - docker pull $(MINI_LAB_VM_IMAGE) + docker pull $(MINI_LAB_VM_IMAGE) + if ! docker inspect vrnetlab/canonical_ubuntu:jammy; then \ + ./scripts/build_ubuntu_image.sh; \ + fi + ifeq ($(CI),true) docker pull $(MINI_LAB_SONIC_IMAGE) endif diff --git a/deploy_partition.yaml b/deploy_partition.yaml index 5909531d..28ff04fb 100644 --- a/deploy_partition.yaml +++ b/deploy_partition.yaml @@ -65,19 +65,30 @@ sysctl_set: yes value: "1" -- name: Deploy dhcp server on leaf01 (Community SONiC) - hosts: leaf01:!dell_sonic - pre_tasks: - - name: Temporary workaround for EOL debian bullseye backports repository (using archive.debian.org) - lineinfile: - path: /etc/apt/sources.list - search_string: deb [arch=amd64] http://deb.debian.org/debian/ bullseye-backports main contrib non-free - line: deb [arch=amd64] http://archive.debian.org/debian/ bullseye-backports main contrib non-free - roles: - - name: ansible-common - tags: always - - name: metal-roles/partition/roles/dhcp - tags: dhcp +- name: Install docker on management server + hosts: managementserver + become: true + tasks: + - name: Install docker + ansible.builtin.apt: + name: docker.io + state: present + update_cache: true + + - name: Enable and start docker + ansible.builtin.systemd: + name: docker + enabled: true + state: started + +# - name: Deploy dhcp server +# hosts: managementserver +# become: true +# roles: +# - name: ansible-common +# tags: always +# - name: metal-roles/partition/roles/dhcp +# tags: dhcp # FIXME: For some reason, the first docker pull always fails on dell_sonic but succeeds on second attempt. # Investigate the cause and remove this play @@ -91,8 +102,8 @@ pull: true failed_when: false -- name: Deploy pixiecore on leaf01 - hosts: leaf01 +- name: Deploy pixiecore + hosts: managementserver become: true roles: - name: ansible-common diff --git a/inventories/partition.yaml b/inventories/partition.yaml index fb3aa8a3..0aab7788 100644 --- a/inventories/partition.yaml +++ b/inventories/partition.yaml @@ -9,11 +9,19 @@ partition: children: dell_sonic: leaves: + managementserver: + vars: + pixie_server_ip: "{{ hostvars['managementserver'].ansible_host }}" + dhcp_server_ip: "10.0.0.21" dell_sonic: vars: ansible_group_priority: 10 +managementserver: + hosts: + managementserver: + leaves: hosts: leaf01: @@ -37,8 +45,8 @@ leaves: asn: 4200000012 metal_core_cidr: 10.0.1.128/{{ metal_core_cidr_mask }} vars: - pixie_server_ip: 10.0.1.1 - dhcp_server_ip: 10.0.1.1 + pixie_server_ip: "{{ hostvars['managementserver'].ansible_host }}" + dhcp_server_ip: 10.0.0.21 ansible_python_interpreter: /usr/bin/python ansible_user: root diff --git a/mini-lab.sonic.yaml b/mini-lab.sonic.yaml index c29fd11a..570788f2 100644 --- a/mini-lab.sonic.yaml +++ b/mini-lab.sonic.yaml @@ -27,6 +27,19 @@ topology: - sh /root/network.sh mini_lab_ext: kind: bridge + managementserver: + kind: generic_vm + image: vrnetlab/canonical_ubuntu:jammy + cpu: 1 + memory: 512Mb + env: + QEMU_MEMORY: 512 + kea: + kind: linux + network-mode: container:exit # join the network namespace of inet + image: docker.cloudsmith.io/isc/docker/kea-dhcp4:2.6.0 + binds: + - files/kea.json:/etc/kea/kea-dhcp4.conf leaf01: group: leaves image: ${MINI_LAB_SONIC_IMAGE} diff --git a/scripts/build_ubuntu_image.sh b/scripts/build_ubuntu_image.sh new file mode 100755 index 00000000..142a0ad1 --- /dev/null +++ b/scripts/build_ubuntu_image.sh @@ -0,0 +1,5 @@ +git clone --depth=1 https://github.com/srl-labs/vrnetlab +cd vrnetlab/ubuntu +make +git reset --hard +cd - \ No newline at end of file From 273a9da4b7dff7854d2ddb49eb7efd4f99375c1e Mon Sep 17 00:00:00 2001 From: Benjamin Ritter Date: Thu, 28 May 2026 11:24:24 +0200 Subject: [PATCH 22/24] feat: use sonic-vpp master branch build includes https://github.com/sonic-net/sonic-platform-vpp/pull/212 and https://github.com/sonic-net/sonic-platform-vpp/pull/220 for troubleshooting reasons Signed-off-by: Benjamin Ritter --- images/sonic/Dockerfile | 4 ++-- images/sonic/{base-202511-vpp => base-vpp}/Dockerfile | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) rename images/sonic/{base-202511-vpp => base-vpp}/Dockerfile (81%) diff --git a/images/sonic/Dockerfile b/images/sonic/Dockerfile index af042be8..1c530758 100644 --- a/images/sonic/Dockerfile +++ b/images/sonic/Dockerfile @@ -14,8 +14,8 @@ RUN apt-get update && \ qemu-system-x86 \ telnet -COPY --from=ghcr.io/metal-stack/mini-lab-sonic-base:202511-vpp /sonic-vs.img /sonic-vs.img -COPY --from=ghcr.io/metal-stack/mini-lab-sonic-base:202511-vpp /frr-pythontools.deb /frr-pythontools.deb +COPY --from=ghcr.io/metal-stack/mini-lab-sonic-base:master-vpp /sonic-vs.img /sonic-vs.img +COPY --from=ghcr.io/metal-stack/mini-lab-sonic-base:master-vpp /frr-pythontools.deb /frr-pythontools.deb ENTRYPOINT ["/launch.py"] diff --git a/images/sonic/base-202511-vpp/Dockerfile b/images/sonic/base-vpp/Dockerfile similarity index 81% rename from images/sonic/base-202511-vpp/Dockerfile rename to images/sonic/base-vpp/Dockerfile index 32daf97e..277cda85 100644 --- a/images/sonic/base-202511-vpp/Dockerfile +++ b/images/sonic/base-vpp/Dockerfile @@ -1,7 +1,8 @@ # Check: https://sonic-build.azurewebsites.net/ui/sonic/pipelines -ARG SONIC_BASE_URL=https://sonic-build.azurewebsites.net/api/sonic/artifacts?branchName=202511&definitionId=2818&artifactName=sonic-buildimage.vpp +ARG SONIC_BRANCH=master +ARG SONIC_BASE_URL=https://sonic-build.azurewebsites.net/api/sonic/artifacts?branchName=${SONIC_BRANCH}&definitionId=2818&artifactName=sonic-buildimage.vpp ARG SONIC_IMG_URL=${SONIC_BASE_URL}&target=target%2Fsonic-vpp.img.gz -ARG FRR_RELOAD_URL=${SONIC_BASE_URL}&target=target%2Fdebs%2Fbookworm%2Ffrr-pythontools_10.4.1-sonic-0_all.deb +ARG FRR_RELOAD_URL=${SONIC_BASE_URL}&target=target%2Fdebs%2Fbookworm%2Ffrr-pythontools_10.5.4-sonic-0_all.deb FROM docker.io/library/busybox:stable AS download From 92553f0be82f42d9850b37e67cf786e94181d629 Mon Sep 17 00:00:00 2001 From: Benjamin Ritter Date: Thu, 28 May 2026 13:09:39 +0200 Subject: [PATCH 23/24] feat: clean up sonic launch.py Removed hacks, that just about worked in sonic-vs but broke in sonic-vpp - sonic-vpp requires first time init to generate VPP config files from hwsku lanemap and port config. If skipped the syncd container, in which vpp runs, will crash immediately - /etc/sonic/sonic-environment is generated on first boot from /usr/share/sonic/device/x86_64-kvm_x86_64-r0/default_sku, so writing it serves no purpose as we reenabled firstboot - switch to telnetlib3, due to telnetlib being deprecated Signed-off-by: Benjamin Ritter --- images/sonic/Dockerfile | 4 +++ images/sonic/launch.py | 64 +++++++---------------------------- images/sonic/requirements.txt | 1 + 3 files changed, 17 insertions(+), 52 deletions(-) create mode 100644 images/sonic/requirements.txt diff --git a/images/sonic/Dockerfile b/images/sonic/Dockerfile index 1c530758..cafff02e 100644 --- a/images/sonic/Dockerfile +++ b/images/sonic/Dockerfile @@ -9,11 +9,15 @@ RUN apt-get update && \ iproute2 \ linux-image-cloud-amd64 \ python3 \ + python3-pip \ python3-guestfs \ python3-scapy \ qemu-system-x86 \ telnet +COPY requirements.txt / +RUN pip install --break-system-packages -r requirements.txt + COPY --from=ghcr.io/metal-stack/mini-lab-sonic-base:master-vpp /sonic-vs.img /sonic-vs.img COPY --from=ghcr.io/metal-stack/mini-lab-sonic-base:master-vpp /frr-pythontools.deb /frr-pythontools.deb diff --git a/images/sonic/launch.py b/images/sonic/launch.py index c4fb0959..9fd25a24 100755 --- a/images/sonic/launch.py +++ b/images/sonic/launch.py @@ -10,7 +10,7 @@ import struct import subprocess import sys -import telnetlib +import telnetlib3 import time from typing import Callable @@ -99,72 +99,35 @@ def wait(self) -> None: def initial_configuration(g: GuestFS, hwsku: str) -> None: image = g.glob_expand('/disk/image-*')[0] - - # g.rm(image + 'platform/firsttime') - - systemd_system = image + 'rw/etc/systemd/system/' - sonic_target_wants = systemd_system + 'sonic.target.wants/' - g.mkdir_p(sonic_target_wants) - # Copy frr-pythontools into the image + g.mkdir_p(image + 'rw/') g.copy_in(localpath='/frr-pythontools.deb', remotedir=image + 'rw/') - # Workaround: Speed up lldp startup by remove hardcoded wait of 90 seconds - # g.ln_s(linkname=systemd_system + 'aaastatsd.timer', target='/dev/null') # Radius - # g.ln_s(linkname=systemd_system + 'featured.timer', target='/dev/null') # Feature handling not necessary - # g.ln_s(linkname=systemd_system + 'hostcfgd.timer', target='/dev/null') # After boot Host configuration - # g.ln_s(linkname=systemd_system + 'rasdaemon.timer', target='/dev/null') # After boot Host configuration - # g.ln_s(linkname=systemd_system + 'tacacs-config.timer', target='/dev/null') # After boot Host configuration - # Started by featured - # g.ln_s(linkname=sonic_target_wants + 'lldp.service', target='/lib/systemd/system/lldp.service') - # g.ln_s(linkname=systemd_system + 'pmon.service', target='/lib/systemd/system/pmon.service') - # g.ln_s(linkname=sonic_target_wants + 'pmon.service', target='/lib/systemd/system/pmon.service') - - # Workaround: Only useful for BackEndToRRouter - # g.ln_s(linkname=systemd_system + 'backend-acl.service', target='/dev/null') - - # Workaround: We don't need LACP - # g.ln_s(linkname=systemd_system + 'teamd.service', target='/dev/null') - # Workaround: Python module sonic_platform not present on vs images + systemd_system = image + 'rw/etc/systemd/system/' + g.mkdir_p(systemd_system) g.ln_s(linkname=systemd_system + 'system-health.service', target='/dev/null') g.ln_s(linkname=systemd_system + 'watchdog-control.service', target='/dev/null') sonic_share = image + 'rw/usr/share/sonic/' platform_dir = image + 'rw' + VS_DEVICES_PATH - hwsku_dir_rw = image + 'rw' + VS_DEVICES_PATH + hwsku g.mkdir_p(platform_dir) g.write(path=platform_dir + '/default_sku', content=f'{hwsku} empty'.encode('utf-8')) # The lanemap.ini file is used by the virtual switch image to assign front panels to the Linux interfaces ethX. # This assignment will later also be used by the script mirror_tap_to_front_panel.sh. - # g.download(remotefilename=hwsku_dir + '/port_config.ini', filename='/port_config.ini') - # g.download(remotefilename=hwsku_dir + '/lanemap.ini', filename='/lanemap.ini') + # Dynamic breakouts are not implemented in sonic-vs/sonic-vpp ifaces = get_ethernet_interfaces() - # The port_config.ini file contains the assignment of front panels to lanes. port_config = parse_port_config() - # The lanemap.ini file is used by the virtual switch image to assign front panels to the Linux interfaces ethX. - # This assignment will later also be used by the script mirror_tap_to_front_panel.sh. lanemap = create_lanemap(port_config, ifaces) with open('/lanemap.ini', 'w') as f: f.write('\n'.join(lanemap)) + hwsku_dir_rw = image + 'rw' + VS_DEVICES_PATH + hwsku g.mkdir_p(hwsku_dir_rw) g.copy_in(localpath='/lanemap.ini', remotedir=hwsku_dir_rw) g.copy_in(localpath='/port_config.ini', remotedir=hwsku_dir_rw) - etc_sonic = image + 'rw/etc/sonic/' - g.mkdir_p(etc_sonic) - # sonic_version = image.removeprefix('/image-').removesuffix('/') - # sonic_environment = f''' - # SONIC_VERSION=${sonic_version} - # PLATFORM=x86_64-kvm_x86_64-r0 - # HWSKU={hwsku} - # DEVICE_TYPE=LeafRouter - # ASIC_TYPE=vpp - # '''.encode('utf-8') - # g.write(path=etc_sonic + 'sonic-environment', content=sonic_environment) - config_db = create_config_db(hwsku) ports = {} for iface in ifaces: @@ -176,7 +139,7 @@ def initial_configuration(g: GuestFS, hwsku: str) -> None: config_db['PORT'] = ports config_db_json = json.dumps(config_db, indent=4, sort_keys=True) - g.write(path=image + 'rw/golden_config_db.json', content=config_db_json.encode('utf-8')) + g.write(path=image + 'rw/init_config_db.json', content=config_db_json.encode('utf-8')) if os.path.exists('/authorized_keys'): g.mkdir_p(image + 'rw/root/.ssh') @@ -215,7 +178,7 @@ def main(): logger.info('Start QEMU') vm.start() - apply_golden_config_via_serial(logger) + apply_init_config_via_serial(logger) # SONiC will start sending LLDP packets after PortConfigDone is set in APPL database logger.info('Wait until eth0 has an IPv4 address') @@ -233,11 +196,11 @@ def handle_exit(signal, frame): sys.exit(0) -def apply_golden_config_via_serial(logger) -> None: +def apply_init_config_via_serial(logger) -> None: logger.info('Connecting to SONiC serial console on 127.0.0.1:5000') while True: try: - tn = telnetlib.Telnet('127.0.0.1', 5000, timeout=600) + tn = telnetlib3.Telnet('127.0.0.1', 5000, timeout=600) break except ConnectionRefusedError: time.sleep(1) @@ -274,13 +237,10 @@ def read_until(marker: bytes, timeout: int) -> str: break time.sleep(5) - logger.info('Installing golden config_db.json') - send(b'sudo config reload -f -y /golden_config_db.json \n') + logger.info('Installing intial config_db.json') + send(b'sudo config reload -f -y /init_config_db.json \n') read_until(b'$ ', timeout=60) - #logger.info('Rebooting SONiC to apply golden config') - #send(b'sudo reboot\n') - tn.close() diff --git a/images/sonic/requirements.txt b/images/sonic/requirements.txt new file mode 100644 index 00000000..4973e51b --- /dev/null +++ b/images/sonic/requirements.txt @@ -0,0 +1 @@ +telnetlib3~=4.0.4 \ No newline at end of file From 8446f3cf1c0093135a72da637c4a630ddf5f1e7b Mon Sep 17 00:00:00 2001 From: Benjamin Ritter Date: Fri, 29 May 2026 15:30:54 +0200 Subject: [PATCH 24/24] fix: add more documentation Signed-off-by: Benjamin Ritter --- images/sonic/README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/images/sonic/README.md b/images/sonic/README.md index bdec0df1..ecc5d770 100644 --- a/images/sonic/README.md +++ b/images/sonic/README.md @@ -5,4 +5,8 @@ We use sonic-vpp to emulate SONiC switches. It is running in kvm inside a contai # Configuration knobs -You can edit the port_config.ini to add more ports. Keep the number as low as possible. It will put less strain on your system because it will spawn fewer VPP worker threads. You will have to set up the switch from scratch afterwards, since VPP will generate some configuration on first startup. \ No newline at end of file +You can edit the port_config.ini to add more ports. + + +# Boot process +The switch will boot with a default first-boot configuration. This is required since first boot will generate some required configuration for VPP. After a short while the configuration that is generated in launch.py is injected and the sonic is reloaded. After the new configuration is loaded the container will be marked ready. Check the docker logs for errors if bootup takes more than a minute. \ No newline at end of file