diff --git a/CHANGELOG.md b/CHANGELOG.md index ea38262f2..da6682b59 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ All notable changes to this project will be documented in this file. - Add `tools/stress/device-observer/`, a per-device sampling tool for the GRE Tunnel Capacity Study. Each tick issues five eAPI `show` commands, scrapes the doublezero-agent Prometheus endpoint, polls EOS syslog via `show logging last` with cross-tick dedupe, tails the orchestrator's agent log for abort-trigger patterns, and tails the orchestrator's runlog to compute provision/deprovision durations. The abort decider is stubbed. - Telemetry - Drop the redundant `ip-msdp-sa-cache` kind from the state-ingest server's default state-collect command list. `show ip msdp sa-cache rejected` already returns the full SA cache (accepted SAs in the `acceptedSaMsg` array plus any rejected SAs in `rejectedSaMsg`), so the bare `show ip msdp sa-cache` collection is redundant — devices were running both commands per tick and uploading the same accepted-SA data twice. The `ip-msdp-sa-cache-rejected` kind is retained. + - Add `ip-route` (`show ip route vrf all`) and `ip-pim-neighbor` (`show ip pim neighbor`) to the state-ingest server's default state-collect command list. `ip-route` exposes admin distance, programmed flags (`hardwareProgrammed`, `kernelProgrammed`), and route action — fields the gNMI AFT path doesn't surface. `ip-pim-neighbor` unblocks the multicast operator dashboard's PIM neighbor health panel. - Telemetry (geoprobe) - Retry transient `bind: invalid argument` failures when allocating per-probe UDP sockets in `Publisher.AddProbe`, matching the existing retry-on-bind pattern in `Pinger`. The shared retry helper is lifted into `retry.go` so the publisher and pinger paths use the same exponential-backoff logic. Fixes intermittent `TestPublisher_RemoveProbe`/`TestPublisher_AddProbe` CI flakes caused by concurrent ephemeral-port allocation ([#3765](https://github.com/malbeclabs/doublezero/issues/3765)) diff --git a/telemetry/state-ingest/pkg/server/config.go b/telemetry/state-ingest/pkg/server/config.go index 6db35b2ad..e5deace74 100644 --- a/telemetry/state-ingest/pkg/server/config.go +++ b/telemetry/state-ingest/pkg/server/config.go @@ -23,8 +23,10 @@ var ( defaultStateToCollectShowCommands = map[string]string{ "snmp-mib-ifmib-ifindex": "show snmp mib ifmib ifindex", "isis-database-detail": "show isis database detail", + "ip-route": "show ip route vrf all", "ip-mroute": "show ip mroute", "ip-mroute-count": "show ip mroute count", + "ip-pim-neighbor": "show ip pim neighbor", "ip-msdp-summary": "show ip msdp summary", "ip-msdp-pim-sa-cache": "show ip msdp pim sa-cache", "ip-msdp-sa-cache-rejected": "show ip msdp sa-cache rejected", diff --git a/telemetry/state-ingest/pkg/server/handler_test.go b/telemetry/state-ingest/pkg/server/handler_test.go index f1e52f6d5..899c6c0db 100644 --- a/telemetry/state-ingest/pkg/server/handler_test.go +++ b/telemetry/state-ingest/pkg/server/handler_test.go @@ -860,7 +860,7 @@ func TestTelemetry_StateIngest_Handler_StateToCollect_UsesDefaultShowCommandsAnd var resp types.StateToCollectResponse require.NoError(t, json.Unmarshal(rr.Body.Bytes(), &resp)) - require.Len(t, resp.ShowCommands, 7) + require.Len(t, resp.ShowCommands, 9) require.Len(t, resp.Custom, 1) // Convert to map for order-independent comparison (map iteration is non-deterministic) @@ -871,8 +871,10 @@ func TestTelemetry_StateIngest_Handler_StateToCollect_UsesDefaultShowCommandsAnd require.Equal(t, map[string]string{ "snmp-mib-ifmib-ifindex": "show snmp mib ifmib ifindex", "isis-database-detail": "show isis database detail", + "ip-route": "show ip route vrf all", "ip-mroute": "show ip mroute", "ip-mroute-count": "show ip mroute count", + "ip-pim-neighbor": "show ip pim neighbor", "ip-msdp-summary": "show ip msdp summary", "ip-msdp-pim-sa-cache": "show ip msdp pim sa-cache", "ip-msdp-sa-cache-rejected": "show ip msdp sa-cache rejected",