From 45f25c13449a32d730fba2b1dadf3f9ccf3bac36 Mon Sep 17 00:00:00 2001 From: TaprootFreak <142087526+TaprootFreak@users.noreply.github.com> Date: Mon, 1 Jun 2026 11:34:08 +0200 Subject: [PATCH] fix: reclassify Apollo network-error log severity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop noise from the recoverable-retry path: - Use logger.warn (not logger.error) when the primary indexer fails and the fallback is about to be engaged; reserve logger.error for cases where no fallback is configured or the fallback itself failed. - Drop the {message, name, stack} metadata payload — the Winston formatter in api.main.ts uses only info.message, so it never reached Loki anyway. Inline the message into the log line for actual signal. - Collapse the redundant info-level breadcrumbs ('Network error detected' / '503 Service Unavailable') into the single warn line. - Refresh the fallback window after expiry instead of arming it once per process lifetime, so a sustained outage keeps the fallback active. Eliminates ~1500/day of error-level lines from dEURO PRD logs that were generated by transient indexer hiccups successfully absorbed by retry. --- api.apollo.config.ts | 55 +++++++++++++++++++------------------------- 1 file changed, 24 insertions(+), 31 deletions(-) diff --git a/api.apollo.config.ts b/api.apollo.config.ts index 9f7198b..cab2628 100644 --- a/api.apollo.config.ts +++ b/api.apollo.config.ts @@ -6,53 +6,47 @@ import { CONFIG } from './api.config'; const logger = new Logger('ApiApolloConfig'); +const FALLBACK_WINDOW_MS = 10 * 60 * 1000; let fallbackUntil: number | null = null; function getIndexerUrl(): string { - return fallbackUntil && Date.now() < fallbackUntil - ? CONFIG.indexerFallback - : CONFIG.indexer; + return fallbackUntil && Date.now() < fallbackUntil ? CONFIG.indexerFallback : CONFIG.indexer; } function activateFallback(): void { - if (!fallbackUntil) { - fallbackUntil = Date.now() + 10 * 60 * 1000; + // Re-arm when the previous window has expired so a sustained outage + // keeps the fallback engaged instead of silently flipping back to primary. + if ((!fallbackUntil || Date.now() >= fallbackUntil) && CONFIG.indexerFallback) { + fallbackUntil = Date.now() + FALLBACK_WINDOW_MS; logger.log(`[Ponder] Switching to fallback for 10min: ${CONFIG.indexerFallback}`); } } const errorLink = onError(({ graphQLErrors, networkError, operation, forward }) => { + const opName = operation?.operationName || 'unknown'; + if (graphQLErrors) { graphQLErrors.forEach((error) => { - logger.error(`[GraphQL error in operation: ${operation?.operationName || 'unknown'}]`, { - message: error.message, - locations: error.locations, - path: error.path, - }); + logger.error(`[GraphQL error in operation: ${opName}] ${error.message}`); }); } - - if (networkError) { - logger.error(`[Network error in operation: ${operation?.operationName || 'unknown'}]`, { - message: networkError.message, - name: networkError.name, - stack: networkError.stack, - }); - if (getIndexerUrl() === CONFIG.indexer) { - const is503 = - (networkError as any)?.response?.status === 503 || - (networkError as any)?.statusCode === 503 || - (networkError as any)?.result?.status === 503; + if (networkError) { + const hasFallback = !!CONFIG.indexerFallback; + const onFallback = getIndexerUrl() !== CONFIG.indexer; + const willRecover = hasFallback && !onFallback; + const msg = `[Network error in operation: ${opName}] ${networkError.message}`; - if (is503) { - logger.log('[Ponder] 503 Service Unavailable - Ponder is syncing, switching to fallback'); - } else { - logger.log('[Ponder] Network error detected, activating fallback'); - } + if (willRecover) { + // Primary failed, fallback hasn't been engaged this window — log + // at warn so transparent retries don't inflate error-rate panels. + logger.warn(msg); activateFallback(); return forward(operation); } + + // No fallback configured, or already on fallback — nothing more to try. + logger.error(msg); } }); @@ -67,10 +61,9 @@ const httpLink = createHttpLink({ return fetch(uri, { ...options, signal: controller.signal, - }) - .finally(() => { - clearTimeout(timeout); - }); + }).finally(() => { + clearTimeout(timeout); + }); }, });