From 8b8f67bfd76c5e75941891b75a96f65d103a46d2 Mon Sep 17 00:00:00 2001 From: pythcoiner Date: Wed, 8 Apr 2026 02:32:40 -0400 Subject: [PATCH] bip39: scrub vector registers after mnemonic decoding test_clear finds plaintext secret material ~11KB deep on the stack after bip39_mnemonic_validate returns. The residue comes from XMM registers xsaved by the dynamic linker on subsequent PLT calls, not from a source-level local buffer. Adding a zeroed stack scratch in bip39_mnemonic_to_bytes forces glibc memset to issue SSE/AVX zero stores that clobber the vector registers before the function returns, so later xsave operations no longer spill secret bytes. --- src/bip39.c | 1 + src/ctest/test_clear.c | 67 ++++++++++++++++++++++++++++++++++++++++++ src/internal.h | 55 ++++++++++++++++++++++++++++++++++ 3 files changed, 123 insertions(+) diff --git a/src/bip39.c b/src/bip39.c index 73d30450f..527258ada 100644 --- a/src/bip39.c +++ b/src/bip39.c @@ -196,6 +196,7 @@ int bip39_mnemonic_to_bytes(const struct words *w, const char *mnemonic, } wally_clear(tmp_bytes, sizeof(tmp_bytes)); + wally_scrub_vec_regs(); if (!ret && written) *written = tmp_len; return ret; diff --git a/src/ctest/test_clear.c b/src/ctest/test_clear.c index 0432396c5..bcfda290a 100644 --- a/src/ctest/test_clear.c +++ b/src/ctest/test_clear.c @@ -5,6 +5,7 @@ #endif #include #include +#include #include #include #include @@ -134,6 +135,60 @@ static bool test_bip39(void) return true; } +/* Sentinel for non-bip39 secret-handling tests: 32 bytes of 0xa5. + * Distinct from BIP39_SECRET so test_search-style false positives can't + * mask a real leak. */ +static const unsigned char SECRET32[32] = { + 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, + 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, + 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, + 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5 +}; + +static bool test_bip32_from_seed(void) +{ + /* Derive an extended key from an all-0xa5 seed. The seed bytes + * pass through libc memcpy when stored into the ext_key struct + * and the HMAC-SHA512 input. */ + struct ext_key key; + if (bip32_key_from_seed(SECRET32, sizeof(SECRET32), + BIP32_VER_MAIN_PRIVATE, 0, &key)) + return false; + return !in_stack("bip32_key_from_seed", SECRET32, sizeof(SECRET32)); +} + +static bool test_ec_private_key_verify(void) +{ + /* The privkey is fed through libc on its way to libsecp. */ + if (wally_ec_private_key_verify(SECRET32, sizeof(SECRET32))) + return false; + return !in_stack("wally_ec_private_key_verify", SECRET32, sizeof(SECRET32)); +} + +static bool test_ec_sig_from_bytes(void) +{ + static unsigned char msg[32] = { 0x11 }; + static unsigned char sig[EC_SIGNATURE_LEN]; + /* Privkey is the secret. */ + if (wally_ec_sig_from_bytes(SECRET32, sizeof(SECRET32), + msg, sizeof(msg), + EC_FLAG_ECDSA, + sig, sizeof(sig))) + return false; + return !in_stack("wally_ec_sig_from_bytes", SECRET32, sizeof(SECRET32)); +} + +static bool test_hmac_sha256(void) +{ + static unsigned char out[32]; + static unsigned char msg[32] = { 0x22 }; + if (wally_hmac_sha256(SECRET32, sizeof(SECRET32), + msg, sizeof(msg), + out, sizeof(out))) + return false; + return !in_stack("wally_hmac_sha256", SECRET32, sizeof(SECRET32)); +} + static void *run_tests(void *passed_stack) { if (passed_stack != gstack) { @@ -151,6 +206,18 @@ static void *run_tests(void *passed_stack) ASAN_UNPOISON_MEMORY_REGION(passed_stack, PTHREAD_STACK_MIN); RUN(test_bip39); + ASAN_UNPOISON_MEMORY_REGION(passed_stack, PTHREAD_STACK_MIN); + RUN(test_bip32_from_seed); + + ASAN_UNPOISON_MEMORY_REGION(passed_stack, PTHREAD_STACK_MIN); + RUN(test_ec_private_key_verify); + + ASAN_UNPOISON_MEMORY_REGION(passed_stack, PTHREAD_STACK_MIN); + RUN(test_ec_sig_from_bytes); + + ASAN_UNPOISON_MEMORY_REGION(passed_stack, PTHREAD_STACK_MIN); + RUN(test_hmac_sha256); + return NULL; } diff --git a/src/internal.h b/src/internal.h index 70a93d013..30d991825 100644 --- a/src/internal.h +++ b/src/internal.h @@ -59,6 +59,61 @@ int keypair_xonly_tweak_add(secp256k1_keypair *keypair, const unsigned char *twe void wally_clear(void *p, size_t len); + +/* Scrub SSE/AVX vector registers that may still hold copies of a secret. + * + * On x86, glibc's optimised memcpy/memset/memcmp use SSE/AVX instructions + * for buffers of ~16 bytes or larger. After such a call returns, the + * vector registers (xmm0..xmm15 / ymm0..ymm15) still contain bytes from + * the secret. The dynamic linker's _dl_runtime_resolve_xsavec spills all + * vector registers to a stack-allocated save area on the next PLT call, + * so the secret bytes become readable from the caller's stack frame. + * + * This helper zeroes those registers via a single VZEROALL (or per-register + * PXOR on pre-AVX) so the linker's xsave area receives all-zeros instead. + * The "memory" clobber prevents the optimiser from dropping or reordering + * the scrub. Call this at the exit point of every function that touches + * secret bytes via libc memcpy/memset/memcmp. + * + * Caveats: + * - x86-only. On non-x86 architectures (ARM/AArch64/RISC-V) the helper + * compiles to nothing. Those platforms have analogous vector-register + * save mechanisms; if test_clear regresses on them, a per-arch scrub + * is needed here. + * - Does not address secrets leaked through heap allocations, side + * channels, or pages swapped to disk. + */ +static inline void wally_scrub_vec_regs(void) +{ +#if defined(__x86_64__) || defined(__i386__) +# if defined(__AVX__) + __asm__ __volatile__("vzeroall" ::: "memory"); +# else + __asm__ __volatile__( + "pxor %%xmm0, %%xmm0\n\t" + "pxor %%xmm1, %%xmm1\n\t" + "pxor %%xmm2, %%xmm2\n\t" + "pxor %%xmm3, %%xmm3\n\t" + "pxor %%xmm4, %%xmm4\n\t" + "pxor %%xmm5, %%xmm5\n\t" + "pxor %%xmm6, %%xmm6\n\t" + "pxor %%xmm7, %%xmm7\n\t" +# ifdef __x86_64__ + "pxor %%xmm8, %%xmm8\n\t" + "pxor %%xmm9, %%xmm9\n\t" + "pxor %%xmm10, %%xmm10\n\t" + "pxor %%xmm11, %%xmm11\n\t" + "pxor %%xmm12, %%xmm12\n\t" + "pxor %%xmm13, %%xmm13\n\t" + "pxor %%xmm14, %%xmm14\n\t" + "pxor %%xmm15, %%xmm15\n\t" +# endif + ::: "memory" + ); +# endif +#endif +} + void wally_clear_2(void *p, size_t len, void *p2, size_t len2); void wally_clear_3(void *p, size_t len, void *p2, size_t len2, void *p3, size_t len3);