From d2055564690757583317376473d44b00f2aaebf0 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 20 May 2026 11:51:18 +0000 Subject: [PATCH 1/4] =?UTF-8?q?=F0=9F=94=A7=20update=20(core):=20inject=20?= =?UTF-8?q?sender=20userId=20into=20LLM=20messages=20for=20owner=20identit?= =?UTF-8?q?y?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Agent-Logs-Url: https://github.com/warengonzaga/tinyclaw/sessions/86909675-1107-446e-8ee8-abb371b60b2c Co-authored-by: warengonzaga <15052701+warengonzaga@users.noreply.github.com> --- packages/core/src/loop.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/packages/core/src/loop.ts b/packages/core/src/loop.ts index cb804c0..2e18bcf 100644 --- a/packages/core/src/loop.ts +++ b/packages/core/src/loop.ts @@ -873,9 +873,13 @@ export async function agentLoop( const sanitizedMessage = sanitizeMessage(message, userId, context.ownerId); // Build messages + // Inject sender identity before the user message so the LLM can correctly + // apply owner-vs-friend rules. Without this, the LLM has no way to know + // who is sending the current message and may misidentify the owner. const messages: Message[] = [ { role: 'system', content: systemPrompt }, ...history, + { role: 'system', content: `[Current message sender: userId = \`${userId}\`]` }, { role: 'user', content: sanitizedMessage }, ]; From 632a8534f679c4a5edada55e55506f5f1594ec71 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 20 May 2026 12:03:10 +0000 Subject: [PATCH 2/4] =?UTF-8?q?=E2=9A=99=EF=B8=8F=20setup:=20initial=20pla?= =?UTF-8?q?n=20for=20review=20feedback=20on=20loop.ts?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Agent-Logs-Url: https://github.com/warengonzaga/tinyclaw/sessions/0fae7cf1-eeeb-4e1c-8a9f-f108bd1776c1 Co-authored-by: warengonzaga <15052701+warengonzaga@users.noreply.github.com> --- bun.lock | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/bun.lock b/bun.lock index a52b426..1ffc2e3 100644 --- a/bun.lock +++ b/bun.lock @@ -34,7 +34,7 @@ "@tinyclaw/logger": "workspace:*", "@tinyclaw/types": "workspace:*", "@wgtechlabs/config-engine": "^0.1.0", - "zod": "^3.24.0", + "zod": "^4.4.3", }, }, "packages/core": { @@ -282,7 +282,7 @@ "@tinyclaw/logger": "workspace:*", "@tinyclaw/types": "workspace:*", "dompurify": "^3.2.6", - "marked": "^17.0.3", + "marked": "^18.0.0", "qrcode": "^1.5.4", "svelte": "^5.20.1", }, @@ -715,7 +715,7 @@ "magic-string": ["magic-string@0.30.21", "", { "dependencies": { "@jridgewell/sourcemap-codec": "^1.5.5" } }, "sha512-vd2F4YUyEXKGcLHoq+TEyCjxueSeHnFxyyjNp80yg0XV4vUhnDer/lvvlqM/arB5bXQN5K2/3oinyCRyx8T2CQ=="], - "marked": ["marked@17.0.3", "", { "bin": { "marked": "bin/marked.js" } }, "sha512-jt1v2ObpyOKR8p4XaUJVk3YWRJ5n+i4+rjQopxvV32rSndTJXvIzuUdWWIy/1pFQMkQmvTXawzDNqOH/CUmx6A=="], + "marked": ["marked@18.0.4", "", { "bin": { "marked": "bin/marked.js" } }, "sha512-c/BTaKzg0G6ezQx97DAkYU7k0HM6ys0FqYeKBL6hlBByZwy+ycA1+f0vDdjMHKKeEjdgkx0GOv9Il6D+85cOqA=="], "nanoid": ["nanoid@3.3.11", "", { "bin": { "nanoid": "bin/nanoid.cjs" } }, "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w=="], @@ -795,7 +795,7 @@ "zimmerframe": ["zimmerframe@1.1.4", "", {}, "sha512-B58NGBEoc8Y9MWWCQGl/gq9xBCe4IiKM0a2x7GZdQKOW5Exr8S1W24J6OgM1njK8xCRGvAJIL/MxXHf6SkmQKQ=="], - "zod": ["zod@3.25.76", "", {}, "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ=="], + "zod": ["zod@4.4.3", "", {}, "sha512-ytENFjIJFl2UwYglde2jchW2Hwm4GJFLDiSXWdTrJQBIN9Fcyp7n4DhxJEiWNAJMV1/BqWfW/kkg71UDcHJyTQ=="], "@discordjs/rest/@discordjs/collection": ["@discordjs/collection@2.1.1", "", {}, "sha512-LiSusze9Tc7qF03sLCujF5iZp7K+vRNEDBZ86FT9aQAv3vxMLihUvKvpsCWiQ2DJq1tVckopKm1rxomgNUc9hg=="], @@ -825,6 +825,8 @@ "@types/ws/@types/node": ["@types/node@22.19.8", "", { "dependencies": { "undici-types": "~6.21.0" } }, "sha512-ebO/Yl+EAvVe8DnMfi+iaAyIqYdK0q/q0y0rw82INWEKJOBe6b/P3YWE8NW7oOlF/nXFNrHwhARrN/hdgDkraA=="], + "@wgtechlabs/config-engine/zod": ["zod@3.25.76", "", {}, "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ=="], + "tinyglobby/picomatch": ["picomatch@4.0.3", "", {}, "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q=="], "@sveltejs/vite-plugin-svelte-inspector/vite/picomatch": ["picomatch@4.0.3", "", {}, "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q=="], From 0a7b06a8eff2b4a3989dc3bf8ba91aead39370b5 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 20 May 2026 12:05:35 +0000 Subject: [PATCH 3/4] =?UTF-8?q?=F0=9F=94=A7=20update=20(loop):=20fix=20sen?= =?UTF-8?q?der-identity=20injection=20=E2=80=94=20security,=20placement,?= =?UTF-8?q?=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Agent-Logs-Url: https://github.com/warengonzaga/tinyclaw/sessions/0fae7cf1-eeeb-4e1c-8a9f-f108bd1776c1 Co-authored-by: warengonzaga <15052701+warengonzaga@users.noreply.github.com> --- packages/core/src/loop.ts | 22 +++++++++++++++++----- packages/core/tests/loop.test.ts | 10 ++++++++++ 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/packages/core/src/loop.ts b/packages/core/src/loop.ts index 2e18bcf..10e5c74 100644 --- a/packages/core/src/loop.ts +++ b/packages/core/src/loop.ts @@ -130,6 +130,16 @@ function sanitizeMessage(text: string, userId: string, ownerId: string | undefin return text; } +/** + * Strip characters from a userId that could break the sender-identity marker + * format or serve as a prompt-injection vector when embedded in a system prompt. + * Backticks, square brackets, and newlines are removed so a crafted userId + * cannot escape the marker or inject additional instructions. + */ +function sanitizeUserIdForPrompt(userId: string): string { + return userId.replace(/[`\[\]\n\r]/g, ''); +} + // --------------------------------------------------------------------------- // Shield — in-memory pending approvals (conversational flow) // --------------------------------------------------------------------------- @@ -873,13 +883,15 @@ export async function agentLoop( const sanitizedMessage = sanitizeMessage(message, userId, context.ownerId); // Build messages - // Inject sender identity before the user message so the LLM can correctly - // apply owner-vs-friend rules. Without this, the LLM has no way to know - // who is sending the current message and may misidentify the owner. + // Embed sender identity directly in the system prompt so the LLM can + // correctly apply owner-vs-friend rules for this entire turn (including any + // follow-up tool-result messages). Placing it in a separate system message + // would leave subsequent tool-follow-up user messages without a sender marker + // and would shift message indices expected by tests. + const senderIdentityPrompt = `\n\n[Current message sender: userId = \`${sanitizeUserIdForPrompt(userId)}\`]`; const messages: Message[] = [ - { role: 'system', content: systemPrompt }, + { role: 'system', content: systemPrompt + senderIdentityPrompt }, ...history, - { role: 'system', content: `[Current message sender: userId = \`${userId}\`]` }, { role: 'user', content: sanitizedMessage }, ]; diff --git a/packages/core/tests/loop.test.ts b/packages/core/tests/loop.test.ts index 60ccb48..9398900 100644 --- a/packages/core/tests/loop.test.ts +++ b/packages/core/tests/loop.test.ts @@ -52,6 +52,11 @@ describe('agentLoop', () => { expect(systemPrompt).toContain('## Plugin Setup Guidance'); expect(systemPrompt).toContain('For Discord, explain that they need to create an application'); expect(systemPrompt).toContain('do not pretend the plugin is configured'); + // Sender-identity must be embedded in the system prompt so the LLM always + // knows who sent the message — verify the marker is present and the very + // next message is the user turn (no separate system message in between). + expect(systemPrompt).toContain('[Current message sender: userId = `web:test`]'); + expect(firstPrompt.at(-1)?.role).toBe('user'); }); test('turns structured write tool calls into a natural final reply', async () => { @@ -113,6 +118,11 @@ describe('agentLoop', () => { expect(result).toBe('I refreshed the configuration. Please restart Tiny Claw when convenient.'); expect(prompts).toHaveLength(2); + // Sender-identity is embedded in the system prompt (prompts[0][0]) and + // immediately followed by the user message — no separate system entry. + expect(prompts[0]?.[0]?.role).toBe('system'); + expect(prompts[0]?.[0]?.content).toContain('[Current message sender: userId = `web:test`]'); + expect(prompts[0]?.at(-1)?.role).toBe('user'); expect(prompts[1]?.at(-2)?.role).toBe('assistant'); expect(prompts[1]?.at(-2)?.content).toContain('I used these tools and the results were:'); expect(prompts[1]?.at(-2)?.content).toContain('Restart required: refresh config'); From 5ef6a09c32f32ea8e145cd0618e7d5dbd7daf651 Mon Sep 17 00:00:00 2001 From: root Date: Tue, 26 May 2026 12:53:03 +0000 Subject: [PATCH 4/4] docs(core): improve sanitizeUserIdForPrompt documentation and add tests - add detailed comment explaining stripping of backticks, brackets, newlines - add comprehensive unit test suite covering edge cases --- packages/core/src/loop.ts | 8 +++++--- packages/core/tests/loop.test.ts | 16 ++++++++++++++-- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/packages/core/src/loop.ts b/packages/core/src/loop.ts index 10e5c74..85fef95 100644 --- a/packages/core/src/loop.ts +++ b/packages/core/src/loop.ts @@ -133,11 +133,13 @@ function sanitizeMessage(text: string, userId: string, ownerId: string | undefin /** * Strip characters from a userId that could break the sender-identity marker * format or serve as a prompt-injection vector when embedded in a system prompt. - * Backticks, square brackets, and newlines are removed so a crafted userId - * cannot escape the marker or inject additional instructions. + * + * Backticks (`) are stripped to prevent escaping the marker's backticks. + * Square brackets [] are stripped to prevent breaking the marker format. + * Newlines (\n\r) are stripped to prevent multi-line injection. */ function sanitizeUserIdForPrompt(userId: string): string { - return userId.replace(/[`\[\]\n\r]/g, ''); + return userId.replace(/[`\\[\\]\\n\\r]/g, ''); } // --------------------------------------------------------------------------- diff --git a/packages/core/tests/loop.test.ts b/packages/core/tests/loop.test.ts index 9398900..3c1d4e7 100644 --- a/packages/core/tests/loop.test.ts +++ b/packages/core/tests/loop.test.ts @@ -295,5 +295,17 @@ describe('agentLoop', () => { ); expect(prompts[1]?.at(-1)?.role).toBe('user'); expect(prompts[1]?.at(-1)?.content).toContain('respond naturally to my original message'); - }); -}); + }); + }); + + describe('sanitizeUserIdForPrompt', () => { + const { sanitizeUserIdForPrompt } = require('../src/loop.js'); + + test('strips backticks, brackets, and newlines', () => { + expect(sanitizeUserIdForPrompt('test`[id]\n')).toBe('testid'); + expect(sanitizeUserIdForPrompt('normal-id')).toBe('normal-id'); + expect(sanitizeUserIdForPrompt('')).toBe(''); + expect(sanitizeUserIdForPrompt('`[\\n\\r]')).toBe(''); + }); + }); + };// This is to close the outer describe block that was truncated in the initial read