From 55353c20d54ca5a3273cee69001146bfae7f511e Mon Sep 17 00:00:00 2001 From: 2002yy <15135142681@163.com> Date: Sun, 17 May 2026 17:32:26 +0800 Subject: [PATCH] docs: add professional engineering docs suite (ARCHITECTURE, CONTEXT_TIERS, MODEL_ROUTING, SECURITY, MEMORY_SYSTEM, NEWS_PIPELINE, PERFORMANCE, TESTING) - 8 new docs/ files covering every engineering dimension - README header rewritten with professional Highlights section listing multi-provider, routing, context tiers, security, safe writer, performance budget, CI - Repository now presents as a complete AI application engineering project Co-Authored-By: Claude Opus 4.7 --- README.md | 16 +++++++ docs/ARCHITECTURE.md | 73 +++++++++++++++++++++++++++++ docs/CONTEXT_TIERS.md | 44 ++++++++++++++++++ docs/MEMORY_SYSTEM.md | 67 +++++++++++++++++++++++++++ docs/MODEL_ROUTING.md | 57 +++++++++++++++++++++++ docs/NEWS_PIPELINE.md | 105 ++++++++++++++++++++++++++++++++++++++++++ docs/PERFORMANCE.md | 46 ++++++++++++++++++ docs/SECURITY.md | 40 ++++++++++++++++ docs/TESTING.md | 68 +++++++++++++++++++++++++++ 9 files changed, 516 insertions(+) create mode 100644 docs/ARCHITECTURE.md create mode 100644 docs/CONTEXT_TIERS.md create mode 100644 docs/MEMORY_SYSTEM.md create mode 100644 docs/MODEL_ROUTING.md create mode 100644 docs/NEWS_PIPELINE.md create mode 100644 docs/PERFORMANCE.md create mode 100644 docs/SECURITY.md create mode 100644 docs/TESTING.md diff --git a/README.md b/README.md index d5104b2..e4fbe96 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,22 @@ 140 tests

+A local AI learning assistant with long-term memory, role-based group chat, +web search, model routing and context-tier management. + +## Highlights + +- **Multi-provider LLM client**: OpenAI / DeepSeek / OpenRouter / SiliconFlow / local models +- **Model routing** with fast / light / deep / archive context tiers +- **Long-term memory** based on Markdown files and safe-writer persistence +- **Web search pipeline**: RSS fetch → article extraction → LLM digest → source-traced discussion +- **SSRF protection** for article fetching, **detect-secrets** in CI +- **Batched session logging** and multi-layer caching for performance +- **Performance budget**: max_tokens bounds on every LLM call by mode +- **140 tests**, Ruff clean, GitHub Actions CI + +--- + **一个面向个人学习复盘的本地 AI 学习搭子系统** — 支持角色群聊、联网搜索、长期记忆和课后总结。 > 不是又一个 AI 问答工具,而是一个会记住你学什么的 AI 学习伙伴。 diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md new file mode 100644 index 0000000..bd684dd --- /dev/null +++ b/docs/ARCHITECTURE.md @@ -0,0 +1,73 @@ +# Architecture + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Streamlit Runtime │ +│ app.py — entry point, fragment orchestration │ +├─────────────────────────────────────────────────────────────┤ +│ src/ui/ │ +│ ├── sidebar.py Settings, modes, export │ +│ ├── status_bar.py Status cards, stats, perf │ +│ ├── chat_panel.py Single-chat UI │ +│ ├── wechat_panel.py Group-chat UI + news phases │ +│ ├── after_session_panel.py Post-session review │ +│ ├── session_state.py init / refresh helpers │ +│ └── theme.py Catppuccin dark theme │ +├─────────────────────────────────────────────────────────────┤ +│ src/ │ +│ ├── llm_client.py Chat / stream, auto-reconnect │ +│ ├── llm_router.py LLM-based routing (JSON mode) │ +│ ├── context_builder.py System prompt assembly │ +│ ├── config.py Multi-provider config │ +│ ├── router.py Route resolution │ +│ ├── mode_manager.py Runtime modes, YAML truth │ +│ ├── performance_budget.py Max-tokens by mode │ +│ ├── role_manager.py Role loading │ +│ ├── model_stats.py Usage tracking │ +│ │ │ +│ ├── memory.py File-based memory with LRU cache │ +│ ├── memory_writer.py Structured memory updates │ +│ ├── memory_tools.py Read/write tool functions │ +│ │ │ +│ ├── wechat_format.py Text formatting, role parsing │ +│ ├── wechat_state.py Group state I/O │ +│ ├── wechat_generator.py LLM generation (opening/reply/ │ +│ │ discussion) │ +│ ├── wechat_prompt.py Prompt template loading │ +│ ├── wechat_memory.py Memory candidate extraction │ +│ ├── wechat_service.py High-level orchestration │ +│ │ │ +│ ├── session_logger.py Session persistence, batch flush │ +│ ├── safe_writer.py Atomic writes, retry, backup │ +│ ├── health_check.py Read-only health probes │ +│ └── news/ News pipeline (see NEWS_PIPELINE) │ +├─────────────────────────────────────────────────────────────┤ +│ config/runtime_state.yaml — Single source of truth │ +│ memory/ — Markdown memory files │ +│ chat/ — Group chat transcripts │ +│ roles/ — Role definitions │ +│ templates/ — Prompt templates │ +└─────────────────────────────────────────────────────────────┘ +``` + +## Layers + +| Layer | Responsibility | +|---|---| +| **UI** | Streamlit fragments, user interaction, display | +| **Orchestration** | wechat_service.py ties news + memory + generation | +| **LLM** | Client, routing, context assembly, budget control | +| **Memory** | File-based, tiered context groups, safe writer | +| **News** | RSS fetch → article extraction → digest → discussion | +| **State** | YAML truth → Markdown views, synced at runtime | + +## Fragment Model + +`app.py` uses `@st.fragment` to isolate re-renders: + +- `render_sidebar_fragment` — settings, state toggles, actions +- `render_status_fragment` — status cards, stats line +- `render_single_main_fragment` — chat UI +- `render_after_session_fragment` — post-session review + +Global-affecting sidebar actions use `st.rerun()` (full page) to refresh all fragments. diff --git a/docs/CONTEXT_TIERS.md b/docs/CONTEXT_TIERS.md new file mode 100644 index 0000000..0d01293 --- /dev/null +++ b/docs/CONTEXT_TIERS.md @@ -0,0 +1,44 @@ +# Context Tiers + +The system selects which memory files to include in the LLM context based on the current performance mode. This balances response quality against token usage and latency. + +## Tier Definitions + +Defined in `src/memory.py` (`CONTEXT_FILE_GROUPS`): + +| Tier | Files | Use Case | +|---|---|---| +| **fast** | `index.md`, `current_focus.md` | Quick lookup, simple Q&A | +| **light** | + `summary.md`, `learner_profile.md` | Default daily chat | +| **deep** | + `progress.md`, `project_context.md`, `task_board.md` | Complex reasoning, project review | +| **archive** | + `archive_summary.md`, `agent.md`, `system_detail.md` | Full context, session archive | + +## Resolution + +`context_mode` is derived from `performance_mode` in `RuntimeModes` (`src/mode_manager.py`): + +- `fast` → `fast` +- `standard` → `light` +- `deep` → `deep` +- No direct UI path to `archive` — used programmatically for archival tasks + +## Memory Files + +Path: `memory/` + +| File | Content | +|---|---| +| `index.md` | Learner name, preferred roles, brief background | +| `current_focus.md` | What the learner is currently working on | +| `summary.md` | Session summaries, key learnings | +| `learner_profile.md` | Learning style, strengths, weaknesses | +| `progress.md` | Version-tracked progress log | +| `project_context.md` | Project description, goals, constraints | +| `task_board.md` | Active tasks, backlog | +| `archive_summary.md` | Archived session records | +| `agent.md` | Agent self-configuration notes | +| `system_detail.md` | Technical system context | + +## Caching + +Memory files are LRU-cached with invalidation on file signature change (`src/memory.py:_read_text_file_cached`). Cache size: 64 entries. diff --git a/docs/MEMORY_SYSTEM.md b/docs/MEMORY_SYSTEM.md new file mode 100644 index 0000000..e4c245b --- /dev/null +++ b/docs/MEMORY_SYSTEM.md @@ -0,0 +1,67 @@ +# Memory System + +## Overview + +File-based long-term memory using Markdown files, managed through a truth hierarchy. No vector store or external database — designed for zero-infrastructure local operation. + +## Truth Hierarchy + +``` +config/runtime_state.yaml (authoritative) + │ + ▼ +memory/internal_state.md (human-readable view, synced) +memory/interaction_settings.md +chat/wechat_state.md +``` + +`mode_manager.py` syncs views from YAML on read. Any write goes through `_write_runtime_state()` which updates YAML, then propagates to view files. + +## File Layout + +``` +memory/ +├── index.md Learner identity, preferences +├── current_focus.md Active learning focus +├── summary.md Session summaries +├── learner_profile.md Learning style, strengths +├── progress.md Versioned progress +├── project_context.md Project description +├── task_board.md Task tracking +├── archive_summary.md Archived history +├── agent.md Agent notes +├── system_detail.md Technical context +├── internal_state.md Runtime state view (synced) +├── interaction_settings.md Interaction state view (synced) +└── pending_updates/ + ├── wechat_memory_candidates.md LLM-extracted candidates + └── wechat_memory_candidates.json Structured candidate data +``` + +## Memory Operations + +### Reading + +`memory.py:_read_text_file_cached(path, signature) → str` + +- LRU-cached (64 entries), invalidated on file signature change +- Context-mode selection via `CONTEXT_FILE_GROUPS` (see CONTEXT_TIERS.md) +- `extract_core_section()` strips frontmatter for lightweight reads + +### Writing + +All writes go through `memory_writer.py` → `safe_writer.py`: + +1. **Preview**: Generate update suggestions → user reviews +2. **Confirm**: User selects which updates to apply +3. **Write**: `safe_write_text()` with atomic temp-file + retry + backup +4. **Flush**: Updated context available on next memory bundle refresh + +### Group Chat Memory Extraction + +`wechat_memory.py` extracts memory candidates from group chat discussions: + +- Triggered by configurable `memory_capture_mode` (manual/auto) +- LLM extracts structured candidates from chat history +- Results stored as Markdown + JSON in `memory/pending_updates/` +- Candidates reviewed before committing to main memory files diff --git a/docs/MODEL_ROUTING.md b/docs/MODEL_ROUTING.md new file mode 100644 index 0000000..0d6c4b0 --- /dev/null +++ b/docs/MODEL_ROUTING.md @@ -0,0 +1,57 @@ +# Model Routing + +## Multi-Provider LLM Client + +`src/llm_client.py` provides a unified interface across 5 LLM providers: + +| Provider | Env Prefix | Default Base URL | +|---|---|---| +| DeepSeek | `DEEPSEEK_*` | `https://api.deepseek.com/v1` | +| OpenAI | `OPENAI_*` | — | +| OpenRouter | `OPENROUTER_*` | `https://openrouter.ai/api/v1` | +| SiliconFlow | `SILICONFLOW_*` | `https://api.siliconflow.cn/v1` | +| Local | `LOCAL_*` | `http://127.0.0.1:8000/v1` | + +Selection via `LLM_PROVIDER_PROFILE` env var. Client instances are cached by config signature and automatically rebuilt when settings change. + +## Model Profiles + +Two model tiers: + +- **flash**: Fast, low-cost model for daily chat and group replies +- **pro**: Higher-quality model for summaries, routing, and complex reasoning + +Resolution logic (`src/wechat_generator.py:_resolve_model_profile`): + +``` +performance_mode = deep → pro +performance_mode = fast → flash +selected_model = pro → pro +default → flash +``` + +## LLM Router + +`src/llm_router.py` performs LLM-based routing when `route_mode == "hybrid"` and `performance_mode != "fast"`. It calls the LLM with a JSON prompt to determine the best role, mode, and model for a user query. + +Valid outputs: + +- **role**: march7 (casual), keqing (project), nahida (concept), firefly (wrap-up) +- **mode**: 普通, 苏格拉底, 费曼, 项目, 论文, 概念地图 +- **model**: flash, pro +- **confidence**: high, medium, low + +Route caching via `st.session_state.current_route` — cleared when settings change. + +## Performance Budget + +All LLM calls are bounded by `src/performance_budget.py`: + +| Call Point | Fast | Standard | Deep | +|---|---|---|---| +| Single chat | 700 | 1100 | 1600 | +| Group reply | 520 | 760 | 1050 | +| Opening | 420 | 620 | 850 | +| News digest | 650 | 950 | 1300 | +| News discussion | 520 | 760 | 1000 | +| History lines | 16 | 28 | 40 | diff --git a/docs/NEWS_PIPELINE.md b/docs/NEWS_PIPELINE.md new file mode 100644 index 0000000..89355f6 --- /dev/null +++ b/docs/NEWS_PIPELINE.md @@ -0,0 +1,105 @@ +# News Pipeline + +Multi-source news aggregation pipeline: search → fetch → extract → digest → discuss → trace. + +## Pipeline Stages + +``` +User query + │ + ▼ +1. Multi-source RSS fetch + ├── Google News RSS + ├── Bing News RSS + └── RSSHub (domestic Chinese sources) + │ + ▼ +2. Dedup + sort + truncate (max 10 items) + │ + ▼ +3. Link resolution (top N = resolve_top_n, bounded) + │ + ▼ +4. Article text extraction (top 5 pages, max 5000 chars each) + ├── trafilatura (primary) + ├── readability-lxml (fallback) + └── raw

text (last resort) + │ + ▼ +5. Digest generation (LLM-summarized) + │ + ▼ +6. Group discussion (4 roles discuss the news) + │ + ▼ +7. Source block written to chat transcript +``` + +## Stage Detail + +### 1. RSS Fetch + +`src/news/rss_fetcher.py` — parallel multi-source fetch: + +- Google News: `https://news.google.com/rss/search?q={query}&hl=zh-CN` +- Bing News: `https://www.bing.com/news/search?q={query}&format=rss` +- RSSHub: Configurable domestic sources +- 600-second article cache per query + +### 2. Dedup + +Title normalization + set-based dedup. Per-query cache (10 min TTL). + +### 3. Link Resolution + +`src/news/link_resolver.py` — resolves Google News redirect URLs to actual article URLs. Only resolves top N items (configured via `resolve_top_n`). + +### 4. Article Extraction + +`src/news/article_fetcher.py` — layered extraction with SSRF protection: + +- **Trafilatura**: Fast, accurate extraction for well-formed pages +- **Readability**: Better for complex layouts +- **Raw text**: `

` tag concatenation as last resort + +Method label tracked per article for quality monitoring. + +### 5. Digest + +`src/news/digest.py` — LLM generates a structured digest with: + +- Article coverage summary (which articles were used) +- Key points from each source +- Token-bounded by `news_digest_max_tokens(performance_mode)` + +### 6. Discussion + +`wechat_generator.py:generate_wechat_news_discussion()` — 4 characters discuss the digest: + +- Bound by `news_discussion_max_tokens(performance_mode)` +- Each character references specific news points +- Group state synced after discussion + +### 7. Source Tracing + +After each news round, a source block is appended to the group chat transcript: + +``` +【联网检索】 +查询:xxx +1. Title | Source | Date | Body status + URL +``` + +This ensures all discussion claims are traceable to their sources. + +## UI Flow + +The entry page (`wechat_panel.py`) provides a 4-phase stepper: + +1. **Search** — Enter query, configure max articles +2. **Fetch articles** — Read page text (optional) +3. **Generate digest** — LLM summary +4. **Discuss in group** — 4-role news discussion + +Each phase is a separate button enabling incremental progress visibility. diff --git a/docs/PERFORMANCE.md b/docs/PERFORMANCE.md new file mode 100644 index 0000000..9922466 --- /dev/null +++ b/docs/PERFORMANCE.md @@ -0,0 +1,46 @@ +# Performance + +## Caching + +| Cache | Location | TTL | Invalidation | +|---|---|---|---| +| Memory file reads | `memory.py:_read_text_file_cached` | LRU-64 | File signature change | +| Runtime modes | `mode_manager.py:load_runtime_modes` | 30s `@st.cache_data` | Time + write | +| Article text | `article_fetcher.py:_ARTICLE_CACHE` | 1800s | Time + LRU-32 | +| RSS results | `rss_fetcher.py:_CACHE` | 600s per query | Time | +| LLM client | `llm_client.py:_client_signature` | Session | Config change | + +## Fragment Rerun Strategy + +`app.py` splits the UI into `@st.fragment` boundaries: + +- Sidebar settings changes → `st.rerun()` (full page) to refresh all panels +- Status bar → isolated fragment, updates independently +- Chat panel → isolated fragment, user messages only rerun here +- After-session panel → isolated fragment + +This prevents unnecessary re-renders of the entire page on every interaction. + +## Performance Budget + +All LLM calls have `max_tokens` bounds via `src/performance_budget.py`. Three tiers: + +- **fast**: Low token consumption, shorter conversations (700 chat, 16 history lines) +- **standard**: Balanced (1100 chat, 28 history lines) +- **deep**: Full context (1600 chat, 40 history lines) + +## Batch Flush + +`src/session_logger.py` buffers session entries and flushes in batches: + +| Mode | Flush interval (entries) | +|---|---| +| fast | 4 | +| standard | 2 | +| deep | 2 | + +Each flush uses `safe_write_text` for atomicity. Stale session warning at 2 hours. + +## Diff Algorithm + +Memory update diffing uses set-based operations (O(n)) rather than line-by-line comparison (O(n×m)), computed from `file_signature()` hashes. diff --git a/docs/SECURITY.md b/docs/SECURITY.md new file mode 100644 index 0000000..9270d98 --- /dev/null +++ b/docs/SECURITY.md @@ -0,0 +1,40 @@ +# Security + +## SSRF Protection + +`src/news/article_fetcher.py` implements defense-in-depth against Server-Side Request Forgery: + +1. **DNS resolution check**: Resolves hostname at fetch time, rejects private/reserved IPs +2. **Redirect validation**: Custom `_SafeHTTPRedirectHandler` validates every redirect hop (max 3 hops) +3. **Blocked targets**: + - Private IP ranges (127.0.0.0/8, 10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16) + - Link-local (169.254.0.0/16) and loopback (::1) + - Internal hostnames (localhost, localhost.localdomain) +4. **Protocol restriction**: HTTP(S) only + +## Secret Scanning + +CI pipeline runs `detect-secrets` as a hard gate (fails on detection). Scans for: + +- OpenAI / DeepSeek / OpenRouter / SiliconFlow API keys +- GitHub personal access tokens (classic and fine-grained) +- Generic `sk-`, `pk-` token patterns +- Private key markers (`.pem`, `-----BEGIN`) + +## Configuration Safety + +- `.env` files excluded from git via `.gitignore` +- `config/runtime_state.yaml` excluded from git (contains runtime paths) +- `memory/` excluded from git (contains learner data) +- `.env.example` serves as the single canonical config template with placeholder values only + +## Safe Writer + +`src/safe_writer.py` ensures file write safety: + +| Mechanism | Detail | +|---|---| +| **Atomic writes** | Write to `.tmp` → replace target with retry | +| **Backup** | Automatic timestamped backup before overwrite | +| **File locking** | Retry on `PermissionError` (up to 8 attempts) | +| **Cleanup** | `try/finally` guarantees temp file cleanup | diff --git a/docs/TESTING.md b/docs/TESTING.md new file mode 100644 index 0000000..03670cc --- /dev/null +++ b/docs/TESTING.md @@ -0,0 +1,68 @@ +# Testing + +## Test Suite + +**140 tests**, Ruff clean, running on GitHub Actions CI. + +### Categories + +| Area | File | Tests | +|---|---|---| +| **Packaging guards** | `test_packaging_guards.py` | 18 | +| **Performance budget** | `test_performance_budget.py` | 14 | +| **News entry flow** | `test_wechat_news_entry_flow.py` | 8 | +| **News service** | `test_wechat_service_news_flow.py` | 6 | +| **Architecture flows** | `test_architecture_flows.py` | — | +| **WeChat decoupling** | `test_wechat_decoupling.py` | 4 | +| **Sidebar rerun** | `test_sidebar_global_rerun.py` | 12 | +| Various unit tests | (spread across test directory) | — | + +### Test Characteristics + +- **Self-contained**: Tests use `monkeypatch` for LLM calls, file I/O isolation +- **Source-code checks**: Many tests verify source code patterns (e.g., "no direct file open in flush path") +- **Pure function tests**: Business logic extracted as pure functions where Streamlit dependencies make direct testing infeasible +- **State machine tests**: News phase rendering, group state transitions +- **Version sync guard**: Runtime version asserted across 3 files (mode_manager, YAML, memory view) + +### Key Patterns + +**FakeSessionState** for testing Streamlit session state logic: + +```python +class _FakeSessionState(dict): + def __getattr__(self, k): return self[k] + def __setattr__(self, k, v): self[k] = v + def __delattr__(self, k): self.pop(k, None) +``` + +**Source-code assertions** for behavioral invariants: + +```python +def test_flush_uses_safe_writer(): + block = text[block_start:block_end] + assert "safe_write_text(current_file, existing + chunk)" in block + assert "with current_file.open(" not in block +``` + +## CI Pipeline + +`.github/workflows/ci.yml` runs on every push and pull request: + +| Step | Action | Gate | +|---|---|---| +| Install deps | `pip install -r requirements.txt -r requirements-dev.txt` | — | +| Lint | `ruff check .` | Hard | +| Type check | `mypy --explicit-package-bases src/` | Soft (continue-on-error) | +| Test | `pytest` | Hard | +| Package check | `python tools/package_project_helper.py` | Hard | +| Secret scan | `detect-secrets` | Hard | + +## Running Tests + +```bash +pytest tests/ # 140 tests +pytest tests/ -v # Verbose +pytest tests/ --cov=src # Coverage +ruff check src/ tests/ # Linting +```