From 49f2fc443887fba0aeddf216bc966d0d47fbc483 Mon Sep 17 00:00:00 2001
From: zhangshaozhi <zhangshaozhi@zhangshaozhideMacBook-Pro.local>
Date: Sun, 17 May 2026 20:26:16 +0800
Subject: [PATCH 01/36] fix: make application runnable in production

- Dockerfile: include README.md in COPY statement
- main.py: add static file serving for built frontend (SPA routing support)
- App.vue: wrap template with Naive UI message/dialog/notification providers

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 Dockerfile         |  2 +-
 protoforge/main.py | 10 ++++++++++
 web/src/App.vue    |  8 +++++++-
 3 files changed, 18 insertions(+), 2 deletions(-)
diff --git a/Dockerfile b/Dockerfile
index babbf5f..2528ade 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -10,7 +10,7 @@ WORKDIR /app
 
 RUN apt-get update && apt-get install -y --no-install-recommends curl && rm -rf /var/lib/apt/lists/*
 
-COPY pyproject.toml .
+COPY pyproject.toml README.md ./
 COPY protoforge/ protoforge/
 
 COPY --from=frontend-builder /app/web/dist /app/static
diff --git a/protoforge/main.py b/protoforge/main.py
index 894c5bc..0e43951 100644
--- a/protoforge/main.py
+++ b/protoforge/main.py
@@ -1,8 +1,11 @@
 import logging
 from contextlib import asynccontextmanager
+from pathlib import Path
 
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import FileResponse
+from fastapi.staticfiles import StaticFiles
 
 from protoforge.api.v1.router import router
 from protoforge.core.engine import SimulationEngine
@@ -184,12 +187,19 @@ def create_app() -> FastAPI:
 
     @app.get("/")
     async def root():
+        index = Path("/app/static/index.html")
+        if index.exists():
+            return FileResponse(index)
         return {
             "name": "ProtoForge",
             "version": "0.1.0",
             "description": "物联网协议仿真与测试平台",
         }
 
+    static_dir = Path("/app/static")
+    if static_dir.exists():
+        app.mount("/assets", StaticFiles(directory=static_dir / "assets"), name="assets")
+
     @app.get("/health")
     async def health():
         return {"status": "ok"}
diff --git a/web/src/App.vue b/web/src/App.vue
index ae6d315..22eabe9 100644
--- a/web/src/App.vue
+++ b/web/src/App.vue
@@ -1,4 +1,7 @@
 <template>
+  <n-message-provider>
+  <n-dialog-provider>
+  <n-notification-provider>
   <div v-if="!loggedIn" class="login-wrapper">
     <Login @login-success="onLogin" />
   </div>
@@ -73,12 +76,15 @@
       </n-layout-content>
     </n-layout>
   </n-layout>
+  </n-notification-provider>
+  </n-dialog-provider>
+  </n-message-provider>
 </template>
 
 <script setup>
 import { ref, computed, onMounted, onUnmounted, h } from 'vue'
 import { useRouter, useRoute } from 'vue-router'
-import { NLayout, NLayoutSider, NLayoutHeader, NLayoutContent, NMenu, NSpace, NAutoComplete, NTag, NButton, NDropdown } from 'naive-ui'
+import { NLayout, NLayoutSider, NLayoutHeader, NLayoutContent, NMenu, NSpace, NAutoComplete, NTag, NButton, NDropdown, NMessageProvider, NDialogProvider, NNotificationProvider } from 'naive-ui'
 import api from './api.js'
 import Login from './views/Login.vue'
 import Welcome from './views/Welcome.vue'

From 8fcf6c17b7786b7350c6ff540f4e665ea481e1a8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Mon, 18 May 2026 13:59:49 +0800
Subject: [PATCH 02/36] fix: support container and local env

---
 protoforge/main.py | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/protoforge/main.py b/protoforge/main.py
index 0e43951..73489ae 100644
--- a/protoforge/main.py
+++ b/protoforge/main.py
@@ -1,4 +1,5 @@
 import logging
+import os
 from contextlib import asynccontextmanager
 from pathlib import Path
 
@@ -142,7 +143,6 @@ async def lifespan(app: FastAPI):
     except Exception as e:
         logger.warning("Failed to start webhook manager: %s", e)
 
-    import os
     if os.environ.get("PROTOFORGE_DEMO_MODE"):
         try:
             from protoforge.core.demo import seed_demo_data
@@ -185,19 +185,26 @@ def create_app() -> FastAPI:
 
     app.include_router(router)
 
+    # 按优先级查找静态文件目录：环境变量 > 容器路径 > 本地构建产物
+    _repo_root = Path(__file__).parent.parent
+    _static_candidates = [
+        Path(os.environ["STATIC_DIR"]) if "STATIC_DIR" in os.environ else None,
+        Path("/app/static"),
+        _repo_root / "web" / "dist",
+    ]
+    static_dir = next((p for p in _static_candidates if p and p.is_dir()), None)
+
     @app.get("/")
     async def root():
-        index = Path("/app/static/index.html")
-        if index.exists():
-            return FileResponse(index)
+        if static_dir and (static_dir / "index.html").exists():
+            return FileResponse(static_dir / "index.html")
         return {
             "name": "ProtoForge",
             "version": "0.1.0",
             "description": "物联网协议仿真与测试平台",
         }
 
-    static_dir = Path("/app/static")
-    if static_dir.exists():
+    if static_dir and (static_dir / "assets").is_dir():
         app.mount("/assets", StaticFiles(directory=static_dir / "assets"), name="assets")
 
     @app.get("/health")

From 1fa9c0889b9351f9bc4d404f6615cafec1075f85 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Mon, 18 May 2026 14:30:59 +0800
Subject: [PATCH 03/36] fix(metrics): support prometheus metric

---
 protoforge/core/metrics.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/protoforge/core/metrics.py b/protoforge/core/metrics.py
index 9300372..0e01abb 100644
--- a/protoforge/core/metrics.py
+++ b/protoforge/core/metrics.py
@@ -46,6 +46,25 @@ def collect_from_engine(self, engine: Any) -> None:
                                 if p.status.value == "running")
         self.set_gauge("protoforge_protocols_running", protocols_running)
 
+        for device in engine._devices.values():
+            if device.status.value != "online":
+                continue
+            labels_base = {
+                "device_id": device.config.id,
+                "device_name": device.config.name,
+                "protocol": device.config.protocol,
+            }
+            for point in device.read_all_points():
+                if not isinstance(point.value, (int, float)):
+                    continue
+                labels = {**labels_base, "point": point.name}
+                point_config = next(
+                    (p for p in device.config.points if p.name == point.name), None
+                )
+                if point_config and point_config.unit:
+                    labels["unit"] = point_config.unit
+                self.set_gauge("protoforge_device_point", float(point.value), labels)
+
     def collect_from_test_runner(self, runner: Any) -> None:
         self.set_gauge("protoforge_test_cases_total", len(runner._test_cases))
         self.set_gauge("protoforge_test_suites_total", len(runner._test_suites))

From 382e939efc63779b60f0a6be768aa210f47c6940 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Mon, 18 May 2026 14:38:29 +0800
Subject: [PATCH 04/36] fix

---
 protoforge/core/metrics.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/protoforge/core/metrics.py b/protoforge/core/metrics.py
index 0e01abb..8a5a2bb 100644
--- a/protoforge/core/metrics.py
+++ b/protoforge/core/metrics.py
@@ -47,8 +47,6 @@ def collect_from_engine(self, engine: Any) -> None:
         self.set_gauge("protoforge_protocols_running", protocols_running)
 
         for device in engine._devices.values():
-            if device.status.value != "online":
-                continue
             labels_base = {
                 "device_id": device.config.id,
                 "device_name": device.config.name,

From 80737ecd5a2099121e10ee624d6363f6e378b7d6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Mon, 18 May 2026 14:42:38 +0800
Subject: [PATCH 05/36] fix(metric): update metric name

---
 protoforge/core/metrics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/protoforge/core/metrics.py b/protoforge/core/metrics.py
index 8a5a2bb..97f7bb9 100644
--- a/protoforge/core/metrics.py
+++ b/protoforge/core/metrics.py
@@ -61,7 +61,7 @@ def collect_from_engine(self, engine: Any) -> None:
                 )
                 if point_config and point_config.unit:
                     labels["unit"] = point_config.unit
-                self.set_gauge("protoforge_device_point", float(point.value), labels)
+                self.set_gauge(f"protoforge_{point.name}", float(point.value), labels)
 
     def collect_from_test_runner(self, runner: Any) -> None:
         self.set_gauge("protoforge_test_cases_total", len(runner._test_cases))

From 8f6ea66402ba0042edcdd4ba5dbcc7414cbfe240 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Mon, 18 May 2026 14:52:29 +0800
Subject: [PATCH 06/36] fix

---
 protoforge/core/metrics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/protoforge/core/metrics.py b/protoforge/core/metrics.py
index 97f7bb9..18fb327 100644
--- a/protoforge/core/metrics.py
+++ b/protoforge/core/metrics.py
@@ -61,7 +61,7 @@ def collect_from_engine(self, engine: Any) -> None:
                 )
                 if point_config and point_config.unit:
                     labels["unit"] = point_config.unit
-                self.set_gauge(f"protoforge_{point.name}", float(point.value), labels)
+                self.set_gauge(point.name, float(point.value), labels)
 
     def collect_from_test_runner(self, runner: Any) -> None:
         self.set_gauge("protoforge_test_cases_total", len(runner._test_cases))

From 94d779adee9ad861b6f66436b1ac156025670034 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Tue, 19 May 2026 10:22:34 +0800
Subject: [PATCH 07/36] fix(monitor): collect monitor data

---
 protoforge/core/device.py  | 3 +++
 protoforge/core/metrics.py | 8 +++++---
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/protoforge/core/device.py b/protoforge/core/device.py
index a344c7c..efd21ef 100644
--- a/protoforge/core/device.py
+++ b/protoforge/core/device.py
@@ -59,9 +59,11 @@ def read_point(self, point_name: str) -> Optional[PointValue]:
             name=point_name,
             value=self._point_values[point_name],
             timestamp=time.time(),
+            quality="good" if self._status == DeviceStatus.ONLINE else "bad",
         )
 
     def read_all_points(self) -> list[PointValue]:
+        quality = "good" if self._status == DeviceStatus.ONLINE else "bad"
         result = []
         now = time.time()
         for name in self._point_values:
@@ -70,6 +72,7 @@ def read_all_points(self) -> list[PointValue]:
                     name=name,
                     value=self._point_values[name],
                     timestamp=now,
+                    quality=quality,
                 )
             )
         return result
diff --git a/protoforge/core/metrics.py b/protoforge/core/metrics.py
index 18fb327..9670525 100644
--- a/protoforge/core/metrics.py
+++ b/protoforge/core/metrics.py
@@ -53,15 +53,17 @@ def collect_from_engine(self, engine: Any) -> None:
                 "protocol": device.config.protocol,
             }
             for point in device.read_all_points():
-                if not isinstance(point.value, (int, float)):
-                    continue
                 labels = {**labels_base, "point": point.name}
                 point_config = next(
                     (p for p in device.config.points if p.name == point.name), None
                 )
                 if point_config and point_config.unit:
                     labels["unit"] = point_config.unit
-                self.set_gauge(point.name, float(point.value), labels)
+                key = self._make_key(point.name, labels)
+                if point.quality != "good":
+                    self._gauges.pop(key, None)
+                elif isinstance(point.value, (int, float)):
+                    self.set_gauge(point.name, float(point.value), labels)
 
     def collect_from_test_runner(self, runner: Any) -> None:
         self.set_gauge("protoforge_test_cases_total", len(runner._test_cases))

From e515c50708a4181d5fd26751f8853cec203c566f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Tue, 19 May 2026 17:54:26 +0800
Subject: [PATCH 08/36] fix(fanuc): update fanuc function

---
 protoforge/templates/fanuc/fanuc_0if_cnc.json    | 5 +++--
 protoforge/templates/modbus/fanuc_cnc.json       | 7 ++++---
 protoforge/templates/mtconnect/mill_machine.json | 5 +++--
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/protoforge/templates/fanuc/fanuc_0if_cnc.json b/protoforge/templates/fanuc/fanuc_0if_cnc.json
index 3231d9b..a10b74f 100644
--- a/protoforge/templates/fanuc/fanuc_0if_cnc.json
+++ b/protoforge/templates/fanuc/fanuc_0if_cnc.json
@@ -75,9 +75,10 @@
             "unit": "mm/min",
             "description": "进给速度",
             "access": "r",
-            "generator_type": "random",
+            "generator_type": "sine",
             "min_value": 100,
-            "max_value": 5000
+            "max_value": 5000,
+            "generator_config": {"period": 60, "phase": 0.0}
         },
         {
             "name": "alarm_status",
diff --git a/protoforge/templates/modbus/fanuc_cnc.json b/protoforge/templates/modbus/fanuc_cnc.json
index 843151e..2970682 100644
--- a/protoforge/templates/modbus/fanuc_cnc.json
+++ b/protoforge/templates/modbus/fanuc_cnc.json
@@ -24,9 +24,10 @@
             "unit": "mm/min",
             "description": "实际进给速度",
             "access": "r",
-            "generator_type": "random",
-            "min_value": 0.0,
-            "max_value": 10000.0
+            "generator_type": "sine",
+            "min_value": 200.0,
+            "max_value": 3000.0,
+            "generator_config": {"period": 90, "phase": 1.0}
         },
         {
             "name": "spindle_override",
diff --git a/protoforge/templates/mtconnect/mill_machine.json b/protoforge/templates/mtconnect/mill_machine.json
index eaeef74..6b06daa 100644
--- a/protoforge/templates/mtconnect/mill_machine.json
+++ b/protoforge/templates/mtconnect/mill_machine.json
@@ -66,9 +66,10 @@
             "unit": "mm/min",
             "description": "进给速度",
             "access": "r",
-            "generator_type": "random",
+            "generator_type": "sine",
             "min_value": 200,
-            "max_value": 3000
+            "max_value": 3000,
+            "generator_config": {"period": 75, "phase": 2.1}
         },
         {
             "name": "part_count",

From d82cc9374d4001d87ed7d52f86534567aca5487d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Tue, 19 May 2026 18:14:27 +0800
Subject: [PATCH 09/36] fix(templates): update templates

---
 protoforge/templates/fanuc/fanuc_0if_cnc.json | 45 +++++++++++++++++++
 .../templates/fanuc/fanuc_31ib_cnc.json       | 45 +++++++++++++++++++
 protoforge/templates/modbus/fanuc_cnc.json    | 45 +++++++++++++++++++
 .../templates/mtconnect/mill_machine.json     | 45 +++++++++++++++++++
 4 files changed, 180 insertions(+)

diff --git a/protoforge/templates/fanuc/fanuc_0if_cnc.json b/protoforge/templates/fanuc/fanuc_0if_cnc.json
index a10b74f..476ad73 100644
--- a/protoforge/templates/fanuc/fanuc_0if_cnc.json
+++ b/protoforge/templates/fanuc/fanuc_0if_cnc.json
@@ -80,6 +80,51 @@
             "max_value": 5000,
             "generator_config": {"period": 60, "phase": 0.0}
         },
+        {
+            "name": "spindle_current",
+            "address": "spindle_current",
+            "data_type": "float32",
+            "unit": "A",
+            "description": "主轴电流",
+            "access": "r",
+            "generator_type": "sine",
+            "min_value": 8.0,
+            "max_value": 32.0,
+            "generator_config": {"period": 120, "phase": 0.5}
+        },
+        {
+            "name": "vibration_x",
+            "address": "vibration_x",
+            "data_type": "float32",
+            "unit": "m/s²",
+            "description": "X轴振动加速度",
+            "access": "r",
+            "generator_type": "random",
+            "min_value": 0.1,
+            "max_value": 2.5
+        },
+        {
+            "name": "vibration_y",
+            "address": "vibration_y",
+            "data_type": "float32",
+            "unit": "m/s²",
+            "description": "Y轴振动加速度",
+            "access": "r",
+            "generator_type": "random",
+            "min_value": 0.1,
+            "max_value": 2.5
+        },
+        {
+            "name": "vibration_z",
+            "address": "vibration_z",
+            "data_type": "float32",
+            "unit": "m/s²",
+            "description": "Z轴振动加速度",
+            "access": "r",
+            "generator_type": "random",
+            "min_value": 0.1,
+            "max_value": 3.0
+        },
         {
             "name": "alarm_status",
             "address": "alarm",
diff --git a/protoforge/templates/fanuc/fanuc_31ib_cnc.json b/protoforge/templates/fanuc/fanuc_31ib_cnc.json
index 89f6ef1..97d18fd 100644
--- a/protoforge/templates/fanuc/fanuc_31ib_cnc.json
+++ b/protoforge/templates/fanuc/fanuc_31ib_cnc.json
@@ -80,6 +80,51 @@
             "generator_type": "fixed",
             "fixed_value": 100
         },
+        {
+            "name": "spindle_current",
+            "address": "spindle_current",
+            "data_type": "float32",
+            "unit": "A",
+            "description": "主轴电流",
+            "access": "r",
+            "generator_type": "sine",
+            "min_value": 10.0,
+            "max_value": 45.0,
+            "generator_config": {"period": 120, "phase": 1.2}
+        },
+        {
+            "name": "vibration_x",
+            "address": "vibration_x",
+            "data_type": "float32",
+            "unit": "m/s²",
+            "description": "X轴振动加速度",
+            "access": "r",
+            "generator_type": "random",
+            "min_value": 0.1,
+            "max_value": 3.0
+        },
+        {
+            "name": "vibration_y",
+            "address": "vibration_y",
+            "data_type": "float32",
+            "unit": "m/s²",
+            "description": "Y轴振动加速度",
+            "access": "r",
+            "generator_type": "random",
+            "min_value": 0.1,
+            "max_value": 3.0
+        },
+        {
+            "name": "vibration_z",
+            "address": "vibration_z",
+            "data_type": "float32",
+            "unit": "m/s²",
+            "description": "Z轴振动加速度",
+            "access": "r",
+            "generator_type": "random",
+            "min_value": 0.1,
+            "max_value": 4.0
+        },
         {
             "name": "tool_number",
             "address": "tool_number",
diff --git a/protoforge/templates/modbus/fanuc_cnc.json b/protoforge/templates/modbus/fanuc_cnc.json
index 2970682..b154318 100644
--- a/protoforge/templates/modbus/fanuc_cnc.json
+++ b/protoforge/templates/modbus/fanuc_cnc.json
@@ -29,6 +29,51 @@
             "max_value": 3000.0,
             "generator_config": {"period": 90, "phase": 1.0}
         },
+        {
+            "name": "spindle_current",
+            "address": "2",
+            "data_type": "float32",
+            "unit": "A",
+            "description": "主轴电流",
+            "access": "r",
+            "generator_type": "sine",
+            "min_value": 8.0,
+            "max_value": 35.0,
+            "generator_config": {"period": 120, "phase": 2.0}
+        },
+        {
+            "name": "vibration_x",
+            "address": "23",
+            "data_type": "float32",
+            "unit": "m/s²",
+            "description": "X轴振动加速度",
+            "access": "r",
+            "generator_type": "random",
+            "min_value": 0.1,
+            "max_value": 2.5
+        },
+        {
+            "name": "vibration_y",
+            "address": "25",
+            "data_type": "float32",
+            "unit": "m/s²",
+            "description": "Y轴振动加速度",
+            "access": "r",
+            "generator_type": "random",
+            "min_value": 0.1,
+            "max_value": 2.5
+        },
+        {
+            "name": "vibration_z",
+            "address": "27",
+            "data_type": "float32",
+            "unit": "m/s²",
+            "description": "Z轴振动加速度",
+            "access": "r",
+            "generator_type": "random",
+            "min_value": 0.1,
+            "max_value": 3.0
+        },
         {
             "name": "spindle_override",
             "address": "3",
diff --git a/protoforge/templates/mtconnect/mill_machine.json b/protoforge/templates/mtconnect/mill_machine.json
index 6b06daa..1262d0f 100644
--- a/protoforge/templates/mtconnect/mill_machine.json
+++ b/protoforge/templates/mtconnect/mill_machine.json
@@ -71,6 +71,51 @@
             "max_value": 3000,
             "generator_config": {"period": 75, "phase": 2.1}
         },
+        {
+            "name": "spindle_current",
+            "address": "SpindleCurrent",
+            "data_type": "float32",
+            "unit": "A",
+            "description": "主轴电流",
+            "access": "r",
+            "generator_type": "sine",
+            "min_value": 6.0,
+            "max_value": 28.0,
+            "generator_config": {"period": 120, "phase": 3.1}
+        },
+        {
+            "name": "vibration_x",
+            "address": "VibrationX",
+            "data_type": "float32",
+            "unit": "m/s²",
+            "description": "X轴振动加速度",
+            "access": "r",
+            "generator_type": "random",
+            "min_value": 0.1,
+            "max_value": 2.0
+        },
+        {
+            "name": "vibration_y",
+            "address": "VibrationY",
+            "data_type": "float32",
+            "unit": "m/s²",
+            "description": "Y轴振动加速度",
+            "access": "r",
+            "generator_type": "random",
+            "min_value": 0.1,
+            "max_value": 2.0
+        },
+        {
+            "name": "vibration_z",
+            "address": "VibrationZ",
+            "data_type": "float32",
+            "unit": "m/s²",
+            "description": "Z轴振动加速度",
+            "access": "r",
+            "generator_type": "random",
+            "min_value": 0.1,
+            "max_value": 2.5
+        },
         {
             "name": "part_count",
             "address": "PartCount",

From 566910feb851276a5d542a30cff8b0bd9fb5f86c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Tue, 19 May 2026 19:46:40 +0800
Subject: [PATCH 10/36] fix(router): update router

---
 docs/curl.md                |  6 ++++++
 protoforge/api/v1/router.py | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+)
 create mode 100644 docs/curl.md

diff --git a/docs/curl.md b/docs/curl.md
new file mode 100644
index 0000000..ae3cbb1
--- /dev/null
+++ b/docs/curl.md
@@ -0,0 +1,6 @@
+# 更新设备测试点请求
+
+```bash
+# /api/v1/devices/{device_id}/sync-from-template
+curl -X POST http://localhost:8000/api/v1/devices/fanuc-cnc数控系统/sync-from-template
+```
diff --git a/protoforge/api/v1/router.py b/protoforge/api/v1/router.py
index e06b966..8eaab1b 100644
--- a/protoforge/api/v1/router.py
+++ b/protoforge/api/v1/router.py
@@ -202,6 +202,38 @@ async def batch_stop_devices(device_ids: list[str]):
     return {"status": "ok", "stopped": stopped, "errors": errors}
 
 
+@router.post("/devices/{device_id}/sync-from-template")
+async def sync_device_from_template(device_id: str):
+    engine = _get_engine()
+    db = _get_database()
+    tm = _get_template_manager()
+    try:
+        instance = engine._devices.get(device_id)
+        if not instance:
+            raise HTTPException(status_code=404, detail=f"Device not found: {device_id}")
+        template_id = instance.config.template_id
+        if not template_id:
+            raise HTTPException(status_code=400, detail="Device has no associated template")
+        template = tm.get_template(template_id)
+        if not template:
+            raise HTTPException(status_code=404, detail=f"Template not found: {template_id}")
+        new_config = DeviceConfig(
+            id=device_id,
+            name=instance.config.name,
+            protocol=instance.config.protocol,
+            template_id=template_id,
+            points=template.points,
+            protocol_config=instance.config.protocol_config,
+        )
+        result = await engine.update_device(device_id, new_config)
+        await db.save_device(new_config)
+        return {"status": "ok", "point_count": len(template.points), "device": result}
+    except HTTPException:
+        raise
+    except ValueError as e:
+        raise HTTPException(status_code=404, detail=str(e))
+
+
 @router.get("/devices/{device_id}", response_model=DeviceInfo)
 async def get_device(device_id: str):
     engine = _get_engine()

From 741bf518a3eccb6997f844063967b37d413878bb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Tue, 19 May 2026 20:22:12 +0800
Subject: [PATCH 11/36] fix(template): update template

---
 protoforge/templates/fanuc/fanuc_0if_cnc.json    | 5 +++--
 protoforge/templates/fanuc/fanuc_31ib_cnc.json   | 5 +++--
 protoforge/templates/modbus/fanuc_cnc.json       | 5 +++--
 protoforge/templates/mtconnect/mill_machine.json | 5 +++--
 4 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/protoforge/templates/fanuc/fanuc_0if_cnc.json b/protoforge/templates/fanuc/fanuc_0if_cnc.json
index 476ad73..3e79750 100644
--- a/protoforge/templates/fanuc/fanuc_0if_cnc.json
+++ b/protoforge/templates/fanuc/fanuc_0if_cnc.json
@@ -64,9 +64,10 @@
             "unit": "RPM",
             "description": "主轴转速",
             "access": "r",
-            "generator_type": "random",
+            "generator_type": "sawtooth",
             "min_value": 1000,
-            "max_value": 8000
+            "max_value": 8000,
+            "generator_config": {"period": 120}
         },
         {
             "name": "feed_rate",
diff --git a/protoforge/templates/fanuc/fanuc_31ib_cnc.json b/protoforge/templates/fanuc/fanuc_31ib_cnc.json
index 97d18fd..83fee77 100644
--- a/protoforge/templates/fanuc/fanuc_31ib_cnc.json
+++ b/protoforge/templates/fanuc/fanuc_31ib_cnc.json
@@ -66,9 +66,10 @@
             "unit": "RPM",
             "description": "主轴转速",
             "access": "r",
-            "generator_type": "random",
+            "generator_type": "sawtooth",
             "min_value": 2000,
-            "max_value": 15000
+            "max_value": 15000,
+            "generator_config": {"period": 150}
         },
         {
             "name": "feed_override",
diff --git a/protoforge/templates/modbus/fanuc_cnc.json b/protoforge/templates/modbus/fanuc_cnc.json
index b154318..265ae56 100644
--- a/protoforge/templates/modbus/fanuc_cnc.json
+++ b/protoforge/templates/modbus/fanuc_cnc.json
@@ -13,9 +13,10 @@
             "unit": "RPM",
             "description": "主轴实际转速",
             "access": "r",
-            "generator_type": "random",
+            "generator_type": "sawtooth",
             "min_value": 0,
-            "max_value": 12000
+            "max_value": 12000,
+            "generator_config": {"period": 180}
         },
         {
             "name": "feed_rate",
diff --git a/protoforge/templates/mtconnect/mill_machine.json b/protoforge/templates/mtconnect/mill_machine.json
index 1262d0f..5ce7d63 100644
--- a/protoforge/templates/mtconnect/mill_machine.json
+++ b/protoforge/templates/mtconnect/mill_machine.json
@@ -55,9 +55,10 @@
             "unit": "RPM",
             "description": "主轴转速",
             "access": "r",
-            "generator_type": "random",
+            "generator_type": "sawtooth",
             "min_value": 3000,
-            "max_value": 12000
+            "max_value": 12000,
+            "generator_config": {"period": 135}
         },
         {
             "name": "feed_rate",

From 3cc3be3e59319f41bcddf4e8f642c4f9618d78e4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Tue, 19 May 2026 20:35:05 +0800
Subject: [PATCH 12/36] fix(template): update template

---
 protoforge/templates/modbus/fanuc_cnc.json       | 7 +++++--
 protoforge/templates/mtconnect/mill_machine.json | 7 +++++--
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/protoforge/templates/modbus/fanuc_cnc.json b/protoforge/templates/modbus/fanuc_cnc.json
index 265ae56..dc7a146 100644
--- a/protoforge/templates/modbus/fanuc_cnc.json
+++ b/protoforge/templates/modbus/fanuc_cnc.json
@@ -194,9 +194,12 @@
             "data_type": "uint16",
             "description": "加工计数",
             "access": "r",
-            "generator_type": "random",
+            "generator_type": "script",
             "min_value": 0,
-            "max_value": 99999
+            "max_value": 99999,
+            "generator_config": {
+                "script": "key = 'part_count_modbus'; last = cache.get(key, 0); interval = 45; result = min(int(elapsed / interval), 99999); cache[key] = result"
+            }
         },
         {
             "name": "cycle_time",
diff --git a/protoforge/templates/mtconnect/mill_machine.json b/protoforge/templates/mtconnect/mill_machine.json
index 5ce7d63..5a11593 100644
--- a/protoforge/templates/mtconnect/mill_machine.json
+++ b/protoforge/templates/mtconnect/mill_machine.json
@@ -124,9 +124,12 @@
             "unit": "件",
             "description": "加工件数",
             "access": "r",
-            "generator_type": "sawtooth",
+            "generator_type": "script",
             "min_value": 0,
-            "max_value": 999
+            "max_value": 999,
+            "generator_config": {
+                "script": "key = 'part_count_mtconnect'; interval = 60; result = min(int(elapsed / interval), 999); cache[key] = result"
+            }
         }
     ],
     "protocol_config": {

From 5bdbcd4ecbbcea080e36741f34fa3662ce669345 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Tue, 19 May 2026 21:01:59 +0800
Subject: [PATCH 13/36] fix(template): update tempalte

---
 protoforge/templates/modbus/fanuc_cnc.json       | 2 +-
 protoforge/templates/mtconnect/mill_machine.json | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/protoforge/templates/modbus/fanuc_cnc.json b/protoforge/templates/modbus/fanuc_cnc.json
index dc7a146..22db212 100644
--- a/protoforge/templates/modbus/fanuc_cnc.json
+++ b/protoforge/templates/modbus/fanuc_cnc.json
@@ -198,7 +198,7 @@
             "min_value": 0,
             "max_value": 99999,
             "generator_config": {
-                "script": "key = 'part_count_modbus'; last = cache.get(key, 0); interval = 45; result = min(int(elapsed / interval), 99999); cache[key] = result"
+                "script": "elapsed = context['elapsed']; result = min(int(elapsed / 45), 99999)"
             }
         },
         {
diff --git a/protoforge/templates/mtconnect/mill_machine.json b/protoforge/templates/mtconnect/mill_machine.json
index 5a11593..b701a48 100644
--- a/protoforge/templates/mtconnect/mill_machine.json
+++ b/protoforge/templates/mtconnect/mill_machine.json
@@ -128,7 +128,7 @@
             "min_value": 0,
             "max_value": 999,
             "generator_config": {
-                "script": "key = 'part_count_mtconnect'; interval = 60; result = min(int(elapsed / interval), 999); cache[key] = result"
+                "script": "elapsed = context['elapsed']; result = min(int(elapsed / 60), 999)"
             }
         }
     ],

From 5d2d9fe0634aa4bcba08552b09c5da3d84b76db7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Tue, 19 May 2026 21:41:04 +0800
Subject: [PATCH 14/36] fix(template): update tempalte

---
 protoforge/templates/fanuc/fanuc_0if_cnc.json | 21 +++++++++++++------
 .../templates/fanuc/fanuc_31ib_cnc.json       | 21 +++++++++++++------
 protoforge/templates/modbus/fanuc_cnc.json    | 21 +++++++++++++------
 .../templates/mtconnect/mill_machine.json     | 21 +++++++++++++------
 4 files changed, 60 insertions(+), 24 deletions(-)

diff --git a/protoforge/templates/fanuc/fanuc_0if_cnc.json b/protoforge/templates/fanuc/fanuc_0if_cnc.json
index 3e79750..39437f3 100644
--- a/protoforge/templates/fanuc/fanuc_0if_cnc.json
+++ b/protoforge/templates/fanuc/fanuc_0if_cnc.json
@@ -100,9 +100,12 @@
             "unit": "m/s²",
             "description": "X轴振动加速度",
             "access": "r",
-            "generator_type": "random",
+            "generator_type": "script",
             "min_value": 0.1,
-            "max_value": 2.5
+            "max_value": 2.5,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; base = 0.5 + 0.3 * math.sin(2 * math.pi * elapsed / 90); noise = random.uniform(-0.15, 0.15); result = round(max(0.1, base + noise), 3)"
+            }
         },
         {
             "name": "vibration_y",
@@ -111,9 +114,12 @@
             "unit": "m/s²",
             "description": "Y轴振动加速度",
             "access": "r",
-            "generator_type": "random",
+            "generator_type": "script",
             "min_value": 0.1,
-            "max_value": 2.5
+            "max_value": 2.5,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; base = 0.5 + 0.3 * math.sin(2 * math.pi * elapsed / 75 + 1.0); noise = random.uniform(-0.15, 0.15); result = round(max(0.1, base + noise), 3)"
+            }
         },
         {
             "name": "vibration_z",
@@ -122,9 +128,12 @@
             "unit": "m/s²",
             "description": "Z轴振动加速度",
             "access": "r",
-            "generator_type": "random",
+            "generator_type": "script",
             "min_value": 0.1,
-            "max_value": 3.0
+            "max_value": 3.0,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; base = 0.7 + 0.4 * math.sin(2 * math.pi * elapsed / 60 + 2.1); noise = random.uniform(-0.2, 0.2); result = round(max(0.1, base + noise), 3)"
+            }
         },
         {
             "name": "alarm_status",
diff --git a/protoforge/templates/fanuc/fanuc_31ib_cnc.json b/protoforge/templates/fanuc/fanuc_31ib_cnc.json
index 83fee77..808fbc6 100644
--- a/protoforge/templates/fanuc/fanuc_31ib_cnc.json
+++ b/protoforge/templates/fanuc/fanuc_31ib_cnc.json
@@ -100,9 +100,12 @@
             "unit": "m/s²",
             "description": "X轴振动加速度",
             "access": "r",
-            "generator_type": "random",
+            "generator_type": "script",
             "min_value": 0.1,
-            "max_value": 3.0
+            "max_value": 3.0,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; base = 0.6 + 0.4 * math.sin(2 * math.pi * elapsed / 80 + 0.5); noise = random.uniform(-0.2, 0.2); result = round(max(0.1, base + noise), 3)"
+            }
         },
         {
             "name": "vibration_y",
@@ -111,9 +114,12 @@
             "unit": "m/s²",
             "description": "Y轴振动加速度",
             "access": "r",
-            "generator_type": "random",
+            "generator_type": "script",
             "min_value": 0.1,
-            "max_value": 3.0
+            "max_value": 3.0,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; base = 0.6 + 0.4 * math.sin(2 * math.pi * elapsed / 65 + 1.5); noise = random.uniform(-0.2, 0.2); result = round(max(0.1, base + noise), 3)"
+            }
         },
         {
             "name": "vibration_z",
@@ -122,9 +128,12 @@
             "unit": "m/s²",
             "description": "Z轴振动加速度",
             "access": "r",
-            "generator_type": "random",
+            "generator_type": "script",
             "min_value": 0.1,
-            "max_value": 4.0
+            "max_value": 4.0,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; base = 0.8 + 0.5 * math.sin(2 * math.pi * elapsed / 55 + 2.5); noise = random.uniform(-0.25, 0.25); result = round(max(0.1, base + noise), 3)"
+            }
         },
         {
             "name": "tool_number",
diff --git a/protoforge/templates/modbus/fanuc_cnc.json b/protoforge/templates/modbus/fanuc_cnc.json
index 22db212..43622cf 100644
--- a/protoforge/templates/modbus/fanuc_cnc.json
+++ b/protoforge/templates/modbus/fanuc_cnc.json
@@ -49,9 +49,12 @@
             "unit": "m/s²",
             "description": "X轴振动加速度",
             "access": "r",
-            "generator_type": "random",
+            "generator_type": "script",
             "min_value": 0.1,
-            "max_value": 2.5
+            "max_value": 2.5,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; base = 0.5 + 0.3 * math.sin(2 * math.pi * elapsed / 85 + 0.8); noise = random.uniform(-0.15, 0.15); result = round(max(0.1, base + noise), 3)"
+            }
         },
         {
             "name": "vibration_y",
@@ -60,9 +63,12 @@
             "unit": "m/s²",
             "description": "Y轴振动加速度",
             "access": "r",
-            "generator_type": "random",
+            "generator_type": "script",
             "min_value": 0.1,
-            "max_value": 2.5
+            "max_value": 2.5,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; base = 0.5 + 0.3 * math.sin(2 * math.pi * elapsed / 70 + 1.8); noise = random.uniform(-0.15, 0.15); result = round(max(0.1, base + noise), 3)"
+            }
         },
         {
             "name": "vibration_z",
@@ -71,9 +77,12 @@
             "unit": "m/s²",
             "description": "Z轴振动加速度",
             "access": "r",
-            "generator_type": "random",
+            "generator_type": "script",
             "min_value": 0.1,
-            "max_value": 3.0
+            "max_value": 3.0,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; base = 0.7 + 0.4 * math.sin(2 * math.pi * elapsed / 58 + 2.8); noise = random.uniform(-0.2, 0.2); result = round(max(0.1, base + noise), 3)"
+            }
         },
         {
             "name": "spindle_override",
diff --git a/protoforge/templates/mtconnect/mill_machine.json b/protoforge/templates/mtconnect/mill_machine.json
index b701a48..fd08bdd 100644
--- a/protoforge/templates/mtconnect/mill_machine.json
+++ b/protoforge/templates/mtconnect/mill_machine.json
@@ -91,9 +91,12 @@
             "unit": "m/s²",
             "description": "X轴振动加速度",
             "access": "r",
-            "generator_type": "random",
+            "generator_type": "script",
             "min_value": 0.1,
-            "max_value": 2.0
+            "max_value": 2.0,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; base = 0.4 + 0.25 * math.sin(2 * math.pi * elapsed / 95 + 0.3); noise = random.uniform(-0.12, 0.12); result = round(max(0.1, base + noise), 3)"
+            }
         },
         {
             "name": "vibration_y",
@@ -102,9 +105,12 @@
             "unit": "m/s²",
             "description": "Y轴振动加速度",
             "access": "r",
-            "generator_type": "random",
+            "generator_type": "script",
             "min_value": 0.1,
-            "max_value": 2.0
+            "max_value": 2.0,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; base = 0.4 + 0.25 * math.sin(2 * math.pi * elapsed / 78 + 1.3); noise = random.uniform(-0.12, 0.12); result = round(max(0.1, base + noise), 3)"
+            }
         },
         {
             "name": "vibration_z",
@@ -113,9 +119,12 @@
             "unit": "m/s²",
             "description": "Z轴振动加速度",
             "access": "r",
-            "generator_type": "random",
+            "generator_type": "script",
             "min_value": 0.1,
-            "max_value": 2.5
+            "max_value": 2.5,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; base = 0.6 + 0.35 * math.sin(2 * math.pi * elapsed / 62 + 2.3); noise = random.uniform(-0.18, 0.18); result = round(max(0.1, base + noise), 3)"
+            }
         },
         {
             "name": "part_count",

From c57a366c70ceacb599786c08934cef60a69d0d25 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Wed, 20 May 2026 13:40:09 +0800
Subject: [PATCH 15/36] feat(fault): support fault

---
 FAULT_INJECTION.md          | 321 +++++++++++++++++++++++++++
 protoforge/api/v1/router.py |  57 +++++
 protoforge/core/device.py   |  18 +-
 protoforge/core/engine.py   |  32 +++
 protoforge/core/fault.py    | 419 ++++++++++++++++++++++++++++++++++++
 protoforge/models/fault.py  |  77 +++++++
 6 files changed, 923 insertions(+), 1 deletion(-)
 create mode 100644 FAULT_INJECTION.md
 create mode 100644 protoforge/core/fault.py
 create mode 100644 protoforge/models/fault.py

diff --git a/FAULT_INJECTION.md b/FAULT_INJECTION.md
new file mode 100644
index 0000000..951648d
--- /dev/null
+++ b/FAULT_INJECTION.md
@@ -0,0 +1,321 @@
+# 故障注入使用文档
+
+本文档描述 ProtoForge 故障注入模块的设计、使用方式及内置故障类型。
+
+---
+
+## 概述
+
+故障注入模块允许你在运行中的模拟设备上注入真实工业场景的异常，用于：
+
+- 验证监控系统的异常检测能力
+- 训练工业 AI 异常检测模型（提供异常样本）
+- 测试报警规则和联动逻辑
+
+支持四种异常场景：
+
+| 场景 | 说明 |
+|------|------|
+| 异常注入 | 立即将指定测点推入异常区间 |
+| 自动恢复 | 故障持续指定时间后自动恢复正常 |
+| 多指标联动 | 一次注入同时影响多个相关测点 |
+| 渐进式劣化 | 指标随时间线性恶化，模拟真实磨损过程 |
+
+---
+
+## 架构设计
+
+```
+FaultInjector（独立模块）
+    │
+    ├── inject(device, request)   注入故障
+    ├── apply(device)             每次 tick 后覆盖测点值（通过钩子机制）
+    ├── clear(device_id)          手动清除
+    └── 自动到期恢复
+
+DeviceInstance.tick()
+    └── 执行正常生成器
+    └── 执行 post_tick_hooks（FaultInjector.apply 挂载于此）
+```
+
+故障模块通过 `register_post_tick_hook` 挂载到设备，不修改设备本身的生成逻辑，完全解耦。
+
+---
+
+## API 接口
+
+### 查询故障类型
+
+```
+GET /api/v1/faults/types
+```
+
+返回所有内置故障类型列表。
+
+```
+GET /api/v1/faults/types/{fault_type_id}
+```
+
+返回指定故障类型的详细定义，包含影响的测点和行为参数。
+
+### 查询活跃故障
+
+```
+GET /api/v1/faults/active
+```
+
+返回当前所有设备上正在运行的故障实例。
+
+### 注入故障
+
+```
+POST /api/v1/devices/{device_id}/fault
+```
+
+请求体：
+
+```json
+{
+    "fault_type_id": "tool_wear",
+    "duration": 300,
+    "intensity": 0.8
+}
+```
+
+| 字段 | 类型 | 必填 | 说明 |
+|------|------|------|------|
+| `fault_type_id` | string | 是 | 故障类型 ID，见下方故障类型列表 |
+| `duration` | float | 否 | 持续时间（秒），不填则使用类型默认值 |
+| `intensity` | float | 否 | 故障强度 0.0~1.0，默认 1.0，影响劣化幅度 |
+
+响应示例：
+
+```json
+{
+    "fault_id": "a3f2c1d4e5b6",
+    "device_id": "fanuc-cnc-01",
+    "fault_type_id": "tool_wear",
+    "fault_type_name": "刀具磨损",
+    "status": "active",
+    "intensity": 0.8,
+    "duration": 300.0,
+    "elapsed": 0.0,
+    "progress": 0.0,
+    "affected_points": ["spindle_current", "vibration_x", "vibration_y", "vibration_z", "feed_rate"],
+    "started_at": 1716192000.0
+}
+```
+
+### 查询设备当前故障
+
+```
+GET /api/v1/devices/{device_id}/fault
+```
+
+无故障时返回 `{"status": "none"}`，有故障时返回故障详情（含实时 `elapsed` 和 `progress`）。
+
+### 手动清除故障
+
+```
+DELETE /api/v1/devices/{device_id}/fault
+```
+
+立即清除故障，测点值由生成器在下一个 tick 自然恢复正常。
+
+---
+
+## 内置故障类型
+
+### tool_wear — 刀具磨损
+
+- **分类**：mechanical
+- **模式**：渐进式
+- **默认持续时间**：300 秒
+- **真实场景**：刀具切削刃逐渐磨损，切削阻力增大，系统自动压低进给速率
+
+| 测点 | 变化方向 | 峰值倍率 |
+|------|---------|---------|
+| `spindle_current` | 升高 | ×2.2 |
+| `vibration_x` | 升高 | ×3.0 |
+| `vibration_y` | 升高 | ×3.0 |
+| `vibration_z` | 升高 | ×3.5 |
+| `feed_rate` | 降低 | ×0.45 |
+
+---
+
+### tool_breakage — 刀具崩刃
+
+- **分类**：mechanical
+- **模式**：瞬间注入
+- **默认持续时间**：15 秒
+- **真实场景**：刀具突发性崩刃，机床通常会触发报警并停机
+
+| 测点 | 变化方向 | 峰值倍率 |
+|------|---------|---------|
+| `spindle_current` | 急升 | ×4.5 |
+| `vibration_x` | 急升 | ×8.0 |
+| `vibration_y` | 急升 | ×8.0 |
+| `vibration_z` | 急升 | ×10.0 |
+| `feed_rate` | 停止 | →0 |
+
+---
+
+### spindle_overheat — 主轴过热
+
+- **分类**：thermal
+- **模式**：渐进式
+- **默认持续时间**：240 秒
+- **真实场景**：长时间高负荷或冷却系统故障，热保护机制逐渐降低转速
+
+| 测点 | 变化方向 | 峰值倍率 |
+|------|---------|---------|
+| `spindle_current` | 升高 | ×1.8 |
+| `spindle_speed` | 降低 | ×0.6 |
+| `vibration_x` | 升高 | ×1.5 |
+| `vibration_z` | 升高 | ×1.5 |
+
+---
+
+### spindle_bearing_fault — 主轴轴承故障
+
+- **分类**：mechanical
+- **模式**：渐进式
+- **默认持续时间**：360 秒
+- **真实场景**：轴承磨损或润滑不足，振动持续升高
+
+| 测点 | 变化方向 | 峰值倍率 |
+|------|---------|---------|
+| `vibration_x` | 升高 | ×4.0 |
+| `vibration_y` | 升高 | ×4.0 |
+| `vibration_z` | 升高 | ×5.0 |
+| `spindle_current` | 轻微升高 | ×1.3 |
+
+---
+
+### feed_stall — 进给堵转
+
+- **分类**：process
+- **模式**：瞬间注入
+- **默认持续时间**：20 秒
+- **真实场景**：工件夹紧松动或切削量过大导致进给轴卡死
+
+| 测点 | 变化方向 | 峰值倍率 |
+|------|---------|---------|
+| `feed_rate` | 停止 | →0 |
+| `spindle_current` | 急升 | ×3.8 |
+| `vibration_z` | 急升 | ×5.0 |
+
+---
+
+### vibration_spike — 振动异常
+
+- **分类**：mechanical
+- **模式**：瞬间注入
+- **默认持续时间**：60 秒
+- **真实场景**：工件装夹松动或切削共振
+
+| 测点 | 变化方向 | 峰值倍率 |
+|------|---------|---------|
+| `vibration_x` | 急升 | ×6.0 |
+| `vibration_y` | 急升 | ×6.0 |
+| `vibration_z` | 急升 | ×7.0 |
+
+---
+
+### coolant_failure — 切削液不足
+
+- **分类**：process
+- **模式**：渐进式
+- **默认持续时间**：480 秒
+- **真实场景**：切削液供给不足，热量积累，劣化速度较慢
+
+| 测点 | 变化方向 | 峰值倍率 |
+|------|---------|---------|
+| `spindle_current` | 升高 | ×1.6 |
+| `vibration_x` | 升高 | ×2.0 |
+| `vibration_y` | 升高 | ×2.0 |
+| `vibration_z` | 升高 | ×2.5 |
+| `feed_rate` | 降低 | ×0.75 |
+
+---
+
+### power_fluctuation — 电源波动
+
+- **分类**：electrical
+- **模式**：瞬间注入（持续期间持续抖动）
+- **默认持续时间**：90 秒
+- **真实场景**：供电电压不稳定，各指标出现随机波动
+
+| 测点 | 变化方向 | 说明 |
+|------|---------|------|
+| `spindle_speed` | 随机抖动 | ±300 RPM 噪声 |
+| `spindle_current` | 随机抖动 | ±5 A 噪声 |
+| `feed_rate` | 随机抖动 | ±150 mm/min 噪声 |
+
+---
+
+## 使用示例
+
+### 模拟刀具磨损过程
+
+```bash
+# 注入刀具磨损，持续 5 分钟，强度 100%
+curl -X POST http://localhost:8000/api/v1/devices/fanuc-cnc-01/fault \
+  -H "Content-Type: application/json" \
+  -d '{"fault_type_id": "tool_wear", "duration": 300, "intensity": 1.0}'
+
+# 每隔 30 秒查看故障进度
+curl http://localhost:8000/api/v1/devices/fanuc-cnc-01/fault
+
+# 查看 Prometheus 指标变化
+curl http://localhost:8000/api/v1/metrics | grep -E "spindle_current|vibration|feed_rate"
+```
+
+### 模拟突发崩刃后手动恢复
+
+```bash
+# 注入崩刃故障
+curl -X POST http://localhost:8000/api/v1/devices/fanuc-cnc-01/fault \
+  -H "Content-Type: application/json" \
+  -d '{"fault_type_id": "tool_breakage", "duration": 60}'
+
+# 手动提前清除
+curl -X DELETE http://localhost:8000/api/v1/devices/fanuc-cnc-01/fault
+```
+
+### 低强度渐进劣化（用于 AI 模型训练）
+
+```bash
+# 用 50% 强度注入轴承故障，持续 10 分钟，产生轻微异常样本
+curl -X POST http://localhost:8000/api/v1/devices/fanuc-cnc-01/fault \
+  -H "Content-Type: application/json" \
+  -d '{"fault_type_id": "spindle_bearing_fault", "duration": 600, "intensity": 0.5}'
+```
+
+---
+
+## 与 Prometheus 集成
+
+故障注入后，测点值的变化会实时反映在 `/api/v1/metrics` 接口中。可以用 Grafana 观察故障期间各指标的时序变化：
+
+```
+# 主轴电流（故障期间会升高）
+fanuc_cnc_spindle_current
+
+# 三轴振动
+fanuc_cnc_vibration_x
+fanuc_cnc_vibration_y
+fanuc_cnc_vibration_z
+
+# 进给速率（刀具磨损/堵转时会降低）
+fanuc_cnc_feed_rate
+```
+
+---
+
+## 注意事项
+
+- 同一设备同时只能有一个活跃故障，新注入会覆盖旧故障
+- 故障到期后测点值由生成器在下一个 tick 自然恢复，不会瞬间跳回
+- 设备必须处于 `online` 状态才能注入故障
+- 删除设备时会自动清除其故障
diff --git a/protoforge/api/v1/router.py b/protoforge/api/v1/router.py
index 8eaab1b..7a6c050 100644
--- a/protoforge/api/v1/router.py
+++ b/protoforge/api/v1/router.py
@@ -8,6 +8,7 @@
 from fastapi.responses import PlainTextResponse
 
 from protoforge.models.device import DeviceConfig, DeviceInfo, PointValue
+from protoforge.models.fault import FaultInjectRequest
 from protoforge.models.scenario import ScenarioConfig, ScenarioInfo
 from protoforge.models.template import TemplateDetail, TemplateInfo
 
@@ -1207,6 +1208,62 @@ async def setup_demo():
         raise HTTPException(status_code=500, detail=get_friendly_error(str(e)))
 
 
+@router.get("/faults/types")
+async def list_fault_types():
+    engine = _get_engine()
+    types = engine.list_fault_types()
+    return [t.model_dump() for t in types]
+
+
+@router.get("/faults/types/{fault_type_id}")
+async def get_fault_type(fault_type_id: str):
+    from protoforge.core.fault import fault_injector
+    ft = fault_injector.get_fault_type(fault_type_id)
+    if not ft:
+        raise HTTPException(status_code=404, detail=f"Fault type not found: {fault_type_id}")
+    return ft.model_dump()
+
+
+@router.get("/faults/active")
+async def list_active_faults():
+    engine = _get_engine()
+    return [f.model_dump() for f in engine.list_active_faults()]
+
+
+@router.post("/devices/{device_id}/fault")
+async def inject_fault(device_id: str, request: FaultInjectRequest):
+    engine = _get_engine()
+    log_bus = _get_log_bus()
+    try:
+        info = engine.inject_fault(device_id, request)
+        log_bus.emit("", "system", device_id, "fault_injected",
+                     f"Fault {request.fault_type_id} injected into {device_id}",
+                     {"fault_type": request.fault_type_id, "duration": info.duration})
+        return info.model_dump()
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+
+
+@router.get("/devices/{device_id}/fault")
+async def get_device_fault(device_id: str):
+    engine = _get_engine()
+    info = engine.get_fault(device_id)
+    if not info:
+        return {"status": "none"}
+    return info.model_dump()
+
+
+@router.delete("/devices/{device_id}/fault")
+async def clear_device_fault(device_id: str):
+    engine = _get_engine()
+    log_bus = _get_log_bus()
+    cleared = engine.clear_fault(device_id)
+    if cleared:
+        log_bus.emit("", "system", device_id, "fault_cleared",
+                     f"Fault cleared on {device_id}")
+    return {"status": "ok", "cleared": cleared}
+
+
 @router.get("/setup/status")
 async def setup_status():
     engine = _get_engine()
diff --git a/protoforge/core/device.py b/protoforge/core/device.py
index efd21ef..f04414a 100644
--- a/protoforge/core/device.py
+++ b/protoforge/core/device.py
@@ -1,5 +1,5 @@
 import time
-from typing import Any, Optional
+from typing import Any, Callable, Optional
 
 from protoforge.core.generator import DataGenerator
 from protoforge.models.device import DeviceConfig, DeviceStatus, GeneratorType, PointConfig, PointValue
@@ -13,6 +13,8 @@ def __init__(self, config: DeviceConfig, generator: DataGenerator):
         self._point_values: dict[str, Any] = {}
         self._point_configs: dict[str, PointConfig] = {}
         self._start_time: Optional[float] = None
+        # 可选的 tick 后处理钩子，由外部模块（如 FaultInjector）注册
+        self._post_tick_hooks: list[Callable[["DeviceInstance"], None]] = []
 
         for point in config.points:
             self._point_configs[point.name] = point
@@ -21,6 +23,14 @@ def __init__(self, config: DeviceConfig, generator: DataGenerator):
             else:
                 self._point_values[point.name] = self._generator.generate(point)
 
+    def register_post_tick_hook(self, hook: Callable[["DeviceInstance"], None]) -> None:
+        """注册 tick 后处理钩子，外部模块通过此接口介入，不修改 tick 逻辑本身"""
+        if hook not in self._post_tick_hooks:
+            self._post_tick_hooks.append(hook)
+
+    def unregister_post_tick_hook(self, hook: Callable[["DeviceInstance"], None]) -> None:
+        self._post_tick_hooks = [h for h in self._post_tick_hooks if h != hook]
+
     @property
     def id(self) -> str:
         return self.config.id
@@ -51,6 +61,12 @@ def tick(self) -> None:
         for name, point in self._point_configs.items():
             if point.generator_type != GeneratorType.FIXED:
                 self._point_values[name] = self._generator.generate(point)
+        # 执行后处理钩子（故障注入等外部模块在此覆盖测点值）
+        for hook in self._post_tick_hooks:
+            try:
+                hook(self)
+            except Exception:
+                pass
 
     def read_point(self, point_name: str) -> Optional[PointValue]:
         if point_name not in self._point_values:
diff --git a/protoforge/core/engine.py b/protoforge/core/engine.py
index f289425..059f10e 100644
--- a/protoforge/core/engine.py
+++ b/protoforge/core/engine.py
@@ -4,9 +4,11 @@
 from typing import Any, Optional
 
 from protoforge.core.device import DeviceInstance
+from protoforge.core.fault import fault_injector
 from protoforge.core.generator import DataGenerator
 from protoforge.core.scenario import Scenario
 from protoforge.models.device import DeviceConfig, DeviceInfo, DeviceStatus, PointValue
+from protoforge.models.fault import FaultInfo, FaultInjectRequest, FaultTypeDefinition
 from protoforge.models.scenario import ScenarioConfig, ScenarioInfo, ScenarioStatus
 from protoforge.protocols.base import ProtocolServer, ProtocolStatus
 
@@ -56,6 +58,8 @@ async def stop_protocol(self, protocol_name: str) -> None:
     async def create_device(self, config: DeviceConfig) -> DeviceInfo:
         instance = DeviceInstance(config, self._generator)
         self._devices[config.id] = instance
+        # 注册故障注入钩子
+        instance.register_post_tick_hook(fault_injector.apply)
 
         server = self._protocol_servers.get(config.protocol)
         if server and server.status == ProtocolStatus.RUNNING:
@@ -70,6 +74,9 @@ async def remove_device(self, device_id: str) -> None:
         if not instance:
             raise ValueError(f"Device not found: {device_id}")
 
+        # 清除该设备的故障
+        fault_injector.clear(device_id)
+
         server = self._protocol_servers.get(instance.protocol)
         if server and server.status == ProtocolStatus.RUNNING:
             await server.remove_device(device_id)
@@ -299,3 +306,28 @@ def _get_device_info(self, instance: DeviceInstance) -> DeviceInfo:
             status=instance.status,
             points=instance.read_all_points(),
         )
+
+    # ------------------------------------------------------------------
+    # 故障管理
+    # ------------------------------------------------------------------
+
+    def inject_fault(self, device_id: str, request: FaultInjectRequest) -> FaultInfo:
+        instance = self._devices.get(device_id)
+        if not instance:
+            raise ValueError(f"Device not found: {device_id}")
+        if instance.status != DeviceStatus.ONLINE:
+            raise ValueError(f"Device {device_id} is not online")
+        return fault_injector.inject(instance, request)
+
+    def clear_fault(self, device_id: str) -> bool:
+        return fault_injector.clear(device_id)
+
+    def get_fault(self, device_id: str) -> Optional[FaultInfo]:
+        return fault_injector.get_fault(device_id)
+
+    def list_active_faults(self) -> list[FaultInfo]:
+        return fault_injector.list_active()
+
+    @staticmethod
+    def list_fault_types() -> list[FaultTypeDefinition]:
+        return fault_injector.list_fault_types()
diff --git a/protoforge/core/fault.py b/protoforge/core/fault.py
new file mode 100644
index 0000000..e72842d
--- /dev/null
+++ b/protoforge/core/fault.py
@@ -0,0 +1,419 @@
+"""
+故障注入模块 (FaultInjector)
+
+设计原则：
+- 完全独立，不修改 device.py / engine.py 现有逻辑
+- 通过 apply(device) 在每次 tick 后覆盖测点值，device 本身无感知
+- 支持四种场景：异常注入、自动恢复、多指标联动、渐进式劣化
+"""
+import logging
+import random
+import time
+import uuid
+from typing import Any, Optional
+
+from protoforge.models.fault import (
+    ActiveFault,
+    FaultInfo,
+    FaultInjectRequest,
+    FaultMode,
+    FaultStatus,
+    FaultTypeDefinition,
+    PointFaultConfig,
+)
+
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# 内置故障类型定义（基于真实工业场景）
+# ---------------------------------------------------------------------------
+
+BUILTIN_FAULT_TYPES: list[FaultTypeDefinition] = [
+
+    # ------------------------------------------------------------------
+    # 刀具磨损 — 最常见的机加工故障
+    # 特征：切削阻力增大 → 主轴电流缓慢爬升，振动幅度增大，进给速率被系统压低
+    # 模式：渐进式，持续数分钟，模拟刀具从轻度磨损到需要换刀的过程
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="tool_wear",
+        name="刀具磨损",
+        description="刀具切削刃磨损，切削阻力增大，主轴电流升高，振动增大，进给速率下降",
+        category="mechanical",
+        default_duration=300.0,
+        tags=["刀具", "磨损", "渐进"],
+        point_faults=[
+            PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL,
+                             multiplier=2.2, noise_scale=0.8),
+            PointFaultConfig(point="vibration_x", mode=FaultMode.GRADUAL,
+                             multiplier=3.0, noise_scale=0.3),
+            PointFaultConfig(point="vibration_y", mode=FaultMode.GRADUAL,
+                             multiplier=3.0, noise_scale=0.3),
+            PointFaultConfig(point="vibration_z", mode=FaultMode.GRADUAL,
+                             multiplier=3.5, noise_scale=0.4),
+            PointFaultConfig(point="feed_rate", mode=FaultMode.GRADUAL,
+                             multiplier=0.45, noise_scale=20.0),
+        ],
+    ),
+
+    # ------------------------------------------------------------------
+    # 刀具崩刃 — 突发性刀具失效
+    # 特征：瞬间冲击 → 振动突增，电流瞬间峰值，进给立即停止
+    # 模式：瞬间注入，持续时间短（机床通常会触发报警停机）
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="tool_breakage",
+        name="刀具崩刃",
+        description="刀具突发性崩刃，振动剧烈突增，主轴电流峰值，进给停止",
+        category="mechanical",
+        default_duration=15.0,
+        tags=["刀具", "崩刃", "突发"],
+        point_faults=[
+            PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
+                             multiplier=4.5, noise_scale=2.0),
+            PointFaultConfig(point="vibration_x", mode=FaultMode.INSTANT,
+                             multiplier=8.0, noise_scale=1.5),
+            PointFaultConfig(point="vibration_y", mode=FaultMode.INSTANT,
+                             multiplier=8.0, noise_scale=1.5),
+            PointFaultConfig(point="vibration_z", mode=FaultMode.INSTANT,
+                             multiplier=10.0, noise_scale=2.0),
+            PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT,
+                             target_value=0.0, noise_scale=0.0),
+        ],
+    ),
+
+    # ------------------------------------------------------------------
+    # 主轴过热 — 长时间高负荷或冷却系统故障
+    # 特征：主轴电流持续偏高，转速因热保护逐渐降低
+    # 模式：渐进式，持续时间较长
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="spindle_overheat",
+        name="主轴过热",
+        description="主轴长时间高负荷运转或冷却不足，电流持续偏高，转速因热保护下降",
+        category="thermal",
+        default_duration=240.0,
+        tags=["主轴", "过热", "渐进"],
+        point_faults=[
+            PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL,
+                             multiplier=1.8, noise_scale=1.2),
+            PointFaultConfig(point="spindle_speed", mode=FaultMode.GRADUAL,
+                             multiplier=0.6, noise_scale=50.0),
+            PointFaultConfig(point="vibration_x", mode=FaultMode.GRADUAL,
+                             multiplier=1.5, noise_scale=0.2),
+            PointFaultConfig(point="vibration_z", mode=FaultMode.GRADUAL,
+                             multiplier=1.5, noise_scale=0.2),
+        ],
+    ),
+
+    # ------------------------------------------------------------------
+    # 主轴轴承故障 — 轴承磨损或润滑不足
+    # 特征：振动频率特征变化，整体振动幅度升高，电流略升
+    # 模式：渐进式
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="spindle_bearing_fault",
+        name="主轴轴承故障",
+        description="主轴轴承磨损或润滑不足，振动幅度持续升高，伴随电流轻微上升",
+        category="mechanical",
+        default_duration=360.0,
+        tags=["主轴", "轴承", "渐进"],
+        point_faults=[
+            PointFaultConfig(point="vibration_x", mode=FaultMode.GRADUAL,
+                             multiplier=4.0, noise_scale=0.5),
+            PointFaultConfig(point="vibration_y", mode=FaultMode.GRADUAL,
+                             multiplier=4.0, noise_scale=0.5),
+            PointFaultConfig(point="vibration_z", mode=FaultMode.GRADUAL,
+                             multiplier=5.0, noise_scale=0.8),
+            PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL,
+                             multiplier=1.3, noise_scale=0.5),
+        ],
+    ),
+
+    # ------------------------------------------------------------------
+    # 进给堵转 — 工件夹紧松动或切削量过大导致进给卡死
+    # 特征：进给速率瞬间降为 0，主轴电流急剧升高
+    # 模式：瞬间注入
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="feed_stall",
+        name="进给堵转",
+        description="进给轴卡死，进给速率降为零，主轴电流急剧升高",
+        category="process",
+        default_duration=20.0,
+        tags=["进给", "堵转", "突发"],
+        point_faults=[
+            PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT,
+                             target_value=0.0, noise_scale=0.0),
+            PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
+                             multiplier=3.8, noise_scale=1.5),
+            PointFaultConfig(point="vibration_z", mode=FaultMode.INSTANT,
+                             multiplier=5.0, noise_scale=1.0),
+        ],
+    ),
+
+    # ------------------------------------------------------------------
+    # 振动异常 — 工件装夹松动或共振
+    # 特征：三轴振动突然大幅增加，其他指标基本正常
+    # 模式：瞬间注入
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="vibration_spike",
+        name="振动异常",
+        description="工件装夹松动或切削共振，三轴振动突然大幅增加",
+        category="mechanical",
+        default_duration=60.0,
+        tags=["振动", "装夹", "突发"],
+        point_faults=[
+            PointFaultConfig(point="vibration_x", mode=FaultMode.INSTANT,
+                             multiplier=6.0, noise_scale=1.0),
+            PointFaultConfig(point="vibration_y", mode=FaultMode.INSTANT,
+                             multiplier=6.0, noise_scale=1.0),
+            PointFaultConfig(point="vibration_z", mode=FaultMode.INSTANT,
+                             multiplier=7.0, noise_scale=1.2),
+        ],
+    ),
+
+    # ------------------------------------------------------------------
+    # 切削液不足 — 冷却润滑失效
+    # 特征：热量积累 → 振动缓慢升高，电流缓慢升高，进给略降
+    # 模式：渐进式，速度较慢
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="coolant_failure",
+        name="切削液不足",
+        description="切削液供给不足，冷却润滑失效，热量积累导致振动和电流缓慢升高",
+        category="process",
+        default_duration=480.0,
+        tags=["切削液", "冷却", "渐进"],
+        point_faults=[
+            PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL,
+                             multiplier=1.6, noise_scale=0.8),
+            PointFaultConfig(point="vibration_x", mode=FaultMode.GRADUAL,
+                             multiplier=2.0, noise_scale=0.3),
+            PointFaultConfig(point="vibration_y", mode=FaultMode.GRADUAL,
+                             multiplier=2.0, noise_scale=0.3),
+            PointFaultConfig(point="vibration_z", mode=FaultMode.GRADUAL,
+                             multiplier=2.5, noise_scale=0.4),
+            PointFaultConfig(point="feed_rate", mode=FaultMode.GRADUAL,
+                             multiplier=0.75, noise_scale=15.0),
+        ],
+    ),
+
+    # ------------------------------------------------------------------
+    # 电源波动 — 供电不稳定
+    # 特征：主轴转速和进给速率出现随机波动，电流不稳定
+    # 模式：瞬间注入（持续期间持续抖动）
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="power_fluctuation",
+        name="电源波动",
+        description="供电电压不稳定，主轴转速和进给速率出现随机波动",
+        category="electrical",
+        default_duration=90.0,
+        tags=["电源", "波动", "突发"],
+        point_faults=[
+            PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT,
+                             multiplier=1.0, noise_scale=300.0),
+            PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
+                             multiplier=1.0, noise_scale=5.0),
+            PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT,
+                             multiplier=1.0, noise_scale=150.0),
+        ],
+    ),
+]
+
+# 按 id 索引
+_FAULT_TYPE_MAP: dict[str, FaultTypeDefinition] = {ft.id: ft for ft in BUILTIN_FAULT_TYPES}
+
+
+# ---------------------------------------------------------------------------
+# FaultInjector
+# ---------------------------------------------------------------------------
+
+class FaultInjector:
+    """
+    故障注入器，完全独立于 DeviceInstance。
+
+    使用方式：
+        injector = FaultInjector()
+        injector.inject(device, request)   # 注入故障
+        injector.apply(device)             # 每次 tick 后调用，覆盖测点值
+        injector.clear(device_id)          # 手动清除
+    """
+
+    def __init__(self):
+        # device_id -> ActiveFault
+        self._active: dict[str, ActiveFault] = {}
+
+    # ------------------------------------------------------------------
+    # 公开接口
+    # ------------------------------------------------------------------
+
+    def inject(self, device: Any, request: FaultInjectRequest) -> FaultInfo:
+        """向设备注入故障，返回故障信息"""
+        fault_type = _FAULT_TYPE_MAP.get(request.fault_type_id)
+        if not fault_type:
+            raise ValueError(f"Unknown fault type: {request.fault_type_id}")
+
+        duration = request.duration if request.duration is not None else fault_type.default_duration
+
+        # 记录注入时各测点的当前基线值
+        baseline: dict[str, float] = {}
+        for pf in fault_type.point_faults:
+            val = device._point_values.get(pf.point)
+            if val is not None:
+                try:
+                    baseline[pf.point] = float(val)
+                except (TypeError, ValueError):
+                    baseline[pf.point] = 0.0
+
+        fault = ActiveFault(
+            fault_id=uuid.uuid4().hex[:12],
+            device_id=device.id,
+            fault_type_id=fault_type.id,
+            fault_type_name=fault_type.name,
+            intensity=max(0.0, min(1.0, request.intensity)),
+            duration=duration,
+            started_at=time.time(),
+            baseline_values=baseline,
+        )
+        self._active[device.id] = fault
+        logger.info("Fault injected: device=%s type=%s duration=%.0fs",
+                    device.id, fault_type.id, duration)
+        return self._to_info(fault, fault_type)
+
+    def apply(self, device: Any) -> None:
+        """
+        在 device.tick() 之后调用，将故障效果覆盖到 _point_values。
+        故障超时后自动清除。
+        """
+        fault = self._active.get(device.id)
+        if not fault:
+            return
+
+        now = time.time()
+        elapsed = now - fault.started_at
+
+        if elapsed >= fault.duration:
+            self._expire(device, fault)
+            return
+
+        fault_type = _FAULT_TYPE_MAP.get(fault.fault_type_id)
+        if not fault_type:
+            return
+
+        # progress: 0.0（刚注入）→ 1.0（达到峰值）
+        progress = min(elapsed / fault.duration, 1.0)
+
+        for pf in fault_type.point_faults:
+            if pf.point not in device._point_values:
+                continue
+            baseline = fault.baseline_values.get(pf.point, 0.0)
+            if baseline == 0.0:
+                # 基线为 0 时用当前值兜底，避免乘法无效
+                try:
+                    baseline = float(device._point_values[pf.point]) or 1.0
+                except (TypeError, ValueError):
+                    continue
+
+            device._point_values[pf.point] = self._compute_value(
+                pf, baseline, progress, fault.intensity
+            )
+
+    def clear(self, device_id: str) -> bool:
+        """手动清除故障，不恢复基线（让生成器自然恢复）"""
+        if device_id not in self._active:
+            return False
+        fault = self._active.pop(device_id)
+        fault.status = FaultStatus.CLEARED
+        fault.cleared_at = time.time()
+        logger.info("Fault cleared manually: device=%s type=%s", device_id, fault.fault_type_id)
+        return True
+
+    def get_fault(self, device_id: str) -> Optional[FaultInfo]:
+        fault = self._active.get(device_id)
+        if not fault:
+            return None
+        fault_type = _FAULT_TYPE_MAP.get(fault.fault_type_id)
+        return self._to_info(fault, fault_type)
+
+    def list_active(self) -> list[FaultInfo]:
+        result = []
+        for fault in self._active.values():
+            fault_type = _FAULT_TYPE_MAP.get(fault.fault_type_id)
+            result.append(self._to_info(fault, fault_type))
+        return result
+
+    @staticmethod
+    def list_fault_types() -> list[FaultTypeDefinition]:
+        return BUILTIN_FAULT_TYPES
+
+    @staticmethod
+    def get_fault_type(fault_type_id: str) -> Optional[FaultTypeDefinition]:
+        return _FAULT_TYPE_MAP.get(fault_type_id)
+
+    # ------------------------------------------------------------------
+    # 内部逻辑
+    # ------------------------------------------------------------------
+
+    def _compute_value(
+        self,
+        pf: PointFaultConfig,
+        baseline: float,
+        progress: float,
+        intensity: float,
+    ) -> float:
+        """根据故障配置和当前进度计算覆盖值"""
+        if pf.mode == FaultMode.INSTANT:
+            # 瞬间模式：直接用目标值，不随时间变化
+            if pf.target_value is not None:
+                target = pf.target_value
+            elif pf.multiplier is not None:
+                target = baseline * (1.0 + (pf.multiplier - 1.0) * intensity)
+            else:
+                target = baseline
+        else:
+            # 渐进模式：随 progress 线性劣化
+            if pf.target_value is not None:
+                target = baseline + (pf.target_value - baseline) * progress * intensity
+            elif pf.multiplier is not None:
+                target = baseline * (1.0 + (pf.multiplier - 1.0) * progress * intensity)
+            else:
+                target = baseline
+
+        # 叠加随机噪声，模拟真实信号抖动
+        if pf.noise_scale > 0:
+            target += random.gauss(0, pf.noise_scale * intensity)
+
+        return round(max(0.0, target), 4)
+
+    def _expire(self, device: Any, fault: ActiveFault) -> None:
+        """故障到期，从 active 中移除，让生成器自然恢复正常值"""
+        self._active.pop(device.id, None)
+        logger.info("Fault expired: device=%s type=%s", device.id, fault.fault_type_id)
+
+    @staticmethod
+    def _to_info(fault: ActiveFault, fault_type: Optional[FaultTypeDefinition]) -> FaultInfo:
+        now = time.time()
+        elapsed = now - fault.started_at
+        progress = min(elapsed / fault.duration, 1.0)
+        affected = [pf.point for pf in fault_type.point_faults] if fault_type else []
+        return FaultInfo(
+            fault_id=fault.fault_id,
+            device_id=fault.device_id,
+            fault_type_id=fault.fault_type_id,
+            fault_type_name=fault.fault_type_name,
+            status=fault.status,
+            intensity=fault.intensity,
+            duration=fault.duration,
+            elapsed=round(elapsed, 1),
+            progress=round(progress, 3),
+            affected_points=affected,
+            started_at=fault.started_at,
+        )
+
+
+# 全局单例
+fault_injector = FaultInjector()
diff --git a/protoforge/models/fault.py b/protoforge/models/fault.py
new file mode 100644
index 0000000..cc038e0
--- /dev/null
+++ b/protoforge/models/fault.py
@@ -0,0 +1,77 @@
+from enum import Enum
+from typing import Any, Optional
+
+from pydantic import BaseModel, Field
+
+
+class FaultMode(str, Enum):
+    """故障注入模式"""
+    INSTANT = "instant"       # 瞬间跳变到异常值，持续 duration 后恢复
+    GRADUAL = "gradual"       # 渐进式劣化，随时间线性恶化，到 duration 时达到峰值后恢复
+
+
+class FaultStatus(str, Enum):
+    ACTIVE = "active"
+    RECOVERING = "recovering"
+    CLEARED = "cleared"
+
+
+class PointFaultConfig(BaseModel):
+    """单个测点的故障行为定义"""
+    point: str
+    mode: FaultMode = FaultMode.INSTANT
+
+    # INSTANT 模式：直接设置为 target_value（若为 None 则用 multiplier 乘以当前值）
+    target_value: Optional[float] = None
+    multiplier: Optional[float] = None     # 异常值 = 当前正常值 × multiplier
+
+    # GRADUAL 模式：从当前值线性劣化到 target_value 或 multiplier 倍
+    # 劣化程度 = progress(0~1) × (target - baseline)
+    noise_scale: float = 0.0               # 叠加随机噪声幅度，模拟真实抖动
+
+
+class FaultTypeDefinition(BaseModel):
+    """故障类型定义，描述一种真实故障场景"""
+    id: str
+    name: str
+    description: str
+    category: str                          # 故障分类：mechanical / electrical / thermal / process
+    default_duration: float = 120.0        # 默认持续时间（秒）
+    point_faults: list[PointFaultConfig] = Field(default_factory=list)
+    tags: list[str] = Field(default_factory=list)
+
+
+class FaultInjectRequest(BaseModel):
+    """故障注入请求"""
+    fault_type_id: str
+    duration: Optional[float] = None       # 覆盖默认持续时间，None 表示用类型默认值
+    intensity: float = 1.0                 # 故障强度系数 0~1，影响劣化幅度
+
+
+class ActiveFault(BaseModel):
+    """当前激活的故障实例"""
+    fault_id: str                          # 唯一实例 ID
+    device_id: str
+    fault_type_id: str
+    fault_type_name: str
+    status: FaultStatus = FaultStatus.ACTIVE
+    intensity: float = 1.0
+    duration: float = 120.0
+    started_at: float = 0.0
+    cleared_at: Optional[float] = None
+    baseline_values: dict[str, float] = Field(default_factory=dict)  # 注入时的正常基线值
+
+
+class FaultInfo(BaseModel):
+    """故障状态信息（API 响应用）"""
+    fault_id: str
+    device_id: str
+    fault_type_id: str
+    fault_type_name: str
+    status: FaultStatus
+    intensity: float
+    duration: float
+    elapsed: float
+    progress: float                        # 0~1，故障进度
+    affected_points: list[str]
+    started_at: float

From 05f993cae5374642bb5447e1aa260e87aec012f9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Wed, 20 May 2026 14:18:21 +0800
Subject: [PATCH 16/36] feat(fault): support fault

---
 web/src/api.js            |   6 ++
 web/src/views/Devices.vue | 169 +++++++++++++++++++++++++++++++++++---
 2 files changed, 164 insertions(+), 11 deletions(-)

diff --git a/web/src/api.js b/web/src/api.js
index 2bc15b3..059d4bd 100644
--- a/web/src/api.js
+++ b/web/src/api.js
@@ -128,4 +128,10 @@ export default {
 
   getSettings: () => d(api.get('/settings')),
   updateSettings: (updates) => d(api.put('/settings', updates)),
+
+  getFaultTypes: () => d(api.get('/faults/types')),
+  getActiveFaults: () => d(api.get('/faults/active')),
+  injectFault: (deviceId, faultTypeId, duration, intensity) => d(api.post(`/devices/${deviceId}/fault`, { fault_type_id: faultTypeId, duration, intensity })),
+  getDeviceFault: (deviceId) => d(api.get(`/devices/${deviceId}/fault`)),
+  clearDeviceFault: (deviceId) => d(api.delete(`/devices/${deviceId}/fault`)),
 }
diff --git a/web/src/views/Devices.vue b/web/src/views/Devices.vue
index 022f3ac..65e0535 100644
--- a/web/src/views/Devices.vue
+++ b/web/src/views/Devices.vue
@@ -90,14 +90,62 @@
       <n-modal v-model:show="showPointsModal" preset="card" title="设备测点" style="width:700px">
         <n-data-table :columns="pointColumns" :data="currentPoints" :bordered="false" size="small" />
       </n-modal>
+
+      <!-- 故障注入 Modal -->
+      <n-modal v-model:show="showFaultModal" preset="card" title="故障注入" style="width:480px">
+        <n-space vertical size="medium">
+          <n-text depth="3" style="font-size:13px">设备：{{ faultTargetDevice?.name }}</n-text>
+          <n-form-item label="故障类型" label-placement="left" label-width="80">
+            <n-select
+              v-model:value="faultTypeId"
+              :options="faultTypeOptions"
+              placeholder="选择故障类型"
+              @update:value="onFaultTypeChange"
+            />
+          </n-form-item>
+          <n-alert v-if="selectedFaultType" type="warning" :bordered="false" style="font-size:12px">
+            <div style="font-weight:500;margin-bottom:4px">{{ selectedFaultType.name }} · {{ faultCategoryLabel(selectedFaultType.category) }}</div>
+            <div style="color:#94a3b8">{{ selectedFaultType.description }}</div>
+            <div style="margin-top:6px;color:#94a3b8">
+              影响测点：{{ selectedFaultType.point_faults.map(p => p.point).join('、') }}
+            </div>
+          </n-alert>
+          <n-form-item label="持续时间" label-placement="left" label-width="80">
+            <n-input-number
+              v-model:value="faultDuration"
+              :min="5"
+              :max="3600"
+              style="width:100%"
+            >
+              <template #suffix>秒</template>
+            </n-input-number>
+          </n-form-item>
+          <n-form-item label="故障强度" label-placement="left" label-width="80">
+            <n-space vertical style="width:100%">
+              <n-slider v-model:value="faultIntensity" :min="0.1" :max="1.0" :step="0.1" />
+              <n-text depth="3" style="font-size:12px">
+                {{ faultIntensityLabel }}（{{ faultIntensity }}）
+              </n-text>
+            </n-space>
+          </n-form-item>
+        </n-space>
+        <template #action>
+          <n-space justify="end">
+            <n-button @click="showFaultModal = false">取消</n-button>
+            <n-button type="error" :loading="faultLoading" :disabled="!faultTypeId" @click="doInjectFault">
+              注入故障
+            </n-button>
+          </n-space>
+        </template>
+      </n-modal>
     </n-space>
   </div>
 </template>
 
 <script setup>
-import { ref, computed, onMounted, h } from 'vue'
+import { ref, computed, onMounted, onUnmounted, h } from 'vue'
 import { NSpace, NSelect, NButton, NDataTable, NModal, NForm, NFormItem, NInput, NTag,
-  NSteps, NStep, NText, NAlert, useMessage, useDialog } from 'naive-ui'
+  NSteps, NStep, NText, NAlert, NInputNumber, NSlider, useMessage, useDialog } from 'naive-ui'
 import { useRouter } from 'vue-router'
 import api from '../api.js'
 
@@ -123,6 +171,17 @@ const qcTemplateId = ref(null)
 const qcDeviceName = ref('')
 const qcLoading = ref(false)
 
+// 故障注入状态
+const showFaultModal = ref(false)
+const faultTargetDevice = ref(null)
+const faultTypes = ref([])
+const faultTypeId = ref(null)
+const faultDuration = ref(120)
+const faultIntensity = ref(1.0)
+const faultLoading = ref(false)
+// device_id -> fault info，用于在列表中显示故障状态
+const activeFaults = ref({})
+
 const protocolLabels = {
   modbus_tcp: 'Modbus TCP', modbus_rtu: 'Modbus RTU', opcua: 'OPC-UA', mqtt: 'MQTT',
   http: 'HTTP', gb28181: 'GB28181', bacnet: 'BACnet', s7: 'S7',
@@ -184,13 +243,25 @@ const columns = [
   },
   { title: '测点', key: 'points', width: 70, render: (row) => (row.points || []).length },
   {
-    title: '操作', key: 'actions', width: 280,
+    title: '故障', key: 'fault', width: 90,
+    render: (row) => {
+      const fault = activeFaults.value[row.id]
+      if (!fault || fault.status === 'none') return h(NTag, { size: 'tiny', bordered: false }, () => '正常')
+      const pct = Math.round((fault.progress || 0) * 100)
+      return h(NTag, { size: 'tiny', type: 'error', bordered: false }, () => `${fault.fault_type_name} ${pct}%`)
+    }
+  },
+  {
+    title: '操作', key: 'actions', width: 320,
     render: (row) => h(NSpace, { size: 4 }, () => [
       h(NButton, { size: 'tiny', tertiary: true, onClick: () => viewPoints(row.id) }, () => '测点'),
       h(NButton, { size: 'tiny', tertiary: true, onClick: () => openEditDevice(row) }, () => '编辑'),
       row.status === 'online' || row.status === 'running'
         ? h(NButton, { size: 'tiny', type: 'warning', secondary: true, onClick: () => toggleDevice(row.id, 'stop') }, () => '停止')
         : h(NButton, { size: 'tiny', type: 'primary', secondary: true, onClick: () => toggleDevice(row.id, 'start') }, () => '启动'),
+      activeFaults.value[row.id] && activeFaults.value[row.id].status !== 'none'
+        ? h(NButton, { size: 'tiny', type: 'warning', secondary: true, onClick: () => stopFault(row.id) }, () => '停止故障')
+        : h(NButton, { size: 'tiny', type: 'error', ghost: true, disabled: row.status !== 'online', onClick: () => openFaultModal(row) }, () => '注入故障'),
       h(NButton, { size: 'tiny', type: 'error', secondary: true, onClick: () => confirmDeleteDevice(row) }, () => '删除'),
     ])
   },
@@ -220,13 +291,6 @@ async function doQuickCreate() {
   } finally { qcLoading.value = false }
 }
 
-async function loadData() {
-  try {
-    const [devRes, protoRes, tmplRes] = await Promise.all([api.getDevices(), api.getProtocols(), api.getTemplates()])
-    devices.value = devRes; protocols.value = protoRes; templates.value = tmplRes
-  } catch (e) { message.error('加载数据失败: ' + (e.response?.data?.detail || e.message)) }
-}
-
 async function createDevice() {
   creating.value = true
   try {
@@ -282,5 +346,88 @@ async function viewPoints(id) {
   catch (e) { message.error('读取测点失败: ' + (e.response?.data?.detail || e.message)) }
 }
 
-onMounted(loadData)
+// 故障注入相关
+const faultTypeOptions = computed(() =>
+  faultTypes.value.map(t => ({ label: `${t.name}（${faultCategoryLabel(t.category)}）`, value: t.id }))
+)
+
+const selectedFaultType = computed(() =>
+  faultTypes.value.find(t => t.id === faultTypeId.value) || null
+)
+
+const faultIntensityLabel = computed(() => {
+  const v = faultIntensity.value
+  if (v <= 0.3) return '轻微'
+  if (v <= 0.6) return '中等'
+  if (v <= 0.8) return '严重'
+  return '极严重'
+})
+
+function faultCategoryLabel(category) {
+  const map = { mechanical: '机械', thermal: '热', electrical: '电气', process: '工艺' }
+  return map[category] || category
+}
+
+function onFaultTypeChange(val) {
+  const t = faultTypes.value.find(f => f.id === val)
+  if (t && t.default_duration) faultDuration.value = t.default_duration
+}
+
+function openFaultModal(row) {
+  faultTargetDevice.value = row
+  faultTypeId.value = null
+  faultDuration.value = 120
+  faultIntensity.value = 1.0
+  showFaultModal.value = true
+}
+
+async function doInjectFault() {
+  if (!faultTypeId.value || !faultTargetDevice.value) return
+  faultLoading.value = true
+  try {
+    await api.injectFault(faultTargetDevice.value.id, faultTypeId.value, faultDuration.value, faultIntensity.value)
+    message.success(`已向设备 "${faultTargetDevice.value.name}" 注入故障`)
+    showFaultModal.value = false
+    await loadFaultStatus()
+  } catch (e) {
+    message.error('注入失败: ' + (e.response?.data?.detail || e.message))
+  } finally { faultLoading.value = false }
+}
+
+async function stopFault(deviceId) {
+  try {
+    await api.clearDeviceFault(deviceId)
+    message.success('故障已停止')
+    await loadFaultStatus()
+  } catch (e) {
+    message.error('停止故障失败: ' + (e.response?.data?.detail || e.message))
+  }
+}
+
+async function loadFaultStatus() {
+  try {
+    const list = await api.getActiveFaults()
+    const map = {}
+    for (const f of list) map[f.device_id] = f
+    activeFaults.value = map
+  } catch (e) { /* 静默失败 */ }
+}
+
+async function loadData() {
+  try {
+    const [devRes, protoRes, tmplRes, ftRes] = await Promise.all([
+      api.getDevices(), api.getProtocols(), api.getTemplates(), api.getFaultTypes()
+    ])
+    devices.value = devRes; protocols.value = protoRes; templates.value = tmplRes; faultTypes.value = ftRes
+    await loadFaultStatus()
+  } catch (e) { message.error('加载数据失败: ' + (e.response?.data?.detail || e.message)) }
+}
+
+let faultPollTimer = null
+onMounted(() => {
+  loadData()
+  faultPollTimer = setInterval(loadFaultStatus, 3000)
+})
+
+onUnmounted(() => { if (faultPollTimer) clearInterval(faultPollTimer) })
 </script>

From 550d8e20b98481a5390be716104142d6aec87770 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Wed, 20 May 2026 19:57:06 +0800
Subject: [PATCH 17/36] feat(ai): support ai

---
 ai/predict.py | 97 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 97 insertions(+)
 create mode 100755 ai/predict.py

diff --git a/ai/predict.py b/ai/predict.py
new file mode 100755
index 0000000..b70f822
--- /dev/null
+++ b/ai/predict.py
@@ -0,0 +1,97 @@
+# -*- coding: utf-8 -*-
+
+import requests
+import numpy as np
+from datetime import datetime, timedelta
+
+VM_URL = "http://localhost:8428"
+DEVICE_ID = "fanuc-cnc"
+METRIC = f'feed_rate{{device_id="{DEVICE_ID}"}}'
+
+def fetch_history(minutes=30):
+    """从VM拉取历史数据"""
+    end = datetime.now()
+    start = end - timedelta(minutes=minutes)
+    resp = requests.get(f"{VM_URL}/api/v1/query_range", params={
+        "query": METRIC,
+        "start": start.timestamp(),
+        "end": end.timestamp(),
+        "step": "1s",
+    })
+    result = resp.json()["data"]["result"]
+    if not result:
+        return [], []
+    values = result[0]["values"]
+    ts = [float(v[0]) for v in values]
+    ys = [float(v[1]) for v in values]
+    return ts, ys
+
+def predict_next(ts, ys, horizon=60):
+    """
+    用FFT检测主频，拟合正弦波，外推未来horizon秒
+    适合周期性信号
+    """
+    if len(ys) < 60:
+        return [], []
+
+    ys = np.array(ys)
+    n = len(ys)
+    dt = 1.0  # 1秒采样
+
+    # FFT找主频
+    fft = np.fft.rfft(ys - ys.mean())
+    freqs = np.fft.rfftfreq(n, d=dt)
+    dominant_idx = np.argmax(np.abs(fft[1:])) + 1
+    dominant_freq = freqs[dominant_idx]
+    period = 1.0 / dominant_freq if dominant_freq > 0 else 60
+
+    # 拟合：y = A*sin(2π/T * t + φ) + offset
+    from scipy.optimize import curve_fit
+    t_rel = np.arange(n, dtype=float)
+    offset = ys.mean()
+    amplitude = (ys.max() - ys.min()) / 2
+
+    def sine_model(t, A, T, phi, C):
+        return A * np.sin(2 * np.pi / T * t + phi) + C
+
+    try:
+        popt, _ = curve_fit(
+            sine_model, t_rel, ys,
+            p0=[amplitude, period, 0, offset],
+            maxfev=5000
+        )
+        # 外推
+        t_future = np.arange(n, n + horizon, dtype=float)
+        y_pred = sine_model(t_future, *popt)
+        ts_future = [ts[-1] + i + 1 for i in range(horizon)]
+        return ts_future, y_pred.tolist()
+    except Exception:
+        # 拟合失败降级为线性
+        slope = (ys[-1] - ys[-10]) / 10
+        ts_future = [ts[-1] + i + 1 for i in range(horizon)]
+        y_pred = [ys[-1] + slope * (i + 1) for i in range(horizon)]
+        return ts_future, y_pred
+
+def write_predictions(ts_future, y_pred, metric_name="protoforge_feed_rate_predicted"):
+    """写回VictoriaMetrics"""
+    lines = []
+    for t, y in zip(ts_future, y_pred):
+        ts_ms = int(t * 1000)
+        lines.append(f'{metric_name}{{device_id="{DEVICE_ID}"}} {y:.2f} {ts_ms}')
+    payload = "\n".join(lines)
+    requests.post(f"{VM_URL}/api/v1/import/prometheus", data=payload)
+
+def run_once():
+    ts, ys = fetch_history(minutes=30)
+    if len(ys) < 60:
+        print("数据不足")
+        return
+    ts_future, y_pred = predict_next(ts, ys, horizon=120)
+    write_predictions(ts_future, y_pred)
+    print(f"写入 {len(y_pred)} 个预测点，预测到 +{len(y_pred)}s")
+
+if __name__ == "__main__":
+    import time
+    while True:
+        run_once()
+        time.sleep(30)  # 每30秒重新预测一次

From e8f70d09c27d53bff6c8f310b7a2064496632a33 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Wed, 20 May 2026 20:29:13 +0800
Subject: [PATCH 18/36] feat(predict_v2): add predict_v2 python file

---
 ai/predict_v2.py | 206 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 206 insertions(+)
 create mode 100755 ai/predict_v2.py

diff --git a/ai/predict_v2.py b/ai/predict_v2.py
new file mode 100755
index 0000000..df5dd97
--- /dev/null
+++ b/ai/predict_v2.py
@@ -0,0 +1,206 @@
+# -*- coding: utf-8 -*-
+"""
+ProtoForge 预测服务 v2
+从 VictoriaMetrics 拉取历史数据，用 FFT + 正弦拟合预测未来值，写回 VM。
+预测值时间戳为未来时间，Grafana 中预测线出现在实测线右侧延伸处。
+"""
+
+import logging
+import time
+from datetime import datetime, timedelta
+
+import numpy as np
+import requests
+from scipy.optimize import curve_fit
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(message)s",
+)
+logger = logging.getLogger(__name__)
+
+# ── 配置 ──────────────────────────────────────────────────────────────────────
+VM_URL = "http://localhost:8428"
+
+# 要预测的指标列表，每项：(查询表达式, 写回指标名)
+PREDICT_TARGETS = [
+    ('feed_rate{device_id="fanuc-cnc"}',       "feed_rate_predicted"),
+    ('spindle_speed{device_id="fanuc-cnc"}',    "spindle_speed_predicted"),
+    ('spindle_current{device_id="fanuc-cnc"}',  "spindle_current_predicted"),
+    ('vibration_x{device_id="fanuc-cnc"}',      "vibration_x_predicted"),
+    ('vibration_y{device_id="fanuc-cnc"}',      "vibration_y_predicted"),
+    ('vibration_z{device_id="fanuc-cnc"}',      "vibration_z_predicted"),
+]
+
+HISTORY_MINUTES = 30   # 拉取多少分钟历史数据用于拟合
+HORIZON_SECONDS = 120  # 预测未来多少秒
+POLL_INTERVAL   = 30   # 每隔多少秒重新预测一次
+MIN_POINTS      = 120  # 至少需要多少个历史点才开始预测
+# ─────────────────────────────────────────────────────────────────────────────
+
+
+def fetch_history(query: str, minutes: int = HISTORY_MINUTES):
+    """从 VictoriaMetrics 拉取历史时序数据，返回 (timestamps, values)。"""
+    now = datetime.now()
+    start = now - timedelta(minutes=minutes)
+    try:
+        resp = requests.get(
+            f"{VM_URL}/api/v1/query_range",
+            params={
+                "query": query,
+                "start": start.timestamp(),
+                "end":   now.timestamp(),
+                "step":  "1s",
+            },
+            timeout=10,
+        )
+        resp.raise_for_status()
+    except requests.RequestException as e:
+        logger.error("拉取数据失败 query=%s: %s", query, e)
+        return [], []
+
+    result = resp.json().get("data", {}).get("result", [])
+    if not result:
+        return [], []
+
+    values = result[0]["values"]
+    ts = [float(v[0]) for v in values]
+    ys = [float(v[1]) for v in values]
+    return ts, ys
+
+
+def _sine_model(t, A, T, phi, C):
+    return A * np.sin(2 * np.pi / T * t + phi) + C
+
+
+def predict_next(ts: list, ys: list, horizon: int = HORIZON_SECONDS):
+    """
+    用 FFT 检测主频，拟合正弦波，外推未来 horizon 秒。
+    返回 (future_timestamps, predicted_values)，时间戳均在最后一个真实点之后。
+    降级策略：拟合失败时用最近 10 点线性外推。
+    """
+    ys_arr = np.array(ys)
+    n = len(ys_arr)
+
+    # ── FFT 找主频 ────────────────────────────────────────────────────────────
+    fft_vals = np.fft.rfft(ys_arr - ys_arr.mean())
+    freqs = np.fft.rfftfreq(n, d=1.0)  # d=1 表示 1 秒采样间隔
+    # 跳过直流分量（index 0）
+    dominant_idx = int(np.argmax(np.abs(fft_vals[1:]))) + 1
+    dominant_freq = freqs[dominant_idx]
+    period = 1.0 / dominant_freq if dominant_freq > 0 else 60.0
+    period = float(np.clip(period, 5.0, 3600.0))  # 限制在合理范围
+
+    # ── 正弦拟合 ──────────────────────────────────────────────────────────────
+    t_rel = np.arange(n, dtype=float)
+    amplitude = (ys_arr.max() - ys_arr.min()) / 2.0
+    offset = float(ys_arr.mean())
+
+    # 最后一个真实数据点的 Unix 时间戳（秒）
+    last_ts = ts[-1]
+
+    try:
+        popt, _ = curve_fit(
+            _sine_model,
+            t_rel,
+            ys_arr,
+            p0=[amplitude, period, 0.0, offset],
+            bounds=(
+                [0,       5.0,    -np.pi, ys_arr.min()],
+                [np.inf,  3600.0,  np.pi, ys_arr.max()],
+            ),
+            maxfev=8000,
+        )
+        t_future = np.arange(n, n + horizon, dtype=float)
+        y_pred = _sine_model(t_future, *popt)
+        # 裁剪到历史数据值域，避免外推飞出合理范围
+        y_pred = np.clip(y_pred, ys_arr.min() * 0.5, ys_arr.max() * 1.5)
+
+        # 未来时间戳：last_ts + 1s, +2s, ..., +horizon s
+        ts_future = [last_ts + i + 1 for i in range(horizon)]
+        logger.debug("正弦拟合成功 period=%.1fs amplitude=%.2f", popt[1], popt[0])
+        return ts_future, y_pred.tolist()
+
+    except Exception as e:
+        logger.warning("正弦拟合失败，降级为线性外推: %s", e)
+        tail = min(10, n)
+        slope = (ys_arr[-1] - ys_arr[-tail]) / tail
+        ts_future = [last_ts + i + 1 for i in range(horizon)]
+        y_pred = [float(ys_arr[-1] + slope * (i + 1)) for i in range(horizon)]
+        return ts_future, y_pred
+
+
+def write_predictions(ts_future: list, y_pred: list, metric_name: str, extra_labels: dict = None):
+    """
+    将预测值以 Prometheus exposition 格式写入 VictoriaMetrics。
+    时间戳为毫秒级 Unix 时间戳，对应未来时间点。
+    """
+    label_str = ""
+    if extra_labels:
+        parts = [f'{k}="{v}"' for k, v in extra_labels.items()]
+        label_str = "{" + ",".join(parts) + "}"
+
+    lines = []
+    for t, y in zip(ts_future, y_pred):
+        ts_ms = int(t * 1000)
+        lines.append(f"{metric_name}{label_str} {y:.4f} {ts_ms}")
+
+    payload = "\n".join(lines)
+    try:
+        resp = requests.post(
+            f"{VM_URL}/api/v1/import/prometheus",
+            data=payload,
+            timeout=10,
+        )
+        resp.raise_for_status()
+    except requests.RequestException as e:
+        logger.error("写入预测数据失败 metric=%s: %s", metric_name, e)
+
+
+def _parse_labels(query: str) -> dict:
+    """从查询表达式中解析标签，如 feed_rate{device_id="fanuc-cnc"} → {"device_id": "fanuc-cnc"}"""
+    labels = {}
+    if "{" not in query:
+        return labels
+    label_part = query[query.index("{") + 1: query.index("}")]
+    for item in label_part.split(","):
+        if "=" in item:
+            k, v = item.split("=", 1)
+            labels[k.strip()] = v.strip().strip('"')
+    return labels
+
+
+def run_once():
+    now_str = datetime.now().strftime("%H:%M:%S")
+    for query, pred_metric in PREDICT_TARGETS:
+        ts, ys = fetch_history(query)
+        if len(ys) < MIN_POINTS:
+            logger.info("[%s] %s 数据不足（%d 点），跳过", now_str, query, len(ys))
+            continue
+
+        ts_future, y_pred = predict_next(ts, ys, horizon=HORIZON_SECONDS)
+        if not ts_future:
+            continue
+
+        extra_labels = _parse_labels(query)
+        write_predictions(ts_future, y_pred, pred_metric, extra_labels)
+
+        future_time = datetime.fromtimestamp(ts_future[-1]).strftime("%H:%M:%S")
+        logger.info(
+            "[%s] %-40s → %-35s 写入 %d 点，预测至 %s",
+            now_str, query, pred_metric, len(y_pred), future_time,
+        )
+
+
+def main():
+    logger.info(
+        "预测服务启动  VM=%s  预测窗口=%ds  轮询间隔=%ds",
+        VM_URL, HORIZON_SECONDS, POLL_INTERVAL,
+    )
+    while True:
+        run_once()
+        time.sleep(POLL_INTERVAL)
+
+
+if __name__ == "__main__":
+    main()

From 57df20284645347aba7de4e2399640e05b6d0b3c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Wed, 20 May 2026 21:13:52 +0800
Subject: [PATCH 19/36] fix

---
 ai/predict_v2.py | 536 +++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 450 insertions(+), 86 deletions(-)

diff --git a/ai/predict_v2.py b/ai/predict_v2.py
index df5dd97..bc425c8 100755
--- a/ai/predict_v2.py
+++ b/ai/predict_v2.py
@@ -1,56 +1,93 @@
 # -*- coding: utf-8 -*-
 """
-ProtoForge 预测服务 v2
-从 VictoriaMetrics 拉取历史数据，用 FFT + 正弦拟合预测未来值，写回 VM。
-预测值时间戳为未来时间，Grafana 中预测线出现在实测线右侧延伸处。
+ProtoForge 预测服务 v3
+
+修复点：
+1. 解决 HORIZON_SECONDS > POLL_INTERVAL 时，多轮预测窗口重叠导致 Grafana 出现毛刺/竖线问题。
+2. 每轮写入新预测前，删除同一个预测 metric 的旧预测序列，只保留最新一轮预测。
+3. 预测时间戳按整秒写入，避免毫秒时间戳和 Grafana step 不对齐。
+4. 拟合使用真实 timestamp 相对时间，不再假设历史数据严格 1 秒等间隔。
+5. 对历史数据做排序、去重、NaN/Inf 清洗。
 """
 
 import logging
+import math
+import re
 import time
 from datetime import datetime, timedelta
+from typing import Dict, List, Tuple
 
 import numpy as np
 import requests
 from scipy.optimize import curve_fit
 
+
 logging.basicConfig(
     level=logging.INFO,
     format="%(asctime)s [%(levelname)s] %(message)s",
 )
+
 logger = logging.getLogger(__name__)
 
+
 # ── 配置 ──────────────────────────────────────────────────────────────────────
+
 VM_URL = "http://localhost:8428"
 
-# 要预测的指标列表，每项：(查询表达式, 写回指标名)
 PREDICT_TARGETS = [
-    ('feed_rate{device_id="fanuc-cnc"}',       "feed_rate_predicted"),
-    ('spindle_speed{device_id="fanuc-cnc"}',    "spindle_speed_predicted"),
-    ('spindle_current{device_id="fanuc-cnc"}',  "spindle_current_predicted"),
-    ('vibration_x{device_id="fanuc-cnc"}',      "vibration_x_predicted"),
-    ('vibration_y{device_id="fanuc-cnc"}',      "vibration_y_predicted"),
-    ('vibration_z{device_id="fanuc-cnc"}',      "vibration_z_predicted"),
+    ('feed_rate{device_id="fanuc-cnc"}', "feed_rate_predicted"),
+    ('spindle_speed{device_id="fanuc-cnc"}', "spindle_speed_predicted"),
+    ('spindle_current{device_id="fanuc-cnc"}', "spindle_current_predicted"),
+    ('vibration_x{device_id="fanuc-cnc"}', "vibration_x_predicted"),
+    ('vibration_y{device_id="fanuc-cnc"}', "vibration_y_predicted"),
+    ('vibration_z{device_id="fanuc-cnc"}', "vibration_z_predicted"),
 ]
 
-HISTORY_MINUTES = 30   # 拉取多少分钟历史数据用于拟合
-HORIZON_SECONDS = 120  # 预测未来多少秒
-POLL_INTERVAL   = 30   # 每隔多少秒重新预测一次
-MIN_POINTS      = 120  # 至少需要多少个历史点才开始预测
+HISTORY_MINUTES = 30
+HORIZON_SECONDS = 120
+POLL_INTERVAL = 30
+MIN_POINTS = 120
+QUERY_STEP = "1s"
+
+# 关键修复：每轮写入前删除旧预测，避免 120s 预测窗口和 30s 轮询周期重叠
+CLEAR_OLD_PREDICTIONS = True
+
+# 如果删除旧预测失败，是否跳过本轮写入。
+# 建议 True，避免继续叠加脏数据。
+SKIP_WRITE_IF_CLEAR_FAILED = True
+
+# 给新预测数据加一个稳定标签，方便 Grafana 查询过滤。
+# Grafana 可以查询：feed_rate_predicted{device_id="fanuc-cnc",forecast="latest"}
+EXTRA_PREDICT_LABELS = {
+    "forecast": "latest",
+    "source": "protoforge",
+}
+
+# 正弦周期限制
+MIN_PERIOD_SECONDS = 5.0
+MAX_PERIOD_SECONDS = 3600.0
+
 # ─────────────────────────────────────────────────────────────────────────────
 
 
-def fetch_history(query: str, minutes: int = HISTORY_MINUTES):
-    """从 VictoriaMetrics 拉取历史时序数据，返回 (timestamps, values)。"""
+def fetch_history(query: str, minutes: int = HISTORY_MINUTES) -> Tuple[List[float], List[float]]:
+    """
+    从 VictoriaMetrics 拉取历史时序数据。
+    返回：
+        timestamps: Unix 秒级时间戳
+        values: float 数值
+    """
     now = datetime.now()
     start = now - timedelta(minutes=minutes)
+
     try:
         resp = requests.get(
             f"{VM_URL}/api/v1/query_range",
             params={
                 "query": query,
                 "start": start.timestamp(),
-                "end":   now.timestamp(),
-                "step":  "1s",
+                "end": now.timestamp(),
+                "step": QUERY_STEP,
             },
             timeout=10,
         )
@@ -59,148 +96,475 @@ def fetch_history(query: str, minutes: int = HISTORY_MINUTES):
         logger.error("拉取数据失败 query=%s: %s", query, e)
         return [], []
 
-    result = resp.json().get("data", {}).get("result", [])
+    try:
+        result = resp.json().get("data", {}).get("result", [])
+    except Exception as e:
+        logger.error("解析 VM 返回失败 query=%s: %s", query, e)
+        return [], []
+
     if not result:
         return [], []
 
-    values = result[0]["values"]
-    ts = [float(v[0]) for v in values]
-    ys = [float(v[1]) for v in values]
+    values = result[0].get("values", [])
+    if not values:
+        return [], []
+
+    ts = []
+    ys = []
+
+    for item in values:
+        if len(item) < 2:
+            continue
+
+        try:
+            t = float(item[0])
+            y = float(item[1])
+        except Exception:
+            continue
+
+        if not math.isfinite(t) or not math.isfinite(y):
+            continue
+
+        ts.append(t)
+        ys.append(y)
+
     return ts, ys
 
 
-def _sine_model(t, A, T, phi, C):
-    return A * np.sin(2 * np.pi / T * t + phi) + C
+def normalize_history(ts: List[float], ys: List[float]) -> Tuple[np.ndarray, np.ndarray]:
+    """
+    清洗历史数据：
+    1. 转换为整秒时间戳
+    2. 排序
+    3. 同一秒多个值时保留最后一个
+    4. 插值补齐中间缺失秒
+    """
+    if not ts or not ys or len(ts) != len(ys):
+        return np.array([]), np.array([])
+
+    data = {}
+
+    for t, y in zip(ts, ys):
+        try:
+            sec = int(round(float(t)))
+            val = float(y)
+        except Exception:
+            continue
+
+        if not math.isfinite(sec) or not math.isfinite(val):
+            continue
+
+        data[sec] = val
+
+    if not data:
+        return np.array([]), np.array([])
+
+    sorted_items = sorted(data.items(), key=lambda x: x[0])
 
+    ts_clean = np.array([x[0] for x in sorted_items], dtype=float)
+    ys_clean = np.array([x[1] for x in sorted_items], dtype=float)
 
-def predict_next(ts: list, ys: list, horizon: int = HORIZON_SECONDS):
+    if len(ts_clean) < 2:
+        return ts_clean, ys_clean
+
+    start_sec = int(ts_clean[0])
+    end_sec = int(ts_clean[-1])
+
+    if end_sec <= start_sec:
+        return ts_clean, ys_clean
+
+    # 统一为 1 秒网格，减少 query_range 缺点、抖动、缺失点对 FFT 的影响
+    ts_grid = np.arange(start_sec, end_sec + 1, 1, dtype=float)
+    ys_grid = np.interp(ts_grid, ts_clean, ys_clean)
+
+    return ts_grid, ys_grid
+
+
+def _sine_model(t: np.ndarray, A: float, T: float, phi: float, C: float) -> np.ndarray:
+    return A * np.sin(2.0 * np.pi / T * t + phi) + C
+
+
+def estimate_period_by_fft(ys_arr: np.ndarray) -> float:
     """
-    用 FFT 检测主频，拟合正弦波，外推未来 horizon 秒。
-    返回 (future_timestamps, predicted_values)，时间戳均在最后一个真实点之后。
-    降级策略：拟合失败时用最近 10 点线性外推。
+    使用 FFT 估算主周期。
+    ys_arr 默认是 1 秒间隔。
     """
-    ys_arr = np.array(ys)
     n = len(ys_arr)
 
-    # ── FFT 找主频 ────────────────────────────────────────────────────────────
-    fft_vals = np.fft.rfft(ys_arr - ys_arr.mean())
-    freqs = np.fft.rfftfreq(n, d=1.0)  # d=1 表示 1 秒采样间隔
-    # 跳过直流分量（index 0）
-    dominant_idx = int(np.argmax(np.abs(fft_vals[1:]))) + 1
-    dominant_freq = freqs[dominant_idx]
-    period = 1.0 / dominant_freq if dominant_freq > 0 else 60.0
-    period = float(np.clip(period, 5.0, 3600.0))  # 限制在合理范围
+    if n < 4:
+        return 60.0
+
+    centered = ys_arr - np.mean(ys_arr)
+
+    if np.allclose(centered, 0):
+        return 60.0
+
+    fft_vals = np.fft.rfft(centered)
+    freqs = np.fft.rfftfreq(n, d=1.0)
+
+    if len(freqs) <= 1:
+        return 60.0
+
+    # 跳过直流分量 index 0
+    power = np.abs(fft_vals[1:])
+    if len(power) == 0 or np.max(power) <= 0:
+        return 60.0
+
+    dominant_idx = int(np.argmax(power)) + 1
+    dominant_freq = float(freqs[dominant_idx])
+
+    if dominant_freq <= 0:
+        return 60.0
+
+    period = 1.0 / dominant_freq
+    period = float(np.clip(period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+
+    return period
 
-    # ── 正弦拟合 ──────────────────────────────────────────────────────────────
-    t_rel = np.arange(n, dtype=float)
-    amplitude = (ys_arr.max() - ys_arr.min()) / 2.0
-    offset = float(ys_arr.mean())
 
-    # 最后一个真实数据点的 Unix 时间戳（秒）
-    last_ts = ts[-1]
+def predict_next(
+    ts: List[float],
+    ys: List[float],
+    horizon: int = HORIZON_SECONDS,
+    start_from_now: bool = True,
+) -> Tuple[List[float], List[float]]:
+    """
+    用 FFT 检测主频，拟合正弦波，外推未来 horizon 秒。
+    返回：
+        future_timestamps: 未来整秒时间戳
+        predicted_values: 预测值
+    """
+    ts_grid, ys_grid = normalize_history(ts, ys)
+
+    if len(ys_grid) < MIN_POINTS:
+        return [], []
+
+    n = len(ys_grid)
+
+    y_min = float(np.min(ys_grid))
+    y_max = float(np.max(ys_grid))
+    y_mean = float(np.mean(ys_grid))
+    y_range = y_max - y_min
+
+    # 数据几乎不波动时，直接使用最后一个值保持
+    if y_range <= 1e-9:
+        base_ts = int(time.time()) if start_from_now else int(ts_grid[-1])
+        base_ts = max(base_ts, int(ts_grid[-1]))
+
+        ts_future = [base_ts + i + 1 for i in range(horizon)]
+        y_pred = [float(ys_grid[-1])] * horizon
+        return ts_future, y_pred
+
+    period = estimate_period_by_fft(ys_grid)
+
+    # 用真实时间戳做相对时间，而不是 np.arange(n)
+    t_fit = ts_grid - ts_grid[0]
+
+    amplitude = y_range / 2.0
+    offset = y_mean
+
+    # 预测起点统一对齐到整秒
+    if start_from_now:
+        base_ts = int(time.time())
+    else:
+        base_ts = int(ts_grid[-1])
+
+    # 避免因为 VM 查询延迟导致预测点落在最后一个真实点之前
+    base_ts = max(base_ts, int(ts_grid[-1]))
+
+    ts_future_arr = np.arange(base_ts + 1, base_ts + 1 + horizon, 1, dtype=float)
+    t_future = ts_future_arr - ts_grid[0]
 
     try:
         popt, _ = curve_fit(
             _sine_model,
-            t_rel,
-            ys_arr,
+            t_fit,
+            ys_grid,
             p0=[amplitude, period, 0.0, offset],
             bounds=(
-                [0,       5.0,    -np.pi, ys_arr.min()],
-                [np.inf,  3600.0,  np.pi, ys_arr.max()],
+                [0.0, MIN_PERIOD_SECONDS, -2.0 * np.pi, y_min - y_range],
+                [np.inf, MAX_PERIOD_SECONDS, 2.0 * np.pi, y_max + y_range],
             ),
-            maxfev=8000,
+            maxfev=12000,
+        )
+
+        y_pred_arr = _sine_model(t_future, *popt)
+
+        # 裁剪到合理范围，避免拟合异常时飞出去
+        margin = y_range * 0.2
+        lower = y_min - margin
+        upper = y_max + margin
+        y_pred_arr = np.clip(y_pred_arr, lower, upper)
+
+        if not np.all(np.isfinite(y_pred_arr)):
+            raise ValueError("预测结果包含 NaN/Inf")
+
+        logger.debug(
+            "正弦拟合成功 period=%.2fs amplitude=%.4f offset=%.4f",
+            popt[1],
+            popt[0],
+            popt[3],
         )
-        t_future = np.arange(n, n + horizon, dtype=float)
-        y_pred = _sine_model(t_future, *popt)
-        # 裁剪到历史数据值域，避免外推飞出合理范围
-        y_pred = np.clip(y_pred, ys_arr.min() * 0.5, ys_arr.max() * 1.5)
 
-        # 未来时间戳：last_ts + 1s, +2s, ..., +horizon s
-        ts_future = [last_ts + i + 1 for i in range(horizon)]
-        logger.debug("正弦拟合成功 period=%.1fs amplitude=%.2f", popt[1], popt[0])
-        return ts_future, y_pred.tolist()
+        return ts_future_arr.tolist(), y_pred_arr.astype(float).tolist()
 
     except Exception as e:
-        logger.warning("正弦拟合失败，降级为线性外推: %s", e)
+        logger.warning("正弦拟合失败，降级为最近值平滑外推: %s", e)
+
+        # 降级策略：用最近 10 个点的均值保持，避免线性外推越走越偏
         tail = min(10, n)
-        slope = (ys_arr[-1] - ys_arr[-tail]) / tail
-        ts_future = [last_ts + i + 1 for i in range(horizon)]
-        y_pred = [float(ys_arr[-1] + slope * (i + 1)) for i in range(horizon)]
+        last_value = float(np.mean(ys_grid[-tail:]))
+
+        ts_future = ts_future_arr.tolist()
+        y_pred = [last_value] * horizon
+
         return ts_future, y_pred
 
 
-def write_predictions(ts_future: list, y_pred: list, metric_name: str, extra_labels: dict = None):
+def prom_escape_label_value(value: str) -> str:
+    """
+    Prometheus exposition label value 转义。
+    """
+    return (
+        str(value)
+        .replace("\\", "\\\\")
+        .replace("\n", "\\n")
+        .replace('"', '\\"')
+    )
+
+
+def build_selector(metric_name: str, labels: Dict[str, str]) -> str:
+    """
+    构造 PromQL selector，用于 delete_series。
+
+    示例：
+        feed_rate_predicted{device_id="fanuc-cnc"}
+    """
+    if not labels:
+        return metric_name
+
+    parts = []
+    for k in sorted(labels.keys()):
+        v = prom_escape_label_value(labels[k])
+        parts.append(f'{k}="{v}"')
+
+    return f'{metric_name}' + "{" + ",".join(parts) + "}"
+
+
+def delete_old_predictions(metric_name: str, base_labels: Dict[str, str]) -> bool:
+    """
+    删除旧预测序列，避免多轮预测窗口重叠。
+
+    注意：
+    这里故意只用 base_labels，比如 device_id。
+    不带 forecast/source 标签，是为了兼容旧版本脚本写入的无 forecast 标签数据。
+    """
+    selector = build_selector(metric_name, base_labels)
+
+    try:
+        resp = requests.post(
+            f"{VM_URL}/api/v1/admin/tsdb/delete_series",
+            params=[("match[]", selector)],
+            timeout=10,
+        )
+
+        if resp.status_code not in (200, 204):
+            logger.error(
+                "删除旧预测数据失败 metric=%s selector=%s status=%s body=%s",
+                metric_name,
+                selector,
+                resp.status_code,
+                resp.text[:500],
+            )
+            return False
+
+        logger.debug("已删除旧预测数据 selector=%s", selector)
+        return True
+
+    except requests.RequestException as e:
+        logger.error("删除旧预测数据异常 metric=%s selector=%s: %s", metric_name, selector, e)
+        return False
+
+
+def write_predictions(
+    ts_future: List[float],
+    y_pred: List[float],
+    metric_name: str,
+    labels: Dict[str, str] = None,
+) -> bool:
     """
     将预测值以 Prometheus exposition 格式写入 VictoriaMetrics。
-    时间戳为毫秒级 Unix 时间戳，对应未来时间点。
+    时间戳为毫秒级 Unix timestamp。
     """
+    if labels is None:
+        labels = {}
+
+    if not ts_future or not y_pred or len(ts_future) != len(y_pred):
+        logger.warning("预测数据为空或长度不一致 metric=%s", metric_name)
+        return False
+
     label_str = ""
-    if extra_labels:
-        parts = [f'{k}="{v}"' for k, v in extra_labels.items()]
+    if labels:
+        parts = []
+        for k in sorted(labels.keys()):
+            v = prom_escape_label_value(labels[k])
+            parts.append(f'{k}="{v}"')
         label_str = "{" + ",".join(parts) + "}"
 
     lines = []
+
     for t, y in zip(ts_future, y_pred):
-        ts_ms = int(t * 1000)
-        lines.append(f"{metric_name}{label_str} {y:.4f} {ts_ms}")
+        try:
+            ts_sec = int(round(float(t)))
+            val = float(y)
+        except Exception:
+            continue
+
+        if not math.isfinite(ts_sec) or not math.isfinite(val):
+            continue
+
+        ts_ms = ts_sec * 1000
+        lines.append(f"{metric_name}{label_str} {val:.6f} {ts_ms}")
+
+    if not lines:
+        logger.warning("没有可写入的预测点 metric=%s", metric_name)
+        return False
+
+    payload = "\n".join(lines) + "\n"
 
-    payload = "\n".join(lines)
     try:
         resp = requests.post(
             f"{VM_URL}/api/v1/import/prometheus",
-            data=payload,
+            data=payload.encode("utf-8"),
+            headers={
+                "Content-Type": "text/plain; version=0.0.4; charset=utf-8",
+            },
             timeout=10,
         )
         resp.raise_for_status()
+        return True
+
     except requests.RequestException as e:
         logger.error("写入预测数据失败 metric=%s: %s", metric_name, e)
+        return False
+
+
+_LABEL_PATTERN = re.compile(r'\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"((?:\\.|[^"])*)"\s*')
 
 
-def _parse_labels(query: str) -> dict:
-    """从查询表达式中解析标签，如 feed_rate{device_id="fanuc-cnc"} → {"device_id": "fanuc-cnc"}"""
+def _parse_labels(query: str) -> Dict[str, str]:
+    """
+    从查询表达式中解析标签。
+
+    示例：
+        feed_rate{device_id="fanuc-cnc"} -> {"device_id": "fanuc-cnc"}
+    """
     labels = {}
-    if "{" not in query:
+
+    if "{" not in query or "}" not in query:
         return labels
-    label_part = query[query.index("{") + 1: query.index("}")]
-    for item in label_part.split(","):
-        if "=" in item:
-            k, v = item.split("=", 1)
-            labels[k.strip()] = v.strip().strip('"')
+
+    try:
+        label_part = query[query.index("{") + 1: query.rindex("}")]
+    except Exception:
+        return labels
+
+    for match in _LABEL_PATTERN.finditer(label_part):
+        key = match.group(1)
+        value = match.group(2)
+        value = value.replace('\\"', '"').replace("\\n", "\n").replace("\\\\", "\\")
+        labels[key] = value
+
     return labels
 
 
+def merge_labels(*dicts: Dict[str, str]) -> Dict[str, str]:
+    result = {}
+
+    for d in dicts:
+        if not d:
+            continue
+        result.update(d)
+
+    return result
+
+
 def run_once():
     now_str = datetime.now().strftime("%H:%M:%S")
+
     for query, pred_metric in PREDICT_TARGETS:
         ts, ys = fetch_history(query)
+
         if len(ys) < MIN_POINTS:
             logger.info("[%s] %s 数据不足（%d 点），跳过", now_str, query, len(ys))
             continue
 
-        ts_future, y_pred = predict_next(ts, ys, horizon=HORIZON_SECONDS)
-        if not ts_future:
+        ts_future, y_pred = predict_next(
+            ts,
+            ys,
+            horizon=HORIZON_SECONDS,
+            start_from_now=True,
+        )
+
+        if not ts_future or not y_pred:
+            logger.warning("[%s] %s 预测结果为空，跳过", now_str, query)
+            continue
+
+        base_labels = _parse_labels(query)
+
+        # 先删除旧预测，再写入新预测。
+        # 删除条件只带 base_labels，兼容老版本无 forecast/source 标签的脏数据。
+        if CLEAR_OLD_PREDICTIONS:
+            clear_ok = delete_old_predictions(pred_metric, base_labels)
+
+            if not clear_ok and SKIP_WRITE_IF_CLEAR_FAILED:
+                logger.error(
+                    "[%s] %s 删除旧预测失败，为避免继续制造重叠数据，本轮跳过写入",
+                    now_str,
+                    pred_metric,
+                )
+                continue
+
+        write_labels = merge_labels(base_labels, EXTRA_PREDICT_LABELS)
+
+        ok = write_predictions(
+            ts_future=ts_future,
+            y_pred=y_pred,
+            metric_name=pred_metric,
+            labels=write_labels,
+        )
+
+        if not ok:
             continue
 
-        extra_labels = _parse_labels(query)
-        write_predictions(ts_future, y_pred, pred_metric, extra_labels)
+        future_start = datetime.fromtimestamp(ts_future[0]).strftime("%H:%M:%S")
+        future_end = datetime.fromtimestamp(ts_future[-1]).strftime("%H:%M:%S")
 
-        future_time = datetime.fromtimestamp(ts_future[-1]).strftime("%H:%M:%S")
         logger.info(
-            "[%s] %-40s → %-35s 写入 %d 点，预测至 %s",
-            now_str, query, pred_metric, len(y_pred), future_time,
+            "[%s] %-40s → %-35s 写入 %d 点，预测区间 %s ~ %s",
+            now_str,
+            query,
+            pred_metric,
+            len(y_pred),
+            future_start,
+            future_end,
         )
 
 
 def main():
     logger.info(
-        "预测服务启动  VM=%s  预测窗口=%ds  轮询间隔=%ds",
-        VM_URL, HORIZON_SECONDS, POLL_INTERVAL,
+        "预测服务启动 VM=%s 历史窗口=%dmin 预测窗口=%ds 轮询间隔=%ds 清理旧预测=%s",
+        VM_URL,
+        HISTORY_MINUTES,
+        HORIZON_SECONDS,
+        POLL_INTERVAL,
+        CLEAR_OLD_PREDICTIONS,
     )
+
     while True:
         run_once()
         time.sleep(POLL_INTERVAL)
 
 
 if __name__ == "__main__":
-    main()
+    main()
\ No newline at end of file

From 88aec295671ca112fd422a28acd4d76d43a82f20 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Wed, 20 May 2026 21:21:11 +0800
Subject: [PATCH 20/36] fix

---
 ai/predict_v2.py | 263 +++++++++++++++++++++++------------------------
 1 file changed, 128 insertions(+), 135 deletions(-)

diff --git a/ai/predict_v2.py b/ai/predict_v2.py
index bc425c8..f631e12 100755
--- a/ai/predict_v2.py
+++ b/ai/predict_v2.py
@@ -1,13 +1,14 @@
 # -*- coding: utf-8 -*-
 """
-ProtoForge 预测服务 v3
+ProtoForge 预测服务 v4
 
 修复点：
-1. 解决 HORIZON_SECONDS > POLL_INTERVAL 时，多轮预测窗口重叠导致 Grafana 出现毛刺/竖线问题。
-2. 每轮写入新预测前，删除同一个预测 metric 的旧预测序列，只保留最新一轮预测。
-3. 预测时间戳按整秒写入，避免毫秒时间戳和 Grafana step 不对齐。
-4. 拟合使用真实 timestamp 相对时间，不再假设历史数据严格 1 秒等间隔。
-5. 对历史数据做排序、去重、NaN/Inf 清洗。
+1. 不再使用 VictoriaMetrics delete_series，避免预测历史被整条删除。
+2. 不再每 30 秒写未来 120 秒，避免多轮预测窗口重叠导致 Grafana 出现竖线/毛刺。
+3. 每轮只写未来 min(HORIZON_SECONDS, POLL_INTERVAL) 秒的数据。
+4. 使用 forecast="rolling_v2" 新标签，避免和上一版 forecast="latest" 的旧预测数据混在一起。
+5. 使用真实 timestamp 做拟合，不假设采样严格等间隔。
+6. 拟合失败时不再简单写平直线，而是尽量重复最近一个周期的波形。
 """
 
 import logging
@@ -44,29 +45,35 @@
 ]
 
 HISTORY_MINUTES = 30
+
+# 理论预测窗口
 HORIZON_SECONDS = 120
+
+# 轮询间隔
 POLL_INTERVAL = 30
+
+# 实际写入窗口。
+# 关键点：实际写入窗口不要大于轮询间隔，否则不同批次预测会重叠。
+WRITE_HORIZON_SECONDS = min(HORIZON_SECONDS, POLL_INTERVAL)
+
 MIN_POINTS = 120
 QUERY_STEP = "1s"
 
-# 关键修复：每轮写入前删除旧预测，避免 120s 预测窗口和 30s 轮询周期重叠
-CLEAR_OLD_PREDICTIONS = True
-
-# 如果删除旧预测失败，是否跳过本轮写入。
-# 建议 True，避免继续叠加脏数据。
-SKIP_WRITE_IF_CLEAR_FAILED = True
+# 不要再清理旧预测，否则历史预测会被整条删除。
+CLEAR_OLD_PREDICTIONS = False
 
-# 给新预测数据加一个稳定标签，方便 Grafana 查询过滤。
-# Grafana 可以查询：feed_rate_predicted{device_id="fanuc-cnc",forecast="latest"}
+# 使用新标签，避免和上一版 forecast="latest" 数据混在一起。
 EXTRA_PREDICT_LABELS = {
-    "forecast": "latest",
+    "forecast": "rolling_v2",
     "source": "protoforge",
 }
 
-# 正弦周期限制
 MIN_PERIOD_SECONDS = 5.0
 MAX_PERIOD_SECONDS = 3600.0
 
+# 进程内记录每条预测序列上次写到哪里，避免本进程运行期间重复写同一时间段
+LAST_WRITTEN_UNTIL: Dict[str, int] = {}
+
 # ─────────────────────────────────────────────────────────────────────────────
 
 
@@ -134,10 +141,10 @@ def fetch_history(query: str, minutes: int = HISTORY_MINUTES) -> Tuple[List[floa
 def normalize_history(ts: List[float], ys: List[float]) -> Tuple[np.ndarray, np.ndarray]:
     """
     清洗历史数据：
-    1. 转换为整秒时间戳
+    1. 时间戳转为整秒
     2. 排序
     3. 同一秒多个值时保留最后一个
-    4. 插值补齐中间缺失秒
+    4. 插值补齐缺失秒
     """
     if not ts or not ys or len(ts) != len(ys):
         return np.array([]), np.array([])
@@ -173,7 +180,6 @@ def normalize_history(ts: List[float], ys: List[float]) -> Tuple[np.ndarray, np.
     if end_sec <= start_sec:
         return ts_clean, ys_clean
 
-    # 统一为 1 秒网格，减少 query_range 缺点、抖动、缺失点对 FFT 的影响
     ts_grid = np.arange(start_sec, end_sec + 1, 1, dtype=float)
     ys_grid = np.interp(ts_grid, ts_clean, ys_clean)
 
@@ -187,7 +193,7 @@ def _sine_model(t: np.ndarray, A: float, T: float, phi: float, C: float) -> np.n
 def estimate_period_by_fft(ys_arr: np.ndarray) -> float:
     """
     使用 FFT 估算主周期。
-    ys_arr 默认是 1 秒间隔。
+    ys_arr 默认已经是 1 秒间隔。
     """
     n = len(ys_arr)
 
@@ -205,8 +211,8 @@ def estimate_period_by_fft(ys_arr: np.ndarray) -> float:
     if len(freqs) <= 1:
         return 60.0
 
-    # 跳过直流分量 index 0
     power = np.abs(fft_vals[1:])
+
     if len(power) == 0 or np.max(power) <= 0:
         return 60.0
 
@@ -222,59 +228,84 @@ def estimate_period_by_fft(ys_arr: np.ndarray) -> float:
     return period
 
 
+def repeat_last_period(
+    ts_grid: np.ndarray,
+    ys_grid: np.ndarray,
+    ts_future_arr: np.ndarray,
+    period_seconds: float,
+) -> np.ndarray:
+    """
+    拟合失败时的降级策略：
+    不直接写平直线，而是把未来时间映射回最近一个周期的历史波形。
+    """
+    if len(ts_grid) < 2:
+        return np.full_like(ts_future_arr, float(ys_grid[-1]), dtype=float)
+
+    period = max(int(round(period_seconds)), 1)
+
+    y_pred = []
+
+    hist_start = float(ts_grid[0])
+    hist_end = float(ts_grid[-1])
+
+    for future_ts in ts_future_arr:
+        mapped_ts = float(future_ts)
+
+        while mapped_ts > hist_end:
+            mapped_ts -= period
+
+        while mapped_ts < hist_start:
+            mapped_ts += period
+
+        val = float(np.interp(mapped_ts, ts_grid, ys_grid))
+        y_pred.append(val)
+
+    return np.array(y_pred, dtype=float)
+
+
 def predict_next(
     ts: List[float],
     ys: List[float],
-    horizon: int = HORIZON_SECONDS,
-    start_from_now: bool = True,
+    horizon: int,
+    base_ts: int,
 ) -> Tuple[List[float], List[float]]:
     """
     用 FFT 检测主频，拟合正弦波，外推未来 horizon 秒。
-    返回：
-        future_timestamps: 未来整秒时间戳
-        predicted_values: 预测值
+
+    base_ts:
+        从 base_ts + 1 开始写预测。
     """
     ts_grid, ys_grid = normalize_history(ts, ys)
 
     if len(ys_grid) < MIN_POINTS:
         return [], []
 
-    n = len(ys_grid)
-
     y_min = float(np.min(ys_grid))
     y_max = float(np.max(ys_grid))
     y_mean = float(np.mean(ys_grid))
     y_range = y_max - y_min
 
-    # 数据几乎不波动时，直接使用最后一个值保持
-    if y_range <= 1e-9:
-        base_ts = int(time.time()) if start_from_now else int(ts_grid[-1])
-        base_ts = max(base_ts, int(ts_grid[-1]))
+    base_ts = max(int(base_ts), int(ts_grid[-1]))
+
+    ts_future_arr = np.arange(
+        base_ts + 1,
+        base_ts + 1 + horizon,
+        1,
+        dtype=float,
+    )
 
-        ts_future = [base_ts + i + 1 for i in range(horizon)]
-        y_pred = [float(ys_grid[-1])] * horizon
-        return ts_future, y_pred
+    if y_range <= 1e-9:
+        y_pred_arr = np.full_like(ts_future_arr, float(ys_grid[-1]), dtype=float)
+        return ts_future_arr.tolist(), y_pred_arr.tolist()
 
     period = estimate_period_by_fft(ys_grid)
 
-    # 用真实时间戳做相对时间，而不是 np.arange(n)
     t_fit = ts_grid - ts_grid[0]
+    t_future = ts_future_arr - ts_grid[0]
 
     amplitude = y_range / 2.0
     offset = y_mean
 
-    # 预测起点统一对齐到整秒
-    if start_from_now:
-        base_ts = int(time.time())
-    else:
-        base_ts = int(ts_grid[-1])
-
-    # 避免因为 VM 查询延迟导致预测点落在最后一个真实点之前
-    base_ts = max(base_ts, int(ts_grid[-1]))
-
-    ts_future_arr = np.arange(base_ts + 1, base_ts + 1 + horizon, 1, dtype=float)
-    t_future = ts_future_arr - ts_grid[0]
-
     try:
         popt, _ = curve_fit(
             _sine_model,
@@ -290,7 +321,6 @@ def predict_next(
 
         y_pred_arr = _sine_model(t_future, *popt)
 
-        # 裁剪到合理范围，避免拟合异常时飞出去
         margin = y_range * 0.2
         lower = y_min - margin
         upper = y_max + margin
@@ -309,16 +339,21 @@ def predict_next(
         return ts_future_arr.tolist(), y_pred_arr.astype(float).tolist()
 
     except Exception as e:
-        logger.warning("正弦拟合失败，降级为最近值平滑外推: %s", e)
+        logger.warning("正弦拟合失败，降级为最近周期波形复制: %s", e)
 
-        # 降级策略：用最近 10 个点的均值保持，避免线性外推越走越偏
-        tail = min(10, n)
-        last_value = float(np.mean(ys_grid[-tail:]))
+        y_pred_arr = repeat_last_period(
+            ts_grid=ts_grid,
+            ys_grid=ys_grid,
+            ts_future_arr=ts_future_arr,
+            period_seconds=period,
+        )
 
-        ts_future = ts_future_arr.tolist()
-        y_pred = [last_value] * horizon
+        margin = y_range * 0.2
+        lower = y_min - margin
+        upper = y_max + margin
+        y_pred_arr = np.clip(y_pred_arr, lower, upper)
 
-        return ts_future, y_pred
+        return ts_future_arr.tolist(), y_pred_arr.astype(float).tolist()
 
 
 def prom_escape_label_value(value: str) -> str:
@@ -333,83 +368,34 @@ def prom_escape_label_value(value: str) -> str:
     )
 
 
-def build_selector(metric_name: str, labels: Dict[str, str]) -> str:
-    """
-    构造 PromQL selector，用于 delete_series。
-
-    示例：
-        feed_rate_predicted{device_id="fanuc-cnc"}
-    """
+def labels_to_str(labels: Dict[str, str]) -> str:
     if not labels:
-        return metric_name
+        return ""
 
     parts = []
+
     for k in sorted(labels.keys()):
         v = prom_escape_label_value(labels[k])
         parts.append(f'{k}="{v}"')
 
-    return f'{metric_name}' + "{" + ",".join(parts) + "}"
-
-
-def delete_old_predictions(metric_name: str, base_labels: Dict[str, str]) -> bool:
-    """
-    删除旧预测序列，避免多轮预测窗口重叠。
-
-    注意：
-    这里故意只用 base_labels，比如 device_id。
-    不带 forecast/source 标签，是为了兼容旧版本脚本写入的无 forecast 标签数据。
-    """
-    selector = build_selector(metric_name, base_labels)
-
-    try:
-        resp = requests.post(
-            f"{VM_URL}/api/v1/admin/tsdb/delete_series",
-            params=[("match[]", selector)],
-            timeout=10,
-        )
-
-        if resp.status_code not in (200, 204):
-            logger.error(
-                "删除旧预测数据失败 metric=%s selector=%s status=%s body=%s",
-                metric_name,
-                selector,
-                resp.status_code,
-                resp.text[:500],
-            )
-            return False
-
-        logger.debug("已删除旧预测数据 selector=%s", selector)
-        return True
-
-    except requests.RequestException as e:
-        logger.error("删除旧预测数据异常 metric=%s selector=%s: %s", metric_name, selector, e)
-        return False
+    return "{" + ",".join(parts) + "}"
 
 
 def write_predictions(
     ts_future: List[float],
     y_pred: List[float],
     metric_name: str,
-    labels: Dict[str, str] = None,
+    labels: Dict[str, str],
 ) -> bool:
     """
     将预测值以 Prometheus exposition 格式写入 VictoriaMetrics。
     时间戳为毫秒级 Unix timestamp。
     """
-    if labels is None:
-        labels = {}
-
     if not ts_future or not y_pred or len(ts_future) != len(y_pred):
         logger.warning("预测数据为空或长度不一致 metric=%s", metric_name)
         return False
 
-    label_str = ""
-    if labels:
-        parts = []
-        for k in sorted(labels.keys()):
-            v = prom_escape_label_value(labels[k])
-            parts.append(f'{k}="{v}"')
-        label_str = "{" + ",".join(parts) + "}"
+    label_str = labels_to_str(labels)
 
     lines = []
 
@@ -449,7 +435,9 @@ def write_predictions(
         return False
 
 
-_LABEL_PATTERN = re.compile(r'\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"((?:\\.|[^"])*)"\s*')
+_LABEL_PATTERN = re.compile(
+    r'\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"((?:\\.|[^"])*)"\s*'
+)
 
 
 def _parse_labels(query: str) -> Dict[str, str]:
@@ -489,6 +477,13 @@ def merge_labels(*dicts: Dict[str, str]) -> Dict[str, str]:
     return result
 
 
+def series_key(metric_name: str, labels: Dict[str, str]) -> str:
+    """
+    构造进程内唯一 key，用于记录上次写到哪个时间点。
+    """
+    return metric_name + labels_to_str(labels)
+
+
 def run_once():
     now_str = datetime.now().strftime("%H:%M:%S")
 
@@ -499,34 +494,28 @@ def run_once():
             logger.info("[%s] %s 数据不足（%d 点），跳过", now_str, query, len(ys))
             continue
 
+        base_labels = _parse_labels(query)
+        write_labels = merge_labels(base_labels, EXTRA_PREDICT_LABELS)
+
+        key = series_key(pred_metric, write_labels)
+
+        now_sec = int(time.time())
+        last_until = LAST_WRITTEN_UNTIL.get(key, 0)
+
+        # 防止同一进程内重复写入已经预测过的时间段
+        base_ts = max(now_sec, last_until)
+
         ts_future, y_pred = predict_next(
-            ts,
-            ys,
-            horizon=HORIZON_SECONDS,
-            start_from_now=True,
+            ts=ts,
+            ys=ys,
+            horizon=WRITE_HORIZON_SECONDS,
+            base_ts=base_ts,
         )
 
         if not ts_future or not y_pred:
             logger.warning("[%s] %s 预测结果为空，跳过", now_str, query)
             continue
 
-        base_labels = _parse_labels(query)
-
-        # 先删除旧预测，再写入新预测。
-        # 删除条件只带 base_labels，兼容老版本无 forecast/source 标签的脏数据。
-        if CLEAR_OLD_PREDICTIONS:
-            clear_ok = delete_old_predictions(pred_metric, base_labels)
-
-            if not clear_ok and SKIP_WRITE_IF_CLEAR_FAILED:
-                logger.error(
-                    "[%s] %s 删除旧预测失败，为避免继续制造重叠数据，本轮跳过写入",
-                    now_str,
-                    pred_metric,
-                )
-                continue
-
-        write_labels = merge_labels(base_labels, EXTRA_PREDICT_LABELS)
-
         ok = write_predictions(
             ts_future=ts_future,
             y_pred=y_pred,
@@ -537,26 +526,30 @@ def run_once():
         if not ok:
             continue
 
+        LAST_WRITTEN_UNTIL[key] = int(max(ts_future))
+
         future_start = datetime.fromtimestamp(ts_future[0]).strftime("%H:%M:%S")
         future_end = datetime.fromtimestamp(ts_future[-1]).strftime("%H:%M:%S")
 
         logger.info(
-            "[%s] %-40s → %-35s 写入 %d 点，预测区间 %s ~ %s",
+            "[%s] %-40s → %-35s 写入 %d 点，预测区间 %s ~ %s，标签=%s",
             now_str,
             query,
             pred_metric,
             len(y_pred),
             future_start,
             future_end,
+            labels_to_str(write_labels),
         )
 
 
 def main():
     logger.info(
-        "预测服务启动 VM=%s 历史窗口=%dmin 预测窗口=%ds 轮询间隔=%ds 清理旧预测=%s",
+        "预测服务启动 VM=%s 历史窗口=%dmin 理论预测窗口=%ds 实际写入窗口=%ds 轮询间隔=%ds 清理旧预测=%s",
         VM_URL,
         HISTORY_MINUTES,
         HORIZON_SECONDS,
+        WRITE_HORIZON_SECONDS,
         POLL_INTERVAL,
         CLEAR_OLD_PREDICTIONS,
     )

From 72d5c092018c5caf59f1a9f6ae556e6eff24ecca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Wed, 20 May 2026 21:35:15 +0800
Subject: [PATCH 21/36] fix

---
 ai/predict_v2.py | 368 ++++++++++++++++++++++++-----------------------
 1 file changed, 188 insertions(+), 180 deletions(-)

diff --git a/ai/predict_v2.py b/ai/predict_v2.py
index f631e12..933a34f 100755
--- a/ai/predict_v2.py
+++ b/ai/predict_v2.py
@@ -1,14 +1,14 @@
 # -*- coding: utf-8 -*-
 """
-ProtoForge 预测服务 v4
+ProtoForge 预测服务 v5
 
 修复点：
-1. 不再使用 VictoriaMetrics delete_series，避免预测历史被整条删除。
-2. 不再每 30 秒写未来 120 秒，避免多轮预测窗口重叠导致 Grafana 出现竖线/毛刺。
-3. 每轮只写未来 min(HORIZON_SECONDS, POLL_INTERVAL) 秒的数据。
-4. 使用 forecast="rolling_v2" 新标签，避免和上一版 forecast="latest" 的旧预测数据混在一起。
-5. 使用真实 timestamp 做拟合，不假设采样严格等间隔。
-6. 拟合失败时不再简单写平直线，而是尽量重复最近一个周期的波形。
+1. 不再使用“单正弦拟合”作为主预测算法。
+2. 主算法改为：周期模板预测（同相位历史值加权平均）。
+3. 周期估计使用 FFT 粗估 + 自相关细化，比单纯 FFT 更稳。
+4. 若可用完整周期不足，则降级为多谐波回归（而不是单正弦）。
+5. 每轮只写入未来 min(HORIZON_SECONDS, POLL_INTERVAL) 秒，避免预测窗口重叠。
+6. 不删除旧预测历史，避免历史预测消失。
 """
 
 import logging
@@ -20,17 +20,13 @@
 
 import numpy as np
 import requests
-from scipy.optimize import curve_fit
-
 
 logging.basicConfig(
     level=logging.INFO,
     format="%(asctime)s [%(levelname)s] %(message)s",
 )
-
 logger = logging.getLogger(__name__)
 
-
 # ── 配置 ──────────────────────────────────────────────────────────────────────
 
 VM_URL = "http://localhost:8428"
@@ -45,45 +41,36 @@
 ]
 
 HISTORY_MINUTES = 30
-
-# 理论预测窗口
 HORIZON_SECONDS = 120
-
-# 轮询间隔
 POLL_INTERVAL = 30
-
-# 实际写入窗口。
-# 关键点：实际写入窗口不要大于轮询间隔，否则不同批次预测会重叠。
 WRITE_HORIZON_SECONDS = min(HORIZON_SECONDS, POLL_INTERVAL)
-
 MIN_POINTS = 120
 QUERY_STEP = "1s"
 
-# 不要再清理旧预测，否则历史预测会被整条删除。
-CLEAR_OLD_PREDICTIONS = False
+# 至少要有多少个完整周期，才使用“周期模板预测”
+MIN_FULL_CYCLES_FOR_TEMPLATE = 3
+MAX_CYCLES_FOR_TEMPLATE = 6
+
+# 周期范围
+MIN_PERIOD_SECONDS = 5
+MAX_PERIOD_SECONDS = 3600
+
+# 多谐波回归最高阶数（降级模式）
+MAX_HARMONICS = 4
 
-# 使用新标签，避免和上一版 forecast="latest" 数据混在一起。
 EXTRA_PREDICT_LABELS = {
-    "forecast": "rolling_v2",
+    "forecast": "seasonal_v1",
     "source": "protoforge",
 }
 
-MIN_PERIOD_SECONDS = 5.0
-MAX_PERIOD_SECONDS = 3600.0
-
-# 进程内记录每条预测序列上次写到哪里，避免本进程运行期间重复写同一时间段
+# 进程内记录每条预测序列上次写到哪里，避免本进程运行时重复写
 LAST_WRITTEN_UNTIL: Dict[str, int] = {}
 
 # ─────────────────────────────────────────────────────────────────────────────
 
 
 def fetch_history(query: str, minutes: int = HISTORY_MINUTES) -> Tuple[List[float], List[float]]:
-    """
-    从 VictoriaMetrics 拉取历史时序数据。
-    返回：
-        timestamps: Unix 秒级时间戳
-        values: float 数值
-    """
+    """从 VictoriaMetrics 拉取历史时序数据。"""
     now = datetime.now()
     start = now - timedelta(minutes=minutes)
 
@@ -118,20 +105,16 @@ def fetch_history(query: str, minutes: int = HISTORY_MINUTES) -> Tuple[List[floa
 
     ts = []
     ys = []
-
     for item in values:
         if len(item) < 2:
             continue
-
         try:
             t = float(item[0])
             y = float(item[1])
         except Exception:
             continue
-
         if not math.isfinite(t) or not math.isfinite(y):
             continue
-
         ts.append(t)
         ys.append(y)
 
@@ -141,33 +124,29 @@ def fetch_history(query: str, minutes: int = HISTORY_MINUTES) -> Tuple[List[floa
 def normalize_history(ts: List[float], ys: List[float]) -> Tuple[np.ndarray, np.ndarray]:
     """
     清洗历史数据：
-    1. 时间戳转为整秒
+    1. 时间戳整秒化
     2. 排序
-    3. 同一秒多个值时保留最后一个
-    4. 插值补齐缺失秒
+    3. 同一秒多个点保留最后一个
+    4. 按 1 秒插值补齐
     """
     if not ts or not ys or len(ts) != len(ys):
         return np.array([]), np.array([])
 
     data = {}
-
     for t, y in zip(ts, ys):
         try:
             sec = int(round(float(t)))
             val = float(y)
         except Exception:
             continue
-
         if not math.isfinite(sec) or not math.isfinite(val):
             continue
-
         data[sec] = val
 
     if not data:
         return np.array([]), np.array([])
 
     sorted_items = sorted(data.items(), key=lambda x: x[0])
-
     ts_clean = np.array([x[0] for x in sorted_items], dtype=float)
     ys_clean = np.array([x[1] for x in sorted_items], dtype=float)
 
@@ -186,22 +165,13 @@ def normalize_history(ts: List[float], ys: List[float]) -> Tuple[np.ndarray, np.
     return ts_grid, ys_grid
 
 
-def _sine_model(t: np.ndarray, A: float, T: float, phi: float, C: float) -> np.ndarray:
-    return A * np.sin(2.0 * np.pi / T * t + phi) + C
-
-
 def estimate_period_by_fft(ys_arr: np.ndarray) -> float:
-    """
-    使用 FFT 估算主周期。
-    ys_arr 默认已经是 1 秒间隔。
-    """
+    """FFT 粗估周期。"""
     n = len(ys_arr)
-
-    if n < 4:
+    if n < 8:
         return 60.0
 
     centered = ys_arr - np.mean(ys_arr)
-
     if np.allclose(centered, 0):
         return 60.0
 
@@ -212,55 +182,139 @@ def estimate_period_by_fft(ys_arr: np.ndarray) -> float:
         return 60.0
 
     power = np.abs(fft_vals[1:])
-
     if len(power) == 0 or np.max(power) <= 0:
         return 60.0
 
     dominant_idx = int(np.argmax(power)) + 1
     dominant_freq = float(freqs[dominant_idx])
-
     if dominant_freq <= 0:
         return 60.0
 
     period = 1.0 / dominant_freq
-    period = float(np.clip(period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+    return float(np.clip(period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+
+
+def refine_period_by_autocorr(ys_arr: np.ndarray, init_period: float) -> float:
+    """
+    用自相关在 init_period 附近细化周期估计。
+    """
+    n = len(ys_arr)
+    if n < 20:
+        return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+
+    centered = ys_arr - np.mean(ys_arr)
+    if np.allclose(centered, 0):
+        return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
 
-    return period
+    corr = np.correlate(centered, centered, mode="full")[n - 1:]
 
+    p0 = int(round(init_period))
+    left = max(MIN_PERIOD_SECONDS, int(max(2, p0 * 0.7)))
+    right = min(n // 2, int(max(left + 1, p0 * 1.3)))
 
-def repeat_last_period(
-    ts_grid: np.ndarray,
-    ys_grid: np.ndarray,
-    ts_future_arr: np.ndarray,
-    period_seconds: float,
-) -> np.ndarray:
+    if right <= left:
+        return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+
+    search = corr[left:right + 1]
+    if len(search) == 0:
+        return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+
+    best_lag = left + int(np.argmax(search))
+    return float(np.clip(best_lag, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+
+
+def estimate_period(ys_arr: np.ndarray) -> float:
+    """FFT + 自相关 的组合周期估计。"""
+    p_fft = estimate_period_by_fft(ys_arr)
+    p_refined = refine_period_by_autocorr(ys_arr, p_fft)
+    return p_refined
+
+
+def seasonal_template_predict(
+    ys_arr: np.ndarray,
+    horizon: int,
+    period: int,
+    gap: int = 0,
+    max_cycles: int = MAX_CYCLES_FOR_TEMPLATE,
+) -> List[float]:
     """
-    拟合失败时的降级策略：
-    不直接写平直线，而是把未来时间映射回最近一个周期的历史波形。
+    同相位历史值加权平均预测。
+    对未来第 k 个点，取过去多个周期同相位点做加权平均：
+        y[n-1+gap+k] ≈ avg(y[n-1+gap+k-p], y[n-1+gap+k-2p], ...)
     """
-    if len(ts_grid) < 2:
-        return np.full_like(ts_future_arr, float(ys_grid[-1]), dtype=float)
+    n = len(ys_arr)
+    preds = []
 
-    period = max(int(round(period_seconds)), 1)
+    for k in range(1, horizon + 1):
+        target_idx = (n - 1) + gap + k
 
-    y_pred = []
+        values = []
+        weights = []
 
-    hist_start = float(ts_grid[0])
-    hist_end = float(ts_grid[-1])
+        # m=1 表示最近一个周期；m 越大越久远
+        for m in range(1, max_cycles + 1):
+            hist_idx = target_idx - m * period
+            if 0 <= hist_idx < n:
+                # 越近权重越大
+                w = 1.0 / m
+                values.append(float(ys_arr[hist_idx]))
+                weights.append(w)
 
-    for future_ts in ts_future_arr:
-        mapped_ts = float(future_ts)
+        if not values:
+            # 万一拿不到，退化为最后一个值
+            preds.append(float(ys_arr[-1]))
+        else:
+            preds.append(float(np.average(values, weights=weights)))
 
-        while mapped_ts > hist_end:
-            mapped_ts -= period
+    return preds
 
-        while mapped_ts < hist_start:
-            mapped_ts += period
 
-        val = float(np.interp(mapped_ts, ts_grid, ys_grid))
-        y_pred.append(val)
+def harmonic_regression_predict(
+    ys_arr: np.ndarray,
+    horizon: int,
+    period: int,
+    gap: int = 0,
+    max_harmonics: int = MAX_HARMONICS,
+) -> List[float]:
+    """
+    多谐波回归（降级模式）：
+    y = c + Σ [a_k sin(2πkt/P) + b_k cos(2πkt/P)]
+    相比单正弦，更能表达非标准正弦波形。
+    """
+    n = len(ys_arr)
+    if n < 10 or period <= 1:
+        return [float(ys_arr[-1])] * horizon
+
+    # 周期太短时，谐波数不能太大
+    K = min(max_harmonics, max(1, period // 4))
+
+    t = np.arange(n, dtype=float)
+    cols = [np.ones(n, dtype=float)]
+
+    for k in range(1, K + 1):
+        angle = 2.0 * np.pi * k * t / period
+        cols.append(np.sin(angle))
+        cols.append(np.cos(angle))
+
+    X = np.column_stack(cols)
+
+    try:
+        coef, _, _, _ = np.linalg.lstsq(X, ys_arr, rcond=None)
+    except Exception:
+        return [float(ys_arr[-1])] * horizon
+
+    t_future = np.arange(n + gap, n + gap + horizon, dtype=float)
+    cols_future = [np.ones(horizon, dtype=float)]
 
-    return np.array(y_pred, dtype=float)
+    for k in range(1, K + 1):
+        angle = 2.0 * np.pi * k * t_future / period
+        cols_future.append(np.sin(angle))
+        cols_future.append(np.cos(angle))
+
+    X_future = np.column_stack(cols_future)
+    y_pred = X_future @ coef
+
+    return y_pred.astype(float).tolist()
 
 
 def predict_next(
@@ -270,96 +324,74 @@ def predict_next(
     base_ts: int,
 ) -> Tuple[List[float], List[float]]:
     """
-    用 FFT 检测主频，拟合正弦波，外推未来 horizon 秒。
-
-    base_ts:
-        从 base_ts + 1 开始写预测。
+    主预测函数：
+    1. 周期估计
+    2. 优先使用周期模板预测
+    3. 周期不够时降级为多谐波回归
     """
     ts_grid, ys_grid = normalize_history(ts, ys)
-
     if len(ys_grid) < MIN_POINTS:
         return [], []
 
     y_min = float(np.min(ys_grid))
     y_max = float(np.max(ys_grid))
-    y_mean = float(np.mean(ys_grid))
     y_range = y_max - y_min
 
-    base_ts = max(int(base_ts), int(ts_grid[-1]))
-
-    ts_future_arr = np.arange(
-        base_ts + 1,
-        base_ts + 1 + horizon,
-        1,
-        dtype=float,
-    )
-
     if y_range <= 1e-9:
-        y_pred_arr = np.full_like(ts_future_arr, float(ys_grid[-1]), dtype=float)
-        return ts_future_arr.tolist(), y_pred_arr.tolist()
+        base_ts = max(int(base_ts), int(ts_grid[-1]))
+        ts_future = [base_ts + i + 1 for i in range(horizon)]
+        y_pred = [float(ys_grid[-1])] * horizon
+        return ts_future, y_pred
 
-    period = estimate_period_by_fft(ys_grid)
+    period_est = estimate_period(ys_grid)
+    period = int(round(period_est))
+    period = max(MIN_PERIOD_SECONDS, min(MAX_PERIOD_SECONDS, period))
 
-    t_fit = ts_grid - ts_grid[0]
-    t_future = ts_future_arr - ts_grid[0]
+    last_real_ts = int(ts_grid[-1])
+    base_ts = max(int(base_ts), last_real_ts)
 
-    amplitude = y_range / 2.0
-    offset = y_mean
+    # 如果当前时间已经超过最后一个真实点，gap 表示中间“空过去”的秒数
+    gap = max(0, base_ts - last_real_ts)
 
-    try:
-        popt, _ = curve_fit(
-            _sine_model,
-            t_fit,
-            ys_grid,
-            p0=[amplitude, period, 0.0, offset],
-            bounds=(
-                [0.0, MIN_PERIOD_SECONDS, -2.0 * np.pi, y_min - y_range],
-                [np.inf, MAX_PERIOD_SECONDS, 2.0 * np.pi, y_max + y_range],
-            ),
-            maxfev=12000,
-        )
+    ts_future = [base_ts + i + 1 for i in range(horizon)]
 
-        y_pred_arr = _sine_model(t_future, *popt)
+    full_cycles = len(ys_grid) // period if period > 0 else 0
 
-        margin = y_range * 0.2
-        lower = y_min - margin
-        upper = y_max + margin
-        y_pred_arr = np.clip(y_pred_arr, lower, upper)
-
-        if not np.all(np.isfinite(y_pred_arr)):
-            raise ValueError("预测结果包含 NaN/Inf")
-
-        logger.debug(
-            "正弦拟合成功 period=%.2fs amplitude=%.4f offset=%.4f",
-            popt[1],
-            popt[0],
-            popt[3],
+    if full_cycles >= MIN_FULL_CYCLES_FOR_TEMPLATE:
+        y_pred = seasonal_template_predict(
+            ys_arr=ys_grid,
+            horizon=horizon,
+            period=period,
+            gap=gap,
+            max_cycles=min(MAX_CYCLES_FOR_TEMPLATE, full_cycles),
         )
-
-        return ts_future_arr.tolist(), y_pred_arr.astype(float).tolist()
-
-    except Exception as e:
-        logger.warning("正弦拟合失败，降级为最近周期波形复制: %s", e)
-
-        y_pred_arr = repeat_last_period(
-            ts_grid=ts_grid,
-            ys_grid=ys_grid,
-            ts_future_arr=ts_future_arr,
-            period_seconds=period,
+        model_name = "seasonal_template"
+    else:
+        y_pred = harmonic_regression_predict(
+            ys_arr=ys_grid,
+            horizon=horizon,
+            period=period,
+            gap=gap,
+            max_harmonics=MAX_HARMONICS,
         )
+        model_name = "harmonic_regression"
+
+    # 合理裁剪，避免偶然外推过大
+    margin = y_range * 0.15
+    lower = y_min - margin
+    upper = y_max + margin
+    y_pred = np.clip(np.array(y_pred, dtype=float), lower, upper).astype(float).tolist()
 
-        margin = y_range * 0.2
-        lower = y_min - margin
-        upper = y_max + margin
-        y_pred_arr = np.clip(y_pred_arr, lower, upper)
+    logger.debug(
+        "predict_next model=%s period=%ss full_cycles=%s gap=%s",
+        model_name, period, full_cycles, gap
+    )
 
-        return ts_future_arr.tolist(), y_pred_arr.astype(float).tolist()
+    return ts_future, y_pred
 
 
 def prom_escape_label_value(value: str) -> str:
-    """
-    Prometheus exposition label value 转义。
-    """
+    """Prometheus label value 转义。"""
     return (
         str(value)
         .replace("\\", "\\\\")
@@ -371,13 +403,10 @@ def prom_escape_label_value(value: str) -> str:
 def labels_to_str(labels: Dict[str, str]) -> str:
     if not labels:
         return ""
-
     parts = []
-
     for k in sorted(labels.keys()):
         v = prom_escape_label_value(labels[k])
         parts.append(f'{k}="{v}"')
-
     return "{" + ",".join(parts) + "}"
 
 
@@ -387,16 +416,12 @@ def write_predictions(
     metric_name: str,
     labels: Dict[str, str],
 ) -> bool:
-    """
-    将预测值以 Prometheus exposition 格式写入 VictoriaMetrics。
-    时间戳为毫秒级 Unix timestamp。
-    """
+    """将预测值以 Prometheus exposition 格式写入 VictoriaMetrics。"""
     if not ts_future or not y_pred or len(ts_future) != len(y_pred):
         logger.warning("预测数据为空或长度不一致 metric=%s", metric_name)
         return False
 
     label_str = labels_to_str(labels)
-
     lines = []
 
     for t, y in zip(ts_future, y_pred):
@@ -422,14 +447,11 @@ def write_predictions(
         resp = requests.post(
             f"{VM_URL}/api/v1/import/prometheus",
             data=payload.encode("utf-8"),
-            headers={
-                "Content-Type": "text/plain; version=0.0.4; charset=utf-8",
-            },
+            headers={"Content-Type": "text/plain; version=0.0.4; charset=utf-8"},
             timeout=10,
         )
         resp.raise_for_status()
         return True
-
     except requests.RequestException as e:
         logger.error("写入预测数据失败 metric=%s: %s", metric_name, e)
         return False
@@ -441,12 +463,7 @@ def write_predictions(
 
 
 def _parse_labels(query: str) -> Dict[str, str]:
-    """
-    从查询表达式中解析标签。
-
-    示例：
-        feed_rate{device_id="fanuc-cnc"} -> {"device_id": "fanuc-cnc"}
-    """
+    """从查询表达式中解析标签。"""
     labels = {}
 
     if "{" not in query or "}" not in query:
@@ -468,19 +485,13 @@ def _parse_labels(query: str) -> Dict[str, str]:
 
 def merge_labels(*dicts: Dict[str, str]) -> Dict[str, str]:
     result = {}
-
     for d in dicts:
-        if not d:
-            continue
-        result.update(d)
-
+        if d:
+            result.update(d)
     return result
 
 
 def series_key(metric_name: str, labels: Dict[str, str]) -> str:
-    """
-    构造进程内唯一 key，用于记录上次写到哪个时间点。
-    """
     return metric_name + labels_to_str(labels)
 
 
@@ -489,7 +500,6 @@ def run_once():
 
     for query, pred_metric in PREDICT_TARGETS:
         ts, ys = fetch_history(query)
-
         if len(ys) < MIN_POINTS:
             logger.info("[%s] %s 数据不足（%d 点），跳过", now_str, query, len(ys))
             continue
@@ -502,7 +512,7 @@ def run_once():
         now_sec = int(time.time())
         last_until = LAST_WRITTEN_UNTIL.get(key, 0)
 
-        # 防止同一进程内重复写入已经预测过的时间段
+        # 避免同一进程内写重叠时间段
         base_ts = max(now_sec, last_until)
 
         ts_future, y_pred = predict_next(
@@ -522,7 +532,6 @@ def run_once():
             metric_name=pred_metric,
             labels=write_labels,
         )
-
         if not ok:
             continue
 
@@ -545,13 +554,12 @@ def run_once():
 
 def main():
     logger.info(
-        "预测服务启动 VM=%s 历史窗口=%dmin 理论预测窗口=%ds 实际写入窗口=%ds 轮询间隔=%ds 清理旧预测=%s",
+        "预测服务启动 VM=%s 历史窗口=%dmin 理论预测窗口=%ds 实际写入窗口=%ds 轮询间隔=%ds",
         VM_URL,
         HISTORY_MINUTES,
         HORIZON_SECONDS,
         WRITE_HORIZON_SECONDS,
         POLL_INTERVAL,
-        CLEAR_OLD_PREDICTIONS,
     )
 
     while True:

From c26b9991d4cfac374d56829b47023b043d270aba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Thu, 21 May 2026 09:03:00 +0800
Subject: [PATCH 22/36] feat(ai): support single scene predict

---
 ai/predict_v3_single_scene.py | 1058 +++++++++++++++++++++++++++++++++
 1 file changed, 1058 insertions(+)
 create mode 100644 ai/predict_v3_single_scene.py

diff --git a/ai/predict_v3_single_scene.py b/ai/predict_v3_single_scene.py
new file mode 100644
index 0000000..23af8c5
--- /dev/null
+++ b/ai/predict_v3_single_scene.py
@@ -0,0 +1,1058 @@
+# -*- coding: utf-8 -*-
+"""
+ProtoForge 预测服务 v6
+
+核心能力：
+1. 周期模板预测：适合 CNC 这类强周期、非标准正弦波形。
+2. 健康基线冻结：检测到异常后，不再用故障数据更新预测模板。
+3. 恢复冷却机制：故障恢复后，需要连续稳定多个周期，才恢复学习。
+4. 预测上下界：写入 predicted_upper / predicted_lower，方便 Grafana 展示预测带。
+5. 异常标记：写入 xxx_anomaly，1 表示异常，0 表示正常。
+6. 不删除历史预测，不使用 delete_series。
+"""
+
+"""
+场景：不考虑物料、不考虑跨程序场景算法预测
+"""
+
+import json
+import logging
+import math
+import os
+import re
+import time
+from dataclasses import asdict, dataclass
+from datetime import datetime, timedelta
+from typing import Dict, List, Optional, Tuple
+
+import numpy as np
+import requests
+
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(message)s",
+)
+
+logger = logging.getLogger(__name__)
+
+
+# ── 基础配置 ──────────────────────────────────────────────────────────────────
+
+VM_URL = "http://localhost:8428"
+
+STATE_FILE = "/tmp/protoforge_predictor_state.json"
+
+HISTORY_MINUTES = 30
+HORIZON_SECONDS = 120
+POLL_INTERVAL = 30
+
+# 实际每轮写入未来多少秒。
+# 不要大于 POLL_INTERVAL，否则多轮预测会重叠。
+WRITE_HORIZON_SECONDS = min(HORIZON_SECONDS, POLL_INTERVAL)
+
+QUERY_STEP = "1s"
+MIN_POINTS = 120
+
+MIN_PERIOD_SECONDS = 5
+MAX_PERIOD_SECONDS = 3600
+
+# 至少多少个完整周期才允许构建健康模板
+MIN_FULL_CYCLES_FOR_TEMPLATE = 3
+
+# 构建模板最多使用最近多少个周期
+MAX_CYCLES_FOR_TEMPLATE = 6
+
+# 检测异常使用最近多少秒实际数据
+DETECT_WINDOW_SECONDS = 15
+
+# 恢复后，至少连续正常多少秒才考虑恢复学习
+RECOVERY_MIN_SECONDS = 60
+
+# 健康状态下模板更新速度，越小越保守
+HEALTHY_EMA_ALPHA = 0.15
+
+# 故障恢复后第一次重新学习时的更新速度
+RECOVERY_EMA_ALPHA = 0.35
+
+# 最近窗口里有多少比例的点超过阈值，才认为异常
+OUTSIDE_RATIO_THRESHOLD = 0.60
+
+# 最近窗口里有多少比例的点回到阈值内，才认为恢复正常
+RECOVERY_INSIDE_RATIO_THRESHOLD = 0.80
+
+
+# ── 指标配置 ──────────────────────────────────────────────────────────────────
+# abs_threshold / rel_threshold 需要按指标单位调。
+# feed_rate 单位 mm/min，这里先给 400 和 25%。
+
+PREDICT_TARGETS = [
+    {
+        "query": 'feed_rate{device_id="fanuc-cnc"}',
+        "pred_metric": "feed_rate_predicted",
+        "anomaly_metric": "feed_rate_anomaly",
+        "abs_threshold": 400.0,
+        "rel_threshold": 0.25,
+    },
+    {
+        "query": 'spindle_speed{device_id="fanuc-cnc"}',
+        "pred_metric": "spindle_speed_predicted",
+        "anomaly_metric": "spindle_speed_anomaly",
+        "abs_threshold": 500.0,
+        "rel_threshold": 0.25,
+    },
+    {
+        "query": 'spindle_current{device_id="fanuc-cnc"}',
+        "pred_metric": "spindle_current_predicted",
+        "anomaly_metric": "spindle_current_anomaly",
+        "abs_threshold": 5.0,
+        "rel_threshold": 0.25,
+    },
+    {
+        "query": 'vibration_x{device_id="fanuc-cnc"}',
+        "pred_metric": "vibration_x_predicted",
+        "anomaly_metric": "vibration_x_anomaly",
+        "abs_threshold": 1.0,
+        "rel_threshold": 0.30,
+    },
+    {
+        "query": 'vibration_y{device_id="fanuc-cnc"}',
+        "pred_metric": "vibration_y_predicted",
+        "anomaly_metric": "vibration_y_anomaly",
+        "abs_threshold": 1.0,
+        "rel_threshold": 0.30,
+    },
+    {
+        "query": 'vibration_z{device_id="fanuc-cnc"}',
+        "pred_metric": "vibration_z_predicted",
+        "anomaly_metric": "vibration_z_anomaly",
+        "abs_threshold": 1.0,
+        "rel_threshold": 0.30,
+    },
+]
+
+EXTRA_PREDICT_LABELS = {
+    "forecast": "health_gated_v1",
+    "source": "protoforge",
+}
+
+BASELINE_STATUS_HEALTHY = "healthy"
+BASELINE_STATUS_ANOMALY = "anomaly"
+BASELINE_STATUS_RECOVERING = "recovering"
+BASELINE_STATUS_LEARNING = "learning"
+
+
+# ── 状态结构 ──────────────────────────────────────────────────────────────────
+
+@dataclass
+class BaselineState:
+    period: int
+    template: List[float]
+    status: str
+    clean_seconds: int
+    last_update_ts: int
+    last_seen_ts: int
+    y_min: float
+    y_max: float
+
+
+BASELINE_STATES: Dict[str, BaselineState] = {}
+LAST_WRITTEN_UNTIL: Dict[str, int] = {}
+
+
+# ── VM 读取 ───────────────────────────────────────────────────────────────────
+
+def fetch_history(query: str, minutes: int = HISTORY_MINUTES) -> Tuple[List[float], List[float]]:
+    now = datetime.now()
+    start = now - timedelta(minutes=minutes)
+
+    try:
+        resp = requests.get(
+            f"{VM_URL}/api/v1/query_range",
+            params={
+                "query": query,
+                "start": start.timestamp(),
+                "end": now.timestamp(),
+                "step": QUERY_STEP,
+            },
+            timeout=10,
+        )
+        resp.raise_for_status()
+    except requests.RequestException as e:
+        logger.error("拉取数据失败 query=%s: %s", query, e)
+        return [], []
+
+    try:
+        result = resp.json().get("data", {}).get("result", [])
+    except Exception as e:
+        logger.error("解析 VM 返回失败 query=%s: %s", query, e)
+        return [], []
+
+    if not result:
+        return [], []
+
+    values = result[0].get("values", [])
+    if not values:
+        return [], []
+
+    ts = []
+    ys = []
+
+    for item in values:
+        if len(item) < 2:
+            continue
+
+        try:
+            t = float(item[0])
+            y = float(item[1])
+        except Exception:
+            continue
+
+        if not math.isfinite(t) or not math.isfinite(y):
+            continue
+
+        ts.append(t)
+        ys.append(y)
+
+    return ts, ys
+
+
+def normalize_history(ts: List[float], ys: List[float]) -> Tuple[np.ndarray, np.ndarray]:
+    if not ts or not ys or len(ts) != len(ys):
+        return np.array([]), np.array([])
+
+    data = {}
+
+    for t, y in zip(ts, ys):
+        try:
+            sec = int(round(float(t)))
+            val = float(y)
+        except Exception:
+            continue
+
+        if not math.isfinite(sec) or not math.isfinite(val):
+            continue
+
+        data[sec] = val
+
+    if not data:
+        return np.array([]), np.array([])
+
+    sorted_items = sorted(data.items(), key=lambda x: x[0])
+
+    ts_clean = np.array([x[0] for x in sorted_items], dtype=float)
+    ys_clean = np.array([x[1] for x in sorted_items], dtype=float)
+
+    if len(ts_clean) < 2:
+        return ts_clean, ys_clean
+
+    start_sec = int(ts_clean[0])
+    end_sec = int(ts_clean[-1])
+
+    if end_sec <= start_sec:
+        return ts_clean, ys_clean
+
+    ts_grid = np.arange(start_sec, end_sec + 1, 1, dtype=float)
+    ys_grid = np.interp(ts_grid, ts_clean, ys_clean)
+
+    return ts_grid, ys_grid
+
+
+# ── 周期估计 ──────────────────────────────────────────────────────────────────
+
+def estimate_period_by_fft(ys_arr: np.ndarray) -> float:
+    n = len(ys_arr)
+
+    if n < 8:
+        return 60.0
+
+    centered = ys_arr - np.mean(ys_arr)
+
+    if np.allclose(centered, 0):
+        return 60.0
+
+    fft_vals = np.fft.rfft(centered)
+    freqs = np.fft.rfftfreq(n, d=1.0)
+
+    if len(freqs) <= 1:
+        return 60.0
+
+    power = np.abs(fft_vals[1:])
+
+    if len(power) == 0 or np.max(power) <= 0:
+        return 60.0
+
+    dominant_idx = int(np.argmax(power)) + 1
+    dominant_freq = float(freqs[dominant_idx])
+
+    if dominant_freq <= 0:
+        return 60.0
+
+    period = 1.0 / dominant_freq
+
+    return float(np.clip(period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+
+
+def refine_period_by_autocorr(ys_arr: np.ndarray, init_period: float) -> float:
+    n = len(ys_arr)
+
+    if n < 20:
+        return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+
+    centered = ys_arr - np.mean(ys_arr)
+
+    if np.allclose(centered, 0):
+        return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+
+    corr = np.correlate(centered, centered, mode="full")[n - 1:]
+
+    p0 = int(round(init_period))
+    left = max(MIN_PERIOD_SECONDS, int(max(2, p0 * 0.7)))
+    right = min(n // 2, int(max(left + 1, p0 * 1.3)))
+
+    if right <= left:
+        return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+
+    search = corr[left:right + 1]
+
+    if len(search) == 0:
+        return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+
+    best_lag = left + int(np.argmax(search))
+
+    return float(np.clip(best_lag, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+
+
+def estimate_period(ys_arr: np.ndarray) -> int:
+    p_fft = estimate_period_by_fft(ys_arr)
+    p_refined = refine_period_by_autocorr(ys_arr, p_fft)
+
+    period = int(round(p_refined))
+    period = max(MIN_PERIOD_SECONDS, min(MAX_PERIOD_SECONDS, period))
+
+    return int(period)
+
+
+# ── 模板构建与预测 ─────────────────────────────────────────────────────────────
+
+def fill_template_nan(template: np.ndarray) -> np.ndarray:
+    period = len(template)
+
+    if period == 0:
+        return template
+
+    idx = np.arange(period)
+    valid = np.isfinite(template)
+
+    if not np.any(valid):
+        return np.zeros(period, dtype=float)
+
+    if np.all(valid):
+        return template
+
+    x_valid = idx[valid]
+    y_valid = template[valid]
+
+    # 环形插值，处理 phase 0 附近缺口
+    x_ext = np.concatenate([x_valid - period, x_valid, x_valid + period])
+    y_ext = np.concatenate([y_valid, y_valid, y_valid])
+
+    filled = np.interp(idx, x_ext, y_ext)
+
+    return filled.astype(float)
+
+
+def build_phase_template(
+    ts_grid: np.ndarray,
+    ys_grid: np.ndarray,
+    period: int,
+    max_cycles: int = MAX_CYCLES_FOR_TEMPLATE,
+    tail_seconds: Optional[int] = None,
+) -> Optional[np.ndarray]:
+    if period <= 1 or len(ys_grid) < period * MIN_FULL_CYCLES_FOR_TEMPLATE:
+        return None
+
+    max_seconds = period * max_cycles
+
+    if tail_seconds is not None:
+        max_seconds = min(max_seconds, int(tail_seconds))
+
+    max_seconds = max(period * MIN_FULL_CYCLES_FOR_TEMPLATE, max_seconds)
+
+    if len(ys_grid) < max_seconds:
+        start_idx = 0
+    else:
+        start_idx = len(ys_grid) - max_seconds
+
+    ts_tail = ts_grid[start_idx:]
+    ys_tail = ys_grid[start_idx:]
+
+    if len(ys_tail) < period * MIN_FULL_CYCLES_FOR_TEMPLATE:
+        return None
+
+    sums = np.zeros(period, dtype=float)
+    weights = np.zeros(period, dtype=float)
+
+    total = len(ys_tail)
+
+    for i, (t, y) in enumerate(zip(ts_tail, ys_tail)):
+        phase = int(t) % period
+
+        # 越近的数据权重越高
+        recency = (i + 1) / total
+        weight = 0.3 + 0.7 * recency
+
+        sums[phase] += float(y) * weight
+        weights[phase] += weight
+
+    template = np.full(period, np.nan, dtype=float)
+
+    valid = weights > 0
+    template[valid] = sums[valid] / weights[valid]
+
+    template = fill_template_nan(template)
+
+    return template
+
+
+def resample_template(old_template: np.ndarray, new_period: int) -> np.ndarray:
+    old_period = len(old_template)
+
+    if old_period == new_period:
+        return old_template.astype(float)
+
+    if old_period <= 1 or new_period <= 1:
+        return np.full(new_period, float(np.mean(old_template)), dtype=float)
+
+    old_x = np.linspace(0.0, 1.0, old_period, endpoint=False)
+    new_x = np.linspace(0.0, 1.0, new_period, endpoint=False)
+
+    old_x_ext = np.concatenate([old_x - 1.0, old_x, old_x + 1.0])
+    old_y_ext = np.concatenate([old_template, old_template, old_template])
+
+    return np.interp(new_x, old_x_ext, old_y_ext).astype(float)
+
+
+def merge_template(
+    old_template: np.ndarray,
+    new_template: np.ndarray,
+    alpha: float,
+) -> np.ndarray:
+    alpha = float(np.clip(alpha, 0.0, 1.0))
+
+    if len(old_template) != len(new_template):
+        old_template = resample_template(old_template, len(new_template))
+
+    return ((1.0 - alpha) * old_template + alpha * new_template).astype(float)
+
+
+def predict_by_state(state: BaselineState, ts_list: List[int]) -> np.ndarray:
+    template = np.array(state.template, dtype=float)
+    period = int(state.period)
+
+    if period <= 1 or len(template) != period:
+        return np.zeros(len(ts_list), dtype=float)
+
+    values = []
+
+    for ts in ts_list:
+        phase = int(ts) % period
+        values.append(float(template[phase]))
+
+    return np.array(values, dtype=float)
+
+
+def calc_threshold(pred: np.ndarray, abs_threshold: float, rel_threshold: float) -> np.ndarray:
+    return np.maximum(abs_threshold, np.abs(pred) * rel_threshold)
+
+
+def calc_bounds(pred: np.ndarray, abs_threshold: float, rel_threshold: float) -> Tuple[np.ndarray, np.ndarray]:
+    threshold = calc_threshold(pred, abs_threshold, rel_threshold)
+    lower = pred - threshold
+    upper = pred + threshold
+    return lower, upper
+
+
+# ── 异常检测与状态更新 ────────────────────────────────────────────────────────
+
+def detect_anomaly(
+    state: BaselineState,
+    ts_grid: np.ndarray,
+    ys_grid: np.ndarray,
+    abs_threshold: float,
+    rel_threshold: float,
+) -> Tuple[bool, float, float, float]:
+    if len(ys_grid) < DETECT_WINDOW_SECONDS:
+        return False, 0.0, 0.0, 0.0
+
+    ts_recent = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int).tolist()
+    actual = ys_grid[-DETECT_WINDOW_SECONDS:].astype(float)
+
+    pred = predict_by_state(state, ts_recent)
+    threshold = calc_threshold(pred, abs_threshold, rel_threshold)
+
+    abs_err = np.abs(actual - pred)
+    outside = abs_err > threshold
+
+    outside_ratio = float(np.mean(outside))
+    mean_abs_err = float(np.mean(abs_err))
+    mean_rel_err = float(np.mean(abs_err / np.maximum(np.abs(pred), 1.0)))
+
+    is_anomaly = outside_ratio >= OUTSIDE_RATIO_THRESHOLD
+
+    return is_anomaly, outside_ratio, mean_abs_err, mean_rel_err
+
+
+def is_recovered(
+    state: BaselineState,
+    ts_grid: np.ndarray,
+    ys_grid: np.ndarray,
+    abs_threshold: float,
+    rel_threshold: float,
+) -> Tuple[bool, float]:
+    if len(ys_grid) < DETECT_WINDOW_SECONDS:
+        return False, 0.0
+
+    ts_recent = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int).tolist()
+    actual = ys_grid[-DETECT_WINDOW_SECONDS:].astype(float)
+
+    pred = predict_by_state(state, ts_recent)
+    threshold = calc_threshold(pred, abs_threshold, rel_threshold)
+
+    abs_err = np.abs(actual - pred)
+    inside = abs_err <= threshold
+
+    inside_ratio = float(np.mean(inside))
+
+    return inside_ratio >= RECOVERY_INSIDE_RATIO_THRESHOLD, inside_ratio
+
+
+def create_initial_state(
+    ts_grid: np.ndarray,
+    ys_grid: np.ndarray,
+    now_sec: int,
+) -> Optional[BaselineState]:
+    if len(ys_grid) < MIN_POINTS:
+        return None
+
+    period = estimate_period(ys_grid)
+
+    template = build_phase_template(
+        ts_grid=ts_grid,
+        ys_grid=ys_grid,
+        period=period,
+        max_cycles=MAX_CYCLES_FOR_TEMPLATE,
+        tail_seconds=period * MAX_CYCLES_FOR_TEMPLATE,
+    )
+
+    if template is None:
+        return None
+
+    return BaselineState(
+        period=int(period),
+        template=template.astype(float).tolist(),
+        status=BASELINE_STATUS_HEALTHY,
+        clean_seconds=int(period * MAX_CYCLES_FOR_TEMPLATE),
+        last_update_ts=now_sec,
+        last_seen_ts=now_sec,
+        y_min=float(np.min(ys_grid)),
+        y_max=float(np.max(ys_grid)),
+    )
+
+
+def maybe_update_state(
+    key: str,
+    ts_grid: np.ndarray,
+    ys_grid: np.ndarray,
+    abs_threshold: float,
+    rel_threshold: float,
+) -> Tuple[Optional[BaselineState], bool, float, float, float]:
+    now_sec = int(time.time())
+
+    state = BASELINE_STATES.get(key)
+
+    if state is None:
+        state = create_initial_state(ts_grid, ys_grid, now_sec)
+
+        if state is None:
+            return None, False, 0.0, 0.0, 0.0
+
+        BASELINE_STATES[key] = state
+        logger.info(
+            "初始化健康模板 key=%s period=%ss clean_seconds=%ss",
+            key,
+            state.period,
+            state.clean_seconds,
+        )
+        return state, False, 0.0, 0.0, 0.0
+
+    elapsed = max(1, now_sec - int(state.last_seen_ts))
+    elapsed = min(elapsed, POLL_INTERVAL * 2)
+    state.last_seen_ts = now_sec
+
+    is_anom, outside_ratio, mean_abs_err, mean_rel_err = detect_anomaly(
+        state=state,
+        ts_grid=ts_grid,
+        ys_grid=ys_grid,
+        abs_threshold=abs_threshold,
+        rel_threshold=rel_threshold,
+    )
+
+    if is_anom:
+        state.status = BASELINE_STATUS_ANOMALY
+        state.clean_seconds = 0
+
+        logger.warning(
+            "检测到异常，冻结模板 key=%s outside_ratio=%.2f mean_abs_err=%.2f mean_rel_err=%.2f",
+            key,
+            outside_ratio,
+            mean_abs_err,
+            mean_rel_err,
+        )
+
+        BASELINE_STATES[key] = state
+        return state, True, outside_ratio, mean_abs_err, mean_rel_err
+
+    recovered, inside_ratio = is_recovered(
+        state=state,
+        ts_grid=ts_grid,
+        ys_grid=ys_grid,
+        abs_threshold=abs_threshold,
+        rel_threshold=rel_threshold,
+    )
+
+    if state.status == BASELINE_STATUS_ANOMALY:
+        if recovered:
+            state.status = BASELINE_STATUS_RECOVERING
+            state.clean_seconds = elapsed
+            logger.info(
+                "异常开始恢复 key=%s inside_ratio=%.2f clean_seconds=%ss",
+                key,
+                inside_ratio,
+                state.clean_seconds,
+            )
+        else:
+            state.clean_seconds = 0
+            BASELINE_STATES[key] = state
+            return state, True, outside_ratio, mean_abs_err, mean_rel_err
+
+    elif state.status == BASELINE_STATUS_RECOVERING:
+        if recovered:
+            state.clean_seconds += elapsed
+        else:
+            state.status = BASELINE_STATUS_ANOMALY
+            state.clean_seconds = 0
+            BASELINE_STATES[key] = state
+            return state, True, outside_ratio, mean_abs_err, mean_rel_err
+
+    else:
+        state.status = BASELINE_STATUS_HEALTHY
+        state.clean_seconds += elapsed
+
+    # 故障恢复后，不要立刻学习。
+    # 必须至少连续正常：max(RECOVERY_MIN_SECONDS, 3 个周期)
+    min_clean_for_update = max(
+        RECOVERY_MIN_SECONDS,
+        int(state.period) * MIN_FULL_CYCLES_FOR_TEMPLATE,
+    )
+
+    if state.clean_seconds < min_clean_for_update:
+        BASELINE_STATES[key] = state
+        return state, False, outside_ratio, mean_abs_err, mean_rel_err
+
+    # 只使用最近 clean_seconds 这段连续正常数据来更新模板，避免历史故障污染。
+    new_period = estimate_period(ys_grid)
+    tail_seconds = min(
+        int(state.clean_seconds),
+        int(new_period) * MAX_CYCLES_FOR_TEMPLATE,
+    )
+
+    new_template = build_phase_template(
+        ts_grid=ts_grid,
+        ys_grid=ys_grid,
+        period=new_period,
+        max_cycles=MAX_CYCLES_FOR_TEMPLATE,
+        tail_seconds=tail_seconds,
+    )
+
+    if new_template is None:
+        BASELINE_STATES[key] = state
+        return state, False, outside_ratio, mean_abs_err, mean_rel_err
+
+    old_template = np.array(state.template, dtype=float)
+
+    if state.status == BASELINE_STATUS_RECOVERING:
+        alpha = RECOVERY_EMA_ALPHA
+        state.status = BASELINE_STATUS_HEALTHY
+    else:
+        alpha = HEALTHY_EMA_ALPHA
+
+    merged = merge_template(
+        old_template=old_template,
+        new_template=new_template,
+        alpha=alpha,
+    )
+
+    state.period = int(new_period)
+    state.template = merged.astype(float).tolist()
+    state.last_update_ts = now_sec
+    state.y_min = float(np.min(ys_grid[-tail_seconds:]))
+    state.y_max = float(np.max(ys_grid[-tail_seconds:]))
+
+    BASELINE_STATES[key] = state
+
+    logger.info(
+        "更新健康模板 key=%s period=%ss status=%s clean_seconds=%ss alpha=%.2f",
+        key,
+        state.period,
+        state.status,
+        state.clean_seconds,
+        alpha,
+    )
+
+    return state, False, outside_ratio, mean_abs_err, mean_rel_err
+
+
+# ── Prometheus 格式写入 ───────────────────────────────────────────────────────
+
+def prom_escape_label_value(value: str) -> str:
+    return (
+        str(value)
+        .replace("\\", "\\\\")
+        .replace("\n", "\\n")
+        .replace('"', '\\"')
+    )
+
+
+def labels_to_str(labels: Dict[str, str]) -> str:
+    if not labels:
+        return ""
+
+    parts = []
+
+    for k in sorted(labels.keys()):
+        v = prom_escape_label_value(labels[k])
+        parts.append(f'{k}="{v}"')
+
+    return "{" + ",".join(parts) + "}"
+
+
+def write_series(
+    metric_name: str,
+    labels: Dict[str, str],
+    ts_list: List[int],
+    values: List[float],
+) -> bool:
+    if not ts_list or not values or len(ts_list) != len(values):
+        return False
+
+    label_str = labels_to_str(labels)
+    lines = []
+
+    for t, y in zip(ts_list, values):
+        try:
+            ts_sec = int(round(float(t)))
+            val = float(y)
+        except Exception:
+            continue
+
+        if not math.isfinite(ts_sec) or not math.isfinite(val):
+            continue
+
+        ts_ms = ts_sec * 1000
+        lines.append(f"{metric_name}{label_str} {val:.6f} {ts_ms}")
+
+    if not lines:
+        return False
+
+    payload = "\n".join(lines) + "\n"
+
+    try:
+        resp = requests.post(
+            f"{VM_URL}/api/v1/import/prometheus",
+            data=payload.encode("utf-8"),
+            headers={"Content-Type": "text/plain; version=0.0.4; charset=utf-8"},
+            timeout=10,
+        )
+        resp.raise_for_status()
+        return True
+    except requests.RequestException as e:
+        logger.error("写入数据失败 metric=%s: %s", metric_name, e)
+        return False
+
+
+def write_prediction_bundle(
+    pred_metric: str,
+    anomaly_metric: str,
+    labels: Dict[str, str],
+    ts_future: List[int],
+    pred_values: np.ndarray,
+    lower_values: np.ndarray,
+    upper_values: np.ndarray,
+    is_anomaly: bool,
+    outside_ratio: float,
+    mean_abs_err: float,
+    mean_rel_err: float,
+) -> bool:
+    ok1 = write_series(
+        metric_name=pred_metric,
+        labels=labels,
+        ts_list=ts_future,
+        values=pred_values.astype(float).tolist(),
+    )
+
+    ok2 = write_series(
+        metric_name=f"{pred_metric}_lower",
+        labels=labels,
+        ts_list=ts_future,
+        values=lower_values.astype(float).tolist(),
+    )
+
+    ok3 = write_series(
+        metric_name=f"{pred_metric}_upper",
+        labels=labels,
+        ts_list=ts_future,
+        values=upper_values.astype(float).tolist(),
+    )
+
+    now_sec = int(time.time())
+
+    anomaly_labels = dict(labels)
+    anomaly_labels["type"] = "prediction_deviation"
+
+    ok4 = write_series(
+        metric_name=anomaly_metric,
+        labels=anomaly_labels,
+        ts_list=[now_sec],
+        values=[1.0 if is_anomaly else 0.0],
+    )
+
+    ok5 = write_series(
+        metric_name=f"{anomaly_metric}_outside_ratio",
+        labels=anomaly_labels,
+        ts_list=[now_sec],
+        values=[outside_ratio],
+    )
+
+    ok6 = write_series(
+        metric_name=f"{anomaly_metric}_mean_abs_error",
+        labels=anomaly_labels,
+        ts_list=[now_sec],
+        values=[mean_abs_err],
+    )
+
+    ok7 = write_series(
+        metric_name=f"{anomaly_metric}_mean_rel_error",
+        labels=anomaly_labels,
+        ts_list=[now_sec],
+        values=[mean_rel_err],
+    )
+
+    return ok1 and ok2 and ok3 and ok4 and ok5 and ok6 and ok7
+
+
+# ── 标签解析 ──────────────────────────────────────────────────────────────────
+
+_LABEL_PATTERN = re.compile(
+    r'\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"((?:\\.|[^"])*)"\s*'
+)
+
+
+def _parse_labels(query: str) -> Dict[str, str]:
+    labels = {}
+
+    if "{" not in query or "}" not in query:
+        return labels
+
+    try:
+        label_part = query[query.index("{") + 1: query.rindex("}")]
+    except Exception:
+        return labels
+
+    for match in _LABEL_PATTERN.finditer(label_part):
+        key = match.group(1)
+        value = match.group(2)
+        value = value.replace('\\"', '"').replace("\\n", "\n").replace("\\\\", "\\")
+        labels[key] = value
+
+    return labels
+
+
+def merge_labels(*dicts: Dict[str, str]) -> Dict[str, str]:
+    result = {}
+
+    for d in dicts:
+        if d:
+            result.update(d)
+
+    return result
+
+
+def series_key(metric_name: str, labels: Dict[str, str]) -> str:
+    return metric_name + labels_to_str(labels)
+
+
+# ── 状态持久化 ────────────────────────────────────────────────────────────────
+
+def load_state():
+    global BASELINE_STATES
+
+    if not os.path.exists(STATE_FILE):
+        return
+
+    try:
+        with open(STATE_FILE, "r", encoding="utf-8") as f:
+            raw = json.load(f)
+
+        states = {}
+
+        for key, value in raw.get("baseline_states", {}).items():
+            states[key] = BaselineState(**value)
+
+        BASELINE_STATES = states
+
+        logger.info("已加载预测状态文件 %s，状态数量=%d", STATE_FILE, len(BASELINE_STATES))
+
+    except Exception as e:
+        logger.warning("加载预测状态文件失败，将重新学习: %s", e)
+
+
+def save_state():
+    try:
+        raw = {
+            "baseline_states": {
+                key: asdict(value)
+                for key, value in BASELINE_STATES.items()
+            }
+        }
+
+        tmp_file = STATE_FILE + ".tmp"
+
+        with open(tmp_file, "w", encoding="utf-8") as f:
+            json.dump(raw, f, ensure_ascii=False, indent=2)
+
+        os.replace(tmp_file, STATE_FILE)
+
+    except Exception as e:
+        logger.warning("保存预测状态文件失败: %s", e)
+
+
+# ── 主逻辑 ────────────────────────────────────────────────────────────────────
+
+def run_once():
+    now_str = datetime.now().strftime("%H:%M:%S")
+
+    for target in PREDICT_TARGETS:
+        query = target["query"]
+        pred_metric = target["pred_metric"]
+        anomaly_metric = target["anomaly_metric"]
+        abs_threshold = float(target["abs_threshold"])
+        rel_threshold = float(target["rel_threshold"])
+
+        ts, ys = fetch_history(query)
+
+        if len(ys) < MIN_POINTS:
+            logger.info("[%s] %s 数据不足（%d 点），跳过", now_str, query, len(ys))
+            continue
+
+        ts_grid, ys_grid = normalize_history(ts, ys)
+
+        if len(ys_grid) < MIN_POINTS:
+            logger.info("[%s] %s 清洗后数据不足（%d 点），跳过", now_str, query, len(ys_grid))
+            continue
+
+        base_labels = _parse_labels(query)
+        write_labels = merge_labels(base_labels, EXTRA_PREDICT_LABELS)
+
+        key = series_key(pred_metric, write_labels)
+
+        state, is_anomaly, outside_ratio, mean_abs_err, mean_rel_err = maybe_update_state(
+            key=key,
+            ts_grid=ts_grid,
+            ys_grid=ys_grid,
+            abs_threshold=abs_threshold,
+            rel_threshold=rel_threshold,
+        )
+
+        if state is None:
+            logger.info("[%s] %s 暂无可用健康模板，等待学习", now_str, query)
+            continue
+
+        now_sec = int(time.time())
+        last_until = LAST_WRITTEN_UNTIL.get(key, 0)
+        last_real_ts = int(ts_grid[-1])
+
+        base_ts = max(now_sec, last_until, last_real_ts)
+
+        ts_future = [
+            base_ts + i + 1
+            for i in range(WRITE_HORIZON_SECONDS)
+        ]
+
+        pred_values = predict_by_state(state, ts_future)
+
+        lower_values, upper_values = calc_bounds(
+            pred=pred_values,
+            abs_threshold=abs_threshold,
+            rel_threshold=rel_threshold,
+        )
+
+        ok = write_prediction_bundle(
+            pred_metric=pred_metric,
+            anomaly_metric=anomaly_metric,
+            labels=write_labels,
+            ts_future=ts_future,
+            pred_values=pred_values,
+            lower_values=lower_values,
+            upper_values=upper_values,
+            is_anomaly=is_anomaly,
+            outside_ratio=outside_ratio,
+            mean_abs_err=mean_abs_err,
+            mean_rel_err=mean_rel_err,
+        )
+
+        if not ok:
+            continue
+
+        LAST_WRITTEN_UNTIL[key] = int(max(ts_future))
+
+        future_start = datetime.fromtimestamp(ts_future[0]).strftime("%H:%M:%S")
+        future_end = datetime.fromtimestamp(ts_future[-1]).strftime("%H:%M:%S")
+
+        logger.info(
+            "[%s] %-40s → %-35s status=%s anomaly=%s period=%ss clean=%ss 写入 %d 点，预测区间 %s ~ %s",
+            now_str,
+            query,
+            pred_metric,
+            state.status,
+            is_anomaly,
+            state.period,
+            state.clean_seconds,
+            len(ts_future),
+            future_start,
+            future_end,
+        )
+
+    save_state()
+
+
+def main():
+    load_state()
+
+    logger.info(
+        "预测服务启动 VM=%s 历史窗口=%dmin 理论预测窗口=%ds 实际写入窗口=%ds 轮询间隔=%ds state=%s",
+        VM_URL,
+        HISTORY_MINUTES,
+        HORIZON_SECONDS,
+        WRITE_HORIZON_SECONDS,
+        POLL_INTERVAL,
+        STATE_FILE,
+    )
+
+    while True:
+        run_once()
+        time.sleep(POLL_INTERVAL)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file

From 54c4b851a004567078cfec337933aafcbd676b44 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Thu, 21 May 2026 13:39:33 +0800
Subject: [PATCH 23/36] fix

---
 ai/predict_v3_single_scene.py | 701 +++++++++++++++++++++++-----------
 1 file changed, 488 insertions(+), 213 deletions(-)
 mode change 100644 => 100755 ai/predict_v3_single_scene.py

diff --git a/ai/predict_v3_single_scene.py b/ai/predict_v3_single_scene.py
old mode 100644
new mode 100755
index 23af8c5..fc07f4f
--- a/ai/predict_v3_single_scene.py
+++ b/ai/predict_v3_single_scene.py
@@ -1,18 +1,22 @@
 # -*- coding: utf-8 -*-
 """
-ProtoForge 预测服务 v6
-
-核心能力：
-1. 周期模板预测：适合 CNC 这类强周期、非标准正弦波形。
-2. 健康基线冻结：检测到异常后，不再用故障数据更新预测模板。
-3. 恢复冷却机制：故障恢复后，需要连续稳定多个周期，才恢复学习。
-4. 预测上下界：写入 predicted_upper / predicted_lower，方便 Grafana 展示预测带。
-5. 异常标记：写入 xxx_anomaly，1 表示异常，0 表示正常。
-6. 不删除历史预测，不使用 delete_series。
-"""
-
-"""
-场景：不考虑物料、不考虑跨程序场景算法预测
+ProtoForge Predictor v8
+
+功能：
+1. 从 VictoriaMetrics 拉取历史数据。
+2. 对 CNC 周期型指标进行相位对齐预测。
+3. 使用“谷底锚点”对齐周期，减少上升沿/下降沿相位偏差。
+4. 每轮只写入未来 min(HORIZON_SECONDS, POLL_INTERVAL) 秒，避免预测窗口重叠。
+5. 检测异常后冻结健康模板，不把故障数据学进去。
+6. 故障恢复后等待稳定一段时间，再恢复模板更新。
+7. 写入：
+   - xxx_predicted
+   - xxx_predicted_upper
+   - xxx_predicted_lower
+   - xxx_anomaly
+   - xxx_anomaly_outside_ratio
+   - xxx_anomaly_mean_abs_error
+   - xxx_anomaly_mean_rel_error
 """
 
 import json
@@ -29,6 +33,10 @@
 import requests
 
 
+# =============================================================================
+# 日志配置
+# =============================================================================
+
 logging.basicConfig(
     level=logging.INFO,
     format="%(asctime)s [%(levelname)s] %(message)s",
@@ -37,18 +45,19 @@
 logger = logging.getLogger(__name__)
 
 
-# ── 基础配置 ──────────────────────────────────────────────────────────────────
+# =============================================================================
+# 基础配置
+# =============================================================================
 
 VM_URL = "http://localhost:8428"
 
-STATE_FILE = "/tmp/protoforge_predictor_state.json"
+STATE_FILE = "/tmp/protoforge_predictor_state_v8.json"
 
 HISTORY_MINUTES = 30
 HORIZON_SECONDS = 120
 POLL_INTERVAL = 30
 
-# 实际每轮写入未来多少秒。
-# 不要大于 POLL_INTERVAL，否则多轮预测会重叠。
+# 实际写入窗口不要大于轮询间隔，否则多轮预测会重叠。
 WRITE_HORIZON_SECONDS = min(HORIZON_SECONDS, POLL_INTERVAL)
 
 QUERY_STEP = "1s"
@@ -57,34 +66,25 @@
 MIN_PERIOD_SECONDS = 5
 MAX_PERIOD_SECONDS = 3600
 
-# 至少多少个完整周期才允许构建健康模板
 MIN_FULL_CYCLES_FOR_TEMPLATE = 3
-
-# 构建模板最多使用最近多少个周期
 MAX_CYCLES_FOR_TEMPLATE = 6
 
-# 检测异常使用最近多少秒实际数据
 DETECT_WINDOW_SECONDS = 15
-
-# 恢复后，至少连续正常多少秒才考虑恢复学习
 RECOVERY_MIN_SECONDS = 60
 
-# 健康状态下模板更新速度，越小越保守
-HEALTHY_EMA_ALPHA = 0.15
-
-# 故障恢复后第一次重新学习时的更新速度
-RECOVERY_EMA_ALPHA = 0.35
+HEALTHY_EMA_ALPHA = 0.12
+RECOVERY_EMA_ALPHA = 0.30
 
-# 最近窗口里有多少比例的点超过阈值，才认为异常
 OUTSIDE_RATIO_THRESHOLD = 0.60
-
-# 最近窗口里有多少比例的点回到阈值内，才认为恢复正常
 RECOVERY_INSIDE_RATIO_THRESHOLD = 0.80
 
+PHASE_SEARCH_RATIO = 0.15
+VALLEY_QUANTILE = 45
+
 
-# ── 指标配置 ──────────────────────────────────────────────────────────────────
-# abs_threshold / rel_threshold 需要按指标单位调。
-# feed_rate 单位 mm/min，这里先给 400 和 25%。
+# =============================================================================
+# 预测指标配置
+# =============================================================================
 
 PREDICT_TARGETS = [
     {
@@ -132,21 +132,23 @@
 ]
 
 EXTRA_PREDICT_LABELS = {
-    "forecast": "health_gated_v1",
+    "forecast": "phase_aligned_health_v8",
     "source": "protoforge",
 }
 
 BASELINE_STATUS_HEALTHY = "healthy"
 BASELINE_STATUS_ANOMALY = "anomaly"
 BASELINE_STATUS_RECOVERING = "recovering"
-BASELINE_STATUS_LEARNING = "learning"
 
 
-# ── 状态结构 ──────────────────────────────────────────────────────────────────
+# =============================================================================
+# 状态结构
+# =============================================================================
 
 @dataclass
 class BaselineState:
     period: int
+    phase_origin_ts: int
     template: List[float]
     status: str
     clean_seconds: int
@@ -160,7 +162,9 @@ class BaselineState:
 LAST_WRITTEN_UNTIL: Dict[str, int] = {}
 
 
-# ── VM 读取 ───────────────────────────────────────────────────────────────────
+# =============================================================================
+# VictoriaMetrics 读取
+# =============================================================================
 
 def fetch_history(query: str, minutes: int = HISTORY_MINUTES) -> Tuple[List[float], List[float]]:
     now = datetime.now()
@@ -258,7 +262,25 @@ def normalize_history(ts: List[float], ys: List[float]) -> Tuple[np.ndarray, np.
     return ts_grid, ys_grid
 
 
-# ── 周期估计 ──────────────────────────────────────────────────────────────────
+# =============================================================================
+# 周期估计
+# =============================================================================
+
+def moving_average(arr: np.ndarray, window: int) -> np.ndarray:
+    if window <= 1 or len(arr) < window:
+        return arr.astype(float)
+
+    window = int(window)
+
+    if window % 2 == 0:
+        window += 1
+
+    kernel = np.ones(window, dtype=float) / window
+    pad = window // 2
+    padded = np.pad(arr.astype(float), (pad, pad), mode="edge")
+
+    return np.convolve(padded, kernel, mode="valid")
+
 
 def estimate_period_by_fft(ys_arr: np.ndarray) -> float:
     n = len(ys_arr)
@@ -307,7 +329,7 @@ def refine_period_by_autocorr(ys_arr: np.ndarray, init_period: float) -> float:
     corr = np.correlate(centered, centered, mode="full")[n - 1:]
 
     p0 = int(round(init_period))
-    left = max(MIN_PERIOD_SECONDS, int(max(2, p0 * 0.7)))
+    left = max(int(MIN_PERIOD_SECONDS), int(max(2, p0 * 0.7)))
     right = min(n // 2, int(max(left + 1, p0 * 1.3)))
 
     if right <= left:
@@ -323,96 +345,252 @@ def refine_period_by_autocorr(ys_arr: np.ndarray, init_period: float) -> float:
     return float(np.clip(best_lag, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
 
 
-def estimate_period(ys_arr: np.ndarray) -> int:
+def estimate_period_rough(ys_arr: np.ndarray) -> int:
     p_fft = estimate_period_by_fft(ys_arr)
     p_refined = refine_period_by_autocorr(ys_arr, p_fft)
 
     period = int(round(p_refined))
-    period = max(MIN_PERIOD_SECONDS, min(MAX_PERIOD_SECONDS, period))
+    period = max(int(MIN_PERIOD_SECONDS), min(int(MAX_PERIOD_SECONDS), period))
 
     return int(period)
 
 
-# ── 模板构建与预测 ─────────────────────────────────────────────────────────────
+# =============================================================================
+# 谷底锚点检测
+# =============================================================================
 
-def fill_template_nan(template: np.ndarray) -> np.ndarray:
-    period = len(template)
+def find_valley_indices(
+    ts_grid: np.ndarray,
+    ys_grid: np.ndarray,
+    expected_period: int,
+) -> List[int]:
+    n = len(ys_grid)
 
-    if period == 0:
-        return template
+    if n < max(10, expected_period * 2):
+        return []
+
+    period = max(3, int(expected_period))
+
+    smooth_window = max(3, int(round(period * 0.08)))
+    smooth_window = min(smooth_window, 21)
+
+    ys_smooth = moving_average(ys_grid, smooth_window)
+    threshold = float(np.percentile(ys_smooth, VALLEY_QUANTILE))
+
+    candidates = []
+
+    for i in range(1, n - 1):
+        if (
+            ys_smooth[i] <= ys_smooth[i - 1]
+            and ys_smooth[i] < ys_smooth[i + 1]
+            and ys_smooth[i] <= threshold
+        ):
+            candidates.append(i)
+
+    if len(candidates) < MIN_FULL_CYCLES_FOR_TEMPLATE:
+        candidates = []
+
+        for i in range(1, n - 1):
+            if ys_smooth[i] <= ys_smooth[i - 1] and ys_smooth[i] < ys_smooth[i + 1]:
+                candidates.append(i)
+
+    if not candidates:
+        return []
 
-    idx = np.arange(period)
-    valid = np.isfinite(template)
+    min_distance = max(2, int(round(period * 0.55)))
+    selected = []
 
-    if not np.any(valid):
-        return np.zeros(period, dtype=float)
+    for idx in candidates:
+        if not selected:
+            selected.append(idx)
+            continue
+
+        if idx - selected[-1] >= min_distance:
+            selected.append(idx)
+            continue
+
+        if ys_smooth[idx] < ys_smooth[selected[-1]]:
+            selected[-1] = idx
 
-    if np.all(valid):
-        return template
+    if len(selected) < 2:
+        return selected
 
-    x_valid = idx[valid]
-    y_valid = template[valid]
+    cleaned = [selected[0]]
+
+    for idx in selected[1:]:
+        diff = int(ts_grid[idx] - ts_grid[cleaned[-1]])
+
+        if int(period * 0.55) <= diff <= int(period * 1.60):
+            cleaned.append(idx)
+            continue
 
-    # 环形插值，处理 phase 0 附近缺口
-    x_ext = np.concatenate([x_valid - period, x_valid, x_valid + period])
-    y_ext = np.concatenate([y_valid, y_valid, y_valid])
+        if diff < int(period * 0.55):
+            if ys_smooth[idx] < ys_smooth[cleaned[-1]]:
+                cleaned[-1] = idx
+            continue
 
-    filled = np.interp(idx, x_ext, y_ext)
+        cleaned.append(idx)
 
-    return filled.astype(float)
+    return cleaned
 
 
-def build_phase_template(
+def detect_period_and_valleys(
+    ts_grid: np.ndarray,
+    ys_grid: np.ndarray,
+) -> Tuple[int, List[int]]:
+    rough = estimate_period_rough(ys_grid)
+    valleys = find_valley_indices(ts_grid, ys_grid, rough)
+
+    if len(valleys) >= 3:
+        diffs = np.diff(ts_grid[valleys])
+        good = diffs[(diffs >= rough * 0.55) & (diffs <= rough * 1.60)]
+
+        if len(good) > 0:
+            period = int(round(float(np.median(good))))
+        else:
+            period = rough
+    else:
+        period = rough
+
+    period = max(int(MIN_PERIOD_SECONDS), min(int(MAX_PERIOD_SECONDS), period))
+
+    return int(period), valleys
+
+
+# =============================================================================
+# 相位对齐模板构建
+# =============================================================================
+
+def build_template_from_valleys(
     ts_grid: np.ndarray,
     ys_grid: np.ndarray,
     period: int,
+    valleys: List[int],
     max_cycles: int = MAX_CYCLES_FOR_TEMPLATE,
-    tail_seconds: Optional[int] = None,
 ) -> Optional[np.ndarray]:
-    if period <= 1 or len(ys_grid) < period * MIN_FULL_CYCLES_FOR_TEMPLATE:
+    if period <= 1 or len(valleys) < MIN_FULL_CYCLES_FOR_TEMPLATE + 1:
         return None
 
-    max_seconds = period * max_cycles
+    pairs = []
+
+    for a, b in zip(valleys[:-1], valleys[1:]):
+        cycle_len = float(ts_grid[b] - ts_grid[a])
 
-    if tail_seconds is not None:
-        max_seconds = min(max_seconds, int(tail_seconds))
+        if period * 0.55 <= cycle_len <= period * 1.60:
+            pairs.append((a, b, cycle_len))
 
-    max_seconds = max(period * MIN_FULL_CYCLES_FOR_TEMPLATE, max_seconds)
+    if len(pairs) < MIN_FULL_CYCLES_FOR_TEMPLATE:
+        return None
+
+    pairs = pairs[-max_cycles:]
+
+    phase_grid = np.arange(period, dtype=float)
+    segments = []
+    weights = []
+
+    for idx, (a, b, cycle_len) in enumerate(pairs):
+        seg_ts = ts_grid[a:b + 1]
+        seg_y = ys_grid[a:b + 1]
+
+        if len(seg_y) < 3:
+            continue
 
-    if len(ys_grid) < max_seconds:
-        start_idx = 0
+        x_old = (seg_ts - seg_ts[0]) / cycle_len * period
+        seg = np.interp(phase_grid, x_old, seg_y)
+
+        segments.append(seg.astype(float))
+
+        weight = 0.5 + 0.5 * ((idx + 1) / len(pairs))
+        weights.append(weight)
+
+    if len(segments) < MIN_FULL_CYCLES_FOR_TEMPLATE:
+        return None
+
+    arr = np.vstack(segments)
+    w_arr = np.array(weights, dtype=float)
+
+    template = np.average(arr, axis=0, weights=w_arr)
+
+    return template.astype(float)
+
+
+def build_current_baseline(
+    ts_grid: np.ndarray,
+    ys_grid: np.ndarray,
+    tail_seconds: Optional[int] = None,
+) -> Optional[Tuple[int, int, np.ndarray]]:
+    if len(ys_grid) < MIN_POINTS:
+        return None
+
+    if tail_seconds is not None and tail_seconds > 0:
+        cutoff = ts_grid[-1] - int(tail_seconds)
+        mask = ts_grid >= cutoff
+        ts_use = ts_grid[mask]
+        ys_use = ys_grid[mask]
     else:
-        start_idx = len(ys_grid) - max_seconds
+        ts_use = ts_grid
+        ys_use = ys_grid
+
+    if len(ys_use) < MIN_POINTS:
+        return None
 
-    ts_tail = ts_grid[start_idx:]
-    ys_tail = ys_grid[start_idx:]
+    period, valleys = detect_period_and_valleys(ts_use, ys_use)
 
-    if len(ys_tail) < period * MIN_FULL_CYCLES_FOR_TEMPLATE:
+    template = build_template_from_valleys(
+        ts_grid=ts_use,
+        ys_grid=ys_use,
+        period=period,
+        valleys=valleys,
+    )
+
+    if template is None or len(valleys) == 0:
         return None
 
-    sums = np.zeros(period, dtype=float)
-    weights = np.zeros(period, dtype=float)
+    phase_origin_ts = int(round(float(ts_use[valleys[-1]])))
 
-    total = len(ys_tail)
+    return int(period), phase_origin_ts, template
 
-    for i, (t, y) in enumerate(zip(ts_tail, ys_tail)):
-        phase = int(t) % period
 
-        # 越近的数据权重越高
-        recency = (i + 1) / total
-        weight = 0.3 + 0.7 * recency
+# =============================================================================
+# 模板预测
+# =============================================================================
 
-        sums[phase] += float(y) * weight
-        weights[phase] += weight
+def circular_template_value(template: np.ndarray, phase: float) -> float:
+    period = len(template)
 
-    template = np.full(period, np.nan, dtype=float)
+    if period == 0:
+        return 0.0
+
+    phase = float(phase) % period
 
-    valid = weights > 0
-    template[valid] = sums[valid] / weights[valid]
+    i0 = int(math.floor(phase)) % period
+    i1 = (i0 + 1) % period
 
-    template = fill_template_nan(template)
+    frac = phase - math.floor(phase)
 
-    return template
+    return float((1.0 - frac) * template[i0] + frac * template[i1])
+
+
+def predict_with_origin(
+    state: BaselineState,
+    ts_list: List[int],
+    phase_origin_ts: Optional[int] = None,
+) -> np.ndarray:
+    template = np.array(state.template, dtype=float)
+    period = int(state.period)
+
+    if period <= 1 or len(template) != period:
+        return np.zeros(len(ts_list), dtype=float)
+
+    origin = int(state.phase_origin_ts if phase_origin_ts is None else phase_origin_ts)
+
+    values = []
+
+    for ts in ts_list:
+        phase = (int(ts) - origin) % period
+        values.append(circular_template_value(template, phase))
+
+    return np.array(values, dtype=float)
 
 
 def resample_template(old_template: np.ndarray, new_period: int) -> np.ndarray:
@@ -433,6 +611,38 @@ def resample_template(old_template: np.ndarray, new_period: int) -> np.ndarray:
     return np.interp(new_x, old_x_ext, old_y_ext).astype(float)
 
 
+def align_new_template_to_old(
+    old_template: np.ndarray,
+    new_template: np.ndarray,
+) -> np.ndarray:
+    if len(old_template) != len(new_template):
+        old_template = resample_template(old_template, len(new_template))
+
+    period = len(new_template)
+
+    if period <= 2:
+        return new_template.astype(float)
+
+    max_shift = max(1, int(round(period * 0.10)))
+
+    old_norm = old_template - np.mean(old_template)
+
+    best_score = None
+    best_template = new_template
+
+    for shift in range(-max_shift, max_shift + 1):
+        shifted = np.roll(new_template, shift)
+        shifted_norm = shifted - np.mean(shifted)
+
+        score = float(np.dot(old_norm, shifted_norm))
+
+        if best_score is None or score > best_score:
+            best_score = score
+            best_template = shifted
+
+    return best_template.astype(float)
+
+
 def merge_template(
     old_template: np.ndarray,
     new_template: np.ndarray,
@@ -443,37 +653,64 @@ def merge_template(
     if len(old_template) != len(new_template):
         old_template = resample_template(old_template, len(new_template))
 
-    return ((1.0 - alpha) * old_template + alpha * new_template).astype(float)
-
-
-def predict_by_state(state: BaselineState, ts_list: List[int]) -> np.ndarray:
-    template = np.array(state.template, dtype=float)
-    period = int(state.period)
-
-    if period <= 1 or len(template) != period:
-        return np.zeros(len(ts_list), dtype=float)
+    new_template = align_new_template_to_old(old_template, new_template)
 
-    values = []
+    merged = (1.0 - alpha) * old_template + alpha * new_template
 
-    for ts in ts_list:
-        phase = int(ts) % period
-        values.append(float(template[phase]))
+    return merged.astype(float)
 
-    return np.array(values, dtype=float)
 
+# =============================================================================
+# 异常检测
+# =============================================================================
 
-def calc_threshold(pred: np.ndarray, abs_threshold: float, rel_threshold: float) -> np.ndarray:
+def calc_threshold(
+    pred: np.ndarray,
+    abs_threshold: float,
+    rel_threshold: float,
+) -> np.ndarray:
     return np.maximum(abs_threshold, np.abs(pred) * rel_threshold)
 
 
-def calc_bounds(pred: np.ndarray, abs_threshold: float, rel_threshold: float) -> Tuple[np.ndarray, np.ndarray]:
+def calc_bounds(
+    pred: np.ndarray,
+    abs_threshold: float,
+    rel_threshold: float,
+) -> Tuple[np.ndarray, np.ndarray]:
     threshold = calc_threshold(pred, abs_threshold, rel_threshold)
+
     lower = pred - threshold
     upper = pred + threshold
+
     return lower, upper
 
 
-# ── 异常检测与状态更新 ────────────────────────────────────────────────────────
+def find_best_phase_origin_for_recent(
+    state: BaselineState,
+    ts_recent: List[int],
+    actual: np.ndarray,
+) -> Tuple[int, np.ndarray, float]:
+    period = int(state.period)
+    base_origin = int(state.phase_origin_ts)
+
+    max_shift = max(1, int(round(period * PHASE_SEARCH_RATIO)))
+
+    best_origin = base_origin
+    best_pred = predict_with_origin(state, ts_recent, base_origin)
+    best_mae = float(np.mean(np.abs(actual - best_pred)))
+
+    for shift in range(-max_shift, max_shift + 1):
+        origin = base_origin + shift
+        pred = predict_with_origin(state, ts_recent, origin)
+        mae = float(np.mean(np.abs(actual - pred)))
+
+        if mae < best_mae:
+            best_mae = mae
+            best_origin = origin
+            best_pred = pred
+
+    return best_origin, best_pred, best_mae
+
 
 def detect_anomaly(
     state: BaselineState,
@@ -481,14 +718,19 @@ def detect_anomaly(
     ys_grid: np.ndarray,
     abs_threshold: float,
     rel_threshold: float,
-) -> Tuple[bool, float, float, float]:
+) -> Tuple[bool, float, float, float, int]:
     if len(ys_grid) < DETECT_WINDOW_SECONDS:
-        return False, 0.0, 0.0, 0.0
+        return False, 0.0, 0.0, 0.0, int(state.phase_origin_ts)
 
     ts_recent = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int).tolist()
     actual = ys_grid[-DETECT_WINDOW_SECONDS:].astype(float)
 
-    pred = predict_by_state(state, ts_recent)
+    best_origin, pred, _ = find_best_phase_origin_for_recent(
+        state=state,
+        ts_recent=ts_recent,
+        actual=actual,
+    )
+
     threshold = calc_threshold(pred, abs_threshold, rel_threshold)
 
     abs_err = np.abs(actual - pred)
@@ -500,56 +742,28 @@ def detect_anomaly(
 
     is_anomaly = outside_ratio >= OUTSIDE_RATIO_THRESHOLD
 
-    return is_anomaly, outside_ratio, mean_abs_err, mean_rel_err
-
-
-def is_recovered(
-    state: BaselineState,
-    ts_grid: np.ndarray,
-    ys_grid: np.ndarray,
-    abs_threshold: float,
-    rel_threshold: float,
-) -> Tuple[bool, float]:
-    if len(ys_grid) < DETECT_WINDOW_SECONDS:
-        return False, 0.0
-
-    ts_recent = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int).tolist()
-    actual = ys_grid[-DETECT_WINDOW_SECONDS:].astype(float)
-
-    pred = predict_by_state(state, ts_recent)
-    threshold = calc_threshold(pred, abs_threshold, rel_threshold)
-
-    abs_err = np.abs(actual - pred)
-    inside = abs_err <= threshold
-
-    inside_ratio = float(np.mean(inside))
+    return is_anomaly, outside_ratio, mean_abs_err, mean_rel_err, int(best_origin)
 
-    return inside_ratio >= RECOVERY_INSIDE_RATIO_THRESHOLD, inside_ratio
 
+# =============================================================================
+# 健康基线状态管理
+# =============================================================================
 
 def create_initial_state(
     ts_grid: np.ndarray,
     ys_grid: np.ndarray,
     now_sec: int,
 ) -> Optional[BaselineState]:
-    if len(ys_grid) < MIN_POINTS:
-        return None
-
-    period = estimate_period(ys_grid)
-
-    template = build_phase_template(
-        ts_grid=ts_grid,
-        ys_grid=ys_grid,
-        period=period,
-        max_cycles=MAX_CYCLES_FOR_TEMPLATE,
-        tail_seconds=period * MAX_CYCLES_FOR_TEMPLATE,
-    )
+    baseline = build_current_baseline(ts_grid, ys_grid)
 
-    if template is None:
+    if baseline is None:
         return None
 
+    period, phase_origin_ts, template = baseline
+
     return BaselineState(
         period=int(period),
+        phase_origin_ts=int(phase_origin_ts),
         template=template.astype(float).tolist(),
         status=BASELINE_STATUS_HEALTHY,
         clean_seconds=int(period * MAX_CYCLES_FOR_TEMPLATE),
@@ -578,19 +792,23 @@ def maybe_update_state(
             return None, False, 0.0, 0.0, 0.0
 
         BASELINE_STATES[key] = state
+
         logger.info(
-            "初始化健康模板 key=%s period=%ss clean_seconds=%ss",
+            "初始化健康模板 key=%s period=%ss origin=%s clean=%ss",
             key,
             state.period,
+            datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"),
             state.clean_seconds,
         )
+
         return state, False, 0.0, 0.0, 0.0
 
     elapsed = max(1, now_sec - int(state.last_seen_ts))
     elapsed = min(elapsed, POLL_INTERVAL * 2)
+
     state.last_seen_ts = now_sec
 
-    is_anom, outside_ratio, mean_abs_err, mean_rel_err = detect_anomaly(
+    is_anom, outside_ratio, mean_abs_err, mean_rel_err, best_origin = detect_anomaly(
         state=state,
         ts_grid=ts_grid,
         ys_grid=ys_grid,
@@ -602,6 +820,8 @@ def maybe_update_state(
         state.status = BASELINE_STATUS_ANOMALY
         state.clean_seconds = 0
 
+        BASELINE_STATES[key] = state
+
         logger.warning(
             "检测到异常，冻结模板 key=%s outside_ratio=%.2f mean_abs_err=%.2f mean_rel_err=%.2f",
             key,
@@ -610,47 +830,39 @@ def maybe_update_state(
             mean_rel_err,
         )
 
-        BASELINE_STATES[key] = state
         return state, True, outside_ratio, mean_abs_err, mean_rel_err
 
-    recovered, inside_ratio = is_recovered(
-        state=state,
-        ts_grid=ts_grid,
-        ys_grid=ys_grid,
-        abs_threshold=abs_threshold,
-        rel_threshold=rel_threshold,
-    )
+    old_origin = int(state.phase_origin_ts)
+    state.phase_origin_ts = int(best_origin)
+
+    if abs(state.phase_origin_ts - old_origin) >= 1:
+        logger.debug(
+            "相位校正 key=%s origin %s -> %s",
+            key,
+            datetime.fromtimestamp(old_origin).strftime("%H:%M:%S"),
+            datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"),
+        )
 
     if state.status == BASELINE_STATUS_ANOMALY:
-        if recovered:
-            state.status = BASELINE_STATUS_RECOVERING
-            state.clean_seconds = elapsed
-            logger.info(
-                "异常开始恢复 key=%s inside_ratio=%.2f clean_seconds=%ss",
-                key,
-                inside_ratio,
-                state.clean_seconds,
-            )
-        else:
-            state.clean_seconds = 0
-            BASELINE_STATES[key] = state
-            return state, True, outside_ratio, mean_abs_err, mean_rel_err
+        state.status = BASELINE_STATUS_RECOVERING
+        state.clean_seconds = elapsed
 
-    elif state.status == BASELINE_STATUS_RECOVERING:
-        if recovered:
-            state.clean_seconds += elapsed
-        else:
-            state.status = BASELINE_STATUS_ANOMALY
-            state.clean_seconds = 0
-            BASELINE_STATES[key] = state
-            return state, True, outside_ratio, mean_abs_err, mean_rel_err
+        BASELINE_STATES[key] = state
 
+        logger.info(
+            "异常开始恢复 key=%s clean_seconds=%ss",
+            key,
+            state.clean_seconds,
+        )
+
+        return state, False, outside_ratio, mean_abs_err, mean_rel_err
+
+    if state.status == BASELINE_STATUS_RECOVERING:
+        state.clean_seconds += elapsed
     else:
         state.status = BASELINE_STATUS_HEALTHY
         state.clean_seconds += elapsed
 
-    # 故障恢复后，不要立刻学习。
-    # 必须至少连续正常：max(RECOVERY_MIN_SECONDS, 3 个周期)
     min_clean_for_update = max(
         RECOVERY_MIN_SECONDS,
         int(state.period) * MIN_FULL_CYCLES_FOR_TEMPLATE,
@@ -660,30 +872,26 @@ def maybe_update_state(
         BASELINE_STATES[key] = state
         return state, False, outside_ratio, mean_abs_err, mean_rel_err
 
-    # 只使用最近 clean_seconds 这段连续正常数据来更新模板，避免历史故障污染。
-    new_period = estimate_period(ys_grid)
     tail_seconds = min(
         int(state.clean_seconds),
-        int(new_period) * MAX_CYCLES_FOR_TEMPLATE,
+        int(state.period) * MAX_CYCLES_FOR_TEMPLATE,
     )
 
-    new_template = build_phase_template(
+    baseline = build_current_baseline(
         ts_grid=ts_grid,
         ys_grid=ys_grid,
-        period=new_period,
-        max_cycles=MAX_CYCLES_FOR_TEMPLATE,
         tail_seconds=tail_seconds,
     )
 
-    if new_template is None:
+    if baseline is None:
         BASELINE_STATES[key] = state
         return state, False, outside_ratio, mean_abs_err, mean_rel_err
 
+    new_period, new_origin, new_template = baseline
     old_template = np.array(state.template, dtype=float)
 
     if state.status == BASELINE_STATUS_RECOVERING:
         alpha = RECOVERY_EMA_ALPHA
-        state.status = BASELINE_STATUS_HEALTHY
     else:
         alpha = HEALTHY_EMA_ALPHA
 
@@ -694,18 +902,25 @@ def maybe_update_state(
     )
 
     state.period = int(new_period)
+    state.phase_origin_ts = int(new_origin)
     state.template = merged.astype(float).tolist()
+    state.status = BASELINE_STATUS_HEALTHY
     state.last_update_ts = now_sec
-    state.y_min = float(np.min(ys_grid[-tail_seconds:]))
-    state.y_max = float(np.max(ys_grid[-tail_seconds:]))
+
+    if tail_seconds > 0 and len(ys_grid) >= tail_seconds:
+        state.y_min = float(np.min(ys_grid[-tail_seconds:]))
+        state.y_max = float(np.max(ys_grid[-tail_seconds:]))
+    else:
+        state.y_min = float(np.min(ys_grid))
+        state.y_max = float(np.max(ys_grid))
 
     BASELINE_STATES[key] = state
 
     logger.info(
-        "更新健康模板 key=%s period=%ss status=%s clean_seconds=%ss alpha=%.2f",
+        "更新健康模板 key=%s period=%ss origin=%s clean=%ss alpha=%.2f",
         key,
         state.period,
-        state.status,
+        datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"),
         state.clean_seconds,
         alpha,
     )
@@ -713,7 +928,9 @@ def maybe_update_state(
     return state, False, outside_ratio, mean_abs_err, mean_rel_err
 
 
-# ── Prometheus 格式写入 ───────────────────────────────────────────────────────
+# =============================================================================
+# Prometheus Exposition 写入
+# =============================================================================
 
 def prom_escape_label_value(value: str) -> str:
     return (
@@ -731,8 +948,7 @@ def labels_to_str(labels: Dict[str, str]) -> str:
     parts = []
 
     for k in sorted(labels.keys()):
-        v = prom_escape_label_value(labels[k])
-        parts.append(f'{k}="{v}"')
+        parts.append(f'{k}="{prom_escape_label_value(labels[k])}"')
 
     return "{" + ",".join(parts) + "}"
 
@@ -771,11 +987,14 @@ def write_series(
         resp = requests.post(
             f"{VM_URL}/api/v1/import/prometheus",
             data=payload.encode("utf-8"),
-            headers={"Content-Type": "text/plain; version=0.0.4; charset=utf-8"},
+            headers={
+                "Content-Type": "text/plain; version=0.0.4; charset=utf-8",
+            },
             timeout=10,
         )
         resp.raise_for_status()
         return True
+
     except requests.RequestException as e:
         logger.error("写入数据失败 metric=%s: %s", metric_name, e)
         return False
@@ -851,28 +1070,37 @@ def write_prediction_bundle(
     return ok1 and ok2 and ok3 and ok4 and ok5 and ok6 and ok7
 
 
-# ── 标签解析 ──────────────────────────────────────────────────────────────────
+# =============================================================================
+# 标签解析
+# =============================================================================
 
 _LABEL_PATTERN = re.compile(
     r'\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"((?:\\.|[^"])*)"\s*'
 )
 
 
-def _parse_labels(query: str) -> Dict[str, str]:
+def parse_labels_from_query(query: str) -> Dict[str, str]:
     labels = {}
 
     if "{" not in query or "}" not in query:
         return labels
 
     try:
-        label_part = query[query.index("{") + 1: query.rindex("}")]
+        label_part = query[query.index("{") + 1:query.rindex("}")]
     except Exception:
         return labels
 
     for match in _LABEL_PATTERN.finditer(label_part):
         key = match.group(1)
         value = match.group(2)
-        value = value.replace('\\"', '"').replace("\\n", "\n").replace("\\\\", "\\")
+
+        value = (
+            value
+            .replace('\\"', '"')
+            .replace("\\n", "\n")
+            .replace("\\\\", "\\")
+        )
+
         labels[key] = value
 
     return labels
@@ -892,9 +1120,11 @@ def series_key(metric_name: str, labels: Dict[str, str]) -> str:
     return metric_name + labels_to_str(labels)
 
 
-# ── 状态持久化 ────────────────────────────────────────────────────────────────
+# =============================================================================
+# 状态持久化
+# =============================================================================
 
-def load_state():
+def load_state() -> None:
     global BASELINE_STATES
 
     if not os.path.exists(STATE_FILE):
@@ -907,17 +1137,36 @@ def load_state():
         states = {}
 
         for key, value in raw.get("baseline_states", {}).items():
+            required_fields = {
+                "period",
+                "phase_origin_ts",
+                "template",
+                "status",
+                "clean_seconds",
+                "last_update_ts",
+                "last_seen_ts",
+                "y_min",
+                "y_max",
+            }
+
+            if not required_fields.issubset(set(value.keys())):
+                continue
+
             states[key] = BaselineState(**value)
 
         BASELINE_STATES = states
 
-        logger.info("已加载预测状态文件 %s，状态数量=%d", STATE_FILE, len(BASELINE_STATES))
+        logger.info(
+            "已加载预测状态文件 %s，状态数量=%d",
+            STATE_FILE,
+            len(BASELINE_STATES),
+        )
 
     except Exception as e:
         logger.warning("加载预测状态文件失败，将重新学习: %s", e)
 
 
-def save_state():
+def save_state() -> None:
     try:
         raw = {
             "baseline_states": {
@@ -937,9 +1186,11 @@ def save_state():
         logger.warning("保存预测状态文件失败: %s", e)
 
 
-# ── 主逻辑 ────────────────────────────────────────────────────────────────────
+# =============================================================================
+# 主流程
+# =============================================================================
 
-def run_once():
+def run_once() -> None:
     now_str = datetime.now().strftime("%H:%M:%S")
 
     for target in PREDICT_TARGETS:
@@ -952,16 +1203,26 @@ def run_once():
         ts, ys = fetch_history(query)
 
         if len(ys) < MIN_POINTS:
-            logger.info("[%s] %s 数据不足（%d 点），跳过", now_str, query, len(ys))
+            logger.info(
+                "[%s] %s 数据不足（%d 点），跳过",
+                now_str,
+                query,
+                len(ys),
+            )
             continue
 
         ts_grid, ys_grid = normalize_history(ts, ys)
 
         if len(ys_grid) < MIN_POINTS:
-            logger.info("[%s] %s 清洗后数据不足（%d 点），跳过", now_str, query, len(ys_grid))
+            logger.info(
+                "[%s] %s 清洗后数据不足（%d 点），跳过",
+                now_str,
+                query,
+                len(ys_grid),
+            )
             continue
 
-        base_labels = _parse_labels(query)
+        base_labels = parse_labels_from_query(query)
         write_labels = merge_labels(base_labels, EXTRA_PREDICT_LABELS)
 
         key = series_key(pred_metric, write_labels)
@@ -975,7 +1236,11 @@ def run_once():
         )
 
         if state is None:
-            logger.info("[%s] %s 暂无可用健康模板，等待学习", now_str, query)
+            logger.info(
+                "[%s] %s 暂无可用健康模板，等待学习",
+                now_str,
+                query,
+            )
             continue
 
         now_sec = int(time.time())
@@ -989,7 +1254,7 @@ def run_once():
             for i in range(WRITE_HORIZON_SECONDS)
         ]
 
-        pred_values = predict_by_state(state, ts_future)
+        pred_values = predict_with_origin(state, ts_future)
 
         lower_values, upper_values = calc_bounds(
             pred=pred_values,
@@ -1012,21 +1277,28 @@ def run_once():
         )
 
         if not ok:
+            logger.error(
+                "[%s] %s 写入预测数据失败",
+                now_str,
+                query,
+            )
             continue
 
         LAST_WRITTEN_UNTIL[key] = int(max(ts_future))
 
         future_start = datetime.fromtimestamp(ts_future[0]).strftime("%H:%M:%S")
         future_end = datetime.fromtimestamp(ts_future[-1]).strftime("%H:%M:%S")
+        origin_str = datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S")
 
         logger.info(
-            "[%s] %-40s → %-35s status=%s anomaly=%s period=%ss clean=%ss 写入 %d 点，预测区间 %s ~ %s",
+            "[%s] %-40s → %-35s status=%s anomaly=%s period=%ss origin=%s clean=%ss 写入 %d 点，预测区间 %s ~ %s",
             now_str,
             query,
             pred_metric,
             state.status,
             is_anomaly,
             state.period,
+            origin_str,
             state.clean_seconds,
             len(ts_future),
             future_start,
@@ -1036,17 +1308,18 @@ def run_once():
     save_state()
 
 
-def main():
+def main() -> None:
     load_state()
 
     logger.info(
-        "预测服务启动 VM=%s 历史窗口=%dmin 理论预测窗口=%ds 实际写入窗口=%ds 轮询间隔=%ds state=%s",
+        "预测服务启动 VM=%s 历史窗口=%dmin 理论预测窗口=%ds 实际写入窗口=%ds 轮询间隔=%ds state=%s forecast=%s",
         VM_URL,
         HISTORY_MINUTES,
         HORIZON_SECONDS,
         WRITE_HORIZON_SECONDS,
         POLL_INTERVAL,
         STATE_FILE,
+        EXTRA_PREDICT_LABELS["forecast"],
     )
 
     while True:
@@ -1055,4 +1328,6 @@ def main():
 
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
+
+    
\ No newline at end of file

From 76e536eff9beb9e3f9db7453bbeb6f1c7844c198 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Thu, 21 May 2026 13:55:08 +0800
Subject: [PATCH 24/36] fix

---
 ai/predict_v3_single_scene.py | 316 ++++++++++++----------------------
 1 file changed, 110 insertions(+), 206 deletions(-)

diff --git a/ai/predict_v3_single_scene.py b/ai/predict_v3_single_scene.py
index fc07f4f..2cde8b8 100755
--- a/ai/predict_v3_single_scene.py
+++ b/ai/predict_v3_single_scene.py
@@ -1,22 +1,12 @@
 # -*- coding: utf-8 -*-
 """
-ProtoForge Predictor v8
-
-功能：
-1. 从 VictoriaMetrics 拉取历史数据。
-2. 对 CNC 周期型指标进行相位对齐预测。
-3. 使用“谷底锚点”对齐周期，减少上升沿/下降沿相位偏差。
-4. 每轮只写入未来 min(HORIZON_SECONDS, POLL_INTERVAL) 秒，避免预测窗口重叠。
-5. 检测异常后冻结健康模板，不把故障数据学进去。
-6. 故障恢复后等待稳定一段时间，再恢复模板更新。
-7. 写入：
-   - xxx_predicted
-   - xxx_predicted_upper
-   - xxx_predicted_lower
-   - xxx_anomaly
-   - xxx_anomaly_outside_ratio
-   - xxx_anomaly_mean_abs_error
-   - xxx_anomaly_mean_rel_error
+ProtoForge Predictor v9
+
+修复重点：
+1. 预测时间轴改为锚定最后一个真实数据点 last_real_ts，而不是锚定 time.time()。
+2. 不再使用 LAST_WRITTEN_UNTIL 把预测不断推向更远未来，避免 Grafana 里预测线相对真实线出现延迟/错位。
+3. 如果真实数据时间戳没有推进，则跳过本轮预测写入，避免重复写同一段未来时间造成毛刺。
+4. 保留：相位对齐、健康模板冻结、故障期不学习、恢复后再学习、预测上下界、异常指标。
 """
 
 import json
@@ -50,14 +40,13 @@
 # =============================================================================
 
 VM_URL = "http://localhost:8428"
-
-STATE_FILE = "/tmp/protoforge_predictor_state_v8.json"
+STATE_FILE = "/tmp/protoforge_predictor_state_v9.json"
 
 HISTORY_MINUTES = 30
 HORIZON_SECONDS = 120
 POLL_INTERVAL = 30
 
-# 实际写入窗口不要大于轮询间隔，否则多轮预测会重叠。
+# 实际每轮写入的预测长度。不要大于 POLL_INTERVAL，否则容易出现预测窗口重叠。
 WRITE_HORIZON_SECONDS = min(HORIZON_SECONDS, POLL_INTERVAL)
 
 QUERY_STEP = "1s"
@@ -76,14 +65,23 @@
 RECOVERY_EMA_ALPHA = 0.30
 
 OUTSIDE_RATIO_THRESHOLD = 0.60
-RECOVERY_INSIDE_RATIO_THRESHOLD = 0.80
-
 PHASE_SEARCH_RATIO = 0.15
 VALLEY_QUANTILE = 45
 
+# 关键修复：预测时间轴锚定真实数据最后一个点。
+# True：预测从 last_real_ts + 1 开始，适合 Grafana 与真实曲线对齐展示。
+# False：预测从当前系统时间 + 1 开始，适合只看纯未来预测，但容易与有采集延迟的真实数据错位。
+ALIGN_PREDICTION_TO_LAST_REAL_TS = True
+
+# 如果 last_real_ts 距离当前系统时间太久，说明采集链路可能断了，跳过预测，避免用陈旧数据继续画未来线。
+MAX_DATA_LAG_SECONDS = 180
+
+# 真实数据至少推进多少秒，才写入新预测，避免同一段未来时间被反复写入。
+MIN_REAL_ADVANCE_SECONDS = 1
+
 
 # =============================================================================
-# 预测指标配置
+# 指标配置
 # =============================================================================
 
 PREDICT_TARGETS = [
@@ -132,7 +130,7 @@
 ]
 
 EXTRA_PREDICT_LABELS = {
-    "forecast": "phase_aligned_health_v8",
+    "forecast": "phase_aligned_health_v9",
     "source": "protoforge",
 }
 
@@ -159,7 +157,10 @@ class BaselineState:
 
 
 BASELINE_STATES: Dict[str, BaselineState] = {}
-LAST_WRITTEN_UNTIL: Dict[str, int] = {}
+
+# 记录每条序列最后一次使用的真实数据时间戳，而不是预测写到哪里。
+# 这样不会把预测不断推向更远的未来。
+LAST_REAL_TS_WRITTEN: Dict[str, int] = {}
 
 
 # =============================================================================
@@ -243,7 +244,6 @@ def normalize_history(ts: List[float], ys: List[float]) -> Tuple[np.ndarray, np.
         return np.array([]), np.array([])
 
     sorted_items = sorted(data.items(), key=lambda x: x[0])
-
     ts_clean = np.array([x[0] for x in sorted_items], dtype=float)
     ys_clean = np.array([x[1] for x in sorted_items], dtype=float)
 
@@ -263,7 +263,7 @@ def normalize_history(ts: List[float], ys: List[float]) -> Tuple[np.ndarray, np.
 
 
 # =============================================================================
-# 周期估计
+# 周期估计与谷底检测
 # =============================================================================
 
 def moving_average(arr: np.ndarray, window: int) -> np.ndarray:
@@ -355,10 +355,6 @@ def estimate_period_rough(ys_arr: np.ndarray) -> int:
     return int(period)
 
 
-# =============================================================================
-# 谷底锚点检测
-# =============================================================================
-
 def find_valley_indices(
     ts_grid: np.ndarray,
     ys_grid: np.ndarray,
@@ -370,7 +366,6 @@ def find_valley_indices(
         return []
 
     period = max(3, int(expected_period))
-
     smooth_window = max(3, int(round(period * 0.08)))
     smooth_window = min(smooth_window, 21)
 
@@ -389,7 +384,6 @@ def find_valley_indices(
 
     if len(candidates) < MIN_FULL_CYCLES_FOR_TEMPLATE:
         candidates = []
-
         for i in range(1, n - 1):
             if ys_smooth[i] <= ys_smooth[i - 1] and ys_smooth[i] < ys_smooth[i + 1]:
                 candidates.append(i)
@@ -458,7 +452,7 @@ def detect_period_and_valleys(
 
 
 # =============================================================================
-# 相位对齐模板构建
+# 相位对齐模板
 # =============================================================================
 
 def build_template_from_valleys(
@@ -552,7 +546,7 @@ def build_current_baseline(
 
 
 # =============================================================================
-# 模板预测
+# 预测与模板合并
 # =============================================================================
 
 def circular_template_value(template: np.ndarray, phase: float) -> float:
@@ -562,10 +556,8 @@ def circular_template_value(template: np.ndarray, phase: float) -> float:
         return 0.0
 
     phase = float(phase) % period
-
     i0 = int(math.floor(phase)) % period
     i1 = (i0 + 1) % period
-
     frac = phase - math.floor(phase)
 
     return float((1.0 - frac) * template[i0] + frac * template[i1])
@@ -583,7 +575,6 @@ def predict_with_origin(
         return np.zeros(len(ts_list), dtype=float)
 
     origin = int(state.phase_origin_ts if phase_origin_ts is None else phase_origin_ts)
-
     values = []
 
     for ts in ts_list:
@@ -611,10 +602,7 @@ def resample_template(old_template: np.ndarray, new_period: int) -> np.ndarray:
     return np.interp(new_x, old_x_ext, old_y_ext).astype(float)
 
 
-def align_new_template_to_old(
-    old_template: np.ndarray,
-    new_template: np.ndarray,
-) -> np.ndarray:
+def align_new_template_to_old(old_template: np.ndarray, new_template: np.ndarray) -> np.ndarray:
     if len(old_template) != len(new_template):
         old_template = resample_template(old_template, len(new_template))
 
@@ -624,7 +612,6 @@ def align_new_template_to_old(
         return new_template.astype(float)
 
     max_shift = max(1, int(round(period * 0.10)))
-
     old_norm = old_template - np.mean(old_template)
 
     best_score = None
@@ -633,7 +620,6 @@ def align_new_template_to_old(
     for shift in range(-max_shift, max_shift + 1):
         shifted = np.roll(new_template, shift)
         shifted_norm = shifted - np.mean(shifted)
-
         score = float(np.dot(old_norm, shifted_norm))
 
         if best_score is None or score > best_score:
@@ -643,18 +629,13 @@ def align_new_template_to_old(
     return best_template.astype(float)
 
 
-def merge_template(
-    old_template: np.ndarray,
-    new_template: np.ndarray,
-    alpha: float,
-) -> np.ndarray:
+def merge_template(old_template: np.ndarray, new_template: np.ndarray, alpha: float) -> np.ndarray:
     alpha = float(np.clip(alpha, 0.0, 1.0))
 
     if len(old_template) != len(new_template):
         old_template = resample_template(old_template, len(new_template))
 
     new_template = align_new_template_to_old(old_template, new_template)
-
     merged = (1.0 - alpha) * old_template + alpha * new_template
 
     return merged.astype(float)
@@ -664,11 +645,7 @@ def merge_template(
 # 异常检测
 # =============================================================================
 
-def calc_threshold(
-    pred: np.ndarray,
-    abs_threshold: float,
-    rel_threshold: float,
-) -> np.ndarray:
+def calc_threshold(pred: np.ndarray, abs_threshold: float, rel_threshold: float) -> np.ndarray:
     return np.maximum(abs_threshold, np.abs(pred) * rel_threshold)
 
 
@@ -678,11 +655,7 @@ def calc_bounds(
     rel_threshold: float,
 ) -> Tuple[np.ndarray, np.ndarray]:
     threshold = calc_threshold(pred, abs_threshold, rel_threshold)
-
-    lower = pred - threshold
-    upper = pred + threshold
-
-    return lower, upper
+    return pred - threshold, pred + threshold
 
 
 def find_best_phase_origin_for_recent(
@@ -692,7 +665,6 @@ def find_best_phase_origin_for_recent(
 ) -> Tuple[int, np.ndarray, float]:
     period = int(state.period)
     base_origin = int(state.phase_origin_ts)
-
     max_shift = max(1, int(round(period * PHASE_SEARCH_RATIO)))
 
     best_origin = base_origin
@@ -732,14 +704,12 @@ def detect_anomaly(
     )
 
     threshold = calc_threshold(pred, abs_threshold, rel_threshold)
-
     abs_err = np.abs(actual - pred)
     outside = abs_err > threshold
 
     outside_ratio = float(np.mean(outside))
     mean_abs_err = float(np.mean(abs_err))
     mean_rel_err = float(np.mean(abs_err / np.maximum(np.abs(pred), 1.0)))
-
     is_anomaly = outside_ratio >= OUTSIDE_RATIO_THRESHOLD
 
     return is_anomaly, outside_ratio, mean_abs_err, mean_rel_err, int(best_origin)
@@ -749,11 +719,7 @@ def detect_anomaly(
 # 健康基线状态管理
 # =============================================================================
 
-def create_initial_state(
-    ts_grid: np.ndarray,
-    ys_grid: np.ndarray,
-    now_sec: int,
-) -> Optional[BaselineState]:
+def create_initial_state(ts_grid: np.ndarray, ys_grid: np.ndarray, now_sec: int) -> Optional[BaselineState]:
     baseline = build_current_baseline(ts_grid, ys_grid)
 
     if baseline is None:
@@ -782,7 +748,6 @@ def maybe_update_state(
     rel_threshold: float,
 ) -> Tuple[Optional[BaselineState], bool, float, float, float]:
     now_sec = int(time.time())
-
     state = BASELINE_STATES.get(key)
 
     if state is None:
@@ -805,7 +770,6 @@ def maybe_update_state(
 
     elapsed = max(1, now_sec - int(state.last_seen_ts))
     elapsed = min(elapsed, POLL_INTERVAL * 2)
-
     state.last_seen_ts = now_sec
 
     is_anom, outside_ratio, mean_abs_err, mean_rel_err, best_origin = detect_anomaly(
@@ -819,7 +783,6 @@ def maybe_update_state(
     if is_anom:
         state.status = BASELINE_STATUS_ANOMALY
         state.clean_seconds = 0
-
         BASELINE_STATES[key] = state
 
         logger.warning(
@@ -846,15 +809,9 @@ def maybe_update_state(
     if state.status == BASELINE_STATUS_ANOMALY:
         state.status = BASELINE_STATUS_RECOVERING
         state.clean_seconds = elapsed
-
         BASELINE_STATES[key] = state
 
-        logger.info(
-            "异常开始恢复 key=%s clean_seconds=%ss",
-            key,
-            state.clean_seconds,
-        )
-
+        logger.info("异常开始恢复 key=%s clean_seconds=%ss", key, state.clean_seconds)
         return state, False, outside_ratio, mean_abs_err, mean_rel_err
 
     if state.status == BASELINE_STATUS_RECOVERING:
@@ -877,11 +834,7 @@ def maybe_update_state(
         int(state.period) * MAX_CYCLES_FOR_TEMPLATE,
     )
 
-    baseline = build_current_baseline(
-        ts_grid=ts_grid,
-        ys_grid=ys_grid,
-        tail_seconds=tail_seconds,
-    )
+    baseline = build_current_baseline(ts_grid=ts_grid, ys_grid=ys_grid, tail_seconds=tail_seconds)
 
     if baseline is None:
         BASELINE_STATES[key] = state
@@ -889,17 +842,9 @@ def maybe_update_state(
 
     new_period, new_origin, new_template = baseline
     old_template = np.array(state.template, dtype=float)
+    alpha = RECOVERY_EMA_ALPHA if state.status == BASELINE_STATUS_RECOVERING else HEALTHY_EMA_ALPHA
 
-    if state.status == BASELINE_STATUS_RECOVERING:
-        alpha = RECOVERY_EMA_ALPHA
-    else:
-        alpha = HEALTHY_EMA_ALPHA
-
-    merged = merge_template(
-        old_template=old_template,
-        new_template=new_template,
-        alpha=alpha,
-    )
+    merged = merge_template(old_template=old_template, new_template=new_template, alpha=alpha)
 
     state.period = int(new_period)
     state.phase_origin_ts = int(new_origin)
@@ -933,12 +878,7 @@ def maybe_update_state(
 # =============================================================================
 
 def prom_escape_label_value(value: str) -> str:
-    return (
-        str(value)
-        .replace("\\", "\\\\")
-        .replace("\n", "\\n")
-        .replace('"', '\\"')
-    )
+    return str(value).replace("\\", "\\\\").replace("\n", "\\n").replace('"', '\\"')
 
 
 def labels_to_str(labels: Dict[str, str]) -> str:
@@ -975,8 +915,7 @@ def write_series(
         if not math.isfinite(ts_sec) or not math.isfinite(val):
             continue
 
-        ts_ms = ts_sec * 1000
-        lines.append(f"{metric_name}{label_str} {val:.6f} {ts_ms}")
+        lines.append(f"{metric_name}{label_str} {val:.6f} {ts_sec * 1000}")
 
     if not lines:
         return False
@@ -987,9 +926,7 @@ def write_series(
         resp = requests.post(
             f"{VM_URL}/api/v1/import/prometheus",
             data=payload.encode("utf-8"),
-            headers={
-                "Content-Type": "text/plain; version=0.0.4; charset=utf-8",
-            },
+            headers={"Content-Type": "text/plain; version=0.0.4; charset=utf-8"},
             timeout=10,
         )
         resp.raise_for_status()
@@ -1012,60 +949,19 @@ def write_prediction_bundle(
     outside_ratio: float,
     mean_abs_err: float,
     mean_rel_err: float,
+    event_ts: int,
 ) -> bool:
-    ok1 = write_series(
-        metric_name=pred_metric,
-        labels=labels,
-        ts_list=ts_future,
-        values=pred_values.astype(float).tolist(),
-    )
-
-    ok2 = write_series(
-        metric_name=f"{pred_metric}_lower",
-        labels=labels,
-        ts_list=ts_future,
-        values=lower_values.astype(float).tolist(),
-    )
-
-    ok3 = write_series(
-        metric_name=f"{pred_metric}_upper",
-        labels=labels,
-        ts_list=ts_future,
-        values=upper_values.astype(float).tolist(),
-    )
-
-    now_sec = int(time.time())
+    ok1 = write_series(pred_metric, labels, ts_future, pred_values.astype(float).tolist())
+    ok2 = write_series(f"{pred_metric}_lower", labels, ts_future, lower_values.astype(float).tolist())
+    ok3 = write_series(f"{pred_metric}_upper", labels, ts_future, upper_values.astype(float).tolist())
 
     anomaly_labels = dict(labels)
     anomaly_labels["type"] = "prediction_deviation"
 
-    ok4 = write_series(
-        metric_name=anomaly_metric,
-        labels=anomaly_labels,
-        ts_list=[now_sec],
-        values=[1.0 if is_anomaly else 0.0],
-    )
-
-    ok5 = write_series(
-        metric_name=f"{anomaly_metric}_outside_ratio",
-        labels=anomaly_labels,
-        ts_list=[now_sec],
-        values=[outside_ratio],
-    )
-
-    ok6 = write_series(
-        metric_name=f"{anomaly_metric}_mean_abs_error",
-        labels=anomaly_labels,
-        ts_list=[now_sec],
-        values=[mean_abs_err],
-    )
-
-    ok7 = write_series(
-        metric_name=f"{anomaly_metric}_mean_rel_error",
-        labels=anomaly_labels,
-        ts_list=[now_sec],
-        values=[mean_rel_err],
-    )
+    ok4 = write_series(anomaly_metric, anomaly_labels, [event_ts], [1.0 if is_anomaly else 0.0])
+    ok5 = write_series(f"{anomaly_metric}_outside_ratio", anomaly_labels, [event_ts], [outside_ratio])
+    ok6 = write_series(f"{anomaly_metric}_mean_abs_error", anomaly_labels, [event_ts], [mean_abs_err])
+    ok7 = write_series(f"{anomaly_metric}_mean_rel_error", anomaly_labels, [event_ts], [mean_rel_err])
 
     return ok1 and ok2 and ok3 and ok4 and ok5 and ok6 and ok7
 
@@ -1074,9 +970,7 @@ def write_prediction_bundle(
 # 标签解析
 # =============================================================================
 
-_LABEL_PATTERN = re.compile(
-    r'\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"((?:\\.|[^"])*)"\s*'
-)
+_LABEL_PATTERN = re.compile(r'\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"((?:\\.|[^"])*)"\s*')
 
 
 def parse_labels_from_query(query: str) -> Dict[str, str]:
@@ -1093,14 +987,7 @@ def parse_labels_from_query(query: str) -> Dict[str, str]:
     for match in _LABEL_PATTERN.finditer(label_part):
         key = match.group(1)
         value = match.group(2)
-
-        value = (
-            value
-            .replace('\\"', '"')
-            .replace("\\n", "\n")
-            .replace("\\\\", "\\")
-        )
-
+        value = value.replace('\\"', '"').replace("\\n", "\n").replace("\\\\", "\\")
         labels[key] = value
 
     return labels
@@ -1155,12 +1042,7 @@ def load_state() -> None:
             states[key] = BaselineState(**value)
 
         BASELINE_STATES = states
-
-        logger.info(
-            "已加载预测状态文件 %s，状态数量=%d",
-            STATE_FILE,
-            len(BASELINE_STATES),
-        )
+        logger.info("已加载预测状态文件 %s，状态数量=%d", STATE_FILE, len(BASELINE_STATES))
 
     except Exception as e:
         logger.warning("加载预测状态文件失败，将重新学习: %s", e)
@@ -1186,6 +1068,44 @@ def save_state() -> None:
         logger.warning("保存预测状态文件失败: %s", e)
 
 
+# =============================================================================
+# 时间轴选择
+# =============================================================================
+
+def build_prediction_timestamps(key: str, last_real_ts: int, now_sec: int) -> Optional[List[int]]:
+    data_lag = now_sec - last_real_ts
+
+    if data_lag > MAX_DATA_LAG_SECONDS:
+        logger.warning(
+            "真实数据延迟过大，跳过预测 key=%s data_lag=%ss max=%ss",
+            key,
+            data_lag,
+            MAX_DATA_LAG_SECONDS,
+        )
+        return None
+
+    last_written_real_ts = LAST_REAL_TS_WRITTEN.get(key)
+
+    if last_written_real_ts is not None:
+        advance = last_real_ts - int(last_written_real_ts)
+
+        if advance < MIN_REAL_ADVANCE_SECONDS:
+            logger.info(
+                "真实数据时间戳未推进，跳过重复写入 key=%s last_real_ts=%s last_written_real_ts=%s",
+                key,
+                last_real_ts,
+                last_written_real_ts,
+            )
+            return None
+
+    if ALIGN_PREDICTION_TO_LAST_REAL_TS:
+        base_ts = last_real_ts
+    else:
+        base_ts = now_sec
+
+    return [base_ts + i + 1 for i in range(WRITE_HORIZON_SECONDS)]
+
+
 # =============================================================================
 # 主流程
 # =============================================================================
@@ -1203,28 +1123,17 @@ def run_once() -> None:
         ts, ys = fetch_history(query)
 
         if len(ys) < MIN_POINTS:
-            logger.info(
-                "[%s] %s 数据不足（%d 点），跳过",
-                now_str,
-                query,
-                len(ys),
-            )
+            logger.info("[%s] %s 数据不足（%d 点），跳过", now_str, query, len(ys))
             continue
 
         ts_grid, ys_grid = normalize_history(ts, ys)
 
         if len(ys_grid) < MIN_POINTS:
-            logger.info(
-                "[%s] %s 清洗后数据不足（%d 点），跳过",
-                now_str,
-                query,
-                len(ys_grid),
-            )
+            logger.info("[%s] %s 清洗后数据不足（%d 点），跳过", now_str, query, len(ys_grid))
             continue
 
         base_labels = parse_labels_from_query(query)
         write_labels = merge_labels(base_labels, EXTRA_PREDICT_LABELS)
-
         key = series_key(pred_metric, write_labels)
 
         state, is_anomaly, outside_ratio, mean_abs_err, mean_rel_err = maybe_update_state(
@@ -1236,26 +1145,23 @@ def run_once() -> None:
         )
 
         if state is None:
-            logger.info(
-                "[%s] %s 暂无可用健康模板，等待学习",
-                now_str,
-                query,
-            )
+            logger.info("[%s] %s 暂无可用健康模板，等待学习", now_str, query)
             continue
 
         now_sec = int(time.time())
-        last_until = LAST_WRITTEN_UNTIL.get(key, 0)
         last_real_ts = int(ts_grid[-1])
+        data_lag = now_sec - last_real_ts
 
-        base_ts = max(now_sec, last_until, last_real_ts)
+        ts_future = build_prediction_timestamps(
+            key=key,
+            last_real_ts=last_real_ts,
+            now_sec=now_sec,
+        )
 
-        ts_future = [
-            base_ts + i + 1
-            for i in range(WRITE_HORIZON_SECONDS)
-        ]
+        if not ts_future:
+            continue
 
         pred_values = predict_with_origin(state, ts_future)
-
         lower_values, upper_values = calc_bounds(
             pred=pred_values,
             abs_threshold=abs_threshold,
@@ -1274,24 +1180,22 @@ def run_once() -> None:
             outside_ratio=outside_ratio,
             mean_abs_err=mean_abs_err,
             mean_rel_err=mean_rel_err,
+            event_ts=last_real_ts,
         )
 
         if not ok:
-            logger.error(
-                "[%s] %s 写入预测数据失败",
-                now_str,
-                query,
-            )
+            logger.error("[%s] %s 写入预测数据失败", now_str, query)
             continue
 
-        LAST_WRITTEN_UNTIL[key] = int(max(ts_future))
+        LAST_REAL_TS_WRITTEN[key] = last_real_ts
 
         future_start = datetime.fromtimestamp(ts_future[0]).strftime("%H:%M:%S")
         future_end = datetime.fromtimestamp(ts_future[-1]).strftime("%H:%M:%S")
+        last_real_str = datetime.fromtimestamp(last_real_ts).strftime("%H:%M:%S")
         origin_str = datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S")
 
         logger.info(
-            "[%s] %-40s → %-35s status=%s anomaly=%s period=%ss origin=%s clean=%ss 写入 %d 点，预测区间 %s ~ %s",
+            "[%s] %-40s → %-35s status=%s anomaly=%s period=%ss origin=%s last_real=%s lag=%ss 写入 %d 点，预测区间 %s ~ %s",
             now_str,
             query,
             pred_metric,
@@ -1299,7 +1203,8 @@ def run_once() -> None:
             is_anomaly,
             state.period,
             origin_str,
-            state.clean_seconds,
+            last_real_str,
+            data_lag,
             len(ts_future),
             future_start,
             future_end,
@@ -1312,7 +1217,7 @@ def main() -> None:
     load_state()
 
     logger.info(
-        "预测服务启动 VM=%s 历史窗口=%dmin 理论预测窗口=%ds 实际写入窗口=%ds 轮询间隔=%ds state=%s forecast=%s",
+        "预测服务启动 VM=%s 历史窗口=%dmin 理论预测窗口=%ds 实际写入窗口=%ds 轮询间隔=%ds state=%s forecast=%s align_to_last_real=%s",
         VM_URL,
         HISTORY_MINUTES,
         HORIZON_SECONDS,
@@ -1320,6 +1225,7 @@ def main() -> None:
         POLL_INTERVAL,
         STATE_FILE,
         EXTRA_PREDICT_LABELS["forecast"],
+        ALIGN_PREDICTION_TO_LAST_REAL_TS,
     )
 
     while True:
@@ -1329,5 +1235,3 @@ def main() -> None:
 
 if __name__ == "__main__":
     main()
-
-    
\ No newline at end of file

From f5e7b2a27320066b5eb9157f297d6651adcd36c8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Thu, 21 May 2026 14:05:54 +0800
Subject: [PATCH 25/36] fix

---
 ai/predict_v3_single_scene.py | 520 +++++++++++++++++++++++++---------
 1 file changed, 385 insertions(+), 135 deletions(-)

diff --git a/ai/predict_v3_single_scene.py b/ai/predict_v3_single_scene.py
index 2cde8b8..d212d2d 100755
--- a/ai/predict_v3_single_scene.py
+++ b/ai/predict_v3_single_scene.py
@@ -1,12 +1,14 @@
 # -*- coding: utf-8 -*-
 """
-ProtoForge Predictor v9
+ProtoForge Predictor v10
 
 修复重点：
-1. 预测时间轴改为锚定最后一个真实数据点 last_real_ts，而不是锚定 time.time()。
-2. 不再使用 LAST_WRITTEN_UNTIL 把预测不断推向更远未来，避免 Grafana 里预测线相对真实线出现延迟/错位。
-3. 如果真实数据时间戳没有推进，则跳过本轮预测写入，避免重复写同一段未来时间造成毛刺。
-4. 保留：相位对齐、健康模板冻结、故障期不学习、恢复后再学习、预测上下界、异常指标。
+1. 修复 lag=0 但预测线仍然相位漂移的问题。
+2. 在谷底相位对齐基础上，增加 phase-lock 相位锁定。
+3. 每轮使用最近 1~2 个周期真实数据，搜索最佳 period + phase_origin。
+4. 预测起点仍然锚定最后一个真实点 last_real_ts，避免写入延迟。
+5. 保留健康模板冻结逻辑：异常期间不学习故障数据。
+6. 保留预测上下界和异常指标。
 """
 
 import json
@@ -40,13 +42,12 @@
 # =============================================================================
 
 VM_URL = "http://localhost:8428"
-STATE_FILE = "/tmp/protoforge_predictor_state_v9.json"
+STATE_FILE = "/tmp/protoforge_predictor_state_v10.json"
 
 HISTORY_MINUTES = 30
 HORIZON_SECONDS = 120
 POLL_INTERVAL = 30
 
-# 实际每轮写入的预测长度。不要大于 POLL_INTERVAL，否则容易出现预测窗口重叠。
 WRITE_HORIZON_SECONDS = min(HORIZON_SECONDS, POLL_INTERVAL)
 
 QUERY_STEP = "1s"
@@ -58,26 +59,29 @@
 MIN_FULL_CYCLES_FOR_TEMPLATE = 3
 MAX_CYCLES_FOR_TEMPLATE = 6
 
-DETECT_WINDOW_SECONDS = 15
+DETECT_WINDOW_SECONDS = 20
 RECOVERY_MIN_SECONDS = 60
 
-HEALTHY_EMA_ALPHA = 0.12
-RECOVERY_EMA_ALPHA = 0.30
+HEALTHY_EMA_ALPHA = 0.10
+RECOVERY_EMA_ALPHA = 0.25
 
 OUTSIDE_RATIO_THRESHOLD = 0.60
-PHASE_SEARCH_RATIO = 0.15
+
 VALLEY_QUANTILE = 45
 
-# 关键修复：预测时间轴锚定真实数据最后一个点。
-# True：预测从 last_real_ts + 1 开始，适合 Grafana 与真实曲线对齐展示。
-# False：预测从当前系统时间 + 1 开始，适合只看纯未来预测，但容易与有采集延迟的真实数据错位。
-ALIGN_PREDICTION_TO_LAST_REAL_TS = True
+# phase-lock 配置
+PHASE_LOCK_MIN_WINDOW_SECONDS = 45
+PHASE_LOCK_MAX_WINDOW_SECONDS = 180
+PHASE_LOCK_PERIOD_SEARCH_RATIO = 0.12
+PHASE_LOCK_ORIGIN_SEARCH_RATIO = 0.35
+PHASE_LOCK_PERIOD_STEP = 1
+PHASE_LOCK_ORIGIN_STEP = 1
 
-# 如果 last_real_ts 距离当前系统时间太久，说明采集链路可能断了，跳过预测，避免用陈旧数据继续画未来线。
+# 真实数据延迟超过这个值，就不继续预测
 MAX_DATA_LAG_SECONDS = 180
 
-# 真实数据至少推进多少秒，才写入新预测，避免同一段未来时间被反复写入。
-MIN_REAL_ADVANCE_SECONDS = 1
+# 预测锚定最后一个真实点
+ALIGN_PREDICTION_TO_LAST_REAL_TS = True
 
 
 # =============================================================================
@@ -130,7 +134,7 @@
 ]
 
 EXTRA_PREDICT_LABELS = {
-    "forecast": "phase_aligned_health_v9",
+    "forecast": "phase_locked_health_v10",
     "source": "protoforge",
 }
 
@@ -157,9 +161,6 @@ class BaselineState:
 
 
 BASELINE_STATES: Dict[str, BaselineState] = {}
-
-# 记录每条序列最后一次使用的真实数据时间戳，而不是预测写到哪里。
-# 这样不会把预测不断推向更远的未来。
 LAST_REAL_TS_WRITTEN: Dict[str, int] = {}
 
 
@@ -197,8 +198,6 @@ def fetch_history(query: str, minutes: int = HISTORY_MINUTES) -> Tuple[List[floa
         return [], []
 
     values = result[0].get("values", [])
-    if not values:
-        return [], []
 
     ts = []
     ys = []
@@ -244,6 +243,7 @@ def normalize_history(ts: List[float], ys: List[float]) -> Tuple[np.ndarray, np.
         return np.array([]), np.array([])
 
     sorted_items = sorted(data.items(), key=lambda x: x[0])
+
     ts_clean = np.array([x[0] for x in sorted_items], dtype=float)
     ys_clean = np.array([x[1] for x in sorted_items], dtype=float)
 
@@ -263,7 +263,7 @@ def normalize_history(ts: List[float], ys: List[float]) -> Tuple[np.ndarray, np.
 
 
 # =============================================================================
-# 周期估计与谷底检测
+# 周期估计
 # =============================================================================
 
 def moving_average(arr: np.ndarray, window: int) -> np.ndarray:
@@ -355,6 +355,10 @@ def estimate_period_rough(ys_arr: np.ndarray) -> int:
     return int(period)
 
 
+# =============================================================================
+# 谷底检测与模板构建
+# =============================================================================
+
 def find_valley_indices(
     ts_grid: np.ndarray,
     ys_grid: np.ndarray,
@@ -366,6 +370,7 @@ def find_valley_indices(
         return []
 
     period = max(3, int(expected_period))
+
     smooth_window = max(3, int(round(period * 0.08)))
     smooth_window = min(smooth_window, 21)
 
@@ -384,6 +389,7 @@ def find_valley_indices(
 
     if len(candidates) < MIN_FULL_CYCLES_FOR_TEMPLATE:
         candidates = []
+
         for i in range(1, n - 1):
             if ys_smooth[i] <= ys_smooth[i - 1] and ys_smooth[i] < ys_smooth[i + 1]:
                 candidates.append(i)
@@ -451,10 +457,6 @@ def detect_period_and_valleys(
     return int(period), valleys
 
 
-# =============================================================================
-# 相位对齐模板
-# =============================================================================
-
 def build_template_from_valleys(
     ts_grid: np.ndarray,
     ys_grid: np.ndarray,
@@ -546,7 +548,7 @@ def build_current_baseline(
 
 
 # =============================================================================
-# 预测与模板合并
+# 模板预测与重采样
 # =============================================================================
 
 def circular_template_value(template: np.ndarray, phase: float) -> float:
@@ -556,6 +558,7 @@ def circular_template_value(template: np.ndarray, phase: float) -> float:
         return 0.0
 
     phase = float(phase) % period
+
     i0 = int(math.floor(phase)) % period
     i1 = (i0 + 1) % period
     frac = phase - math.floor(phase)
@@ -563,46 +566,77 @@ def circular_template_value(template: np.ndarray, phase: float) -> float:
     return float((1.0 - frac) * template[i0] + frac * template[i1])
 
 
-def predict_with_origin(
-    state: BaselineState,
+def resample_template(old_template: np.ndarray, new_period: int) -> np.ndarray:
+    old_period = len(old_template)
+
+    if old_period == new_period:
+        return old_template.astype(float)
+
+    if old_period <= 1 or new_period <= 1:
+        return np.full(new_period, float(np.mean(old_template)), dtype=float)
+
+    old_x = np.linspace(0.0, 1.0, old_period, endpoint=False)
+    new_x = np.linspace(0.0, 1.0, new_period, endpoint=False)
+
+    old_x_ext = np.concatenate([old_x - 1.0, old_x, old_x + 1.0])
+    old_y_ext = np.concatenate([old_template, old_template, old_template])
+
+    return np.interp(new_x, old_x_ext, old_y_ext).astype(float)
+
+
+def predict_template_values(
+    template: np.ndarray,
+    period: int,
+    phase_origin_ts: int,
     ts_list: List[int],
-    phase_origin_ts: Optional[int] = None,
 ) -> np.ndarray:
-    template = np.array(state.template, dtype=float)
-    period = int(state.period)
-
-    if period <= 1 or len(template) != period:
+    if period <= 1:
         return np.zeros(len(ts_list), dtype=float)
 
-    origin = int(state.phase_origin_ts if phase_origin_ts is None else phase_origin_ts)
+    if len(template) != period:
+        template = resample_template(template, period)
+
     values = []
 
     for ts in ts_list:
-        phase = (int(ts) - origin) % period
+        phase = (int(ts) - int(phase_origin_ts)) % period
         values.append(circular_template_value(template, phase))
 
     return np.array(values, dtype=float)
 
 
-def resample_template(old_template: np.ndarray, new_period: int) -> np.ndarray:
-    old_period = len(old_template)
+def predict_with_state(state: BaselineState, ts_list: List[int]) -> np.ndarray:
+    template = np.array(state.template, dtype=float)
 
-    if old_period == new_period:
-        return old_template.astype(float)
+    return predict_template_values(
+        template=template,
+        period=int(state.period),
+        phase_origin_ts=int(state.phase_origin_ts),
+        ts_list=ts_list,
+    )
 
-    if old_period <= 1 or new_period <= 1:
-        return np.full(new_period, float(np.mean(old_template)), dtype=float)
 
-    old_x = np.linspace(0.0, 1.0, old_period, endpoint=False)
-    new_x = np.linspace(0.0, 1.0, new_period, endpoint=False)
+def normalize_origin_near(origin: int, period: int, near_ts: int) -> int:
+    if period <= 1:
+        return origin
 
-    old_x_ext = np.concatenate([old_x - 1.0, old_x, old_x + 1.0])
-    old_y_ext = np.concatenate([old_template, old_template, old_template])
+    origin = int(origin)
+    period = int(period)
+    near_ts = int(near_ts)
 
-    return np.interp(new_x, old_x_ext, old_y_ext).astype(float)
+    while origin + period <= near_ts:
+        origin += period
 
+    while origin > near_ts:
+        origin -= period
 
-def align_new_template_to_old(old_template: np.ndarray, new_template: np.ndarray) -> np.ndarray:
+    return origin
+
+
+def align_new_template_to_old(
+    old_template: np.ndarray,
+    new_template: np.ndarray,
+) -> np.ndarray:
     if len(old_template) != len(new_template):
         old_template = resample_template(old_template, len(new_template))
 
@@ -629,23 +663,117 @@ def align_new_template_to_old(old_template: np.ndarray, new_template: np.ndarray
     return best_template.astype(float)
 
 
-def merge_template(old_template: np.ndarray, new_template: np.ndarray, alpha: float) -> np.ndarray:
+def merge_template(
+    old_template: np.ndarray,
+    new_template: np.ndarray,
+    alpha: float,
+) -> np.ndarray:
     alpha = float(np.clip(alpha, 0.0, 1.0))
 
     if len(old_template) != len(new_template):
         old_template = resample_template(old_template, len(new_template))
 
     new_template = align_new_template_to_old(old_template, new_template)
+
     merged = (1.0 - alpha) * old_template + alpha * new_template
 
     return merged.astype(float)
 
 
+# =============================================================================
+# Phase Lock
+# =============================================================================
+
+def phase_lock_recent(
+    state: BaselineState,
+    ts_grid: np.ndarray,
+    ys_grid: np.ndarray,
+) -> Tuple[int, int, np.ndarray, float]:
+    base_period = int(state.period)
+    base_origin = int(state.phase_origin_ts)
+    base_template = np.array(state.template, dtype=float)
+
+    if base_period <= 1 or len(base_template) <= 1:
+        ts_recent = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int).tolist()
+        pred = predict_with_state(state, ts_recent)
+        actual = ys_grid[-len(ts_recent):].astype(float)
+        mae = float(np.mean(np.abs(actual - pred))) if len(actual) else 0.0
+        return base_period, base_origin, pred, mae
+
+    window_seconds = max(
+        PHASE_LOCK_MIN_WINDOW_SECONDS,
+        min(PHASE_LOCK_MAX_WINDOW_SECONDS, int(base_period * 2)),
+    )
+
+    cutoff = ts_grid[-1] - window_seconds
+    mask = ts_grid >= cutoff
+
+    ts_recent_arr = ts_grid[mask].astype(int)
+    actual = ys_grid[mask].astype(float)
+
+    if len(ts_recent_arr) < max(10, DETECT_WINDOW_SECONDS):
+        ts_recent_arr = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int)
+        actual = ys_grid[-DETECT_WINDOW_SECONDS:].astype(float)
+
+    ts_recent = ts_recent_arr.tolist()
+    last_ts = int(ts_recent[-1])
+
+    p_min = max(int(MIN_PERIOD_SECONDS), int(round(base_period * (1.0 - PHASE_LOCK_PERIOD_SEARCH_RATIO))))
+    p_max = min(int(MAX_PERIOD_SECONDS), int(round(base_period * (1.0 + PHASE_LOCK_PERIOD_SEARCH_RATIO))))
+
+    if p_max < p_min:
+        p_min = p_max = base_period
+
+    best_period = base_period
+    best_origin = normalize_origin_near(base_origin, base_period, last_ts)
+    best_template = resample_template(base_template, best_period)
+    best_pred = predict_template_values(best_template, best_period, best_origin, ts_recent)
+    best_mae = float(np.mean(np.abs(actual - best_pred)))
+
+    for period in range(p_min, p_max + 1, PHASE_LOCK_PERIOD_STEP):
+        template = resample_template(base_template, period)
+        center_origin = normalize_origin_near(base_origin, period, last_ts)
+
+        origin_shift = max(2, int(round(period * PHASE_LOCK_ORIGIN_SEARCH_RATIO)))
+
+        for shift in range(-origin_shift, origin_shift + 1, PHASE_LOCK_ORIGIN_STEP):
+            origin = center_origin + shift
+
+            pred = predict_template_values(
+                template=template,
+                period=period,
+                phase_origin_ts=origin,
+                ts_list=ts_recent,
+            )
+
+            mae = float(np.mean(np.abs(actual - pred)))
+
+            # 轻微惩罚周期变化，避免过拟合抖动
+            penalty = abs(period - base_period) * 0.5
+            score = mae + penalty
+
+            best_score = best_mae + abs(best_period - base_period) * 0.5
+
+            if score < best_score:
+                best_period = period
+                best_origin = origin
+                best_pred = pred
+                best_mae = mae
+
+    best_origin = normalize_origin_near(best_origin, best_period, last_ts)
+
+    return int(best_period), int(best_origin), best_pred, float(best_mae)
+
+
 # =============================================================================
 # 异常检测
 # =============================================================================
 
-def calc_threshold(pred: np.ndarray, abs_threshold: float, rel_threshold: float) -> np.ndarray:
+def calc_threshold(
+    pred: np.ndarray,
+    abs_threshold: float,
+    rel_threshold: float,
+) -> np.ndarray:
     return np.maximum(abs_threshold, np.abs(pred) * rel_threshold)
 
 
@@ -655,33 +783,8 @@ def calc_bounds(
     rel_threshold: float,
 ) -> Tuple[np.ndarray, np.ndarray]:
     threshold = calc_threshold(pred, abs_threshold, rel_threshold)
-    return pred - threshold, pred + threshold
-
-
-def find_best_phase_origin_for_recent(
-    state: BaselineState,
-    ts_recent: List[int],
-    actual: np.ndarray,
-) -> Tuple[int, np.ndarray, float]:
-    period = int(state.period)
-    base_origin = int(state.phase_origin_ts)
-    max_shift = max(1, int(round(period * PHASE_SEARCH_RATIO)))
-
-    best_origin = base_origin
-    best_pred = predict_with_origin(state, ts_recent, base_origin)
-    best_mae = float(np.mean(np.abs(actual - best_pred)))
-
-    for shift in range(-max_shift, max_shift + 1):
-        origin = base_origin + shift
-        pred = predict_with_origin(state, ts_recent, origin)
-        mae = float(np.mean(np.abs(actual - pred)))
 
-        if mae < best_mae:
-            best_mae = mae
-            best_origin = origin
-            best_pred = pred
-
-    return best_origin, best_pred, best_mae
+    return pred - threshold, pred + threshold
 
 
 def detect_anomaly(
@@ -690,36 +793,50 @@ def detect_anomaly(
     ys_grid: np.ndarray,
     abs_threshold: float,
     rel_threshold: float,
-) -> Tuple[bool, float, float, float, int]:
-    if len(ys_grid) < DETECT_WINDOW_SECONDS:
-        return False, 0.0, 0.0, 0.0, int(state.phase_origin_ts)
-
-    ts_recent = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int).tolist()
-    actual = ys_grid[-DETECT_WINDOW_SECONDS:].astype(float)
-
-    best_origin, pred, _ = find_best_phase_origin_for_recent(
+) -> Tuple[bool, float, float, float, int, int]:
+    best_period, best_origin, pred_recent, _ = phase_lock_recent(
         state=state,
-        ts_recent=ts_recent,
-        actual=actual,
+        ts_grid=ts_grid,
+        ys_grid=ys_grid,
     )
 
-    threshold = calc_threshold(pred, abs_threshold, rel_threshold)
-    abs_err = np.abs(actual - pred)
+    recent_len = len(pred_recent)
+
+    if recent_len <= 0:
+        return False, 0.0, 0.0, 0.0, best_period, best_origin
+
+    actual = ys_grid[-recent_len:].astype(float)
+
+    threshold = calc_threshold(pred_recent, abs_threshold, rel_threshold)
+
+    abs_err = np.abs(actual - pred_recent)
     outside = abs_err > threshold
 
     outside_ratio = float(np.mean(outside))
     mean_abs_err = float(np.mean(abs_err))
-    mean_rel_err = float(np.mean(abs_err / np.maximum(np.abs(pred), 1.0)))
+    mean_rel_err = float(np.mean(abs_err / np.maximum(np.abs(pred_recent), 1.0)))
+
     is_anomaly = outside_ratio >= OUTSIDE_RATIO_THRESHOLD
 
-    return is_anomaly, outside_ratio, mean_abs_err, mean_rel_err, int(best_origin)
+    return (
+        is_anomaly,
+        outside_ratio,
+        mean_abs_err,
+        mean_rel_err,
+        int(best_period),
+        int(best_origin),
+    )
 
 
 # =============================================================================
 # 健康基线状态管理
 # =============================================================================
 
-def create_initial_state(ts_grid: np.ndarray, ys_grid: np.ndarray, now_sec: int) -> Optional[BaselineState]:
+def create_initial_state(
+    ts_grid: np.ndarray,
+    ys_grid: np.ndarray,
+    now_sec: int,
+) -> Optional[BaselineState]:
     baseline = build_current_baseline(ts_grid, ys_grid)
 
     if baseline is None:
@@ -740,6 +857,26 @@ def create_initial_state(ts_grid: np.ndarray, ys_grid: np.ndarray, now_sec: int)
     )
 
 
+def apply_phase_lock_to_state(
+    state: BaselineState,
+    best_period: int,
+    best_origin: int,
+) -> None:
+    best_period = int(best_period)
+
+    if best_period <= 1:
+        return
+
+    template = np.array(state.template, dtype=float)
+
+    if len(template) != best_period:
+        template = resample_template(template, best_period)
+
+    state.period = best_period
+    state.phase_origin_ts = int(best_origin)
+    state.template = template.astype(float).tolist()
+
+
 def maybe_update_state(
     key: str,
     ts_grid: np.ndarray,
@@ -772,7 +909,14 @@ def maybe_update_state(
     elapsed = min(elapsed, POLL_INTERVAL * 2)
     state.last_seen_ts = now_sec
 
-    is_anom, outside_ratio, mean_abs_err, mean_rel_err, best_origin = detect_anomaly(
+    (
+        is_anomaly,
+        outside_ratio,
+        mean_abs_err,
+        mean_rel_err,
+        best_period,
+        best_origin,
+    ) = detect_anomaly(
         state=state,
         ts_grid=ts_grid,
         ys_grid=ys_grid,
@@ -780,9 +924,10 @@ def maybe_update_state(
         rel_threshold=rel_threshold,
     )
 
-    if is_anom:
+    if is_anomaly:
         state.status = BASELINE_STATUS_ANOMALY
         state.clean_seconds = 0
+
         BASELINE_STATES[key] = state
 
         logger.warning(
@@ -795,13 +940,17 @@ def maybe_update_state(
 
         return state, True, outside_ratio, mean_abs_err, mean_rel_err
 
+    old_period = int(state.period)
     old_origin = int(state.phase_origin_ts)
-    state.phase_origin_ts = int(best_origin)
 
-    if abs(state.phase_origin_ts - old_origin) >= 1:
-        logger.debug(
-            "相位校正 key=%s origin %s -> %s",
+    apply_phase_lock_to_state(state, best_period, best_origin)
+
+    if old_period != state.period or old_origin != state.phase_origin_ts:
+        logger.info(
+            "phase-lock key=%s period %s -> %s origin %s -> %s",
             key,
+            old_period,
+            state.period,
             datetime.fromtimestamp(old_origin).strftime("%H:%M:%S"),
             datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"),
         )
@@ -809,9 +958,15 @@ def maybe_update_state(
     if state.status == BASELINE_STATUS_ANOMALY:
         state.status = BASELINE_STATUS_RECOVERING
         state.clean_seconds = elapsed
+
         BASELINE_STATES[key] = state
 
-        logger.info("异常开始恢复 key=%s clean_seconds=%ss", key, state.clean_seconds)
+        logger.info(
+            "异常开始恢复 key=%s clean_seconds=%ss",
+            key,
+            state.clean_seconds,
+        )
+
         return state, False, outside_ratio, mean_abs_err, mean_rel_err
 
     if state.status == BASELINE_STATUS_RECOVERING:
@@ -834,17 +989,27 @@ def maybe_update_state(
         int(state.period) * MAX_CYCLES_FOR_TEMPLATE,
     )
 
-    baseline = build_current_baseline(ts_grid=ts_grid, ys_grid=ys_grid, tail_seconds=tail_seconds)
+    baseline = build_current_baseline(
+        ts_grid=ts_grid,
+        ys_grid=ys_grid,
+        tail_seconds=tail_seconds,
+    )
 
     if baseline is None:
         BASELINE_STATES[key] = state
         return state, False, outside_ratio, mean_abs_err, mean_rel_err
 
     new_period, new_origin, new_template = baseline
+
     old_template = np.array(state.template, dtype=float)
+
     alpha = RECOVERY_EMA_ALPHA if state.status == BASELINE_STATUS_RECOVERING else HEALTHY_EMA_ALPHA
 
-    merged = merge_template(old_template=old_template, new_template=new_template, alpha=alpha)
+    merged = merge_template(
+        old_template=old_template,
+        new_template=new_template,
+        alpha=alpha,
+    )
 
     state.period = int(new_period)
     state.phase_origin_ts = int(new_origin)
@@ -878,7 +1043,12 @@ def maybe_update_state(
 # =============================================================================
 
 def prom_escape_label_value(value: str) -> str:
-    return str(value).replace("\\", "\\\\").replace("\n", "\\n").replace('"', '\\"')
+    return (
+        str(value)
+        .replace("\\", "\\\\")
+        .replace("\n", "\\n")
+        .replace('"', '\\"')
+    )
 
 
 def labels_to_str(labels: Dict[str, str]) -> str:
@@ -926,7 +1096,9 @@ def write_series(
         resp = requests.post(
             f"{VM_URL}/api/v1/import/prometheus",
             data=payload.encode("utf-8"),
-            headers={"Content-Type": "text/plain; version=0.0.4; charset=utf-8"},
+            headers={
+                "Content-Type": "text/plain; version=0.0.4; charset=utf-8",
+            },
             timeout=10,
         )
         resp.raise_for_status()
@@ -951,17 +1123,57 @@ def write_prediction_bundle(
     mean_rel_err: float,
     event_ts: int,
 ) -> bool:
-    ok1 = write_series(pred_metric, labels, ts_future, pred_values.astype(float).tolist())
-    ok2 = write_series(f"{pred_metric}_lower", labels, ts_future, lower_values.astype(float).tolist())
-    ok3 = write_series(f"{pred_metric}_upper", labels, ts_future, upper_values.astype(float).tolist())
+    ok1 = write_series(
+        metric_name=pred_metric,
+        labels=labels,
+        ts_list=ts_future,
+        values=pred_values.astype(float).tolist(),
+    )
+
+    ok2 = write_series(
+        metric_name=f"{pred_metric}_lower",
+        labels=labels,
+        ts_list=ts_future,
+        values=lower_values.astype(float).tolist(),
+    )
+
+    ok3 = write_series(
+        metric_name=f"{pred_metric}_upper",
+        labels=labels,
+        ts_list=ts_future,
+        values=upper_values.astype(float).tolist(),
+    )
 
     anomaly_labels = dict(labels)
     anomaly_labels["type"] = "prediction_deviation"
 
-    ok4 = write_series(anomaly_metric, anomaly_labels, [event_ts], [1.0 if is_anomaly else 0.0])
-    ok5 = write_series(f"{anomaly_metric}_outside_ratio", anomaly_labels, [event_ts], [outside_ratio])
-    ok6 = write_series(f"{anomaly_metric}_mean_abs_error", anomaly_labels, [event_ts], [mean_abs_err])
-    ok7 = write_series(f"{anomaly_metric}_mean_rel_error", anomaly_labels, [event_ts], [mean_rel_err])
+    ok4 = write_series(
+        metric_name=anomaly_metric,
+        labels=anomaly_labels,
+        ts_list=[event_ts],
+        values=[1.0 if is_anomaly else 0.0],
+    )
+
+    ok5 = write_series(
+        metric_name=f"{anomaly_metric}_outside_ratio",
+        labels=anomaly_labels,
+        ts_list=[event_ts],
+        values=[outside_ratio],
+    )
+
+    ok6 = write_series(
+        metric_name=f"{anomaly_metric}_mean_abs_error",
+        labels=anomaly_labels,
+        ts_list=[event_ts],
+        values=[mean_abs_err],
+    )
+
+    ok7 = write_series(
+        metric_name=f"{anomaly_metric}_mean_rel_error",
+        labels=anomaly_labels,
+        ts_list=[event_ts],
+        values=[mean_rel_err],
+    )
 
     return ok1 and ok2 and ok3 and ok4 and ok5 and ok6 and ok7
 
@@ -970,7 +1182,9 @@ def write_prediction_bundle(
 # 标签解析
 # =============================================================================
 
-_LABEL_PATTERN = re.compile(r'\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"((?:\\.|[^"])*)"\s*')
+_LABEL_PATTERN = re.compile(
+    r'\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"((?:\\.|[^"])*)"\s*'
+)
 
 
 def parse_labels_from_query(query: str) -> Dict[str, str]:
@@ -987,7 +1201,14 @@ def parse_labels_from_query(query: str) -> Dict[str, str]:
     for match in _LABEL_PATTERN.finditer(label_part):
         key = match.group(1)
         value = match.group(2)
-        value = value.replace('\\"', '"').replace("\\n", "\n").replace("\\\\", "\\")
+
+        value = (
+            value
+            .replace('\\"', '"')
+            .replace("\\n", "\n")
+            .replace("\\\\", "\\")
+        )
+
         labels[key] = value
 
     return labels
@@ -1042,7 +1263,12 @@ def load_state() -> None:
             states[key] = BaselineState(**value)
 
         BASELINE_STATES = states
-        logger.info("已加载预测状态文件 %s，状态数量=%d", STATE_FILE, len(BASELINE_STATES))
+
+        logger.info(
+            "已加载预测状态文件 %s，状态数量=%d",
+            STATE_FILE,
+            len(BASELINE_STATES),
+        )
 
     except Exception as e:
         logger.warning("加载预测状态文件失败，将重新学习: %s", e)
@@ -1069,10 +1295,14 @@ def save_state() -> None:
 
 
 # =============================================================================
-# 时间轴选择
+# 时间轴
 # =============================================================================
 
-def build_prediction_timestamps(key: str, last_real_ts: int, now_sec: int) -> Optional[List[int]]:
+def build_prediction_timestamps(
+    key: str,
+    last_real_ts: int,
+    now_sec: int,
+) -> Optional[List[int]]:
     data_lag = now_sec - last_real_ts
 
     if data_lag > MAX_DATA_LAG_SECONDS:
@@ -1086,24 +1316,24 @@ def build_prediction_timestamps(key: str, last_real_ts: int, now_sec: int) -> Op
 
     last_written_real_ts = LAST_REAL_TS_WRITTEN.get(key)
 
-    if last_written_real_ts is not None:
-        advance = last_real_ts - int(last_written_real_ts)
-
-        if advance < MIN_REAL_ADVANCE_SECONDS:
-            logger.info(
-                "真实数据时间戳未推进，跳过重复写入 key=%s last_real_ts=%s last_written_real_ts=%s",
-                key,
-                last_real_ts,
-                last_written_real_ts,
-            )
-            return None
+    if last_written_real_ts is not None and last_real_ts <= int(last_written_real_ts):
+        logger.info(
+            "真实数据时间戳未推进，跳过重复写入 key=%s last_real_ts=%s last_written_real_ts=%s",
+            key,
+            last_real_ts,
+            last_written_real_ts,
+        )
+        return None
 
     if ALIGN_PREDICTION_TO_LAST_REAL_TS:
         base_ts = last_real_ts
     else:
         base_ts = now_sec
 
-    return [base_ts + i + 1 for i in range(WRITE_HORIZON_SECONDS)]
+    return [
+        base_ts + i + 1
+        for i in range(WRITE_HORIZON_SECONDS)
+    ]
 
 
 # =============================================================================
@@ -1123,17 +1353,28 @@ def run_once() -> None:
         ts, ys = fetch_history(query)
 
         if len(ys) < MIN_POINTS:
-            logger.info("[%s] %s 数据不足（%d 点），跳过", now_str, query, len(ys))
+            logger.info(
+                "[%s] %s 数据不足（%d 点），跳过",
+                now_str,
+                query,
+                len(ys),
+            )
             continue
 
         ts_grid, ys_grid = normalize_history(ts, ys)
 
         if len(ys_grid) < MIN_POINTS:
-            logger.info("[%s] %s 清洗后数据不足（%d 点），跳过", now_str, query, len(ys_grid))
+            logger.info(
+                "[%s] %s 清洗后数据不足（%d 点），跳过",
+                now_str,
+                query,
+                len(ys_grid),
+            )
             continue
 
         base_labels = parse_labels_from_query(query)
         write_labels = merge_labels(base_labels, EXTRA_PREDICT_LABELS)
+
         key = series_key(pred_metric, write_labels)
 
         state, is_anomaly, outside_ratio, mean_abs_err, mean_rel_err = maybe_update_state(
@@ -1145,7 +1386,11 @@ def run_once() -> None:
         )
 
         if state is None:
-            logger.info("[%s] %s 暂无可用健康模板，等待学习", now_str, query)
+            logger.info(
+                "[%s] %s 暂无可用健康模板，等待学习",
+                now_str,
+                query,
+            )
             continue
 
         now_sec = int(time.time())
@@ -1161,7 +1406,8 @@ def run_once() -> None:
         if not ts_future:
             continue
 
-        pred_values = predict_with_origin(state, ts_future)
+        pred_values = predict_with_state(state, ts_future)
+
         lower_values, upper_values = calc_bounds(
             pred=pred_values,
             abs_threshold=abs_threshold,
@@ -1184,7 +1430,11 @@ def run_once() -> None:
         )
 
         if not ok:
-            logger.error("[%s] %s 写入预测数据失败", now_str, query)
+            logger.error(
+                "[%s] %s 写入预测数据失败",
+                now_str,
+                query,
+            )
             continue
 
         LAST_REAL_TS_WRITTEN[key] = last_real_ts
@@ -1234,4 +1484,4 @@ def main() -> None:
 
 
 if __name__ == "__main__":
-    main()
+    main()
\ No newline at end of file

From 79e9f9b080e3f5fc4a284ab1422d3a54aacaff0e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Thu, 21 May 2026 21:23:40 +0800
Subject: [PATCH 26/36] feat(pridict_v4): update pridict v4 version

---
 ai/pridict_v4.py | 1604 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 1604 insertions(+)
 create mode 100644 ai/pridict_v4.py

diff --git a/ai/pridict_v4.py b/ai/pridict_v4.py
new file mode 100644
index 0000000..774ad3a
--- /dev/null
+++ b/ai/pridict_v4.py
@@ -0,0 +1,1604 @@
+# -*- coding: utf-8 -*-
+"""
+ProtoForge Predictor v11
+
+核心能力：
+1. feed_rate / spindle_speed / spindle_current 使用 phase-lock 点预测。
+2. vibration_x / vibration_y / vibration_z 使用 phase-band 预测带。
+3. vibration 类指标不再追求单点完全贴合，而是输出：
+   - xxx_predicted        中位数预测线
+   - xxx_predicted_upper  正常上边界
+   - xxx_predicted_lower  正常下边界
+4. 预测起点锚定最后一个真实点 last_real_ts，避免时间错位。
+5. 异常期间冻结健康模板，不学习故障数据。
+6. 故障恢复后等待稳定，再恢复模板学习。
+"""
+
+import json
+import logging
+import math
+import os
+import re
+import time
+from dataclasses import asdict, dataclass
+from datetime import datetime, timedelta
+from typing import Dict, List, Optional, Tuple
+
+import numpy as np
+import requests
+
+
+# =============================================================================
+# 日志配置
+# =============================================================================
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(message)s",
+)
+
+logger = logging.getLogger(__name__)
+
+
+# =============================================================================
+# 基础配置
+# =============================================================================
+
+VM_URL = "http://localhost:8428"
+STATE_FILE = "/tmp/protoforge_predictor_state_v11.json"
+
+HISTORY_MINUTES = 30
+HORIZON_SECONDS = 120
+POLL_INTERVAL = 30
+
+WRITE_HORIZON_SECONDS = min(HORIZON_SECONDS, POLL_INTERVAL)
+
+QUERY_STEP = "1s"
+MIN_POINTS = 120
+
+MIN_PERIOD_SECONDS = 5
+MAX_PERIOD_SECONDS = 3600
+
+MIN_FULL_CYCLES_FOR_TEMPLATE = 3
+MAX_CYCLES_FOR_TEMPLATE = 8
+
+DETECT_WINDOW_SECONDS = 20
+RECOVERY_MIN_SECONDS = 60
+
+HEALTHY_EMA_ALPHA = 0.10
+RECOVERY_EMA_ALPHA = 0.25
+
+OUTSIDE_RATIO_THRESHOLD = 0.60
+
+VALLEY_QUANTILE = 45
+
+MAX_DATA_LAG_SECONDS = 180
+
+PHASE_LOCK_MIN_WINDOW_SECONDS = 45
+PHASE_LOCK_MAX_WINDOW_SECONDS = 180
+PHASE_LOCK_PERIOD_SEARCH_RATIO = 0.12
+PHASE_LOCK_ORIGIN_SEARCH_RATIO = 0.35
+PHASE_LOCK_PERIOD_STEP = 1
+PHASE_LOCK_ORIGIN_STEP = 1
+
+
+# =============================================================================
+# 指标配置
+# =============================================================================
+
+PREDICT_TARGETS = [
+    {
+        "query": 'feed_rate{device_id="fanuc-cnc"}',
+        "pred_metric": "feed_rate_predicted",
+        "anomaly_metric": "feed_rate_anomaly",
+        "strategy": "phase_point",
+        "abs_threshold": 400.0,
+        "rel_threshold": 0.25,
+        "smooth_window": 1,
+    },
+    {
+        "query": 'spindle_speed{device_id="fanuc-cnc"}',
+        "pred_metric": "spindle_speed_predicted",
+        "anomaly_metric": "spindle_speed_anomaly",
+        "strategy": "phase_point",
+        "abs_threshold": 500.0,
+        "rel_threshold": 0.25,
+        "smooth_window": 1,
+    },
+    {
+        "query": 'spindle_current{device_id="fanuc-cnc"}',
+        "pred_metric": "spindle_current_predicted",
+        "anomaly_metric": "spindle_current_anomaly",
+        "strategy": "phase_point",
+        "abs_threshold": 5.0,
+        "rel_threshold": 0.25,
+        "smooth_window": 1,
+    },
+    {
+        "query": 'vibration_x{device_id="fanuc-cnc"}',
+        "pred_metric": "vibration_x_predicted",
+        "anomaly_metric": "vibration_x_anomaly",
+        "strategy": "phase_band",
+        "abs_threshold": 0.12,
+        "rel_threshold": 0.35,
+        "smooth_window": 5,
+        "band_low_q": 10,
+        "band_high_q": 90,
+        "band_pad_abs": 0.06,
+    },
+    {
+        "query": 'vibration_y{device_id="fanuc-cnc"}',
+        "pred_metric": "vibration_y_predicted",
+        "anomaly_metric": "vibration_y_anomaly",
+        "strategy": "phase_band",
+        "abs_threshold": 0.12,
+        "rel_threshold": 0.35,
+        "smooth_window": 5,
+        "band_low_q": 10,
+        "band_high_q": 90,
+        "band_pad_abs": 0.06,
+    },
+    {
+        "query": 'vibration_z{device_id="fanuc-cnc"}',
+        "pred_metric": "vibration_z_predicted",
+        "anomaly_metric": "vibration_z_anomaly",
+        "strategy": "phase_band",
+        "abs_threshold": 0.12,
+        "rel_threshold": 0.35,
+        "smooth_window": 5,
+        "band_low_q": 10,
+        "band_high_q": 90,
+        "band_pad_abs": 0.06,
+    },
+]
+
+EXTRA_PREDICT_LABELS = {
+    "forecast": "phase_band_health_v11",
+    "source": "protoforge",
+}
+
+BASELINE_STATUS_HEALTHY = "healthy"
+BASELINE_STATUS_ANOMALY = "anomaly"
+BASELINE_STATUS_RECOVERING = "recovering"
+
+
+# =============================================================================
+# 状态结构
+# =============================================================================
+
+@dataclass
+class BaselineState:
+    period: int
+    phase_origin_ts: int
+    template: List[float]
+    lower_template: List[float]
+    upper_template: List[float]
+    strategy: str
+    status: str
+    clean_seconds: int
+    last_update_ts: int
+    last_seen_ts: int
+    y_min: float
+    y_max: float
+
+
+BASELINE_STATES: Dict[str, BaselineState] = {}
+LAST_REAL_TS_WRITTEN: Dict[str, int] = {}
+
+
+# =============================================================================
+# VictoriaMetrics 读取
+# =============================================================================
+
+def fetch_history(query: str, minutes: int = HISTORY_MINUTES) -> Tuple[List[float], List[float]]:
+    now = datetime.now()
+    start = now - timedelta(minutes=minutes)
+
+    try:
+        resp = requests.get(
+            f"{VM_URL}/api/v1/query_range",
+            params={
+                "query": query,
+                "start": start.timestamp(),
+                "end": now.timestamp(),
+                "step": QUERY_STEP,
+            },
+            timeout=10,
+        )
+        resp.raise_for_status()
+    except requests.RequestException as e:
+        logger.error("拉取数据失败 query=%s: %s", query, e)
+        return [], []
+
+    try:
+        result = resp.json().get("data", {}).get("result", [])
+    except Exception as e:
+        logger.error("解析 VM 返回失败 query=%s: %s", query, e)
+        return [], []
+
+    if not result:
+        return [], []
+
+    values = result[0].get("values", [])
+
+    ts = []
+    ys = []
+
+    for item in values:
+        if len(item) < 2:
+            continue
+
+        try:
+            t = float(item[0])
+            y = float(item[1])
+        except Exception:
+            continue
+
+        if not math.isfinite(t) or not math.isfinite(y):
+            continue
+
+        ts.append(t)
+        ys.append(y)
+
+    return ts, ys
+
+
+def normalize_history(ts: List[float], ys: List[float]) -> Tuple[np.ndarray, np.ndarray]:
+    if not ts or not ys or len(ts) != len(ys):
+        return np.array([]), np.array([])
+
+    data = {}
+
+    for t, y in zip(ts, ys):
+        try:
+            sec = int(round(float(t)))
+            val = float(y)
+        except Exception:
+            continue
+
+        if not math.isfinite(sec) or not math.isfinite(val):
+            continue
+
+        data[sec] = val
+
+    if not data:
+        return np.array([]), np.array([])
+
+    sorted_items = sorted(data.items(), key=lambda x: x[0])
+
+    ts_clean = np.array([x[0] for x in sorted_items], dtype=float)
+    ys_clean = np.array([x[1] for x in sorted_items], dtype=float)
+
+    if len(ts_clean) < 2:
+        return ts_clean, ys_clean
+
+    start_sec = int(ts_clean[0])
+    end_sec = int(ts_clean[-1])
+
+    if end_sec <= start_sec:
+        return ts_clean, ys_clean
+
+    ts_grid = np.arange(start_sec, end_sec + 1, 1, dtype=float)
+    ys_grid = np.interp(ts_grid, ts_clean, ys_clean)
+
+    return ts_grid, ys_grid
+
+
+# =============================================================================
+# 平滑与预处理
+# =============================================================================
+
+def rolling_median(arr: np.ndarray, window: int) -> np.ndarray:
+    if window <= 1 or len(arr) < window:
+        return arr.astype(float)
+
+    if window % 2 == 0:
+        window += 1
+
+    pad = window // 2
+    padded = np.pad(arr.astype(float), (pad, pad), mode="edge")
+
+    result = []
+
+    for i in range(len(arr)):
+        result.append(float(np.median(padded[i:i + window])))
+
+    return np.array(result, dtype=float)
+
+
+def moving_average(arr: np.ndarray, window: int) -> np.ndarray:
+    if window <= 1 or len(arr) < window:
+        return arr.astype(float)
+
+    if window % 2 == 0:
+        window += 1
+
+    kernel = np.ones(window, dtype=float) / window
+    pad = window // 2
+    padded = np.pad(arr.astype(float), (pad, pad), mode="edge")
+
+    return np.convolve(padded, kernel, mode="valid")
+
+
+def preprocess_values(ys_grid: np.ndarray, target: Dict) -> np.ndarray:
+    strategy = target.get("strategy", "phase_point")
+    smooth_window = int(target.get("smooth_window", 1))
+
+    if strategy == "phase_band":
+        return rolling_median(ys_grid, smooth_window)
+
+    if smooth_window > 1:
+        return moving_average(ys_grid, smooth_window)
+
+    return ys_grid.astype(float)
+
+
+# =============================================================================
+# 周期估计
+# =============================================================================
+
+def estimate_period_by_fft(ys_arr: np.ndarray) -> float:
+    n = len(ys_arr)
+
+    if n < 8:
+        return 60.0
+
+    centered = ys_arr - np.mean(ys_arr)
+
+    if np.allclose(centered, 0):
+        return 60.0
+
+    fft_vals = np.fft.rfft(centered)
+    freqs = np.fft.rfftfreq(n, d=1.0)
+
+    if len(freqs) <= 1:
+        return 60.0
+
+    power = np.abs(fft_vals[1:])
+
+    if len(power) == 0 or np.max(power) <= 0:
+        return 60.0
+
+    dominant_idx = int(np.argmax(power)) + 1
+    dominant_freq = float(freqs[dominant_idx])
+
+    if dominant_freq <= 0:
+        return 60.0
+
+    period = 1.0 / dominant_freq
+
+    return float(np.clip(period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+
+
+def refine_period_by_autocorr(ys_arr: np.ndarray, init_period: float) -> float:
+    n = len(ys_arr)
+
+    if n < 20:
+        return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+
+    centered = ys_arr - np.mean(ys_arr)
+
+    if np.allclose(centered, 0):
+        return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+
+    corr = np.correlate(centered, centered, mode="full")[n - 1:]
+
+    p0 = int(round(init_period))
+    left = max(int(MIN_PERIOD_SECONDS), int(max(2, p0 * 0.7)))
+    right = min(n // 2, int(max(left + 1, p0 * 1.3)))
+
+    if right <= left:
+        return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+
+    search = corr[left:right + 1]
+
+    if len(search) == 0:
+        return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+
+    best_lag = left + int(np.argmax(search))
+
+    return float(np.clip(best_lag, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+
+
+def estimate_period_rough(ys_arr: np.ndarray) -> int:
+    p_fft = estimate_period_by_fft(ys_arr)
+    p_refined = refine_period_by_autocorr(ys_arr, p_fft)
+
+    period = int(round(p_refined))
+    period = max(int(MIN_PERIOD_SECONDS), min(int(MAX_PERIOD_SECONDS), period))
+
+    return int(period)
+
+
+# =============================================================================
+# 谷底检测
+# =============================================================================
+
+def find_valley_indices(
+    ts_grid: np.ndarray,
+    ys_grid: np.ndarray,
+    expected_period: int,
+) -> List[int]:
+    n = len(ys_grid)
+
+    if n < max(10, expected_period * 2):
+        return []
+
+    period = max(3, int(expected_period))
+    smooth_window = max(3, int(round(period * 0.08)))
+    smooth_window = min(smooth_window, 21)
+
+    ys_smooth = moving_average(ys_grid, smooth_window)
+    threshold = float(np.percentile(ys_smooth, VALLEY_QUANTILE))
+
+    candidates = []
+
+    for i in range(1, n - 1):
+        if (
+            ys_smooth[i] <= ys_smooth[i - 1]
+            and ys_smooth[i] < ys_smooth[i + 1]
+            and ys_smooth[i] <= threshold
+        ):
+            candidates.append(i)
+
+    if len(candidates) < MIN_FULL_CYCLES_FOR_TEMPLATE:
+        candidates = []
+
+        for i in range(1, n - 1):
+            if ys_smooth[i] <= ys_smooth[i - 1] and ys_smooth[i] < ys_smooth[i + 1]:
+                candidates.append(i)
+
+    if not candidates:
+        return []
+
+    min_distance = max(2, int(round(period * 0.55)))
+    selected = []
+
+    for idx in candidates:
+        if not selected:
+            selected.append(idx)
+            continue
+
+        if idx - selected[-1] >= min_distance:
+            selected.append(idx)
+            continue
+
+        if ys_smooth[idx] < ys_smooth[selected[-1]]:
+            selected[-1] = idx
+
+    if len(selected) < 2:
+        return selected
+
+    cleaned = [selected[0]]
+
+    for idx in selected[1:]:
+        diff = int(ts_grid[idx] - ts_grid[cleaned[-1]])
+
+        if int(period * 0.55) <= diff <= int(period * 1.60):
+            cleaned.append(idx)
+            continue
+
+        if diff < int(period * 0.55):
+            if ys_smooth[idx] < ys_smooth[cleaned[-1]]:
+                cleaned[-1] = idx
+            continue
+
+        cleaned.append(idx)
+
+    return cleaned
+
+
+def detect_period_and_valleys(
+    ts_grid: np.ndarray,
+    ys_grid: np.ndarray,
+) -> Tuple[int, List[int]]:
+    rough = estimate_period_rough(ys_grid)
+    valleys = find_valley_indices(ts_grid, ys_grid, rough)
+
+    if len(valleys) >= 3:
+        diffs = np.diff(ts_grid[valleys])
+        good = diffs[(diffs >= rough * 0.55) & (diffs <= rough * 1.60)]
+
+        if len(good) > 0:
+            period = int(round(float(np.median(good))))
+        else:
+            period = rough
+    else:
+        period = rough
+
+    period = max(int(MIN_PERIOD_SECONDS), min(int(MAX_PERIOD_SECONDS), period))
+
+    return int(period), valleys
+
+
+# =============================================================================
+# 模板构建
+# =============================================================================
+
+def build_templates_from_valleys(
+    ts_grid: np.ndarray,
+    ys_grid: np.ndarray,
+    period: int,
+    valleys: List[int],
+    target: Dict,
+) -> Optional[Tuple[np.ndarray, np.ndarray, np.ndarray]]:
+    if period <= 1 or len(valleys) < MIN_FULL_CYCLES_FOR_TEMPLATE + 1:
+        return None
+
+    strategy = target.get("strategy", "phase_point")
+    low_q = float(target.get("band_low_q", 10))
+    high_q = float(target.get("band_high_q", 90))
+
+    pairs = []
+
+    for a, b in zip(valleys[:-1], valleys[1:]):
+        cycle_len = float(ts_grid[b] - ts_grid[a])
+
+        if period * 0.55 <= cycle_len <= period * 1.60:
+            pairs.append((a, b, cycle_len))
+
+    if len(pairs) < MIN_FULL_CYCLES_FOR_TEMPLATE:
+        return None
+
+    pairs = pairs[-MAX_CYCLES_FOR_TEMPLATE:]
+
+    phase_grid = np.arange(period, dtype=float)
+    segments = []
+    weights = []
+
+    for idx, (a, b, cycle_len) in enumerate(pairs):
+        seg_ts = ts_grid[a:b + 1]
+        seg_y = ys_grid[a:b + 1]
+
+        if len(seg_y) < 3:
+            continue
+
+        x_old = (seg_ts - seg_ts[0]) / cycle_len * period
+        seg = np.interp(phase_grid, x_old, seg_y)
+
+        segments.append(seg.astype(float))
+        weights.append(0.5 + 0.5 * ((idx + 1) / len(pairs)))
+
+    if len(segments) < MIN_FULL_CYCLES_FOR_TEMPLATE:
+        return None
+
+    arr = np.vstack(segments)
+    w_arr = np.array(weights, dtype=float)
+
+    if strategy == "phase_band":
+        mid_template = np.percentile(arr, 50, axis=0)
+        lower_template = np.percentile(arr, low_q, axis=0)
+        upper_template = np.percentile(arr, high_q, axis=0)
+    else:
+        mid_template = np.average(arr, axis=0, weights=w_arr)
+        lower_template = mid_template.copy()
+        upper_template = mid_template.copy()
+
+    return (
+        mid_template.astype(float),
+        lower_template.astype(float),
+        upper_template.astype(float),
+    )
+
+
+def build_current_baseline(
+    ts_grid: np.ndarray,
+    ys_grid: np.ndarray,
+    target: Dict,
+    tail_seconds: Optional[int] = None,
+) -> Optional[Tuple[int, int, np.ndarray, np.ndarray, np.ndarray]]:
+    if len(ys_grid) < MIN_POINTS:
+        return None
+
+    if tail_seconds is not None and tail_seconds > 0:
+        cutoff = ts_grid[-1] - int(tail_seconds)
+        mask = ts_grid >= cutoff
+        ts_use = ts_grid[mask]
+        ys_use = ys_grid[mask]
+    else:
+        ts_use = ts_grid
+        ys_use = ys_grid
+
+    if len(ys_use) < MIN_POINTS:
+        return None
+
+    period, valleys = detect_period_and_valleys(ts_use, ys_use)
+
+    templates = build_templates_from_valleys(
+        ts_grid=ts_use,
+        ys_grid=ys_use,
+        period=period,
+        valleys=valleys,
+        target=target,
+    )
+
+    if templates is None or len(valleys) == 0:
+        return None
+
+    template, lower_template, upper_template = templates
+    phase_origin_ts = int(round(float(ts_use[valleys[-1]])))
+
+    return int(period), phase_origin_ts, template, lower_template, upper_template
+
+
+# =============================================================================
+# 模板预测
+# =============================================================================
+
+def circular_template_value(template: np.ndarray, phase: float) -> float:
+    period = len(template)
+
+    if period == 0:
+        return 0.0
+
+    phase = float(phase) % period
+    i0 = int(math.floor(phase)) % period
+    i1 = (i0 + 1) % period
+    frac = phase - math.floor(phase)
+
+    return float((1.0 - frac) * template[i0] + frac * template[i1])
+
+
+def resample_template(old_template: np.ndarray, new_period: int) -> np.ndarray:
+    old_period = len(old_template)
+
+    if old_period == new_period:
+        return old_template.astype(float)
+
+    if old_period <= 1 or new_period <= 1:
+        return np.full(new_period, float(np.mean(old_template)), dtype=float)
+
+    old_x = np.linspace(0.0, 1.0, old_period, endpoint=False)
+    new_x = np.linspace(0.0, 1.0, new_period, endpoint=False)
+
+    old_x_ext = np.concatenate([old_x - 1.0, old_x, old_x + 1.0])
+    old_y_ext = np.concatenate([old_template, old_template, old_template])
+
+    return np.interp(new_x, old_x_ext, old_y_ext).astype(float)
+
+
+def predict_template_values(
+    template: np.ndarray,
+    period: int,
+    phase_origin_ts: int,
+    ts_list: List[int],
+) -> np.ndarray:
+    if period <= 1:
+        return np.zeros(len(ts_list), dtype=float)
+
+    if len(template) != period:
+        template = resample_template(template, period)
+
+    values = []
+
+    for ts in ts_list:
+        phase = (int(ts) - int(phase_origin_ts)) % period
+        values.append(circular_template_value(template, phase))
+
+    return np.array(values, dtype=float)
+
+
+def predict_state_bundle(
+    state: BaselineState,
+    ts_list: List[int],
+) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    period = int(state.period)
+    origin = int(state.phase_origin_ts)
+
+    mid = predict_template_values(
+        template=np.array(state.template, dtype=float),
+        period=period,
+        phase_origin_ts=origin,
+        ts_list=ts_list,
+    )
+
+    lower = predict_template_values(
+        template=np.array(state.lower_template, dtype=float),
+        period=period,
+        phase_origin_ts=origin,
+        ts_list=ts_list,
+    )
+
+    upper = predict_template_values(
+        template=np.array(state.upper_template, dtype=float),
+        period=period,
+        phase_origin_ts=origin,
+        ts_list=ts_list,
+    )
+
+    return mid, lower, upper
+
+
+def normalize_origin_near(origin: int, period: int, near_ts: int) -> int:
+    if period <= 1:
+        return origin
+
+    origin = int(origin)
+    period = int(period)
+    near_ts = int(near_ts)
+
+    while origin + period <= near_ts:
+        origin += period
+
+    while origin > near_ts:
+        origin -= period
+
+    return origin
+
+
+def merge_template(
+    old_template: np.ndarray,
+    new_template: np.ndarray,
+    alpha: float,
+) -> np.ndarray:
+    alpha = float(np.clip(alpha, 0.0, 1.0))
+
+    if len(old_template) != len(new_template):
+        old_template = resample_template(old_template, len(new_template))
+
+    merged = (1.0 - alpha) * old_template + alpha * new_template
+
+    return merged.astype(float)
+
+
+# =============================================================================
+# Phase Lock
+# =============================================================================
+
+def phase_lock_recent(
+    state: BaselineState,
+    ts_grid: np.ndarray,
+    ys_model: np.ndarray,
+) -> Tuple[int, int, np.ndarray, float]:
+    base_period = int(state.period)
+    base_origin = int(state.phase_origin_ts)
+    base_template = np.array(state.template, dtype=float)
+
+    if base_period <= 1 or len(base_template) <= 1:
+        ts_recent = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int).tolist()
+        pred = predict_template_values(base_template, base_period, base_origin, ts_recent)
+        actual = ys_model[-len(ts_recent):].astype(float)
+        mae = float(np.mean(np.abs(actual - pred))) if len(actual) else 0.0
+        return base_period, base_origin, pred, mae
+
+    window_seconds = max(
+        PHASE_LOCK_MIN_WINDOW_SECONDS,
+        min(PHASE_LOCK_MAX_WINDOW_SECONDS, int(base_period * 2)),
+    )
+
+    cutoff = ts_grid[-1] - window_seconds
+    mask = ts_grid >= cutoff
+
+    ts_recent_arr = ts_grid[mask].astype(int)
+    actual = ys_model[mask].astype(float)
+
+    if len(ts_recent_arr) < max(10, DETECT_WINDOW_SECONDS):
+        ts_recent_arr = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int)
+        actual = ys_model[-DETECT_WINDOW_SECONDS:].astype(float)
+
+    ts_recent = ts_recent_arr.tolist()
+    last_ts = int(ts_recent[-1])
+
+    p_min = max(
+        int(MIN_PERIOD_SECONDS),
+        int(round(base_period * (1.0 - PHASE_LOCK_PERIOD_SEARCH_RATIO))),
+    )
+    p_max = min(
+        int(MAX_PERIOD_SECONDS),
+        int(round(base_period * (1.0 + PHASE_LOCK_PERIOD_SEARCH_RATIO))),
+    )
+
+    best_period = base_period
+    best_origin = normalize_origin_near(base_origin, base_period, last_ts)
+    best_template = resample_template(base_template, best_period)
+
+    best_pred = predict_template_values(
+        template=best_template,
+        period=best_period,
+        phase_origin_ts=best_origin,
+        ts_list=ts_recent,
+    )
+
+    best_mae = float(np.mean(np.abs(actual - best_pred)))
+
+    for period in range(p_min, p_max + 1, PHASE_LOCK_PERIOD_STEP):
+        template = resample_template(base_template, period)
+        center_origin = normalize_origin_near(base_origin, period, last_ts)
+        origin_shift = max(2, int(round(period * PHASE_LOCK_ORIGIN_SEARCH_RATIO)))
+
+        for shift in range(-origin_shift, origin_shift + 1, PHASE_LOCK_ORIGIN_STEP):
+            origin = center_origin + shift
+
+            pred = predict_template_values(
+                template=template,
+                period=period,
+                phase_origin_ts=origin,
+                ts_list=ts_recent,
+            )
+
+            mae = float(np.mean(np.abs(actual - pred)))
+            penalty = abs(period - base_period) * 0.5
+            score = mae + penalty
+
+            best_score = best_mae + abs(best_period - base_period) * 0.5
+
+            if score < best_score:
+                best_period = period
+                best_origin = origin
+                best_pred = pred
+                best_mae = mae
+
+    best_origin = normalize_origin_near(best_origin, best_period, last_ts)
+
+    return int(best_period), int(best_origin), best_pred, float(best_mae)
+
+
+# =============================================================================
+# 异常检测
+# =============================================================================
+
+def calc_point_bounds(
+    pred: np.ndarray,
+    abs_threshold: float,
+    rel_threshold: float,
+) -> Tuple[np.ndarray, np.ndarray]:
+    threshold = np.maximum(abs_threshold, np.abs(pred) * rel_threshold)
+    return pred - threshold, pred + threshold
+
+
+def calc_final_bounds(
+    state: BaselineState,
+    pred: np.ndarray,
+    lower_raw: np.ndarray,
+    upper_raw: np.ndarray,
+    target: Dict,
+) -> Tuple[np.ndarray, np.ndarray]:
+    strategy = target.get("strategy", "phase_point")
+    abs_threshold = float(target.get("abs_threshold", 1.0))
+    rel_threshold = float(target.get("rel_threshold", 0.25))
+
+    if strategy == "phase_band":
+        pad_abs = float(target.get("band_pad_abs", abs_threshold))
+        dynamic_pad = np.maximum(pad_abs, np.abs(pred) * rel_threshold * 0.20)
+        lower = lower_raw - dynamic_pad
+        upper = upper_raw + dynamic_pad
+        return lower, upper
+
+    return calc_point_bounds(pred, abs_threshold, rel_threshold)
+
+
+def detect_anomaly(
+    state: BaselineState,
+    ts_grid: np.ndarray,
+    ys_model: np.ndarray,
+    target: Dict,
+) -> Tuple[bool, float, float, float, int, int]:
+    best_period, best_origin, pred_recent, _ = phase_lock_recent(
+        state=state,
+        ts_grid=ts_grid,
+        ys_model=ys_model,
+    )
+
+    recent_len = len(pred_recent)
+
+    if recent_len <= 0:
+        return False, 0.0, 0.0, 0.0, best_period, best_origin
+
+    actual = ys_model[-recent_len:].astype(float)
+
+    tmp_state = BaselineState(
+        period=best_period,
+        phase_origin_ts=best_origin,
+        template=state.template,
+        lower_template=state.lower_template,
+        upper_template=state.upper_template,
+        strategy=state.strategy,
+        status=state.status,
+        clean_seconds=state.clean_seconds,
+        last_update_ts=state.last_update_ts,
+        last_seen_ts=state.last_seen_ts,
+        y_min=state.y_min,
+        y_max=state.y_max,
+    )
+
+    recent_ts = ts_grid[-recent_len:].astype(int).tolist()
+    pred, lower_raw, upper_raw = predict_state_bundle(tmp_state, recent_ts)
+
+    lower, upper = calc_final_bounds(
+        state=tmp_state,
+        pred=pred,
+        lower_raw=lower_raw,
+        upper_raw=upper_raw,
+        target=target,
+    )
+
+    outside = (actual < lower) | (actual > upper)
+    abs_err = np.abs(actual - pred)
+
+    outside_ratio = float(np.mean(outside))
+    mean_abs_err = float(np.mean(abs_err))
+    mean_rel_err = float(np.mean(abs_err / np.maximum(np.abs(pred), 1e-6)))
+
+    is_anomaly = outside_ratio >= OUTSIDE_RATIO_THRESHOLD
+
+    return (
+        is_anomaly,
+        outside_ratio,
+        mean_abs_err,
+        mean_rel_err,
+        int(best_period),
+        int(best_origin),
+    )
+
+
+# =============================================================================
+# 状态管理
+# =============================================================================
+
+def create_initial_state(
+    ts_grid: np.ndarray,
+    ys_model: np.ndarray,
+    target: Dict,
+    now_sec: int,
+) -> Optional[BaselineState]:
+    baseline = build_current_baseline(
+        ts_grid=ts_grid,
+        ys_grid=ys_model,
+        target=target,
+    )
+
+    if baseline is None:
+        return None
+
+    period, phase_origin_ts, template, lower_template, upper_template = baseline
+
+    return BaselineState(
+        period=int(period),
+        phase_origin_ts=int(phase_origin_ts),
+        template=template.astype(float).tolist(),
+        lower_template=lower_template.astype(float).tolist(),
+        upper_template=upper_template.astype(float).tolist(),
+        strategy=str(target.get("strategy", "phase_point")),
+        status=BASELINE_STATUS_HEALTHY,
+        clean_seconds=int(period * MAX_CYCLES_FOR_TEMPLATE),
+        last_update_ts=now_sec,
+        last_seen_ts=now_sec,
+        y_min=float(np.min(ys_model)),
+        y_max=float(np.max(ys_model)),
+    )
+
+
+def apply_phase_lock_to_state(
+    state: BaselineState,
+    best_period: int,
+    best_origin: int,
+) -> None:
+    best_period = int(best_period)
+
+    if best_period <= 1:
+        return
+
+    if len(state.template) != best_period:
+        state.template = resample_template(
+            np.array(state.template, dtype=float),
+            best_period,
+        ).astype(float).tolist()
+
+    if len(state.lower_template) != best_period:
+        state.lower_template = resample_template(
+            np.array(state.lower_template, dtype=float),
+            best_period,
+        ).astype(float).tolist()
+
+    if len(state.upper_template) != best_period:
+        state.upper_template = resample_template(
+            np.array(state.upper_template, dtype=float),
+            best_period,
+        ).astype(float).tolist()
+
+    state.period = best_period
+    state.phase_origin_ts = int(best_origin)
+
+
+def maybe_update_state(
+    key: str,
+    ts_grid: np.ndarray,
+    ys_model: np.ndarray,
+    target: Dict,
+) -> Tuple[Optional[BaselineState], bool, float, float, float]:
+    now_sec = int(time.time())
+    state = BASELINE_STATES.get(key)
+
+    if state is None:
+        state = create_initial_state(
+            ts_grid=ts_grid,
+            ys_model=ys_model,
+            target=target,
+            now_sec=now_sec,
+        )
+
+        if state is None:
+            return None, False, 0.0, 0.0, 0.0
+
+        BASELINE_STATES[key] = state
+
+        logger.info(
+            "初始化健康模板 key=%s strategy=%s period=%ss origin=%s clean=%ss",
+            key,
+            state.strategy,
+            state.period,
+            datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"),
+            state.clean_seconds,
+        )
+
+        return state, False, 0.0, 0.0, 0.0
+
+    elapsed = max(1, now_sec - int(state.last_seen_ts))
+    elapsed = min(elapsed, POLL_INTERVAL * 2)
+    state.last_seen_ts = now_sec
+
+    (
+        is_anomaly,
+        outside_ratio,
+        mean_abs_err,
+        mean_rel_err,
+        best_period,
+        best_origin,
+    ) = detect_anomaly(
+        state=state,
+        ts_grid=ts_grid,
+        ys_model=ys_model,
+        target=target,
+    )
+
+    if is_anomaly:
+        state.status = BASELINE_STATUS_ANOMALY
+        state.clean_seconds = 0
+        BASELINE_STATES[key] = state
+
+        logger.warning(
+            "检测到异常，冻结模板 key=%s outside_ratio=%.2f mean_abs_err=%.4f mean_rel_err=%.4f",
+            key,
+            outside_ratio,
+            mean_abs_err,
+            mean_rel_err,
+        )
+
+        return state, True, outside_ratio, mean_abs_err, mean_rel_err
+
+    old_period = int(state.period)
+    old_origin = int(state.phase_origin_ts)
+
+    apply_phase_lock_to_state(state, best_period, best_origin)
+
+    if old_period != state.period or old_origin != state.phase_origin_ts:
+        logger.info(
+            "phase-lock key=%s period %s -> %s origin %s -> %s",
+            key,
+            old_period,
+            state.period,
+            datetime.fromtimestamp(old_origin).strftime("%H:%M:%S"),
+            datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"),
+        )
+
+    if state.status == BASELINE_STATUS_ANOMALY:
+        state.status = BASELINE_STATUS_RECOVERING
+        state.clean_seconds = elapsed
+        BASELINE_STATES[key] = state
+
+        logger.info(
+            "异常开始恢复 key=%s clean_seconds=%ss",
+            key,
+            state.clean_seconds,
+        )
+
+        return state, False, outside_ratio, mean_abs_err, mean_rel_err
+
+    if state.status == BASELINE_STATUS_RECOVERING:
+        state.clean_seconds += elapsed
+    else:
+        state.status = BASELINE_STATUS_HEALTHY
+        state.clean_seconds += elapsed
+
+    min_clean_for_update = max(
+        RECOVERY_MIN_SECONDS,
+        int(state.period) * MIN_FULL_CYCLES_FOR_TEMPLATE,
+    )
+
+    if state.clean_seconds < min_clean_for_update:
+        BASELINE_STATES[key] = state
+        return state, False, outside_ratio, mean_abs_err, mean_rel_err
+
+    tail_seconds = min(
+        int(state.clean_seconds),
+        int(state.period) * MAX_CYCLES_FOR_TEMPLATE,
+    )
+
+    baseline = build_current_baseline(
+        ts_grid=ts_grid,
+        ys_grid=ys_model,
+        target=target,
+        tail_seconds=tail_seconds,
+    )
+
+    if baseline is None:
+        BASELINE_STATES[key] = state
+        return state, False, outside_ratio, mean_abs_err, mean_rel_err
+
+    new_period, new_origin, new_template, new_lower_template, new_upper_template = baseline
+
+    alpha = RECOVERY_EMA_ALPHA if state.status == BASELINE_STATUS_RECOVERING else HEALTHY_EMA_ALPHA
+
+    state.template = merge_template(
+        np.array(state.template, dtype=float),
+        new_template,
+        alpha,
+    ).astype(float).tolist()
+
+    state.lower_template = merge_template(
+        np.array(state.lower_template, dtype=float),
+        new_lower_template,
+        alpha,
+    ).astype(float).tolist()
+
+    state.upper_template = merge_template(
+        np.array(state.upper_template, dtype=float),
+        new_upper_template,
+        alpha,
+    ).astype(float).tolist()
+
+    state.period = int(new_period)
+    state.phase_origin_ts = int(new_origin)
+    state.status = BASELINE_STATUS_HEALTHY
+    state.last_update_ts = now_sec
+
+    if tail_seconds > 0 and len(ys_model) >= tail_seconds:
+        state.y_min = float(np.min(ys_model[-tail_seconds:]))
+        state.y_max = float(np.max(ys_model[-tail_seconds:]))
+    else:
+        state.y_min = float(np.min(ys_model))
+        state.y_max = float(np.max(ys_model))
+
+    BASELINE_STATES[key] = state
+
+    logger.info(
+        "更新健康模板 key=%s strategy=%s period=%ss origin=%s clean=%ss alpha=%.2f",
+        key,
+        state.strategy,
+        state.period,
+        datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"),
+        state.clean_seconds,
+        alpha,
+    )
+
+    return state, False, outside_ratio, mean_abs_err, mean_rel_err
+
+
+# =============================================================================
+# Prometheus 写入
+# =============================================================================
+
+def prom_escape_label_value(value: str) -> str:
+    return (
+        str(value)
+        .replace("\\", "\\\\")
+        .replace("\n", "\\n")
+        .replace('"', '\\"')
+    )
+
+
+def labels_to_str(labels: Dict[str, str]) -> str:
+    if not labels:
+        return ""
+
+    parts = []
+
+    for k in sorted(labels.keys()):
+        parts.append(f'{k}="{prom_escape_label_value(labels[k])}"')
+
+    return "{" + ",".join(parts) + "}"
+
+
+def write_series(
+    metric_name: str,
+    labels: Dict[str, str],
+    ts_list: List[int],
+    values: List[float],
+) -> bool:
+    if not ts_list or not values or len(ts_list) != len(values):
+        return False
+
+    label_str = labels_to_str(labels)
+    lines = []
+
+    for t, y in zip(ts_list, values):
+        try:
+            ts_sec = int(round(float(t)))
+            val = float(y)
+        except Exception:
+            continue
+
+        if not math.isfinite(ts_sec) or not math.isfinite(val):
+            continue
+
+        lines.append(f"{metric_name}{label_str} {val:.6f} {ts_sec * 1000}")
+
+    if not lines:
+        return False
+
+    payload = "\n".join(lines) + "\n"
+
+    try:
+        resp = requests.post(
+            f"{VM_URL}/api/v1/import/prometheus",
+            data=payload.encode("utf-8"),
+            headers={"Content-Type": "text/plain; version=0.0.4; charset=utf-8"},
+            timeout=10,
+        )
+        resp.raise_for_status()
+        return True
+
+    except requests.RequestException as e:
+        logger.error("写入数据失败 metric=%s: %s", metric_name, e)
+        return False
+
+
+def write_prediction_bundle(
+    pred_metric: str,
+    anomaly_metric: str,
+    labels: Dict[str, str],
+    ts_future: List[int],
+    pred_values: np.ndarray,
+    lower_values: np.ndarray,
+    upper_values: np.ndarray,
+    is_anomaly: bool,
+    outside_ratio: float,
+    mean_abs_err: float,
+    mean_rel_err: float,
+    event_ts: int,
+) -> bool:
+    ok1 = write_series(
+        metric_name=pred_metric,
+        labels=labels,
+        ts_list=ts_future,
+        values=pred_values.astype(float).tolist(),
+    )
+
+    ok2 = write_series(
+        metric_name=f"{pred_metric}_lower",
+        labels=labels,
+        ts_list=ts_future,
+        values=lower_values.astype(float).tolist(),
+    )
+
+    ok3 = write_series(
+        metric_name=f"{pred_metric}_upper",
+        labels=labels,
+        ts_list=ts_future,
+        values=upper_values.astype(float).tolist(),
+    )
+
+    anomaly_labels = dict(labels)
+    anomaly_labels["type"] = "prediction_deviation"
+
+    ok4 = write_series(
+        metric_name=anomaly_metric,
+        labels=anomaly_labels,
+        ts_list=[event_ts],
+        values=[1.0 if is_anomaly else 0.0],
+    )
+
+    ok5 = write_series(
+        metric_name=f"{anomaly_metric}_outside_ratio",
+        labels=anomaly_labels,
+        ts_list=[event_ts],
+        values=[outside_ratio],
+    )
+
+    ok6 = write_series(
+        metric_name=f"{anomaly_metric}_mean_abs_error",
+        labels=anomaly_labels,
+        ts_list=[event_ts],
+        values=[mean_abs_err],
+    )
+
+    ok7 = write_series(
+        metric_name=f"{anomaly_metric}_mean_rel_error",
+        labels=anomaly_labels,
+        ts_list=[event_ts],
+        values=[mean_rel_err],
+    )
+
+    return ok1 and ok2 and ok3 and ok4 and ok5 and ok6 and ok7
+
+
+# =============================================================================
+# 标签解析
+# =============================================================================
+
+_LABEL_PATTERN = re.compile(
+    r'\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"((?:\\.|[^"])*)"\s*'
+)
+
+
+def parse_labels_from_query(query: str) -> Dict[str, str]:
+    labels = {}
+
+    if "{" not in query or "}" not in query:
+        return labels
+
+    try:
+        label_part = query[query.index("{") + 1:query.rindex("}")]
+    except Exception:
+        return labels
+
+    for match in _LABEL_PATTERN.finditer(label_part):
+        key = match.group(1)
+        value = match.group(2)
+
+        value = (
+            value
+            .replace('\\"', '"')
+            .replace("\\n", "\n")
+            .replace("\\\\", "\\")
+        )
+
+        labels[key] = value
+
+    return labels
+
+
+def merge_labels(*dicts: Dict[str, str]) -> Dict[str, str]:
+    result = {}
+
+    for d in dicts:
+        if d:
+            result.update(d)
+
+    return result
+
+
+def series_key(metric_name: str, labels: Dict[str, str]) -> str:
+    return metric_name + labels_to_str(labels)
+
+
+# =============================================================================
+# 状态持久化
+# =============================================================================
+
+def load_state() -> None:
+    global BASELINE_STATES
+
+    if not os.path.exists(STATE_FILE):
+        return
+
+    try:
+        with open(STATE_FILE, "r", encoding="utf-8") as f:
+            raw = json.load(f)
+
+        states = {}
+
+        for key, value in raw.get("baseline_states", {}).items():
+            required_fields = {
+                "period",
+                "phase_origin_ts",
+                "template",
+                "lower_template",
+                "upper_template",
+                "strategy",
+                "status",
+                "clean_seconds",
+                "last_update_ts",
+                "last_seen_ts",
+                "y_min",
+                "y_max",
+            }
+
+            if not required_fields.issubset(set(value.keys())):
+                continue
+
+            states[key] = BaselineState(**value)
+
+        BASELINE_STATES = states
+
+        logger.info(
+            "已加载预测状态文件 %s，状态数量=%d",
+            STATE_FILE,
+            len(BASELINE_STATES),
+        )
+
+    except Exception as e:
+        logger.warning("加载预测状态文件失败，将重新学习: %s", e)
+
+
+def save_state() -> None:
+    try:
+        raw = {
+            "baseline_states": {
+                key: asdict(value)
+                for key, value in BASELINE_STATES.items()
+            }
+        }
+
+        tmp_file = STATE_FILE + ".tmp"
+
+        with open(tmp_file, "w", encoding="utf-8") as f:
+            json.dump(raw, f, ensure_ascii=False, indent=2)
+
+        os.replace(tmp_file, STATE_FILE)
+
+    except Exception as e:
+        logger.warning("保存预测状态文件失败: %s", e)
+
+
+# =============================================================================
+# 时间轴
+# =============================================================================
+
+def build_prediction_timestamps(
+    key: str,
+    last_real_ts: int,
+    now_sec: int,
+) -> Optional[List[int]]:
+    data_lag = now_sec - last_real_ts
+
+    if data_lag > MAX_DATA_LAG_SECONDS:
+        logger.warning(
+            "真实数据延迟过大，跳过预测 key=%s data_lag=%ss max=%ss",
+            key,
+            data_lag,
+            MAX_DATA_LAG_SECONDS,
+        )
+        return None
+
+    last_written_real_ts = LAST_REAL_TS_WRITTEN.get(key)
+
+    if last_written_real_ts is not None and last_real_ts <= int(last_written_real_ts):
+        logger.info(
+            "真实数据时间戳未推进，跳过重复写入 key=%s last_real_ts=%s last_written_real_ts=%s",
+            key,
+            last_real_ts,
+            last_written_real_ts,
+        )
+        return None
+
+    base_ts = last_real_ts
+
+    return [
+        base_ts + i + 1
+        for i in range(WRITE_HORIZON_SECONDS)
+    ]
+
+
+# =============================================================================
+# 主流程
+# =============================================================================
+
+def run_once() -> None:
+    now_str = datetime.now().strftime("%H:%M:%S")
+
+    for target in PREDICT_TARGETS:
+        query = target["query"]
+        pred_metric = target["pred_metric"]
+        anomaly_metric = target["anomaly_metric"]
+
+        ts, ys = fetch_history(query)
+
+        if len(ys) < MIN_POINTS:
+            logger.info("[%s] %s 数据不足（%d 点），跳过", now_str, query, len(ys))
+            continue
+
+        ts_grid, ys_grid_raw = normalize_history(ts, ys)
+
+        if len(ys_grid_raw) < MIN_POINTS:
+            logger.info("[%s] %s 清洗后数据不足（%d 点），跳过", now_str, query, len(ys_grid_raw))
+            continue
+
+        ys_grid_model = preprocess_values(ys_grid_raw, target)
+
+        base_labels = parse_labels_from_query(query)
+        write_labels = merge_labels(base_labels, EXTRA_PREDICT_LABELS)
+
+        key = series_key(pred_metric, write_labels)
+
+        state, is_anomaly, outside_ratio, mean_abs_err, mean_rel_err = maybe_update_state(
+            key=key,
+            ts_grid=ts_grid,
+            ys_model=ys_grid_model,
+            target=target,
+        )
+
+        if state is None:
+            logger.info("[%s] %s 暂无可用健康模板，等待学习", now_str, query)
+            continue
+
+        now_sec = int(time.time())
+        last_real_ts = int(ts_grid[-1])
+        data_lag = now_sec - last_real_ts
+
+        ts_future = build_prediction_timestamps(
+            key=key,
+            last_real_ts=last_real_ts,
+            now_sec=now_sec,
+        )
+
+        if not ts_future:
+            continue
+
+        pred_values, lower_raw, upper_raw = predict_state_bundle(state, ts_future)
+
+        lower_values, upper_values = calc_final_bounds(
+            state=state,
+            pred=pred_values,
+            lower_raw=lower_raw,
+            upper_raw=upper_raw,
+            target=target,
+        )
+
+        ok = write_prediction_bundle(
+            pred_metric=pred_metric,
+            anomaly_metric=anomaly_metric,
+            labels=write_labels,
+            ts_future=ts_future,
+            pred_values=pred_values,
+            lower_values=lower_values,
+            upper_values=upper_values,
+            is_anomaly=is_anomaly,
+            outside_ratio=outside_ratio,
+            mean_abs_err=mean_abs_err,
+            mean_rel_err=mean_rel_err,
+            event_ts=last_real_ts,
+        )
+
+        if not ok:
+            logger.error("[%s] %s 写入预测数据失败", now_str, query)
+            continue
+
+        LAST_REAL_TS_WRITTEN[key] = last_real_ts
+
+        future_start = datetime.fromtimestamp(ts_future[0]).strftime("%H:%M:%S")
+        future_end = datetime.fromtimestamp(ts_future[-1]).strftime("%H:%M:%S")
+        last_real_str = datetime.fromtimestamp(last_real_ts).strftime("%H:%M:%S")
+        origin_str = datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S")
+
+        logger.info(
+            "[%s] %-40s → %-35s strategy=%s status=%s anomaly=%s period=%ss origin=%s last_real=%s lag=%ss 写入 %d 点，预测区间 %s ~ %s",
+            now_str,
+            query,
+            pred_metric,
+            state.strategy,
+            state.status,
+            is_anomaly,
+            state.period,
+            origin_str,
+            last_real_str,
+            data_lag,
+            len(ts_future),
+            future_start,
+            future_end,
+        )
+
+    save_state()
+
+
+def main() -> None:
+    load_state()
+
+    logger.info(
+        "预测服务启动 VM=%s 历史窗口=%dmin 理论预测窗口=%ds 实际写入窗口=%ds 轮询间隔=%ds state=%s forecast=%s",
+        VM_URL,
+        HISTORY_MINUTES,
+        HORIZON_SECONDS,
+        WRITE_HORIZON_SECONDS,
+        POLL_INTERVAL,
+        STATE_FILE,
+        EXTRA_PREDICT_LABELS["forecast"],
+    )
+
+    while True:
+        run_once()
+        time.sleep(POLL_INTERVAL)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file

From 3609fbae4e2fbb33700de79ee1f7730ac81e366c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Thu, 21 May 2026 21:39:55 +0800
Subject: [PATCH 27/36] fix

---
 ai/pridict_v4.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/ai/pridict_v4.py b/ai/pridict_v4.py
index 774ad3a..c55f21a 100644
--- a/ai/pridict_v4.py
+++ b/ai/pridict_v4.py
@@ -120,11 +120,11 @@
         "anomaly_metric": "vibration_x_anomaly",
         "strategy": "phase_band",
         "abs_threshold": 0.12,
-        "rel_threshold": 0.35,
+        "rel_threshold": 0.40,
         "smooth_window": 5,
         "band_low_q": 10,
         "band_high_q": 90,
-        "band_pad_abs": 0.06,
+        "band_pad_abs": 0.08,
     },
     {
         "query": 'vibration_y{device_id="fanuc-cnc"}',
@@ -132,11 +132,11 @@
         "anomaly_metric": "vibration_y_anomaly",
         "strategy": "phase_band",
         "abs_threshold": 0.12,
-        "rel_threshold": 0.35,
+        "rel_threshold": 0.40,
         "smooth_window": 5,
         "band_low_q": 10,
         "band_high_q": 90,
-        "band_pad_abs": 0.06,
+        "band_pad_abs": 0.08,
     },
     {
         "query": 'vibration_z{device_id="fanuc-cnc"}',
@@ -144,11 +144,11 @@
         "anomaly_metric": "vibration_z_anomaly",
         "strategy": "phase_band",
         "abs_threshold": 0.12,
-        "rel_threshold": 0.35,
+        "rel_threshold": 0.40,
         "smooth_window": 5,
         "band_low_q": 10,
         "band_high_q": 90,
-        "band_pad_abs": 0.06,
+        "band_pad_abs": 0.08,
     },
 ]
 

From 1c4217b31d9304cddeb6be16949cc54eaa8d31ec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Thu, 21 May 2026 21:48:41 +0800
Subject: [PATCH 28/36] fix

---
 ai/pridict_v4.py | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/ai/pridict_v4.py b/ai/pridict_v4.py
index c55f21a..8657944 100644
--- a/ai/pridict_v4.py
+++ b/ai/pridict_v4.py
@@ -119,37 +119,37 @@
         "pred_metric": "vibration_x_predicted",
         "anomaly_metric": "vibration_x_anomaly",
         "strategy": "phase_band",
-        "abs_threshold": 0.12,
-        "rel_threshold": 0.40,
+        "abs_threshold": 0.18,
+        "rel_threshold": 0.50,
         "smooth_window": 5,
-        "band_low_q": 10,
-        "band_high_q": 90,
-        "band_pad_abs": 0.08,
+        "band_low_q": 2,
+        "band_high_q": 98,
+        "band_pad_abs": 0.12,
     },
     {
         "query": 'vibration_y{device_id="fanuc-cnc"}',
         "pred_metric": "vibration_y_predicted",
         "anomaly_metric": "vibration_y_anomaly",
         "strategy": "phase_band",
-        "abs_threshold": 0.12,
-        "rel_threshold": 0.40,
+        "abs_threshold": 0.18,
+        "rel_threshold": 0.50,
         "smooth_window": 5,
-        "band_low_q": 10,
-        "band_high_q": 90,
-        "band_pad_abs": 0.08,
+        "band_low_q": 2,
+        "band_high_q": 98,
+        "band_pad_abs": 0.12,
     },
     {
         "query": 'vibration_z{device_id="fanuc-cnc"}',
         "pred_metric": "vibration_z_predicted",
         "anomaly_metric": "vibration_z_anomaly",
         "strategy": "phase_band",
-        "abs_threshold": 0.12,
-        "rel_threshold": 0.40,
+        "abs_threshold": 0.18,
+        "rel_threshold": 0.50,
         "smooth_window": 5,
-        "band_low_q": 10,
-        "band_high_q": 90,
-        "band_pad_abs": 0.08,
-    },
+        "band_low_q": 2,
+        "band_high_q": 98,
+        "band_pad_abs": 0.12,
+    }
 ]
 
 EXTRA_PREDICT_LABELS = {

From 4077e8f416d9d2f9d60df50d551fc817ddacd982 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Fri, 22 May 2026 09:40:02 +0800
Subject: [PATCH 29/36] feat(predict_v5): add predict v5

---
 ai/pridict_v5.py | 1794 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 1794 insertions(+)
 create mode 100644 ai/pridict_v5.py

diff --git a/ai/pridict_v5.py b/ai/pridict_v5.py
new file mode 100644
index 0000000..6894a66
--- /dev/null
+++ b/ai/pridict_v5.py
@@ -0,0 +1,1794 @@
+# -*- coding: utf-8 -*-
+"""
+ProtoForge Predictor v12
+
+核心能力：
+1. feed_rate / spindle_speed / spindle_current 使用 phase-lock 点预测。
+2. vibration_x / vibration_y / vibration_z 使用 phase-band 预测带。
+3. vibration 类指标：
+   - predicted 使用平滑后的中位数模板，用于趋势参考。
+   - upper/lower 使用原始波动分位数模板 + padding，用于正常波动容忍带。
+   - 偶发越界不直接报警，只有持续越界 / 高比例越界 / 严重越界才报警。
+4. 预测起点锚定最后一个真实点 last_real_ts，避免时间错位。
+5. 异常期间冻结健康模板，不学习故障数据。
+6. 故障恢复后等待稳定，再恢复模板学习。
+7. 写入：
+   - xxx_predicted
+   - xxx_predicted_upper
+   - xxx_predicted_lower
+   - xxx_anomaly
+   - xxx_anomaly_outside_ratio
+   - xxx_anomaly_mean_abs_error
+   - xxx_anomaly_mean_rel_error
+   - xxx_anomaly_max_consecutive_outside
+   - xxx_anomaly_max_exceed_ratio
+"""
+
+import json
+import logging
+import math
+import os
+import re
+import time
+from dataclasses import asdict, dataclass
+from datetime import datetime, timedelta
+from typing import Dict, List, Optional, Tuple
+
+import numpy as np
+import requests
+
+
+# =============================================================================
+# 日志配置
+# =============================================================================
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(message)s",
+)
+
+logger = logging.getLogger(__name__)
+
+
+# =============================================================================
+# 基础配置
+# =============================================================================
+
+VM_URL = "http://localhost:8428"
+STATE_FILE = "/tmp/protoforge_predictor_state_v12.json"
+
+HISTORY_MINUTES = 30
+HORIZON_SECONDS = 120
+POLL_INTERVAL = 30
+
+WRITE_HORIZON_SECONDS = min(HORIZON_SECONDS, POLL_INTERVAL)
+
+QUERY_STEP = "1s"
+MIN_POINTS = 120
+
+MIN_PERIOD_SECONDS = 5
+MAX_PERIOD_SECONDS = 3600
+
+MIN_FULL_CYCLES_FOR_TEMPLATE = 3
+MAX_CYCLES_FOR_TEMPLATE = 8
+
+DETECT_WINDOW_SECONDS = 30
+RECOVERY_MIN_SECONDS = 60
+
+HEALTHY_EMA_ALPHA = 0.10
+RECOVERY_EMA_ALPHA = 0.25
+
+OUTSIDE_RATIO_THRESHOLD = 0.60
+MIN_CONSECUTIVE_OUTSIDE = 5
+SEVERE_EXCEED_RATIO = 1.8
+
+VALLEY_QUANTILE = 45
+
+MAX_DATA_LAG_SECONDS = 180
+
+PHASE_LOCK_MIN_WINDOW_SECONDS = 45
+PHASE_LOCK_MAX_WINDOW_SECONDS = 180
+PHASE_LOCK_PERIOD_SEARCH_RATIO = 0.12
+PHASE_LOCK_ORIGIN_SEARCH_RATIO = 0.35
+PHASE_LOCK_PERIOD_STEP = 1
+PHASE_LOCK_ORIGIN_STEP = 1
+
+
+# =============================================================================
+# 指标配置
+# =============================================================================
+
+PREDICT_TARGETS = [
+    {
+        "query": 'feed_rate{device_id="fanuc-cnc"}',
+        "pred_metric": "feed_rate_predicted",
+        "anomaly_metric": "feed_rate_anomaly",
+        "strategy": "phase_point",
+        "abs_threshold": 400.0,
+        "rel_threshold": 0.25,
+        "smooth_window": 1,
+        "outside_ratio_threshold": 0.60,
+        "min_consecutive_outside": 5,
+        "severe_exceed_ratio": 1.8,
+    },
+    {
+        "query": 'spindle_speed{device_id="fanuc-cnc"}',
+        "pred_metric": "spindle_speed_predicted",
+        "anomaly_metric": "spindle_speed_anomaly",
+        "strategy": "phase_point",
+        "abs_threshold": 500.0,
+        "rel_threshold": 0.25,
+        "smooth_window": 1,
+        "outside_ratio_threshold": 0.60,
+        "min_consecutive_outside": 5,
+        "severe_exceed_ratio": 1.8,
+    },
+    {
+        "query": 'spindle_current{device_id="fanuc-cnc"}',
+        "pred_metric": "spindle_current_predicted",
+        "anomaly_metric": "spindle_current_anomaly",
+        "strategy": "phase_point",
+        "abs_threshold": 5.0,
+        "rel_threshold": 0.25,
+        "smooth_window": 1,
+        "outside_ratio_threshold": 0.60,
+        "min_consecutive_outside": 5,
+        "severe_exceed_ratio": 1.8,
+    },
+    {
+        "query": 'vibration_x{device_id="fanuc-cnc"}',
+        "pred_metric": "vibration_x_predicted",
+        "anomaly_metric": "vibration_x_anomaly",
+        "strategy": "phase_band",
+
+        # vibration 类指标噪声、尖峰较多，不建议用很窄的阈值。
+        "abs_threshold": 0.18,
+        "rel_threshold": 0.55,
+
+        # 平滑只用于相位锁定和 predicted 中位趋势。
+        "smooth_window": 5,
+
+        # upper/lower 用原始值分位数，范围放宽，覆盖正常尖峰。
+        "band_low_q": 1,
+        "band_high_q": 99,
+        "band_pad_abs": 0.15,
+
+        # 偶发越界容忍。
+        "outside_ratio_threshold": 0.70,
+        "min_consecutive_outside": 5,
+        "severe_exceed_ratio": 2.0,
+    },
+    {
+        "query": 'vibration_y{device_id="fanuc-cnc"}',
+        "pred_metric": "vibration_y_predicted",
+        "anomaly_metric": "vibration_y_anomaly",
+        "strategy": "phase_band",
+        "abs_threshold": 0.18,
+        "rel_threshold": 0.55,
+        "smooth_window": 5,
+        "band_low_q": 1,
+        "band_high_q": 99,
+        "band_pad_abs": 0.15,
+        "outside_ratio_threshold": 0.70,
+        "min_consecutive_outside": 5,
+        "severe_exceed_ratio": 2.0,
+    },
+    {
+        "query": 'vibration_z{device_id="fanuc-cnc"}',
+        "pred_metric": "vibration_z_predicted",
+        "anomaly_metric": "vibration_z_anomaly",
+        "strategy": "phase_band",
+        "abs_threshold": 0.18,
+        "rel_threshold": 0.55,
+        "smooth_window": 5,
+        "band_low_q": 1,
+        "band_high_q": 99,
+        "band_pad_abs": 0.15,
+        "outside_ratio_threshold": 0.70,
+        "min_consecutive_outside": 5,
+        "severe_exceed_ratio": 2.0,
+    },
+]
+
+EXTRA_PREDICT_LABELS = {
+    "forecast": "phase_band_health_v12",
+    "source": "protoforge",
+}
+
+BASELINE_STATUS_HEALTHY = "healthy"
+BASELINE_STATUS_ANOMALY = "anomaly"
+BASELINE_STATUS_RECOVERING = "recovering"
+
+
+# =============================================================================
+# 状态结构
+# =============================================================================
+
+@dataclass
+class BaselineState:
+    period: int
+    phase_origin_ts: int
+    template: List[float]
+    lower_template: List[float]
+    upper_template: List[float]
+    strategy: str
+    status: str
+    clean_seconds: int
+    last_update_ts: int
+    last_seen_ts: int
+    y_min: float
+    y_max: float
+
+
+BASELINE_STATES: Dict[str, BaselineState] = {}
+LAST_REAL_TS_WRITTEN: Dict[str, int] = {}
+
+
+# =============================================================================
+# VictoriaMetrics 读取
+# =============================================================================
+
+def fetch_history(query: str, minutes: int = HISTORY_MINUTES) -> Tuple[List[float], List[float]]:
+    now = datetime.now()
+    start = now - timedelta(minutes=minutes)
+
+    try:
+        resp = requests.get(
+            f"{VM_URL}/api/v1/query_range",
+            params={
+                "query": query,
+                "start": start.timestamp(),
+                "end": now.timestamp(),
+                "step": QUERY_STEP,
+            },
+            timeout=10,
+        )
+        resp.raise_for_status()
+    except requests.RequestException as e:
+        logger.error("拉取数据失败 query=%s: %s", query, e)
+        return [], []
+
+    try:
+        result = resp.json().get("data", {}).get("result", [])
+    except Exception as e:
+        logger.error("解析 VM 返回失败 query=%s: %s", query, e)
+        return [], []
+
+    if not result:
+        return [], []
+
+    values = result[0].get("values", [])
+
+    ts = []
+    ys = []
+
+    for item in values:
+        if len(item) < 2:
+            continue
+
+        try:
+            t = float(item[0])
+            y = float(item[1])
+        except Exception:
+            continue
+
+        if not math.isfinite(t) or not math.isfinite(y):
+            continue
+
+        ts.append(t)
+        ys.append(y)
+
+    return ts, ys
+
+
+def normalize_history(ts: List[float], ys: List[float]) -> Tuple[np.ndarray, np.ndarray]:
+    if not ts or not ys or len(ts) != len(ys):
+        return np.array([]), np.array([])
+
+    data = {}
+
+    for t, y in zip(ts, ys):
+        try:
+            sec = int(round(float(t)))
+            val = float(y)
+        except Exception:
+            continue
+
+        if not math.isfinite(sec) or not math.isfinite(val):
+            continue
+
+        data[sec] = val
+
+    if not data:
+        return np.array([]), np.array([])
+
+    sorted_items = sorted(data.items(), key=lambda x: x[0])
+
+    ts_clean = np.array([x[0] for x in sorted_items], dtype=float)
+    ys_clean = np.array([x[1] for x in sorted_items], dtype=float)
+
+    if len(ts_clean) < 2:
+        return ts_clean, ys_clean
+
+    start_sec = int(ts_clean[0])
+    end_sec = int(ts_clean[-1])
+
+    if end_sec <= start_sec:
+        return ts_clean, ys_clean
+
+    ts_grid = np.arange(start_sec, end_sec + 1, 1, dtype=float)
+    ys_grid = np.interp(ts_grid, ts_clean, ys_clean)
+
+    return ts_grid, ys_grid
+
+
+# =============================================================================
+# 平滑与预处理
+# =============================================================================
+
+def rolling_median(arr: np.ndarray, window: int) -> np.ndarray:
+    if window <= 1 or len(arr) < window:
+        return arr.astype(float)
+
+    if window % 2 == 0:
+        window += 1
+
+    pad = window // 2
+    padded = np.pad(arr.astype(float), (pad, pad), mode="edge")
+
+    result = []
+
+    for i in range(len(arr)):
+        result.append(float(np.median(padded[i:i + window])))
+
+    return np.array(result, dtype=float)
+
+
+def moving_average(arr: np.ndarray, window: int) -> np.ndarray:
+    if window <= 1 or len(arr) < window:
+        return arr.astype(float)
+
+    if window % 2 == 0:
+        window += 1
+
+    kernel = np.ones(window, dtype=float) / window
+    pad = window // 2
+    padded = np.pad(arr.astype(float), (pad, pad), mode="edge")
+
+    return np.convolve(padded, kernel, mode="valid")
+
+
+def preprocess_values(ys_grid: np.ndarray, target: Dict) -> np.ndarray:
+    strategy = target.get("strategy", "phase_point")
+    smooth_window = int(target.get("smooth_window", 1))
+
+    if strategy == "phase_band":
+        return rolling_median(ys_grid, smooth_window)
+
+    if smooth_window > 1:
+        return moving_average(ys_grid, smooth_window)
+
+    return ys_grid.astype(float)
+
+
+# =============================================================================
+# 周期估计
+# =============================================================================
+
+def estimate_period_by_fft(ys_arr: np.ndarray) -> float:
+    n = len(ys_arr)
+
+    if n < 8:
+        return 60.0
+
+    centered = ys_arr - np.mean(ys_arr)
+
+    if np.allclose(centered, 0):
+        return 60.0
+
+    fft_vals = np.fft.rfft(centered)
+    freqs = np.fft.rfftfreq(n, d=1.0)
+
+    if len(freqs) <= 1:
+        return 60.0
+
+    power = np.abs(fft_vals[1:])
+
+    if len(power) == 0 or np.max(power) <= 0:
+        return 60.0
+
+    dominant_idx = int(np.argmax(power)) + 1
+    dominant_freq = float(freqs[dominant_idx])
+
+    if dominant_freq <= 0:
+        return 60.0
+
+    period = 1.0 / dominant_freq
+
+    return float(np.clip(period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+
+
+def refine_period_by_autocorr(ys_arr: np.ndarray, init_period: float) -> float:
+    n = len(ys_arr)
+
+    if n < 20:
+        return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+
+    centered = ys_arr - np.mean(ys_arr)
+
+    if np.allclose(centered, 0):
+        return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+
+    corr = np.correlate(centered, centered, mode="full")[n - 1:]
+
+    p0 = int(round(init_period))
+    left = max(int(MIN_PERIOD_SECONDS), int(max(2, p0 * 0.7)))
+    right = min(n // 2, int(max(left + 1, p0 * 1.3)))
+
+    if right <= left:
+        return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+
+    search = corr[left:right + 1]
+
+    if len(search) == 0:
+        return float(np.clip(init_period, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+
+    best_lag = left + int(np.argmax(search))
+
+    return float(np.clip(best_lag, MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS))
+
+
+def estimate_period_rough(ys_arr: np.ndarray) -> int:
+    p_fft = estimate_period_by_fft(ys_arr)
+    p_refined = refine_period_by_autocorr(ys_arr, p_fft)
+
+    period = int(round(p_refined))
+    period = max(int(MIN_PERIOD_SECONDS), min(int(MAX_PERIOD_SECONDS), period))
+
+    return int(period)
+
+
+# =============================================================================
+# 谷底检测
+# =============================================================================
+
+def find_valley_indices(
+    ts_grid: np.ndarray,
+    ys_grid: np.ndarray,
+    expected_period: int,
+) -> List[int]:
+    n = len(ys_grid)
+
+    if n < max(10, expected_period * 2):
+        return []
+
+    period = max(3, int(expected_period))
+    smooth_window = max(3, int(round(period * 0.08)))
+    smooth_window = min(smooth_window, 21)
+
+    ys_smooth = moving_average(ys_grid, smooth_window)
+    threshold = float(np.percentile(ys_smooth, VALLEY_QUANTILE))
+
+    candidates = []
+
+    for i in range(1, n - 1):
+        if (
+            ys_smooth[i] <= ys_smooth[i - 1]
+            and ys_smooth[i] < ys_smooth[i + 1]
+            and ys_smooth[i] <= threshold
+        ):
+            candidates.append(i)
+
+    if len(candidates) < MIN_FULL_CYCLES_FOR_TEMPLATE:
+        candidates = []
+
+        for i in range(1, n - 1):
+            if ys_smooth[i] <= ys_smooth[i - 1] and ys_smooth[i] < ys_smooth[i + 1]:
+                candidates.append(i)
+
+    if not candidates:
+        return []
+
+    min_distance = max(2, int(round(period * 0.55)))
+    selected = []
+
+    for idx in candidates:
+        if not selected:
+            selected.append(idx)
+            continue
+
+        if idx - selected[-1] >= min_distance:
+            selected.append(idx)
+            continue
+
+        if ys_smooth[idx] < ys_smooth[selected[-1]]:
+            selected[-1] = idx
+
+    if len(selected) < 2:
+        return selected
+
+    cleaned = [selected[0]]
+
+    for idx in selected[1:]:
+        diff = int(ts_grid[idx] - ts_grid[cleaned[-1]])
+
+        if int(period * 0.55) <= diff <= int(period * 1.60):
+            cleaned.append(idx)
+            continue
+
+        if diff < int(period * 0.55):
+            if ys_smooth[idx] < ys_smooth[cleaned[-1]]:
+                cleaned[-1] = idx
+            continue
+
+        cleaned.append(idx)
+
+    return cleaned
+
+
+def detect_period_and_valleys(
+    ts_grid: np.ndarray,
+    ys_grid: np.ndarray,
+) -> Tuple[int, List[int]]:
+    rough = estimate_period_rough(ys_grid)
+    valleys = find_valley_indices(ts_grid, ys_grid, rough)
+
+    if len(valleys) >= 3:
+        diffs = np.diff(ts_grid[valleys])
+        good = diffs[(diffs >= rough * 0.55) & (diffs <= rough * 1.60)]
+
+        if len(good) > 0:
+            period = int(round(float(np.median(good))))
+        else:
+            period = rough
+    else:
+        period = rough
+
+    period = max(int(MIN_PERIOD_SECONDS), min(int(MAX_PERIOD_SECONDS), period))
+
+    return int(period), valleys
+
+
+# =============================================================================
+# 模板构建
+# =============================================================================
+
+def build_templates_from_valleys(
+    ts_grid: np.ndarray,
+    ys_mid_grid: np.ndarray,
+    ys_band_grid: np.ndarray,
+    period: int,
+    valleys: List[int],
+    target: Dict,
+) -> Optional[Tuple[np.ndarray, np.ndarray, np.ndarray]]:
+    if period <= 1 or len(valleys) < MIN_FULL_CYCLES_FOR_TEMPLATE + 1:
+        return None
+
+    strategy = target.get("strategy", "phase_point")
+    low_q = float(target.get("band_low_q", 10))
+    high_q = float(target.get("band_high_q", 90))
+
+    pairs = []
+
+    for a, b in zip(valleys[:-1], valleys[1:]):
+        cycle_len = float(ts_grid[b] - ts_grid[a])
+
+        if period * 0.55 <= cycle_len <= period * 1.60:
+            pairs.append((a, b, cycle_len))
+
+    if len(pairs) < MIN_FULL_CYCLES_FOR_TEMPLATE:
+        return None
+
+    pairs = pairs[-MAX_CYCLES_FOR_TEMPLATE:]
+
+    phase_grid = np.arange(period, dtype=float)
+    mid_segments = []
+    band_segments = []
+    weights = []
+
+    for idx, (a, b, cycle_len) in enumerate(pairs):
+        seg_ts = ts_grid[a:b + 1]
+        seg_mid_y = ys_mid_grid[a:b + 1]
+        seg_band_y = ys_band_grid[a:b + 1]
+
+        if len(seg_mid_y) < 3 or len(seg_band_y) < 3:
+            continue
+
+        x_old = (seg_ts - seg_ts[0]) / cycle_len * period
+
+        mid_seg = np.interp(phase_grid, x_old, seg_mid_y)
+        band_seg = np.interp(phase_grid, x_old, seg_band_y)
+
+        mid_segments.append(mid_seg.astype(float))
+        band_segments.append(band_seg.astype(float))
+        weights.append(0.5 + 0.5 * ((idx + 1) / len(pairs)))
+
+    if len(mid_segments) < MIN_FULL_CYCLES_FOR_TEMPLATE:
+        return None
+
+    mid_arr = np.vstack(mid_segments)
+    band_arr = np.vstack(band_segments)
+    w_arr = np.array(weights, dtype=float)
+
+    if strategy == "phase_band":
+        mid_template = np.percentile(mid_arr, 50, axis=0)
+
+        # upper/lower 使用原始值分布，而不是平滑值分布。
+        lower_template = np.percentile(band_arr, low_q, axis=0)
+        upper_template = np.percentile(band_arr, high_q, axis=0)
+    else:
+        mid_template = np.average(mid_arr, axis=0, weights=w_arr)
+        lower_template = mid_template.copy()
+        upper_template = mid_template.copy()
+
+    return (
+        mid_template.astype(float),
+        lower_template.astype(float),
+        upper_template.astype(float),
+    )
+
+
+def build_current_baseline(
+    ts_grid: np.ndarray,
+    ys_mid_grid: np.ndarray,
+    ys_band_grid: np.ndarray,
+    target: Dict,
+    tail_seconds: Optional[int] = None,
+) -> Optional[Tuple[int, int, np.ndarray, np.ndarray, np.ndarray]]:
+    if len(ys_mid_grid) < MIN_POINTS or len(ys_band_grid) < MIN_POINTS:
+        return None
+
+    if tail_seconds is not None and tail_seconds > 0:
+        cutoff = ts_grid[-1] - int(tail_seconds)
+        mask = ts_grid >= cutoff
+        ts_use = ts_grid[mask]
+        ys_mid_use = ys_mid_grid[mask]
+        ys_band_use = ys_band_grid[mask]
+    else:
+        ts_use = ts_grid
+        ys_mid_use = ys_mid_grid
+        ys_band_use = ys_band_grid
+
+    if len(ys_mid_use) < MIN_POINTS or len(ys_band_use) < MIN_POINTS:
+        return None
+
+    period, valleys = detect_period_and_valleys(ts_use, ys_mid_use)
+
+    templates = build_templates_from_valleys(
+        ts_grid=ts_use,
+        ys_mid_grid=ys_mid_use,
+        ys_band_grid=ys_band_use,
+        period=period,
+        valleys=valleys,
+        target=target,
+    )
+
+    if templates is None or len(valleys) == 0:
+        return None
+
+    template, lower_template, upper_template = templates
+    phase_origin_ts = int(round(float(ts_use[valleys[-1]])))
+
+    return int(period), phase_origin_ts, template, lower_template, upper_template
+
+
+# =============================================================================
+# 模板预测
+# =============================================================================
+
+def circular_template_value(template: np.ndarray, phase: float) -> float:
+    period = len(template)
+
+    if period == 0:
+        return 0.0
+
+    phase = float(phase) % period
+    i0 = int(math.floor(phase)) % period
+    i1 = (i0 + 1) % period
+    frac = phase - math.floor(phase)
+
+    return float((1.0 - frac) * template[i0] + frac * template[i1])
+
+
+def resample_template(old_template: np.ndarray, new_period: int) -> np.ndarray:
+    old_period = len(old_template)
+
+    if old_period == new_period:
+        return old_template.astype(float)
+
+    if old_period <= 1 or new_period <= 1:
+        return np.full(new_period, float(np.mean(old_template)), dtype=float)
+
+    old_x = np.linspace(0.0, 1.0, old_period, endpoint=False)
+    new_x = np.linspace(0.0, 1.0, new_period, endpoint=False)
+
+    old_x_ext = np.concatenate([old_x - 1.0, old_x, old_x + 1.0])
+    old_y_ext = np.concatenate([old_template, old_template, old_template])
+
+    return np.interp(new_x, old_x_ext, old_y_ext).astype(float)
+
+
+def predict_template_values(
+    template: np.ndarray,
+    period: int,
+    phase_origin_ts: int,
+    ts_list: List[int],
+) -> np.ndarray:
+    if period <= 1:
+        return np.zeros(len(ts_list), dtype=float)
+
+    if len(template) != period:
+        template = resample_template(template, period)
+
+    values = []
+
+    for ts in ts_list:
+        phase = (int(ts) - int(phase_origin_ts)) % period
+        values.append(circular_template_value(template, phase))
+
+    return np.array(values, dtype=float)
+
+
+def predict_state_bundle(
+    state: BaselineState,
+    ts_list: List[int],
+) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    period = int(state.period)
+    origin = int(state.phase_origin_ts)
+
+    mid = predict_template_values(
+        template=np.array(state.template, dtype=float),
+        period=period,
+        phase_origin_ts=origin,
+        ts_list=ts_list,
+    )
+
+    lower = predict_template_values(
+        template=np.array(state.lower_template, dtype=float),
+        period=period,
+        phase_origin_ts=origin,
+        ts_list=ts_list,
+    )
+
+    upper = predict_template_values(
+        template=np.array(state.upper_template, dtype=float),
+        period=period,
+        phase_origin_ts=origin,
+        ts_list=ts_list,
+    )
+
+    return mid, lower, upper
+
+
+def normalize_origin_near(origin: int, period: int, near_ts: int) -> int:
+    if period <= 1:
+        return origin
+
+    origin = int(origin)
+    period = int(period)
+    near_ts = int(near_ts)
+
+    while origin + period <= near_ts:
+        origin += period
+
+    while origin > near_ts:
+        origin -= period
+
+    return origin
+
+
+def merge_template(
+    old_template: np.ndarray,
+    new_template: np.ndarray,
+    alpha: float,
+) -> np.ndarray:
+    alpha = float(np.clip(alpha, 0.0, 1.0))
+
+    if len(old_template) != len(new_template):
+        old_template = resample_template(old_template, len(new_template))
+
+    merged = (1.0 - alpha) * old_template + alpha * new_template
+
+    return merged.astype(float)
+
+
+# =============================================================================
+# Phase Lock
+# =============================================================================
+
+def phase_lock_recent(
+    state: BaselineState,
+    ts_grid: np.ndarray,
+    ys_model: np.ndarray,
+) -> Tuple[int, int, np.ndarray, float]:
+    base_period = int(state.period)
+    base_origin = int(state.phase_origin_ts)
+    base_template = np.array(state.template, dtype=float)
+
+    if base_period <= 1 or len(base_template) <= 1:
+        ts_recent = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int).tolist()
+        pred = predict_template_values(base_template, base_period, base_origin, ts_recent)
+        actual = ys_model[-len(ts_recent):].astype(float)
+        mae = float(np.mean(np.abs(actual - pred))) if len(actual) else 0.0
+        return base_period, base_origin, pred, mae
+
+    window_seconds = max(
+        PHASE_LOCK_MIN_WINDOW_SECONDS,
+        min(PHASE_LOCK_MAX_WINDOW_SECONDS, int(base_period * 2)),
+    )
+
+    cutoff = ts_grid[-1] - window_seconds
+    mask = ts_grid >= cutoff
+
+    ts_recent_arr = ts_grid[mask].astype(int)
+    actual = ys_model[mask].astype(float)
+
+    if len(ts_recent_arr) < max(10, DETECT_WINDOW_SECONDS):
+        ts_recent_arr = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int)
+        actual = ys_model[-DETECT_WINDOW_SECONDS:].astype(float)
+
+    ts_recent = ts_recent_arr.tolist()
+    last_ts = int(ts_recent[-1])
+
+    p_min = max(
+        int(MIN_PERIOD_SECONDS),
+        int(round(base_period * (1.0 - PHASE_LOCK_PERIOD_SEARCH_RATIO))),
+    )
+    p_max = min(
+        int(MAX_PERIOD_SECONDS),
+        int(round(base_period * (1.0 + PHASE_LOCK_PERIOD_SEARCH_RATIO))),
+    )
+
+    best_period = base_period
+    best_origin = normalize_origin_near(base_origin, base_period, last_ts)
+    best_template = resample_template(base_template, best_period)
+
+    best_pred = predict_template_values(
+        template=best_template,
+        period=best_period,
+        phase_origin_ts=best_origin,
+        ts_list=ts_recent,
+    )
+
+    best_mae = float(np.mean(np.abs(actual - best_pred)))
+
+    for period in range(p_min, p_max + 1, PHASE_LOCK_PERIOD_STEP):
+        template = resample_template(base_template, period)
+        center_origin = normalize_origin_near(base_origin, period, last_ts)
+        origin_shift = max(2, int(round(period * PHASE_LOCK_ORIGIN_SEARCH_RATIO)))
+
+        for shift in range(-origin_shift, origin_shift + 1, PHASE_LOCK_ORIGIN_STEP):
+            origin = center_origin + shift
+
+            pred = predict_template_values(
+                template=template,
+                period=period,
+                phase_origin_ts=origin,
+                ts_list=ts_recent,
+            )
+
+            mae = float(np.mean(np.abs(actual - pred)))
+            penalty = abs(period - base_period) * 0.5
+            score = mae + penalty
+
+            best_score = best_mae + abs(best_period - base_period) * 0.5
+
+            if score < best_score:
+                best_period = period
+                best_origin = origin
+                best_pred = pred
+                best_mae = mae
+
+    best_origin = normalize_origin_near(best_origin, best_period, last_ts)
+
+    return int(best_period), int(best_origin), best_pred, float(best_mae)
+
+
+# =============================================================================
+# 异常检测
+# =============================================================================
+
+def max_consecutive_true(flags: np.ndarray) -> int:
+    max_count = 0
+    current = 0
+
+    for flag in flags:
+        if bool(flag):
+            current += 1
+            max_count = max(max_count, current)
+        else:
+            current = 0
+
+    return int(max_count)
+
+
+def calc_point_bounds(
+    pred: np.ndarray,
+    abs_threshold: float,
+    rel_threshold: float,
+) -> Tuple[np.ndarray, np.ndarray]:
+    threshold = np.maximum(abs_threshold, np.abs(pred) * rel_threshold)
+    return pred - threshold, pred + threshold
+
+
+def calc_final_bounds(
+    state: BaselineState,
+    pred: np.ndarray,
+    lower_raw: np.ndarray,
+    upper_raw: np.ndarray,
+    target: Dict,
+) -> Tuple[np.ndarray, np.ndarray]:
+    strategy = target.get("strategy", "phase_point")
+    abs_threshold = float(target.get("abs_threshold", 1.0))
+    rel_threshold = float(target.get("rel_threshold", 0.25))
+
+    if strategy == "phase_band":
+        pad_abs = float(target.get("band_pad_abs", abs_threshold))
+
+        # 对 vibration 类指标：边界更像正常波动容忍带，不是硬边界。
+        dynamic_pad = np.maximum(
+            pad_abs,
+            np.abs(pred) * rel_threshold * 0.25,
+        )
+
+        lower = lower_raw - dynamic_pad
+        upper = upper_raw + dynamic_pad
+
+        return lower, upper
+
+    return calc_point_bounds(pred, abs_threshold, rel_threshold)
+
+
+def detect_anomaly(
+    state: BaselineState,
+    ts_grid: np.ndarray,
+    ys_model: np.ndarray,
+    ys_actual: np.ndarray,
+    target: Dict,
+) -> Tuple[bool, float, float, float, int, int, int, float]:
+    best_period, best_origin, pred_recent, _ = phase_lock_recent(
+        state=state,
+        ts_grid=ts_grid,
+        ys_model=ys_model,
+    )
+
+    recent_len = len(pred_recent)
+
+    if recent_len <= 0:
+        return False, 0.0, 0.0, 0.0, best_period, best_origin, 0, 0.0
+
+    if target.get("strategy", "phase_point") == "phase_band":
+        actual = ys_actual[-recent_len:].astype(float)
+    else:
+        actual = ys_model[-recent_len:].astype(float)
+
+    tmp_state = BaselineState(
+        period=best_period,
+        phase_origin_ts=best_origin,
+        template=state.template,
+        lower_template=state.lower_template,
+        upper_template=state.upper_template,
+        strategy=state.strategy,
+        status=state.status,
+        clean_seconds=state.clean_seconds,
+        last_update_ts=state.last_update_ts,
+        last_seen_ts=state.last_seen_ts,
+        y_min=state.y_min,
+        y_max=state.y_max,
+    )
+
+    recent_ts = ts_grid[-recent_len:].astype(int).tolist()
+    pred, lower_raw, upper_raw = predict_state_bundle(tmp_state, recent_ts)
+
+    lower, upper = calc_final_bounds(
+        state=tmp_state,
+        pred=pred,
+        lower_raw=lower_raw,
+        upper_raw=upper_raw,
+        target=target,
+    )
+
+    above_upper = actual - upper
+    below_lower = lower - actual
+
+    exceed = np.maximum(above_upper, below_lower)
+    exceed = np.maximum(exceed, 0.0)
+
+    outside = exceed > 0
+
+    band_width = np.maximum(upper - lower, 1e-6)
+    exceed_ratio = exceed / band_width
+
+    abs_err = np.abs(actual - pred)
+
+    outside_ratio = float(np.mean(outside))
+    mean_abs_err = float(np.mean(abs_err))
+    mean_rel_err = float(np.mean(abs_err / np.maximum(np.abs(pred), 1e-6)))
+
+    max_outside_seconds = max_consecutive_true(outside)
+    max_exceed_ratio = float(np.max(exceed_ratio)) if len(exceed_ratio) > 0 else 0.0
+
+    outside_ratio_threshold = float(
+        target.get("outside_ratio_threshold", OUTSIDE_RATIO_THRESHOLD)
+    )
+    min_consecutive_outside = int(
+        target.get("min_consecutive_outside", MIN_CONSECUTIVE_OUTSIDE)
+    )
+    severe_exceed_ratio = float(
+        target.get("severe_exceed_ratio", SEVERE_EXCEED_RATIO)
+    )
+
+    # 核心优化：
+    # 1. 偶发 1~3 个点越界不报警。
+    # 2. 持续越界才报警。
+    # 3. 高比例越界才报警。
+    # 4. 严重越界才立即报警。
+    is_anomaly = (
+        outside_ratio >= outside_ratio_threshold
+        or max_outside_seconds >= min_consecutive_outside
+        or max_exceed_ratio >= severe_exceed_ratio
+    )
+
+    return (
+        is_anomaly,
+        outside_ratio,
+        mean_abs_err,
+        mean_rel_err,
+        int(best_period),
+        int(best_origin),
+        int(max_outside_seconds),
+        float(max_exceed_ratio),
+    )
+
+
+# =============================================================================
+# 状态管理
+# =============================================================================
+
+def create_initial_state(
+    ts_grid: np.ndarray,
+    ys_model: np.ndarray,
+    ys_actual: np.ndarray,
+    target: Dict,
+    now_sec: int,
+) -> Optional[BaselineState]:
+    baseline = build_current_baseline(
+        ts_grid=ts_grid,
+        ys_mid_grid=ys_model,
+        ys_band_grid=ys_actual,
+        target=target,
+    )
+
+    if baseline is None:
+        return None
+
+    period, phase_origin_ts, template, lower_template, upper_template = baseline
+
+    return BaselineState(
+        period=int(period),
+        phase_origin_ts=int(phase_origin_ts),
+        template=template.astype(float).tolist(),
+        lower_template=lower_template.astype(float).tolist(),
+        upper_template=upper_template.astype(float).tolist(),
+        strategy=str(target.get("strategy", "phase_point")),
+        status=BASELINE_STATUS_HEALTHY,
+        clean_seconds=int(period * MAX_CYCLES_FOR_TEMPLATE),
+        last_update_ts=now_sec,
+        last_seen_ts=now_sec,
+        y_min=float(np.min(ys_actual)),
+        y_max=float(np.max(ys_actual)),
+    )
+
+
+def apply_phase_lock_to_state(
+    state: BaselineState,
+    best_period: int,
+    best_origin: int,
+) -> None:
+    best_period = int(best_period)
+
+    if best_period <= 1:
+        return
+
+    if len(state.template) != best_period:
+        state.template = resample_template(
+            np.array(state.template, dtype=float),
+            best_period,
+        ).astype(float).tolist()
+
+    if len(state.lower_template) != best_period:
+        state.lower_template = resample_template(
+            np.array(state.lower_template, dtype=float),
+            best_period,
+        ).astype(float).tolist()
+
+    if len(state.upper_template) != best_period:
+        state.upper_template = resample_template(
+            np.array(state.upper_template, dtype=float),
+            best_period,
+        ).astype(float).tolist()
+
+    state.period = best_period
+    state.phase_origin_ts = int(best_origin)
+
+
+def maybe_update_state(
+    key: str,
+    ts_grid: np.ndarray,
+    ys_model: np.ndarray,
+    ys_actual: np.ndarray,
+    target: Dict,
+) -> Tuple[Optional[BaselineState], bool, float, float, float, int, float]:
+    now_sec = int(time.time())
+    state = BASELINE_STATES.get(key)
+
+    if state is None:
+        state = create_initial_state(
+            ts_grid=ts_grid,
+            ys_model=ys_model,
+            ys_actual=ys_actual,
+            target=target,
+            now_sec=now_sec,
+        )
+
+        if state is None:
+            return None, False, 0.0, 0.0, 0.0, 0, 0.0
+
+        BASELINE_STATES[key] = state
+
+        logger.info(
+            "初始化健康模板 key=%s strategy=%s period=%ss origin=%s clean=%ss",
+            key,
+            state.strategy,
+            state.period,
+            datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"),
+            state.clean_seconds,
+        )
+
+        return state, False, 0.0, 0.0, 0.0, 0, 0.0
+
+    elapsed = max(1, now_sec - int(state.last_seen_ts))
+    elapsed = min(elapsed, POLL_INTERVAL * 2)
+    state.last_seen_ts = now_sec
+
+    (
+        is_anomaly,
+        outside_ratio,
+        mean_abs_err,
+        mean_rel_err,
+        best_period,
+        best_origin,
+        max_outside_seconds,
+        max_exceed_ratio,
+    ) = detect_anomaly(
+        state=state,
+        ts_grid=ts_grid,
+        ys_model=ys_model,
+        ys_actual=ys_actual,
+        target=target,
+    )
+
+    if is_anomaly:
+        state.status = BASELINE_STATUS_ANOMALY
+        state.clean_seconds = 0
+        BASELINE_STATES[key] = state
+
+        logger.warning(
+            "检测到异常，冻结模板 key=%s outside_ratio=%.2f max_outside=%ss max_exceed_ratio=%.2f mean_abs_err=%.4f mean_rel_err=%.4f",
+            key,
+            outside_ratio,
+            max_outside_seconds,
+            max_exceed_ratio,
+            mean_abs_err,
+            mean_rel_err,
+        )
+
+        return (
+            state,
+            True,
+            outside_ratio,
+            mean_abs_err,
+            mean_rel_err,
+            max_outside_seconds,
+            max_exceed_ratio,
+        )
+
+    old_period = int(state.period)
+    old_origin = int(state.phase_origin_ts)
+
+    apply_phase_lock_to_state(state, best_period, best_origin)
+
+    if old_period != state.period or old_origin != state.phase_origin_ts:
+        logger.info(
+            "phase-lock key=%s period %s -> %s origin %s -> %s",
+            key,
+            old_period,
+            state.period,
+            datetime.fromtimestamp(old_origin).strftime("%H:%M:%S"),
+            datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"),
+        )
+
+    if state.status == BASELINE_STATUS_ANOMALY:
+        state.status = BASELINE_STATUS_RECOVERING
+        state.clean_seconds = elapsed
+        BASELINE_STATES[key] = state
+
+        logger.info(
+            "异常开始恢复 key=%s clean_seconds=%ss",
+            key,
+            state.clean_seconds,
+        )
+
+        return (
+            state,
+            False,
+            outside_ratio,
+            mean_abs_err,
+            mean_rel_err,
+            max_outside_seconds,
+            max_exceed_ratio,
+        )
+
+    if state.status == BASELINE_STATUS_RECOVERING:
+        state.clean_seconds += elapsed
+    else:
+        state.status = BASELINE_STATUS_HEALTHY
+        state.clean_seconds += elapsed
+
+    min_clean_for_update = max(
+        RECOVERY_MIN_SECONDS,
+        int(state.period) * MIN_FULL_CYCLES_FOR_TEMPLATE,
+    )
+
+    if state.clean_seconds < min_clean_for_update:
+        BASELINE_STATES[key] = state
+        return (
+            state,
+            False,
+            outside_ratio,
+            mean_abs_err,
+            mean_rel_err,
+            max_outside_seconds,
+            max_exceed_ratio,
+        )
+
+    tail_seconds = min(
+        int(state.clean_seconds),
+        int(state.period) * MAX_CYCLES_FOR_TEMPLATE,
+    )
+
+    baseline = build_current_baseline(
+        ts_grid=ts_grid,
+        ys_mid_grid=ys_model,
+        ys_band_grid=ys_actual,
+        target=target,
+        tail_seconds=tail_seconds,
+    )
+
+    if baseline is None:
+        BASELINE_STATES[key] = state
+        return (
+            state,
+            False,
+            outside_ratio,
+            mean_abs_err,
+            mean_rel_err,
+            max_outside_seconds,
+            max_exceed_ratio,
+        )
+
+    new_period, new_origin, new_template, new_lower_template, new_upper_template = baseline
+
+    alpha = RECOVERY_EMA_ALPHA if state.status == BASELINE_STATUS_RECOVERING else HEALTHY_EMA_ALPHA
+
+    state.template = merge_template(
+        np.array(state.template, dtype=float),
+        new_template,
+        alpha,
+    ).astype(float).tolist()
+
+    state.lower_template = merge_template(
+        np.array(state.lower_template, dtype=float),
+        new_lower_template,
+        alpha,
+    ).astype(float).tolist()
+
+    state.upper_template = merge_template(
+        np.array(state.upper_template, dtype=float),
+        new_upper_template,
+        alpha,
+    ).astype(float).tolist()
+
+    state.period = int(new_period)
+    state.phase_origin_ts = int(new_origin)
+    state.status = BASELINE_STATUS_HEALTHY
+    state.last_update_ts = now_sec
+
+    if tail_seconds > 0 and len(ys_actual) >= tail_seconds:
+        state.y_min = float(np.min(ys_actual[-tail_seconds:]))
+        state.y_max = float(np.max(ys_actual[-tail_seconds:]))
+    else:
+        state.y_min = float(np.min(ys_actual))
+        state.y_max = float(np.max(ys_actual))
+
+    BASELINE_STATES[key] = state
+
+    logger.info(
+        "更新健康模板 key=%s strategy=%s period=%ss origin=%s clean=%ss alpha=%.2f",
+        key,
+        state.strategy,
+        state.period,
+        datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"),
+        state.clean_seconds,
+        alpha,
+    )
+
+    return (
+        state,
+        False,
+        outside_ratio,
+        mean_abs_err,
+        mean_rel_err,
+        max_outside_seconds,
+        max_exceed_ratio,
+    )
+
+
+# =============================================================================
+# Prometheus 写入
+# =============================================================================
+
+def prom_escape_label_value(value: str) -> str:
+    return (
+        str(value)
+        .replace("\\", "\\\\")
+        .replace("\n", "\\n")
+        .replace('"', '\\"')
+    )
+
+
+def labels_to_str(labels: Dict[str, str]) -> str:
+    if not labels:
+        return ""
+
+    parts = []
+
+    for k in sorted(labels.keys()):
+        parts.append(f'{k}="{prom_escape_label_value(labels[k])}"')
+
+    return "{" + ",".join(parts) + "}"
+
+
+def write_series(
+    metric_name: str,
+    labels: Dict[str, str],
+    ts_list: List[int],
+    values: List[float],
+) -> bool:
+    if not ts_list or not values or len(ts_list) != len(values):
+        return False
+
+    label_str = labels_to_str(labels)
+    lines = []
+
+    for t, y in zip(ts_list, values):
+        try:
+            ts_sec = int(round(float(t)))
+            val = float(y)
+        except Exception:
+            continue
+
+        if not math.isfinite(ts_sec) or not math.isfinite(val):
+            continue
+
+        lines.append(f"{metric_name}{label_str} {val:.6f} {ts_sec * 1000}")
+
+    if not lines:
+        return False
+
+    payload = "\n".join(lines) + "\n"
+
+    try:
+        resp = requests.post(
+            f"{VM_URL}/api/v1/import/prometheus",
+            data=payload.encode("utf-8"),
+            headers={"Content-Type": "text/plain; version=0.0.4; charset=utf-8"},
+            timeout=10,
+        )
+        resp.raise_for_status()
+        return True
+
+    except requests.RequestException as e:
+        logger.error("写入数据失败 metric=%s: %s", metric_name, e)
+        return False
+
+
+def write_prediction_bundle(
+    pred_metric: str,
+    anomaly_metric: str,
+    labels: Dict[str, str],
+    ts_future: List[int],
+    pred_values: np.ndarray,
+    lower_values: np.ndarray,
+    upper_values: np.ndarray,
+    is_anomaly: bool,
+    outside_ratio: float,
+    mean_abs_err: float,
+    mean_rel_err: float,
+    max_outside_seconds: int,
+    max_exceed_ratio: float,
+    event_ts: int,
+) -> bool:
+    ok1 = write_series(
+        metric_name=pred_metric,
+        labels=labels,
+        ts_list=ts_future,
+        values=pred_values.astype(float).tolist(),
+    )
+
+    ok2 = write_series(
+        metric_name=f"{pred_metric}_lower",
+        labels=labels,
+        ts_list=ts_future,
+        values=lower_values.astype(float).tolist(),
+    )
+
+    ok3 = write_series(
+        metric_name=f"{pred_metric}_upper",
+        labels=labels,
+        ts_list=ts_future,
+        values=upper_values.astype(float).tolist(),
+    )
+
+    anomaly_labels = dict(labels)
+    anomaly_labels["type"] = "prediction_deviation"
+
+    ok4 = write_series(
+        metric_name=anomaly_metric,
+        labels=anomaly_labels,
+        ts_list=[event_ts],
+        values=[1.0 if is_anomaly else 0.0],
+    )
+
+    ok5 = write_series(
+        metric_name=f"{anomaly_metric}_outside_ratio",
+        labels=anomaly_labels,
+        ts_list=[event_ts],
+        values=[outside_ratio],
+    )
+
+    ok6 = write_series(
+        metric_name=f"{anomaly_metric}_mean_abs_error",
+        labels=anomaly_labels,
+        ts_list=[event_ts],
+        values=[mean_abs_err],
+    )
+
+    ok7 = write_series(
+        metric_name=f"{anomaly_metric}_mean_rel_error",
+        labels=anomaly_labels,
+        ts_list=[event_ts],
+        values=[mean_rel_err],
+    )
+
+    ok8 = write_series(
+        metric_name=f"{anomaly_metric}_max_consecutive_outside",
+        labels=anomaly_labels,
+        ts_list=[event_ts],
+        values=[float(max_outside_seconds)],
+    )
+
+    ok9 = write_series(
+        metric_name=f"{anomaly_metric}_max_exceed_ratio",
+        labels=anomaly_labels,
+        ts_list=[event_ts],
+        values=[float(max_exceed_ratio)],
+    )
+
+    return ok1 and ok2 and ok3 and ok4 and ok5 and ok6 and ok7 and ok8 and ok9
+
+
+# =============================================================================
+# 标签解析
+# =============================================================================
+
+_LABEL_PATTERN = re.compile(
+    r'\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"((?:\\.|[^"])*)"\s*'
+)
+
+
+def parse_labels_from_query(query: str) -> Dict[str, str]:
+    labels = {}
+
+    if "{" not in query or "}" not in query:
+        return labels
+
+    try:
+        label_part = query[query.index("{") + 1:query.rindex("}")]
+    except Exception:
+        return labels
+
+    for match in _LABEL_PATTERN.finditer(label_part):
+        key = match.group(1)
+        value = match.group(2)
+
+        value = (
+            value
+            .replace('\\"', '"')
+            .replace("\\n", "\n")
+            .replace("\\\\", "\\")
+        )
+
+        labels[key] = value
+
+    return labels
+
+
+def merge_labels(*dicts: Dict[str, str]) -> Dict[str, str]:
+    result = {}
+
+    for d in dicts:
+        if d:
+            result.update(d)
+
+    return result
+
+
+def series_key(metric_name: str, labels: Dict[str, str]) -> str:
+    return metric_name + labels_to_str(labels)
+
+
+# =============================================================================
+# 状态持久化
+# =============================================================================
+
+def load_state() -> None:
+    global BASELINE_STATES
+
+    if not os.path.exists(STATE_FILE):
+        return
+
+    try:
+        with open(STATE_FILE, "r", encoding="utf-8") as f:
+            raw = json.load(f)
+
+        states = {}
+
+        for key, value in raw.get("baseline_states", {}).items():
+            required_fields = {
+                "period",
+                "phase_origin_ts",
+                "template",
+                "lower_template",
+                "upper_template",
+                "strategy",
+                "status",
+                "clean_seconds",
+                "last_update_ts",
+                "last_seen_ts",
+                "y_min",
+                "y_max",
+            }
+
+            if not required_fields.issubset(set(value.keys())):
+                continue
+
+            states[key] = BaselineState(**value)
+
+        BASELINE_STATES = states
+
+        logger.info(
+            "已加载预测状态文件 %s，状态数量=%d",
+            STATE_FILE,
+            len(BASELINE_STATES),
+        )
+
+    except Exception as e:
+        logger.warning("加载预测状态文件失败，将重新学习: %s", e)
+
+
+def save_state() -> None:
+    try:
+        raw = {
+            "baseline_states": {
+                key: asdict(value)
+                for key, value in BASELINE_STATES.items()
+            }
+        }
+
+        tmp_file = STATE_FILE + ".tmp"
+
+        with open(tmp_file, "w", encoding="utf-8") as f:
+            json.dump(raw, f, ensure_ascii=False, indent=2)
+
+        os.replace(tmp_file, STATE_FILE)
+
+    except Exception as e:
+        logger.warning("保存预测状态文件失败: %s", e)
+
+
+# =============================================================================
+# 时间轴
+# =============================================================================
+
+def build_prediction_timestamps(
+    key: str,
+    last_real_ts: int,
+    now_sec: int,
+) -> Optional[List[int]]:
+    data_lag = now_sec - last_real_ts
+
+    if data_lag > MAX_DATA_LAG_SECONDS:
+        logger.warning(
+            "真实数据延迟过大，跳过预测 key=%s data_lag=%ss max=%ss",
+            key,
+            data_lag,
+            MAX_DATA_LAG_SECONDS,
+        )
+        return None
+
+    last_written_real_ts = LAST_REAL_TS_WRITTEN.get(key)
+
+    if last_written_real_ts is not None and last_real_ts <= int(last_written_real_ts):
+        logger.info(
+            "真实数据时间戳未推进，跳过重复写入 key=%s last_real_ts=%s last_written_real_ts=%s",
+            key,
+            last_real_ts,
+            last_written_real_ts,
+        )
+        return None
+
+    base_ts = last_real_ts
+
+    return [
+        base_ts + i + 1
+        for i in range(WRITE_HORIZON_SECONDS)
+    ]
+
+
+# =============================================================================
+# 主流程
+# =============================================================================
+
+def run_once() -> None:
+    now_str = datetime.now().strftime("%H:%M:%S")
+
+    for target in PREDICT_TARGETS:
+        query = target["query"]
+        pred_metric = target["pred_metric"]
+        anomaly_metric = target["anomaly_metric"]
+
+        ts, ys = fetch_history(query)
+
+        if len(ys) < MIN_POINTS:
+            logger.info("[%s] %s 数据不足（%d 点），跳过", now_str, query, len(ys))
+            continue
+
+        ts_grid, ys_grid_raw = normalize_history(ts, ys)
+
+        if len(ys_grid_raw) < MIN_POINTS:
+            logger.info("[%s] %s 清洗后数据不足（%d 点），跳过", now_str, query, len(ys_grid_raw))
+            continue
+
+        ys_grid_model = preprocess_values(ys_grid_raw, target)
+
+        base_labels = parse_labels_from_query(query)
+        write_labels = merge_labels(base_labels, EXTRA_PREDICT_LABELS)
+
+        key = series_key(pred_metric, write_labels)
+
+        (
+            state,
+            is_anomaly,
+            outside_ratio,
+            mean_abs_err,
+            mean_rel_err,
+            max_outside_seconds,
+            max_exceed_ratio,
+        ) = maybe_update_state(
+            key=key,
+            ts_grid=ts_grid,
+            ys_model=ys_grid_model,
+            ys_actual=ys_grid_raw,
+            target=target,
+        )
+
+        if state is None:
+            logger.info("[%s] %s 暂无可用健康模板，等待学习", now_str, query)
+            continue
+
+        now_sec = int(time.time())
+        last_real_ts = int(ts_grid[-1])
+        data_lag = now_sec - last_real_ts
+
+        ts_future = build_prediction_timestamps(
+            key=key,
+            last_real_ts=last_real_ts,
+            now_sec=now_sec,
+        )
+
+        if not ts_future:
+            continue
+
+        pred_values, lower_raw, upper_raw = predict_state_bundle(state, ts_future)
+
+        lower_values, upper_values = calc_final_bounds(
+            state=state,
+            pred=pred_values,
+            lower_raw=lower_raw,
+            upper_raw=upper_raw,
+            target=target,
+        )
+
+        ok = write_prediction_bundle(
+            pred_metric=pred_metric,
+            anomaly_metric=anomaly_metric,
+            labels=write_labels,
+            ts_future=ts_future,
+            pred_values=pred_values,
+            lower_values=lower_values,
+            upper_values=upper_values,
+            is_anomaly=is_anomaly,
+            outside_ratio=outside_ratio,
+            mean_abs_err=mean_abs_err,
+            mean_rel_err=mean_rel_err,
+            max_outside_seconds=max_outside_seconds,
+            max_exceed_ratio=max_exceed_ratio,
+            event_ts=last_real_ts,
+        )
+
+        if not ok:
+            logger.error("[%s] %s 写入预测数据失败", now_str, query)
+            continue
+
+        LAST_REAL_TS_WRITTEN[key] = last_real_ts
+
+        future_start = datetime.fromtimestamp(ts_future[0]).strftime("%H:%M:%S")
+        future_end = datetime.fromtimestamp(ts_future[-1]).strftime("%H:%M:%S")
+        last_real_str = datetime.fromtimestamp(last_real_ts).strftime("%H:%M:%S")
+        origin_str = datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S")
+
+        logger.info(
+            "[%s] %-40s → %-35s strategy=%s status=%s anomaly=%s outside=%.2f max_outside=%ss max_exceed=%.2f period=%ss origin=%s last_real=%s lag=%ss 写入 %d 点，预测区间 %s ~ %s",
+            now_str,
+            query,
+            pred_metric,
+            state.strategy,
+            state.status,
+            is_anomaly,
+            outside_ratio,
+            max_outside_seconds,
+            max_exceed_ratio,
+            state.period,
+            origin_str,
+            last_real_str,
+            data_lag,
+            len(ts_future),
+            future_start,
+            future_end,
+        )
+
+    save_state()
+
+
+def main() -> None:
+    load_state()
+
+    logger.info(
+        "预测服务启动 VM=%s 历史窗口=%dmin 理论预测窗口=%ds 实际写入窗口=%ds 轮询间隔=%ds state=%s forecast=%s",
+        VM_URL,
+        HISTORY_MINUTES,
+        HORIZON_SECONDS,
+        WRITE_HORIZON_SECONDS,
+        POLL_INTERVAL,
+        STATE_FILE,
+        EXTRA_PREDICT_LABELS["forecast"],
+    )
+
+    while True:
+        run_once()
+        time.sleep(POLL_INTERVAL)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file

From f9b6506452a75ffd1f1f8beee1ebd1008fdc724d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Mon, 25 May 2026 13:46:29 +0800
Subject: [PATCH 30/36] feat(protoforge): fault update

---
 protoforge/core/demo.py    |   2 +-
 protoforge/core/fault.py   | 187 +++++++++++++++++++++++++++++++++++++
 protoforge/models/fault.py |   2 +
 web/src/views/Devices.vue  | 117 ++++++++++++++++++++---
 4 files changed, 296 insertions(+), 12 deletions(-)

diff --git a/protoforge/core/demo.py b/protoforge/core/demo.py
index ff0b333..b7ccae7 100644
--- a/protoforge/core/demo.py
+++ b/protoforge/core/demo.py
@@ -108,7 +108,7 @@ async def seed_demo_data(engine: Any, template_manager: Any) -> None:
             "points": [
                 {"name": "weight", "address": "net_weight", "data_type": "float32", "generator_type": "random", "min_value": 0.5, "max_value": 50.0},
                 {"name": "tare", "address": "tare_weight", "data_type": "float32", "generator_type": "fixed", "fixed_value": 2.5},
-                {"name": "stable", "address": "stable_flag", "data_type": "bool", "generator_type": "fixed", "fixed_value": true},
+                {"name": "stable", "address": "stable_flag", "data_type": "bool", "generator_type": "fixed", "fixed_value": True},
             ],
         },
     ]
diff --git a/protoforge/core/fault.py b/protoforge/core/fault.py
index e72842d..5beba87 100644
--- a/protoforge/core/fault.py
+++ b/protoforge/core/fault.py
@@ -41,6 +41,7 @@
         name="刀具磨损",
         description="刀具切削刃磨损，切削阻力增大，主轴电流升高，振动增大，进给速率下降",
         category="mechanical",
+        scenario_type="trend_drift",
         default_duration=300.0,
         tags=["刀具", "磨损", "渐进"],
         point_faults=[
@@ -67,6 +68,7 @@
         name="刀具崩刃",
         description="刀具突发性崩刃，振动剧烈突增，主轴电流峰值，进给停止",
         category="mechanical",
+        scenario_type="sudden_spike",
         default_duration=15.0,
         tags=["刀具", "崩刃", "突发"],
         point_faults=[
@@ -93,6 +95,7 @@
         name="主轴过热",
         description="主轴长时间高负荷运转或冷却不足，电流持续偏高，转速因热保护下降",
         category="thermal",
+        scenario_type="trend_drift",
         default_duration=240.0,
         tags=["主轴", "过热", "渐进"],
         point_faults=[
@@ -117,6 +120,7 @@
         name="主轴轴承故障",
         description="主轴轴承磨损或润滑不足，振动幅度持续升高，伴随电流轻微上升",
         category="mechanical",
+        scenario_type="trend_drift",
         default_duration=360.0,
         tags=["主轴", "轴承", "渐进"],
         point_faults=[
@@ -141,6 +145,7 @@
         name="进给堵转",
         description="进给轴卡死，进给速率降为零，主轴电流急剧升高",
         category="process",
+        scenario_type="sudden_spike",
         default_duration=20.0,
         tags=["进给", "堵转", "突发"],
         point_faults=[
@@ -163,6 +168,7 @@
         name="振动异常",
         description="工件装夹松动或切削共振，三轴振动突然大幅增加",
         category="mechanical",
+        scenario_type="sudden_spike",
         default_duration=60.0,
         tags=["振动", "装夹", "突发"],
         point_faults=[
@@ -185,6 +191,7 @@
         name="切削液不足",
         description="切削液供给不足，冷却润滑失效，热量积累导致振动和电流缓慢升高",
         category="process",
+        scenario_type="trend_drift",
         default_duration=480.0,
         tags=["切削液", "冷却", "渐进"],
         point_faults=[
@@ -211,6 +218,7 @@
         name="电源波动",
         description="供电电压不稳定，主轴转速和进给速率出现随机波动",
         category="electrical",
+        scenario_type="high_noise",
         default_duration=90.0,
         tags=["电源", "波动", "突发"],
         point_faults=[
@@ -222,6 +230,176 @@
                              multiplier=1.0, noise_scale=150.0),
         ],
     ),
+
+    # ==================================================================
+    # 以下为新增故障类型
+    # ==================================================================
+
+    # ------------------------------------------------------------------
+    # 传感器强干扰 — 高噪声波动型
+    # 场景：电磁干扰、接地不良、信号线屏蔽失效等导致传感器读数剧烈抖动
+    # 特征：均值基本不变，但噪声幅度突然增大数倍，信号看起来"毛刺"严重
+    # 区别于真实故障：设备本身没有坏，只是采集信号质量变差
+    # 模式：瞬间注入，持续期间每次采样都叠加大幅随机噪声
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="sensor_noise",
+        name="传感器强干扰",
+        description=(
+            "【高噪声波动型】电磁干扰或接地不良导致传感器信号质量恶化。"
+            "均值基本不变，但每次采样叠加大幅随机噪声，曲线呈现密集毛刺。"
+            "典型场景：变频器附近的传感器、信号线屏蔽层破损、接地回路故障。"
+        ),
+        category="electrical",
+        scenario_type="high_noise",
+        default_duration=120.0,
+        tags=["传感器", "干扰", "噪声", "高噪声波动型"],
+        point_faults=[
+            PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
+                             multiplier=1.0, noise_scale=8.0),
+            PointFaultConfig(point="vibration_x", mode=FaultMode.INSTANT,
+                             multiplier=1.0, noise_scale=2.5),
+            PointFaultConfig(point="vibration_y", mode=FaultMode.INSTANT,
+                             multiplier=1.0, noise_scale=2.5),
+            PointFaultConfig(point="vibration_z", mode=FaultMode.INSTANT,
+                             multiplier=1.0, noise_scale=3.0),
+            PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT,
+                             multiplier=1.0, noise_scale=80.0),
+        ],
+    ),
+
+    # ------------------------------------------------------------------
+    # 换工件/换程序段 — 工况切换型（高速加工 → 低速精加工）
+    # 场景：CNC 机床切换加工程序，从粗加工切换到精加工
+    # 特征：转速降低、进给降低、电流降低，所有指标跳到新的正常范围并稳定
+    # 关键：这不是故障！数据本身没有坏，只是工况变了，正常范围完全不同
+    # 模式：STEP 阶跃，立即跳到新基线并在整个 duration 内保持
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="mode_switch_fine_machining",
+        name="切换精加工工况",
+        description=(
+            "【工况切换型】从粗加工切换到精加工程序段。"
+            "主轴转速升高、进给速率降低、切削电流降低，各指标立即跳到新的正常范围并保持稳定。"
+            "数据本身没有异常，但与粗加工基线相比会触发阈值告警。"
+            "典型场景：换刀后进入精加工、加工不同特征面、程序跳段。"
+        ),
+        category="process",
+        scenario_type="mode_switch",
+        default_duration=300.0,
+        tags=["工况切换", "精加工", "程序段", "工况切换型"],
+        point_faults=[
+            PointFaultConfig(point="spindle_speed", mode=FaultMode.STEP,
+                             multiplier=1.4, noise_scale=30.0),
+            PointFaultConfig(point="feed_rate", mode=FaultMode.STEP,
+                             multiplier=0.3, noise_scale=10.0),
+            PointFaultConfig(point="spindle_current", mode=FaultMode.STEP,
+                             multiplier=0.55, noise_scale=0.5),
+            PointFaultConfig(point="vibration_x", mode=FaultMode.STEP,
+                             multiplier=0.6, noise_scale=0.1),
+            PointFaultConfig(point="vibration_y", mode=FaultMode.STEP,
+                             multiplier=0.6, noise_scale=0.1),
+            PointFaultConfig(point="vibration_z", mode=FaultMode.STEP,
+                             multiplier=0.6, noise_scale=0.1),
+        ],
+    ),
+
+    # ------------------------------------------------------------------
+    # 进入空载工况 — 工况切换型（加工中 → 空载运行）
+    # 场景：加工完成、等待上料、程序暂停，主轴空转
+    # 特征：进给降为 0，电流大幅下降到空载水平，转速维持，振动降低
+    # 模式：STEP 阶跃，立即切换到空载基线
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="mode_switch_idle",
+        name="切换空载工况",
+        description=(
+            "【工况切换型】机床进入空载运行状态（加工完成等待上料、程序暂停）。"
+            "进给速率降为零，主轴电流降至空载水平（约为加工时的 20-30%），"
+            "主轴转速维持，振动明显降低。"
+            "典型场景：换料等待、程序暂停、加工间隙、换刀等待。"
+        ),
+        category="process",
+        scenario_type="mode_switch",
+        default_duration=180.0,
+        tags=["工况切换", "空载", "等待", "工况切换型"],
+        point_faults=[
+            PointFaultConfig(point="feed_rate", mode=FaultMode.STEP,
+                             target_value=0.0, noise_scale=2.0),
+            PointFaultConfig(point="spindle_current", mode=FaultMode.STEP,
+                             multiplier=0.22, noise_scale=0.3),
+            PointFaultConfig(point="vibration_x", mode=FaultMode.STEP,
+                             multiplier=0.25, noise_scale=0.05),
+            PointFaultConfig(point="vibration_y", mode=FaultMode.STEP,
+                             multiplier=0.25, noise_scale=0.05),
+            PointFaultConfig(point="vibration_z", mode=FaultMode.STEP,
+                             multiplier=0.25, noise_scale=0.05),
+        ],
+    ),
+
+    # ------------------------------------------------------------------
+    # 突发电流尖峰 — 突发脉冲型
+    # 场景：切削过程中遇到硬质夹杂物、刀具切入角突变、工件材质不均
+    # 特征：主轴电流瞬间冲高（持续 2-5 秒），然后恢复正常，其他指标基本不变
+    # 区别于刀具崩刃：电流尖峰后能自动恢复，不会导致停机
+    # 模式：瞬间注入，持续时间极短
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="current_spike",
+        name="突发电流尖峰",
+        description=(
+            "【突发脉冲型】切削过程中遇到硬质夹杂物或材质不均，主轴电流瞬间冲高后自动恢复。"
+            "电流短暂升至正常值的 3-4 倍，持续仅数秒，振动轻微抖动，进给基本不受影响。"
+            "典型场景：铸件内部硬质点、焊缝区域、材料硬度不均匀。"
+            "与刀具崩刃的区别：能自动恢复，不触发停机报警。"
+        ),
+        category="mechanical",
+        scenario_type="sudden_spike",
+        default_duration=5.0,
+        tags=["电流", "尖峰", "脉冲", "突发脉冲型"],
+        point_faults=[
+            PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
+                             multiplier=3.5, noise_scale=1.5),
+            PointFaultConfig(point="vibration_x", mode=FaultMode.INSTANT,
+                             multiplier=2.0, noise_scale=0.5),
+            PointFaultConfig(point="vibration_y", mode=FaultMode.INSTANT,
+                             multiplier=2.0, noise_scale=0.5),
+            PointFaultConfig(point="vibration_z", mode=FaultMode.INSTANT,
+                             multiplier=2.5, noise_scale=0.8),
+        ],
+    ),
+
+    # ------------------------------------------------------------------
+    # 主轴负载异常 — 关系约束型
+    # 场景：刀具钝化但未完全磨损、切削参数不匹配、工件材料变硬
+    # 特征：主轴转速正常、进给速率正常，但主轴电流异常升高
+    # 关键：单看任何一个指标都"正常"，只有多指标关系才能发现异常
+    # 模式：渐进式，电流缓慢爬升，转速和进给保持不变
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="spindle_load_anomaly",
+        name="主轴负载异常",
+        description=(
+            "【关系约束型】主轴转速正常、进给速率正常，但主轴电流异常升高。"
+            "单看任何一个指标都在正常范围内，只有分析多指标关系才能发现异常。"
+            "物理含义：切削阻力增大（刀具钝化初期、材料变硬），"
+            "系统尚未触发保护降速，但电流已超出正常切削功率范围。"
+            "典型场景：刀具轻度钝化、切削液浓度不足、工件材料批次差异。"
+        ),
+        category="mechanical",
+        scenario_type="relation_constraint",
+        default_duration=240.0,
+        tags=["主轴", "负载", "关系约束", "关系约束型"],
+        point_faults=[
+            PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL,
+                             multiplier=2.8, noise_scale=1.0),
+            # 转速和进给保持不变（multiplier=1.0），只叠加极小噪声维持真实感
+            PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT,
+                             multiplier=1.0, noise_scale=15.0),
+            PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT,
+                             multiplier=1.0, noise_scale=5.0),
+        ],
+    ),
 ]
 
 # 按 id 索引
@@ -374,6 +552,15 @@ def _compute_value(
                 target = baseline * (1.0 + (pf.multiplier - 1.0) * intensity)
             else:
                 target = baseline
+        elif pf.mode == FaultMode.STEP:
+            # 阶跃模式：立即跳到新基线并在整个 duration 内保持（工况切换专用）
+            # 与 INSTANT 的区别：STEP 的 multiplier 表示新工况的正常倍数，不受 intensity 缩放
+            if pf.target_value is not None:
+                target = pf.target_value
+            elif pf.multiplier is not None:
+                target = baseline * pf.multiplier
+            else:
+                target = baseline
         else:
             # 渐进模式：随 progress 线性劣化
             if pf.target_value is not None:
diff --git a/protoforge/models/fault.py b/protoforge/models/fault.py
index cc038e0..025da96 100644
--- a/protoforge/models/fault.py
+++ b/protoforge/models/fault.py
@@ -8,6 +8,7 @@ class FaultMode(str, Enum):
     """故障注入模式"""
     INSTANT = "instant"       # 瞬间跳变到异常值，持续 duration 后恢复
     GRADUAL = "gradual"       # 渐进式劣化，随时间线性恶化，到 duration 时达到峰值后恢复
+    STEP = "step"             # 阶跃切换到新工况基线，整个 duration 内保持新基线（工况切换专用）
 
 
 class FaultStatus(str, Enum):
@@ -36,6 +37,7 @@ class FaultTypeDefinition(BaseModel):
     name: str
     description: str
     category: str                          # 故障分类：mechanical / electrical / thermal / process
+    scenario_type: str = "trend_drift"     # 异常场景类型：trend_drift / sudden_spike / high_noise / mode_switch / relation_constraint
     default_duration: float = 120.0        # 默认持续时间（秒）
     point_faults: list[PointFaultConfig] = Field(default_factory=list)
     tags: list[str] = Field(default_factory=list)
diff --git a/web/src/views/Devices.vue b/web/src/views/Devices.vue
index 65e0535..36141a4 100644
--- a/web/src/views/Devices.vue
+++ b/web/src/views/Devices.vue
@@ -92,24 +92,55 @@
       </n-modal>
 
       <!-- 故障注入 Modal -->
-      <n-modal v-model:show="showFaultModal" preset="card" title="故障注入" style="width:480px">
+      <n-modal v-model:show="showFaultModal" preset="card" title="故障注入" style="width:560px">
         <n-space vertical size="medium">
-          <n-text depth="3" style="font-size:13px">设备：{{ faultTargetDevice?.name }}</n-text>
+          <n-text depth="3" style="font-size:13px">目标设备：<n-text strong>{{ faultTargetDevice?.name }}</n-text></n-text>
+
           <n-form-item label="故障类型" label-placement="left" label-width="80">
             <n-select
               v-model:value="faultTypeId"
-              :options="faultTypeOptions"
+              :options="faultTypeGroupedOptions"
               placeholder="选择故障类型"
               @update:value="onFaultTypeChange"
             />
           </n-form-item>
-          <n-alert v-if="selectedFaultType" type="warning" :bordered="false" style="font-size:12px">
-            <div style="font-weight:500;margin-bottom:4px">{{ selectedFaultType.name }} · {{ faultCategoryLabel(selectedFaultType.category) }}</div>
-            <div style="color:#94a3b8">{{ selectedFaultType.description }}</div>
-            <div style="margin-top:6px;color:#94a3b8">
-              影响测点：{{ selectedFaultType.point_faults.map(p => p.point).join('、') }}
+
+          <!-- 场景说明卡片 -->
+          <div v-if="selectedFaultType" style="background:#1a1a2e;border:1px solid #2d2d4e;border-radius:8px;padding:14px 16px;">
+            <!-- 标题行：故障名 + 场景类型标签 + 分类标签 -->
+            <n-space align="center" style="margin-bottom:10px;flex-wrap:wrap;gap:6px">
+              <n-text strong style="font-size:14px">{{ selectedFaultType.name }}</n-text>
+              <n-tag :type="scenarioTagType(selectedFaultType.scenario_type)" size="small" round>
+                {{ scenarioTypeLabel(selectedFaultType.scenario_type) }}
+              </n-tag>
+              <n-tag size="small" :bordered="false" style="background:#2d2d4e;color:#94a3b8">
+                {{ faultCategoryLabel(selectedFaultType.category) }}
+              </n-tag>
+            </n-space>
+
+            <!-- 描述文本 -->
+            <n-text depth="3" style="font-size:12px;line-height:1.7;display:block;white-space:pre-wrap">{{ selectedFaultType.description }}</n-text>
+
+            <!-- 影响测点 -->
+            <div style="margin-top:10px;padding-top:10px;border-top:1px solid #2d2d4e">
+              <n-text depth="3" style="font-size:11px">影响测点：</n-text>
+              <n-space size="small" style="margin-top:4px;flex-wrap:wrap">
+                <n-tag
+                  v-for="pf in selectedFaultType.point_faults"
+                  :key="pf.point"
+                  size="tiny"
+                  :bordered="false"
+                  style="background:#2d2d4e;color:#e2e8f0;font-family:monospace"
+                >
+                  {{ pf.point }}
+                  <span style="color:#94a3b8;margin-left:4px">
+                    {{ pointFaultModeLabel(pf) }}
+                  </span>
+                </n-tag>
+              </n-space>
             </div>
-          </n-alert>
+          </div>
+
           <n-form-item label="持续时间" label-placement="left" label-width="80">
             <n-input-number
               v-model:value="faultDuration"
@@ -120,11 +151,15 @@
               <template #suffix>秒</template>
             </n-input-number>
           </n-form-item>
+
           <n-form-item label="故障强度" label-placement="left" label-width="80">
             <n-space vertical style="width:100%">
               <n-slider v-model:value="faultIntensity" :min="0.1" :max="1.0" :step="0.1" />
               <n-text depth="3" style="font-size:12px">
                 {{ faultIntensityLabel }}（{{ faultIntensity }}）
+                <span v-if="selectedFaultType?.scenario_type === 'mode_switch'" style="color:#f59e0b">
+                  · 工况切换型强度不影响切换幅度
+                </span>
               </n-text>
             </n-space>
           </n-form-item>
@@ -243,12 +278,17 @@ const columns = [
   },
   { title: '测点', key: 'points', width: 70, render: (row) => (row.points || []).length },
   {
-    title: '故障', key: 'fault', width: 90,
+    title: '故障', key: 'fault', width: 130,
     render: (row) => {
       const fault = activeFaults.value[row.id]
       if (!fault || fault.status === 'none') return h(NTag, { size: 'tiny', bordered: false }, () => '正常')
       const pct = Math.round((fault.progress || 0) * 100)
-      return h(NTag, { size: 'tiny', type: 'error', bordered: false }, () => `${fault.fault_type_name} ${pct}%`)
+      const ft = faultTypes.value.find(t => t.id === fault.fault_type_id)
+      const scenarioLabel = ft ? scenarioTypeLabel(ft.scenario_type) : ''
+      return h(NSpace, { size: 2, vertical: false, align: 'center' }, () => [
+        h(NTag, { size: 'tiny', type: 'error', bordered: false }, () => `${fault.fault_type_name} ${pct}%`),
+        scenarioLabel ? h(NTag, { size: 'tiny', bordered: false, style: 'font-size:10px;background:#2d1b1b;color:#f87171' }, () => scenarioLabel) : null,
+      ])
     }
   },
   {
@@ -351,6 +391,25 @@ const faultTypeOptions = computed(() =>
   faultTypes.value.map(t => ({ label: `${t.name}（${faultCategoryLabel(t.category)}）`, value: t.id }))
 )
 
+// 按场景类型分组的故障选项
+const SCENARIO_ORDER = ['trend_drift', 'sudden_spike', 'high_noise', 'mode_switch', 'relation_constraint']
+const faultTypeGroupedOptions = computed(() => {
+  const groups = {}
+  for (const t of faultTypes.value) {
+    const st = t.scenario_type || 'trend_drift'
+    if (!groups[st]) groups[st] = []
+    groups[st].push({ label: t.name, value: t.id })
+  }
+  return SCENARIO_ORDER
+    .filter(st => groups[st])
+    .map(st => ({
+      type: 'group',
+      label: scenarioTypeLabel(st),
+      key: st,
+      children: groups[st],
+    }))
+})
+
 const selectedFaultType = computed(() =>
   faultTypes.value.find(t => t.id === faultTypeId.value) || null
 )
@@ -368,6 +427,42 @@ function faultCategoryLabel(category) {
   return map[category] || category
 }
 
+function scenarioTypeLabel(scenarioType) {
+  const map = {
+    trend_drift: '趋势漂移型',
+    sudden_spike: '突发脉冲型',
+    high_noise: '高噪声波动型',
+    mode_switch: '工况切换型',
+    relation_constraint: '关系约束型',
+  }
+  return map[scenarioType] || scenarioType
+}
+
+function scenarioTagType(scenarioType) {
+  const map = {
+    trend_drift: 'warning',
+    sudden_spike: 'error',
+    high_noise: 'info',
+    mode_switch: 'success',
+    relation_constraint: 'default',
+  }
+  return map[scenarioType] || 'default'
+}
+
+function pointFaultModeLabel(pf) {
+  if (pf.mode === 'step') return '→ 阶跃'
+  if (pf.mode === 'gradual') {
+    if (pf.multiplier != null) return `→ ×${pf.multiplier}`
+    if (pf.target_value != null) return `→ ${pf.target_value}`
+  }
+  if (pf.mode === 'instant') {
+    if (pf.target_value != null) return `→ ${pf.target_value}`
+    if (pf.multiplier != null && pf.multiplier !== 1.0) return `→ ×${pf.multiplier}`
+    return '± 噪声'
+  }
+  return ''
+}
+
 function onFaultTypeChange(val) {
   const t = faultTypes.value.find(f => f.id === val)
   if (t && t.default_duration) faultDuration.value = t.default_duration

From 685ae6b79c28cf8094e20fc6f9f7c00a32d0fe4a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Mon, 25 May 2026 14:00:07 +0800
Subject: [PATCH 31/36] Revert "feat(protoforge): fault update"

This reverts commit f9b6506452a75ffd1f1f8beee1ebd1008fdc724d.
---
 protoforge/core/demo.py    |   2 +-
 protoforge/core/fault.py   | 187 -------------------------------------
 protoforge/models/fault.py |   2 -
 web/src/views/Devices.vue  | 117 +++--------------------
 4 files changed, 12 insertions(+), 296 deletions(-)

diff --git a/protoforge/core/demo.py b/protoforge/core/demo.py
index b7ccae7..ff0b333 100644
--- a/protoforge/core/demo.py
+++ b/protoforge/core/demo.py
@@ -108,7 +108,7 @@ async def seed_demo_data(engine: Any, template_manager: Any) -> None:
             "points": [
                 {"name": "weight", "address": "net_weight", "data_type": "float32", "generator_type": "random", "min_value": 0.5, "max_value": 50.0},
                 {"name": "tare", "address": "tare_weight", "data_type": "float32", "generator_type": "fixed", "fixed_value": 2.5},
-                {"name": "stable", "address": "stable_flag", "data_type": "bool", "generator_type": "fixed", "fixed_value": True},
+                {"name": "stable", "address": "stable_flag", "data_type": "bool", "generator_type": "fixed", "fixed_value": true},
             ],
         },
     ]
diff --git a/protoforge/core/fault.py b/protoforge/core/fault.py
index 5beba87..e72842d 100644
--- a/protoforge/core/fault.py
+++ b/protoforge/core/fault.py
@@ -41,7 +41,6 @@
         name="刀具磨损",
         description="刀具切削刃磨损，切削阻力增大，主轴电流升高，振动增大，进给速率下降",
         category="mechanical",
-        scenario_type="trend_drift",
         default_duration=300.0,
         tags=["刀具", "磨损", "渐进"],
         point_faults=[
@@ -68,7 +67,6 @@
         name="刀具崩刃",
         description="刀具突发性崩刃，振动剧烈突增，主轴电流峰值，进给停止",
         category="mechanical",
-        scenario_type="sudden_spike",
         default_duration=15.0,
         tags=["刀具", "崩刃", "突发"],
         point_faults=[
@@ -95,7 +93,6 @@
         name="主轴过热",
         description="主轴长时间高负荷运转或冷却不足，电流持续偏高，转速因热保护下降",
         category="thermal",
-        scenario_type="trend_drift",
         default_duration=240.0,
         tags=["主轴", "过热", "渐进"],
         point_faults=[
@@ -120,7 +117,6 @@
         name="主轴轴承故障",
         description="主轴轴承磨损或润滑不足，振动幅度持续升高，伴随电流轻微上升",
         category="mechanical",
-        scenario_type="trend_drift",
         default_duration=360.0,
         tags=["主轴", "轴承", "渐进"],
         point_faults=[
@@ -145,7 +141,6 @@
         name="进给堵转",
         description="进给轴卡死，进给速率降为零，主轴电流急剧升高",
         category="process",
-        scenario_type="sudden_spike",
         default_duration=20.0,
         tags=["进给", "堵转", "突发"],
         point_faults=[
@@ -168,7 +163,6 @@
         name="振动异常",
         description="工件装夹松动或切削共振，三轴振动突然大幅增加",
         category="mechanical",
-        scenario_type="sudden_spike",
         default_duration=60.0,
         tags=["振动", "装夹", "突发"],
         point_faults=[
@@ -191,7 +185,6 @@
         name="切削液不足",
         description="切削液供给不足，冷却润滑失效，热量积累导致振动和电流缓慢升高",
         category="process",
-        scenario_type="trend_drift",
         default_duration=480.0,
         tags=["切削液", "冷却", "渐进"],
         point_faults=[
@@ -218,7 +211,6 @@
         name="电源波动",
         description="供电电压不稳定，主轴转速和进给速率出现随机波动",
         category="electrical",
-        scenario_type="high_noise",
         default_duration=90.0,
         tags=["电源", "波动", "突发"],
         point_faults=[
@@ -230,176 +222,6 @@
                              multiplier=1.0, noise_scale=150.0),
         ],
     ),
-
-    # ==================================================================
-    # 以下为新增故障类型
-    # ==================================================================
-
-    # ------------------------------------------------------------------
-    # 传感器强干扰 — 高噪声波动型
-    # 场景：电磁干扰、接地不良、信号线屏蔽失效等导致传感器读数剧烈抖动
-    # 特征：均值基本不变，但噪声幅度突然增大数倍，信号看起来"毛刺"严重
-    # 区别于真实故障：设备本身没有坏，只是采集信号质量变差
-    # 模式：瞬间注入，持续期间每次采样都叠加大幅随机噪声
-    # ------------------------------------------------------------------
-    FaultTypeDefinition(
-        id="sensor_noise",
-        name="传感器强干扰",
-        description=(
-            "【高噪声波动型】电磁干扰或接地不良导致传感器信号质量恶化。"
-            "均值基本不变，但每次采样叠加大幅随机噪声，曲线呈现密集毛刺。"
-            "典型场景：变频器附近的传感器、信号线屏蔽层破损、接地回路故障。"
-        ),
-        category="electrical",
-        scenario_type="high_noise",
-        default_duration=120.0,
-        tags=["传感器", "干扰", "噪声", "高噪声波动型"],
-        point_faults=[
-            PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
-                             multiplier=1.0, noise_scale=8.0),
-            PointFaultConfig(point="vibration_x", mode=FaultMode.INSTANT,
-                             multiplier=1.0, noise_scale=2.5),
-            PointFaultConfig(point="vibration_y", mode=FaultMode.INSTANT,
-                             multiplier=1.0, noise_scale=2.5),
-            PointFaultConfig(point="vibration_z", mode=FaultMode.INSTANT,
-                             multiplier=1.0, noise_scale=3.0),
-            PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT,
-                             multiplier=1.0, noise_scale=80.0),
-        ],
-    ),
-
-    # ------------------------------------------------------------------
-    # 换工件/换程序段 — 工况切换型（高速加工 → 低速精加工）
-    # 场景：CNC 机床切换加工程序，从粗加工切换到精加工
-    # 特征：转速降低、进给降低、电流降低，所有指标跳到新的正常范围并稳定
-    # 关键：这不是故障！数据本身没有坏，只是工况变了，正常范围完全不同
-    # 模式：STEP 阶跃，立即跳到新基线并在整个 duration 内保持
-    # ------------------------------------------------------------------
-    FaultTypeDefinition(
-        id="mode_switch_fine_machining",
-        name="切换精加工工况",
-        description=(
-            "【工况切换型】从粗加工切换到精加工程序段。"
-            "主轴转速升高、进给速率降低、切削电流降低，各指标立即跳到新的正常范围并保持稳定。"
-            "数据本身没有异常，但与粗加工基线相比会触发阈值告警。"
-            "典型场景：换刀后进入精加工、加工不同特征面、程序跳段。"
-        ),
-        category="process",
-        scenario_type="mode_switch",
-        default_duration=300.0,
-        tags=["工况切换", "精加工", "程序段", "工况切换型"],
-        point_faults=[
-            PointFaultConfig(point="spindle_speed", mode=FaultMode.STEP,
-                             multiplier=1.4, noise_scale=30.0),
-            PointFaultConfig(point="feed_rate", mode=FaultMode.STEP,
-                             multiplier=0.3, noise_scale=10.0),
-            PointFaultConfig(point="spindle_current", mode=FaultMode.STEP,
-                             multiplier=0.55, noise_scale=0.5),
-            PointFaultConfig(point="vibration_x", mode=FaultMode.STEP,
-                             multiplier=0.6, noise_scale=0.1),
-            PointFaultConfig(point="vibration_y", mode=FaultMode.STEP,
-                             multiplier=0.6, noise_scale=0.1),
-            PointFaultConfig(point="vibration_z", mode=FaultMode.STEP,
-                             multiplier=0.6, noise_scale=0.1),
-        ],
-    ),
-
-    # ------------------------------------------------------------------
-    # 进入空载工况 — 工况切换型（加工中 → 空载运行）
-    # 场景：加工完成、等待上料、程序暂停，主轴空转
-    # 特征：进给降为 0，电流大幅下降到空载水平，转速维持，振动降低
-    # 模式：STEP 阶跃，立即切换到空载基线
-    # ------------------------------------------------------------------
-    FaultTypeDefinition(
-        id="mode_switch_idle",
-        name="切换空载工况",
-        description=(
-            "【工况切换型】机床进入空载运行状态（加工完成等待上料、程序暂停）。"
-            "进给速率降为零，主轴电流降至空载水平（约为加工时的 20-30%），"
-            "主轴转速维持，振动明显降低。"
-            "典型场景：换料等待、程序暂停、加工间隙、换刀等待。"
-        ),
-        category="process",
-        scenario_type="mode_switch",
-        default_duration=180.0,
-        tags=["工况切换", "空载", "等待", "工况切换型"],
-        point_faults=[
-            PointFaultConfig(point="feed_rate", mode=FaultMode.STEP,
-                             target_value=0.0, noise_scale=2.0),
-            PointFaultConfig(point="spindle_current", mode=FaultMode.STEP,
-                             multiplier=0.22, noise_scale=0.3),
-            PointFaultConfig(point="vibration_x", mode=FaultMode.STEP,
-                             multiplier=0.25, noise_scale=0.05),
-            PointFaultConfig(point="vibration_y", mode=FaultMode.STEP,
-                             multiplier=0.25, noise_scale=0.05),
-            PointFaultConfig(point="vibration_z", mode=FaultMode.STEP,
-                             multiplier=0.25, noise_scale=0.05),
-        ],
-    ),
-
-    # ------------------------------------------------------------------
-    # 突发电流尖峰 — 突发脉冲型
-    # 场景：切削过程中遇到硬质夹杂物、刀具切入角突变、工件材质不均
-    # 特征：主轴电流瞬间冲高（持续 2-5 秒），然后恢复正常，其他指标基本不变
-    # 区别于刀具崩刃：电流尖峰后能自动恢复，不会导致停机
-    # 模式：瞬间注入，持续时间极短
-    # ------------------------------------------------------------------
-    FaultTypeDefinition(
-        id="current_spike",
-        name="突发电流尖峰",
-        description=(
-            "【突发脉冲型】切削过程中遇到硬质夹杂物或材质不均，主轴电流瞬间冲高后自动恢复。"
-            "电流短暂升至正常值的 3-4 倍，持续仅数秒，振动轻微抖动，进给基本不受影响。"
-            "典型场景：铸件内部硬质点、焊缝区域、材料硬度不均匀。"
-            "与刀具崩刃的区别：能自动恢复，不触发停机报警。"
-        ),
-        category="mechanical",
-        scenario_type="sudden_spike",
-        default_duration=5.0,
-        tags=["电流", "尖峰", "脉冲", "突发脉冲型"],
-        point_faults=[
-            PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
-                             multiplier=3.5, noise_scale=1.5),
-            PointFaultConfig(point="vibration_x", mode=FaultMode.INSTANT,
-                             multiplier=2.0, noise_scale=0.5),
-            PointFaultConfig(point="vibration_y", mode=FaultMode.INSTANT,
-                             multiplier=2.0, noise_scale=0.5),
-            PointFaultConfig(point="vibration_z", mode=FaultMode.INSTANT,
-                             multiplier=2.5, noise_scale=0.8),
-        ],
-    ),
-
-    # ------------------------------------------------------------------
-    # 主轴负载异常 — 关系约束型
-    # 场景：刀具钝化但未完全磨损、切削参数不匹配、工件材料变硬
-    # 特征：主轴转速正常、进给速率正常，但主轴电流异常升高
-    # 关键：单看任何一个指标都"正常"，只有多指标关系才能发现异常
-    # 模式：渐进式，电流缓慢爬升，转速和进给保持不变
-    # ------------------------------------------------------------------
-    FaultTypeDefinition(
-        id="spindle_load_anomaly",
-        name="主轴负载异常",
-        description=(
-            "【关系约束型】主轴转速正常、进给速率正常，但主轴电流异常升高。"
-            "单看任何一个指标都在正常范围内，只有分析多指标关系才能发现异常。"
-            "物理含义：切削阻力增大（刀具钝化初期、材料变硬），"
-            "系统尚未触发保护降速，但电流已超出正常切削功率范围。"
-            "典型场景：刀具轻度钝化、切削液浓度不足、工件材料批次差异。"
-        ),
-        category="mechanical",
-        scenario_type="relation_constraint",
-        default_duration=240.0,
-        tags=["主轴", "负载", "关系约束", "关系约束型"],
-        point_faults=[
-            PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL,
-                             multiplier=2.8, noise_scale=1.0),
-            # 转速和进给保持不变（multiplier=1.0），只叠加极小噪声维持真实感
-            PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT,
-                             multiplier=1.0, noise_scale=15.0),
-            PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT,
-                             multiplier=1.0, noise_scale=5.0),
-        ],
-    ),
 ]
 
 # 按 id 索引
@@ -552,15 +374,6 @@ def _compute_value(
                 target = baseline * (1.0 + (pf.multiplier - 1.0) * intensity)
             else:
                 target = baseline
-        elif pf.mode == FaultMode.STEP:
-            # 阶跃模式：立即跳到新基线并在整个 duration 内保持（工况切换专用）
-            # 与 INSTANT 的区别：STEP 的 multiplier 表示新工况的正常倍数，不受 intensity 缩放
-            if pf.target_value is not None:
-                target = pf.target_value
-            elif pf.multiplier is not None:
-                target = baseline * pf.multiplier
-            else:
-                target = baseline
         else:
             # 渐进模式：随 progress 线性劣化
             if pf.target_value is not None:
diff --git a/protoforge/models/fault.py b/protoforge/models/fault.py
index 025da96..cc038e0 100644
--- a/protoforge/models/fault.py
+++ b/protoforge/models/fault.py
@@ -8,7 +8,6 @@ class FaultMode(str, Enum):
     """故障注入模式"""
     INSTANT = "instant"       # 瞬间跳变到异常值，持续 duration 后恢复
     GRADUAL = "gradual"       # 渐进式劣化，随时间线性恶化，到 duration 时达到峰值后恢复
-    STEP = "step"             # 阶跃切换到新工况基线，整个 duration 内保持新基线（工况切换专用）
 
 
 class FaultStatus(str, Enum):
@@ -37,7 +36,6 @@ class FaultTypeDefinition(BaseModel):
     name: str
     description: str
     category: str                          # 故障分类：mechanical / electrical / thermal / process
-    scenario_type: str = "trend_drift"     # 异常场景类型：trend_drift / sudden_spike / high_noise / mode_switch / relation_constraint
     default_duration: float = 120.0        # 默认持续时间（秒）
     point_faults: list[PointFaultConfig] = Field(default_factory=list)
     tags: list[str] = Field(default_factory=list)
diff --git a/web/src/views/Devices.vue b/web/src/views/Devices.vue
index 36141a4..65e0535 100644
--- a/web/src/views/Devices.vue
+++ b/web/src/views/Devices.vue
@@ -92,55 +92,24 @@
       </n-modal>
 
       <!-- 故障注入 Modal -->
-      <n-modal v-model:show="showFaultModal" preset="card" title="故障注入" style="width:560px">
+      <n-modal v-model:show="showFaultModal" preset="card" title="故障注入" style="width:480px">
         <n-space vertical size="medium">
-          <n-text depth="3" style="font-size:13px">目标设备：<n-text strong>{{ faultTargetDevice?.name }}</n-text></n-text>
-
+          <n-text depth="3" style="font-size:13px">设备：{{ faultTargetDevice?.name }}</n-text>
           <n-form-item label="故障类型" label-placement="left" label-width="80">
             <n-select
               v-model:value="faultTypeId"
-              :options="faultTypeGroupedOptions"
+              :options="faultTypeOptions"
               placeholder="选择故障类型"
               @update:value="onFaultTypeChange"
             />
           </n-form-item>
-
-          <!-- 场景说明卡片 -->
-          <div v-if="selectedFaultType" style="background:#1a1a2e;border:1px solid #2d2d4e;border-radius:8px;padding:14px 16px;">
-            <!-- 标题行：故障名 + 场景类型标签 + 分类标签 -->
-            <n-space align="center" style="margin-bottom:10px;flex-wrap:wrap;gap:6px">
-              <n-text strong style="font-size:14px">{{ selectedFaultType.name }}</n-text>
-              <n-tag :type="scenarioTagType(selectedFaultType.scenario_type)" size="small" round>
-                {{ scenarioTypeLabel(selectedFaultType.scenario_type) }}
-              </n-tag>
-              <n-tag size="small" :bordered="false" style="background:#2d2d4e;color:#94a3b8">
-                {{ faultCategoryLabel(selectedFaultType.category) }}
-              </n-tag>
-            </n-space>
-
-            <!-- 描述文本 -->
-            <n-text depth="3" style="font-size:12px;line-height:1.7;display:block;white-space:pre-wrap">{{ selectedFaultType.description }}</n-text>
-
-            <!-- 影响测点 -->
-            <div style="margin-top:10px;padding-top:10px;border-top:1px solid #2d2d4e">
-              <n-text depth="3" style="font-size:11px">影响测点：</n-text>
-              <n-space size="small" style="margin-top:4px;flex-wrap:wrap">
-                <n-tag
-                  v-for="pf in selectedFaultType.point_faults"
-                  :key="pf.point"
-                  size="tiny"
-                  :bordered="false"
-                  style="background:#2d2d4e;color:#e2e8f0;font-family:monospace"
-                >
-                  {{ pf.point }}
-                  <span style="color:#94a3b8;margin-left:4px">
-                    {{ pointFaultModeLabel(pf) }}
-                  </span>
-                </n-tag>
-              </n-space>
+          <n-alert v-if="selectedFaultType" type="warning" :bordered="false" style="font-size:12px">
+            <div style="font-weight:500;margin-bottom:4px">{{ selectedFaultType.name }} · {{ faultCategoryLabel(selectedFaultType.category) }}</div>
+            <div style="color:#94a3b8">{{ selectedFaultType.description }}</div>
+            <div style="margin-top:6px;color:#94a3b8">
+              影响测点：{{ selectedFaultType.point_faults.map(p => p.point).join('、') }}
             </div>
-          </div>
-
+          </n-alert>
           <n-form-item label="持续时间" label-placement="left" label-width="80">
             <n-input-number
               v-model:value="faultDuration"
@@ -151,15 +120,11 @@
               <template #suffix>秒</template>
             </n-input-number>
           </n-form-item>
-
           <n-form-item label="故障强度" label-placement="left" label-width="80">
             <n-space vertical style="width:100%">
               <n-slider v-model:value="faultIntensity" :min="0.1" :max="1.0" :step="0.1" />
               <n-text depth="3" style="font-size:12px">
                 {{ faultIntensityLabel }}（{{ faultIntensity }}）
-                <span v-if="selectedFaultType?.scenario_type === 'mode_switch'" style="color:#f59e0b">
-                  · 工况切换型强度不影响切换幅度
-                </span>
               </n-text>
             </n-space>
           </n-form-item>
@@ -278,17 +243,12 @@ const columns = [
   },
   { title: '测点', key: 'points', width: 70, render: (row) => (row.points || []).length },
   {
-    title: '故障', key: 'fault', width: 130,
+    title: '故障', key: 'fault', width: 90,
     render: (row) => {
       const fault = activeFaults.value[row.id]
       if (!fault || fault.status === 'none') return h(NTag, { size: 'tiny', bordered: false }, () => '正常')
       const pct = Math.round((fault.progress || 0) * 100)
-      const ft = faultTypes.value.find(t => t.id === fault.fault_type_id)
-      const scenarioLabel = ft ? scenarioTypeLabel(ft.scenario_type) : ''
-      return h(NSpace, { size: 2, vertical: false, align: 'center' }, () => [
-        h(NTag, { size: 'tiny', type: 'error', bordered: false }, () => `${fault.fault_type_name} ${pct}%`),
-        scenarioLabel ? h(NTag, { size: 'tiny', bordered: false, style: 'font-size:10px;background:#2d1b1b;color:#f87171' }, () => scenarioLabel) : null,
-      ])
+      return h(NTag, { size: 'tiny', type: 'error', bordered: false }, () => `${fault.fault_type_name} ${pct}%`)
     }
   },
   {
@@ -391,25 +351,6 @@ const faultTypeOptions = computed(() =>
   faultTypes.value.map(t => ({ label: `${t.name}（${faultCategoryLabel(t.category)}）`, value: t.id }))
 )
 
-// 按场景类型分组的故障选项
-const SCENARIO_ORDER = ['trend_drift', 'sudden_spike', 'high_noise', 'mode_switch', 'relation_constraint']
-const faultTypeGroupedOptions = computed(() => {
-  const groups = {}
-  for (const t of faultTypes.value) {
-    const st = t.scenario_type || 'trend_drift'
-    if (!groups[st]) groups[st] = []
-    groups[st].push({ label: t.name, value: t.id })
-  }
-  return SCENARIO_ORDER
-    .filter(st => groups[st])
-    .map(st => ({
-      type: 'group',
-      label: scenarioTypeLabel(st),
-      key: st,
-      children: groups[st],
-    }))
-})
-
 const selectedFaultType = computed(() =>
   faultTypes.value.find(t => t.id === faultTypeId.value) || null
 )
@@ -427,42 +368,6 @@ function faultCategoryLabel(category) {
   return map[category] || category
 }
 
-function scenarioTypeLabel(scenarioType) {
-  const map = {
-    trend_drift: '趋势漂移型',
-    sudden_spike: '突发脉冲型',
-    high_noise: '高噪声波动型',
-    mode_switch: '工况切换型',
-    relation_constraint: '关系约束型',
-  }
-  return map[scenarioType] || scenarioType
-}
-
-function scenarioTagType(scenarioType) {
-  const map = {
-    trend_drift: 'warning',
-    sudden_spike: 'error',
-    high_noise: 'info',
-    mode_switch: 'success',
-    relation_constraint: 'default',
-  }
-  return map[scenarioType] || 'default'
-}
-
-function pointFaultModeLabel(pf) {
-  if (pf.mode === 'step') return '→ 阶跃'
-  if (pf.mode === 'gradual') {
-    if (pf.multiplier != null) return `→ ×${pf.multiplier}`
-    if (pf.target_value != null) return `→ ${pf.target_value}`
-  }
-  if (pf.mode === 'instant') {
-    if (pf.target_value != null) return `→ ${pf.target_value}`
-    if (pf.multiplier != null && pf.multiplier !== 1.0) return `→ ×${pf.multiplier}`
-    return '± 噪声'
-  }
-  return ''
-}
-
 function onFaultTypeChange(val) {
   const t = faultTypes.value.find(f => f.id === val)
   if (t && t.default_duration) faultDuration.value = t.default_duration

From 02174daba0a6b5f0c770243c5f47f082ea2e5d51 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Tue, 26 May 2026 10:47:45 +0800
Subject: [PATCH 32/36] feat(protoforge): update protoforge

---
 protoforge/core/engine.py                     |   8 +
 protoforge/core/fault.py                      | 290 ++++++++++--------
 protoforge/protocols/fanuc/server.py          |  79 ++++-
 protoforge/templates/fanuc/fanuc_0if_cnc.json |  69 ++---
 protoforge/templates/modbus/fanuc_cnc.json    |  58 +++-
 web/src/views/Devices.vue                     |   2 +-
 6 files changed, 318 insertions(+), 188 deletions(-)

diff --git a/protoforge/core/engine.py b/protoforge/core/engine.py
index 059f10e..d8b72aa 100644
--- a/protoforge/core/engine.py
+++ b/protoforge/core/engine.py
@@ -293,6 +293,14 @@ async def _tick_loop(self) -> None:
         while self._running:
             for instance in self._devices.values():
                 instance.tick()
+                # 将 DeviceInstance._point_values 同步到协议服务器，保证协议层读到最新值
+                server = self._protocol_servers.get(instance.protocol)
+                if server and hasattr(server, '_behaviors'):
+                    behavior = server._behaviors.get(instance.id)
+                    if behavior is not None:
+                        behavior._values.update(instance._point_values)
+                        if hasattr(behavior, 'sync_from_point_values'):
+                            behavior.sync_from_point_values(instance._point_values)
             for scenario in self._scenario_instances.values():
                 scenario.tick()
             await asyncio.sleep(1.0)
diff --git a/protoforge/core/fault.py b/protoforge/core/fault.py
index e72842d..11b61a7 100644
--- a/protoforge/core/fault.py
+++ b/protoforge/core/fault.py
@@ -32,194 +32,228 @@
 BUILTIN_FAULT_TYPES: list[FaultTypeDefinition] = [
 
     # ------------------------------------------------------------------
-    # 刀具磨损 — 最常见的机加工故障
-    # 特征：切削阻力增大 → 主轴电流缓慢爬升，振动幅度增大，进给速率被系统压低
-    # 模式：渐进式，持续数分钟，模拟刀具从轻度磨损到需要换刀的过程
-    # ------------------------------------------------------------------
-    FaultTypeDefinition(
-        id="tool_wear",
-        name="刀具磨损",
-        description="刀具切削刃磨损，切削阻力增大，主轴电流升高，振动增大，进给速率下降",
-        category="mechanical",
-        default_duration=300.0,
-        tags=["刀具", "磨损", "渐进"],
-        point_faults=[
-            PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL,
-                             multiplier=2.2, noise_scale=0.8),
-            PointFaultConfig(point="vibration_x", mode=FaultMode.GRADUAL,
-                             multiplier=3.0, noise_scale=0.3),
-            PointFaultConfig(point="vibration_y", mode=FaultMode.GRADUAL,
-                             multiplier=3.0, noise_scale=0.3),
-            PointFaultConfig(point="vibration_z", mode=FaultMode.GRADUAL,
-                             multiplier=3.5, noise_scale=0.4),
-            PointFaultConfig(point="feed_rate", mode=FaultMode.GRADUAL,
-                             multiplier=0.45, noise_scale=20.0),
-        ],
-    ),
-
-    # ------------------------------------------------------------------
-    # 刀具崩刃 — 突发性刀具失效
-    # 特征：瞬间冲击 → 振动突增，电流瞬间峰值，进给立即停止
-    # 模式：瞬间注入，持续时间短（机床通常会触发报警停机）
+    # 进给堵转 — 工件夹紧松动或切削量过大导致进给卡死
+    # 特征：进给速率瞬间降为0，主轴负载和电流急剧升高，主轴仍在转（区别于崩刃）
+    # 模式：瞬间注入
     # ------------------------------------------------------------------
     FaultTypeDefinition(
-        id="tool_breakage",
-        name="刀具崩刃",
-        description="刀具突发性崩刃，振动剧烈突增，主轴电流峰值，进给停止",
-        category="mechanical",
-        default_duration=15.0,
-        tags=["刀具", "崩刃", "突发"],
+        id="feed_stall",
+        name="进给堵转",
+        description="进给轴卡死，进给速率降为零，主轴负载和电流急剧升高，主轴转速维持（区别于崩刃停主轴）",
+        category="process",
+        default_duration=20.0,
+        tags=["进给", "堵转", "突发"],
         point_faults=[
-            PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
-                             multiplier=4.5, noise_scale=2.0),
-            PointFaultConfig(point="vibration_x", mode=FaultMode.INSTANT,
-                             multiplier=8.0, noise_scale=1.5),
-            PointFaultConfig(point="vibration_y", mode=FaultMode.INSTANT,
-                             multiplier=8.0, noise_scale=1.5),
-            PointFaultConfig(point="vibration_z", mode=FaultMode.INSTANT,
-                             multiplier=10.0, noise_scale=2.0),
             PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT,
                              target_value=0.0, noise_scale=0.0),
+            PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT,
+                             multiplier=2.8, noise_scale=5.0),
+            PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
+                             multiplier=3.8, noise_scale=1.5),
         ],
     ),
 
     # ------------------------------------------------------------------
     # 主轴过热 — 长时间高负荷或冷却系统故障
-    # 特征：主轴电流持续偏高，转速因热保护逐渐降低
+    # 特征：主轴负载和电流持续偏高，转速因热保护逐渐降低
     # 模式：渐进式，持续时间较长
     # ------------------------------------------------------------------
     FaultTypeDefinition(
         id="spindle_overheat",
         name="主轴过热",
-        description="主轴长时间高负荷运转或冷却不足，电流持续偏高，转速因热保护下降",
+        description="主轴长时间高负荷运转或冷却不足，spindle_load和spindle_current持续偏高，转速因热保护渐进下降",
         category="thermal",
         default_duration=240.0,
         tags=["主轴", "过热", "渐进"],
         point_faults=[
+            PointFaultConfig(point="spindle_load", mode=FaultMode.GRADUAL,
+                             multiplier=1.6, noise_scale=3.0),
             PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL,
                              multiplier=1.8, noise_scale=1.2),
             PointFaultConfig(point="spindle_speed", mode=FaultMode.GRADUAL,
                              multiplier=0.6, noise_scale=50.0),
-            PointFaultConfig(point="vibration_x", mode=FaultMode.GRADUAL,
-                             multiplier=1.5, noise_scale=0.2),
-            PointFaultConfig(point="vibration_z", mode=FaultMode.GRADUAL,
-                             multiplier=1.5, noise_scale=0.2),
         ],
     ),
 
     # ------------------------------------------------------------------
-    # 主轴轴承故障 — 轴承磨损或润滑不足
-    # 特征：振动频率特征变化，整体振动幅度升高，电流略升
-    # 模式：渐进式
+    # 电源波动 — 供电不稳定
+    # 特征：主轴转速和进给速率出现随机波动，电流不稳定
+    # 模式：瞬间注入（持续期间持续抖动）
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="power_fluctuation",
+        name="电源波动",
+        description="供电电压不稳定，主轴转速和进给速率出现随机波动",
+        category="electrical",
+        default_duration=90.0,
+        tags=["电源", "波动", "突发"],
+        point_faults=[
+            PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT,
+                             multiplier=1.0, noise_scale=300.0),
+            PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
+                             multiplier=1.0, noise_scale=5.0),
+            PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT,
+                             multiplier=1.0, noise_scale=150.0),
+        ],
+    ),
+
+    # ------------------------------------------------------------------
+    # 刀具磨损加剧 — 主轴负载趋势漂移
+    # 特征：spindle_load 基线随时间缓慢爬升（趋势漂移型），电流同步升高
+    # 场景：刀具从轻度磨损到需要换刀的完整过程
+    # 模式：渐进式，持续时间长
     # ------------------------------------------------------------------
     FaultTypeDefinition(
-        id="spindle_bearing_fault",
-        name="主轴轴承故障",
-        description="主轴轴承磨损或润滑不足，振动幅度持续升高，伴随电流轻微上升",
-        category="mechanical",
-        default_duration=360.0,
-        tags=["主轴", "轴承", "渐进"],
+        id="tool_wear_progressive",
+        name="刀具磨损加剧",
+        description="刀具磨损导致切削阻力持续增大，spindle_load基线缓慢爬升至1.8倍，spindle_current同步升高；进给速度由G代码控制不受影响",
+        category="tool",
+        default_duration=600.0,
+        tags=["刀具", "磨损", "负载", "趋势漂移"],
         point_faults=[
-            PointFaultConfig(point="vibration_x", mode=FaultMode.GRADUAL,
-                             multiplier=4.0, noise_scale=0.5),
-            PointFaultConfig(point="vibration_y", mode=FaultMode.GRADUAL,
-                             multiplier=4.0, noise_scale=0.5),
-            PointFaultConfig(point="vibration_z", mode=FaultMode.GRADUAL,
-                             multiplier=5.0, noise_scale=0.8),
+            PointFaultConfig(point="spindle_load", mode=FaultMode.GRADUAL,
+                             multiplier=1.8, noise_scale=3.0),
             PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL,
-                             multiplier=1.3, noise_scale=0.5),
+                             multiplier=1.7, noise_scale=1.5),
         ],
     ),
 
     # ------------------------------------------------------------------
-    # 进给堵转 — 工件夹紧松动或切削量过大导致进给卡死
-    # 特征：进给速率瞬间降为 0，主轴电流急剧升高
-    # 模式：瞬间注入
+    # 刀具崩刃 — 主轴负载突发脉冲
+    # 特征：spindle_load 瞬间冲高（可超120%，FANUC最大输出200%），进给停止，CNC停主轴
+    # 场景：刀具突发性失效，机床触发过载报警并停机
+    # 模式：瞬间注入，持续时间极短
     # ------------------------------------------------------------------
     FaultTypeDefinition(
-        id="feed_stall",
-        name="进给堵转",
-        description="进给轴卡死，进给速率降为零，主轴电流急剧升高",
-        category="process",
-        default_duration=20.0,
-        tags=["进给", "堵转", "突发"],
+        id="tool_breakage_sudden",
+        name="刀具崩刃",
+        description="刀具突发性崩刃，spindle_load瞬间冲高至正常值3.2倍（可超120%，FANUC最大输出200%），进给停止，CNC触发过载报警并停主轴",
+        category="tool",
+        default_duration=10.0,
+        tags=["刀具", "崩刃", "突发", "过载"],
         point_faults=[
+            PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT,
+                             multiplier=3.2, noise_scale=8.0),
+            PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
+                             multiplier=4.0, noise_scale=3.0),
+            PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT,
+                             target_value=0.0, noise_scale=0.0),
             PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT,
                              target_value=0.0, noise_scale=0.0),
+            PointFaultConfig(point="alarm_status", mode=FaultMode.INSTANT,
+                             target_value=1.0, noise_scale=0.0),
+        ],
+    ),
+
+    # ------------------------------------------------------------------
+    # 过载保护触发 — 负载/进给反向联动异常（关系约束型）
+    # 特征：负载超限后CNC自动降进给速率，负载高企与进给降速同时出现
+    # 场景：切削参数过激进，CNC自适应保护介入
+    # 模式：瞬间注入（持续期间维持异常关系）
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="tool_overload_protection",
+        name="过载保护触发",
+        description="主轴负载超限，CNC自动降低进给速率保护刀具，负载高企与进给降速同时出现",
+        category="tool",
+        default_duration=120.0,
+        tags=["刀具", "过载", "进给", "关系约束"],
+        point_faults=[
+            PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT,
+                             multiplier=1.9, noise_scale=4.0),
             PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
-                             multiplier=3.8, noise_scale=1.5),
-            PointFaultConfig(point="vibration_z", mode=FaultMode.INSTANT,
-                             multiplier=5.0, noise_scale=1.0),
+                             multiplier=1.8, noise_scale=2.0),
+            PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT,
+                             multiplier=0.35, noise_scale=15.0),
         ],
     ),
 
     # ------------------------------------------------------------------
-    # 振动异常 — 工件装夹松动或共振
-    # 特征：三轴振动突然大幅增加，其他指标基本正常
-    # 模式：瞬间注入
+    # 空切检测 — 刀具未接触工件（工况切换型）
+    # 特征：spindle_load 跌至空载区间（5-15%），主轴转速和进给速率保持正常
+    # 场景：工件装夹偏移、程序坐标错误、工件提前切完
+    # 模式：瞬间注入（均值跳变，方差不变）
     # ------------------------------------------------------------------
     FaultTypeDefinition(
-        id="vibration_spike",
-        name="振动异常",
-        description="工件装夹松动或切削共振，三轴振动突然大幅增加",
-        category="mechanical",
-        default_duration=60.0,
-        tags=["振动", "装夹", "突发"],
+        id="air_cutting",
+        name="空切检测",
+        description="刀具未接触工件，spindle_load跌至空载区间(5-15%)，spindle_current降至空转水平，转速进给保持正常",
+        category="tool",
+        default_duration=180.0,
+        tags=["刀具", "空切", "工况切换", "负载"],
         point_faults=[
-            PointFaultConfig(point="vibration_x", mode=FaultMode.INSTANT,
-                             multiplier=6.0, noise_scale=1.0),
-            PointFaultConfig(point="vibration_y", mode=FaultMode.INSTANT,
-                             multiplier=6.0, noise_scale=1.0),
-            PointFaultConfig(point="vibration_z", mode=FaultMode.INSTANT,
-                             multiplier=7.0, noise_scale=1.2),
+            PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT,
+                             target_value=8.0, noise_scale=2.0),
+            PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
+                             target_value=2.5, noise_scale=0.3),
         ],
     ),
 
     # ------------------------------------------------------------------
-    # 切削液不足 — 冷却润滑失效
-    # 特征：热量积累 → 振动缓慢升高，电流缓慢升高，进给略降
-    # 模式：渐进式，速度较慢
+    # 积屑瘤 — 切屑粘附刀刃导致周期性负载突刺
+    # 特征：spindle_load 在正常基线上出现间歇性冲高后恢复，不是持续爬升
+    #       突刺幅度约1.5-2倍基线，持续1-3秒后自行恢复，周期不固定
+    # 场景：低速切削、切削液不足、韧性材料（铝合金、不锈钢）加工时常见
+    # 模式：瞬间注入（noise_scale 大，模拟随机突刺效果）
     # ------------------------------------------------------------------
     FaultTypeDefinition(
-        id="coolant_failure",
-        name="切削液不足",
-        description="切削液供给不足，冷却润滑失效，热量积累导致振动和电流缓慢升高",
-        category="process",
-        default_duration=480.0,
-        tags=["切削液", "冷却", "渐进"],
+        id="built_up_edge",
+        name="积屑瘤",
+        description="切屑粘附刀刃，spindle_load在正常基线上出现间歇性突刺（1.5-2倍），突刺后自行恢复，区别于磨损的持续爬升",
+        category="tool",
+        default_duration=300.0,
+        tags=["刀具", "积屑瘤", "突刺", "低速切削"],
         point_faults=[
-            PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL,
-                             multiplier=1.6, noise_scale=0.8),
-            PointFaultConfig(point="vibration_x", mode=FaultMode.GRADUAL,
-                             multiplier=2.0, noise_scale=0.3),
-            PointFaultConfig(point="vibration_y", mode=FaultMode.GRADUAL,
-                             multiplier=2.0, noise_scale=0.3),
-            PointFaultConfig(point="vibration_z", mode=FaultMode.GRADUAL,
-                             multiplier=2.5, noise_scale=0.4),
-            PointFaultConfig(point="feed_rate", mode=FaultMode.GRADUAL,
-                             multiplier=0.75, noise_scale=15.0),
+            PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT,
+                             multiplier=1.7, noise_scale=12.0),
+            PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
+                             multiplier=1.6, noise_scale=4.0),
         ],
     ),
 
     # ------------------------------------------------------------------
-    # 电源波动 — 供电不稳定
-    # 特征：主轴转速和进给速率出现随机波动，电流不稳定
-    # 模式：瞬间注入（持续期间持续抖动）
+    # 刀具涂层剥落 — 负载阶跃后在新基线稳定
+    # 特征：spindle_load 出现一次阶跃式跳升（区别于缓慢爬升的磨损），
+    #       然后在新的高基线上稳定波动，不会继续爬升也不会恢复
+    # 场景：涂层质量问题或切削条件恶劣导致涂层突然失效
+    # 模式：瞬间注入（立即跳到新基线，持续维持）
     # ------------------------------------------------------------------
     FaultTypeDefinition(
-        id="power_fluctuation",
-        name="电源波动",
-        description="供电电压不稳定，主轴转速和进给速率出现随机波动",
-        category="electrical",
-        default_duration=90.0,
-        tags=["电源", "波动", "突发"],
+        id="coating_spalling",
+        name="刀具涂层剥落",
+        description="刀具涂层突然失效，spindle_load阶跃式跳升至1.5倍后在新基线稳定波动，区别于磨损的缓慢爬升和崩刃的瞬间冲高",
+        category="tool",
+        default_duration=600.0,
+        tags=["刀具", "涂层", "阶跃", "工况切换"],
         point_faults=[
-            PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT,
-                             multiplier=1.0, noise_scale=300.0),
+            PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT,
+                             multiplier=1.5, noise_scale=3.0),
             PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
-                             multiplier=1.0, noise_scale=5.0),
+                             multiplier=1.4, noise_scale=1.5),
+        ],
+    ),
+
+    # ------------------------------------------------------------------
+    # 换刀装夹偏移 — 刀具伸出量异常导致负载整体偏高
+    # 特征：换刀后 spindle_load 整体偏高（1.4-1.6倍），波动规律正常，
+    #       不是空切（负载不低），不是磨损（不随时间爬升）
+    # 场景：刀具伸出量偏长、刀柄锥面未清洁、刀具型号装错
+    # 模式：瞬间注入（均值整体偏移，方差不变）
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="tool_offset_error",
+        name="换刀装夹偏移",
+        description="换刀后刀具伸出量或装夹位置异常，spindle_load整体偏高(1.4-1.6倍)，波动规律正常，不随时间变化，区别于磨损和空切",
+        category="tool",
+        default_duration=3600.0,
+        tags=["刀具", "装夹", "工况切换", "负载偏移"],
+        point_faults=[
+            PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT,
+                             multiplier=1.5, noise_scale=3.0),
+            PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
+                             multiplier=1.4, noise_scale=1.5),
             PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT,
-                             multiplier=1.0, noise_scale=150.0),
+                             multiplier=1.0, noise_scale=5.0),
         ],
     ),
 ]
@@ -312,10 +346,10 @@ def apply(self, device: Any) -> None:
                 continue
             baseline = fault.baseline_values.get(pf.point, 0.0)
             if baseline == 0.0:
-                # 基线为 0 时用当前值兜底，避免乘法无效
-                try:
-                    baseline = float(device._point_values[pf.point]) or 1.0
-                except (TypeError, ValueError):
+                # 基线为0说明注入时设备处于换刀/停机状态
+                # target_value 模式可以直接执行（如崩刃归零、空切归空载）
+                # multiplier 模式跳过，避免在零基线上产生无意义的值
+                if pf.target_value is None:
                     continue
 
             device._point_values[pf.point] = self._compute_value(
diff --git a/protoforge/protocols/fanuc/server.py b/protoforge/protocols/fanuc/server.py
index b0d6f15..e711347 100644
--- a/protoforge/protocols/fanuc/server.py
+++ b/protoforge/protocols/fanuc/server.py
@@ -22,8 +22,11 @@ def __init__(self, points: list[dict]):
             "program": "O0001",
             "speed_override": 100,
             "feed_override": 100,
-            "spindle_speed": 3000,
-            "feed_rate": 500,
+            "spindle_speed": 0.0,
+            "feed_rate": 0.0,
+            "spindle_current": 0.0,
+            "spindle_load": 0.0,
+            "tool_number": 1,
             "absolute_pos": [0.0] * 5,
             "machine_pos": [0.0] * 5,
             "relative_pos": [0.0] * 5,
@@ -32,6 +35,32 @@ def __init__(self, points: list[dict]):
         for p in points:
             self._values[p["name"]] = p.get("fixed_value", 0)
 
+    def sync_from_point_values(self, point_values: dict[str, Any]) -> None:
+        """将 DeviceInstance._point_values 同步到 _cnc_status，保持协议层数据与生成器一致"""
+        mapping = {
+            "spindle_speed": "spindle_speed",
+            "feed_rate": "feed_rate",
+            "spindle_current": "spindle_current",
+            "spindle_load": "spindle_load",
+            "tool_number": "tool_number",
+            "alarm_status": "alarm",
+            "run_mode": "mode",
+            "execution_status": "execution",
+            "program_name": "program",
+            "x_absolute": ("absolute_pos", 0),
+            "y_absolute": ("absolute_pos", 1),
+            "z_absolute": ("absolute_pos", 2),
+        }
+        for point_name, status_key in mapping.items():
+            if point_name not in point_values:
+                continue
+            val = point_values[point_name]
+            if isinstance(status_key, tuple):
+                key, idx = status_key
+                self._cnc_status[key][idx] = float(val)
+            else:
+                self._cnc_status[status_key] = val
+
     async def generate_value(self, point_config: dict[str, Any]) -> Any:
         name = point_config.get("name", "")
         return self._values.get(name, 0)
@@ -144,6 +173,12 @@ def _process_focas(self, data: bytes) -> bytes | None:
             return self._handle_cnc_rdspindlespd(req_id)
         elif func_id == 0x0111:
             return self._handle_cnc_rdfeed(req_id)
+        elif func_id == 0x0112:
+            return self._handle_cnc_rdspload(req_id)
+        elif func_id == 0x0113:
+            return self._handle_cnc_rdspmeter(req_id)
+        elif func_id == 0x0114:
+            return self._handle_cnc_toolnum(req_id)
         elif func_id == 0x0120:
             return self._handle_cnc_alarm(req_id)
         elif func_id == 0x0130:
@@ -247,7 +282,7 @@ def _handle_cnc_rdspindlespd(self, req_id: int) -> bytes:
 
     def _handle_cnc_rdfeed(self, req_id: int) -> bytes:
         behavior = next(iter(self._behaviors.values()), None)
-        feed = behavior._cnc_status.get("feed_rate", 500) if behavior else 500
+        feed = behavior._cnc_status.get("feed_rate", 0.0) if behavior else 0.0
 
         resp = bytearray()
         resp += struct.pack("<H", 0x0111)
@@ -256,6 +291,42 @@ def _handle_cnc_rdfeed(self, req_id: int) -> bytes:
         resp += struct.pack("<d", float(feed))
         return bytes(resp)
 
+    def _handle_cnc_rdspload(self, req_id: int) -> bytes:
+        """cnc_rdspload — 主轴负载率(%)，FANUC FOCAS2 原生接口"""
+        behavior = next(iter(self._behaviors.values()), None)
+        load = behavior._cnc_status.get("spindle_load", 0.0) if behavior else 0.0
+
+        resp = bytearray()
+        resp += struct.pack("<H", 0x0112)
+        resp += struct.pack("<I", req_id)
+        resp += struct.pack("<I", 0x00000000)
+        resp += struct.pack("<d", float(load))
+        return bytes(resp)
+
+    def _handle_cnc_rdspmeter(self, req_id: int) -> bytes:
+        """cnc_rdspmeter — 主轴电流(A)"""
+        behavior = next(iter(self._behaviors.values()), None)
+        current = behavior._cnc_status.get("spindle_current", 0.0) if behavior else 0.0
+
+        resp = bytearray()
+        resp += struct.pack("<H", 0x0113)
+        resp += struct.pack("<I", req_id)
+        resp += struct.pack("<I", 0x00000000)
+        resp += struct.pack("<d", float(current))
+        return bytes(resp)
+
+    def _handle_cnc_toolnum(self, req_id: int) -> bytes:
+        """cnc_toolnum — 当前刀号"""
+        behavior = next(iter(self._behaviors.values()), None)
+        tool = behavior._cnc_status.get("tool_number", 1) if behavior else 1
+
+        resp = bytearray()
+        resp += struct.pack("<H", 0x0114)
+        resp += struct.pack("<I", req_id)
+        resp += struct.pack("<I", 0x00000000)
+        resp += struct.pack("<H", int(tool))
+        return bytes(resp)
+
     def _handle_cnc_alarm(self, req_id: int) -> bytes:
         behavior = next(iter(self._behaviors.values()), None)
         alarm = behavior._cnc_status.get("alarm", 0) if behavior else 0
@@ -311,6 +382,8 @@ async def read_points(self, device_id: str) -> list[PointValue]:
         config = self._device_configs.get(device_id)
         if not behavior or not config:
             return []
+        # 将 _point_values 同步到 _cnc_status，保证 FOCAS 协议响应与生成器数据一致
+        behavior.sync_from_point_values(behavior._values)
         now = time.time()
         return [PointValue(name=p.name, value=behavior.get_value(p.name), timestamp=now) for p in config.points]
 
diff --git a/protoforge/templates/fanuc/fanuc_0if_cnc.json b/protoforge/templates/fanuc/fanuc_0if_cnc.json
index 39437f3..0f0e362 100644
--- a/protoforge/templates/fanuc/fanuc_0if_cnc.json
+++ b/protoforge/templates/fanuc/fanuc_0if_cnc.json
@@ -62,77 +62,68 @@
             "address": "spindle_speed",
             "data_type": "float32",
             "unit": "RPM",
-            "description": "主轴转速",
+            "description": "主轴转速，按加工程序阶梯切换：换刀归零，粗铣2000，半精铣4000，精铣6000",
             "access": "r",
-            "generator_type": "sawtooth",
-            "min_value": 1000,
+            "generator_type": "script",
+            "min_value": 0,
             "max_value": 8000,
-            "generator_config": {"period": 120}
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; t = elapsed % 360\nif t < 30: target = 0\nelif t < 120: target = 2000\nelif t < 150: target = 0\nelif t < 240: target = 4000\nelif t < 270: target = 0\nelse: target = 6000\nnoise = random.gauss(0, 8) if target > 0 else 0; result = round(max(0, target + noise), 1)"
+            }
         },
         {
             "name": "feed_rate",
             "address": "feed_rate",
             "data_type": "float32",
             "unit": "mm/min",
-            "description": "进给速度",
+            "description": "进给速度，与加工工步联动：换刀时为0，粗铣800，半精铣500，精铣300",
             "access": "r",
-            "generator_type": "sine",
-            "min_value": 100,
+            "generator_type": "script",
+            "min_value": 0,
             "max_value": 5000,
-            "generator_config": {"period": 60, "phase": 0.0}
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; t = elapsed % 360\nif t < 30: target = 0\nelif t < 120: target = 800\nelif t < 150: target = 0\nelif t < 240: target = 500\nelif t < 270: target = 0\nelse: target = 300\nnoise = random.gauss(0, 5) if target > 0 else 0; result = round(max(0, target + noise), 1)"
+            }
         },
         {
             "name": "spindle_current",
             "address": "spindle_current",
             "data_type": "float32",
             "unit": "A",
-            "description": "主轴电流",
-            "access": "r",
-            "generator_type": "sine",
-            "min_value": 8.0,
-            "max_value": 32.0,
-            "generator_config": {"period": 120, "phase": 0.5}
-        },
-        {
-            "name": "vibration_x",
-            "address": "vibration_x",
-            "data_type": "float32",
-            "unit": "m/s²",
-            "description": "X轴振动加速度",
+            "description": "主轴电流，与工步联动：换刀时伺服保持电流约2.5A，粗铣18-24A，半精铣12-18A，精铣8-13A",
             "access": "r",
             "generator_type": "script",
-            "min_value": 0.1,
-            "max_value": 2.5,
+            "min_value": 0.0,
+            "max_value": 40.0,
             "generator_config": {
-                "script": "elapsed = context['elapsed']; base = 0.5 + 0.3 * math.sin(2 * math.pi * elapsed / 90); noise = random.uniform(-0.15, 0.15); result = round(max(0.1, base + noise), 3)"
+                "script": "elapsed = context['elapsed']; t = elapsed % 360\nif t < 30: base, noise_std = 2.5, 0.3\nelif t < 120: base, noise_std = 21.0, 1.2\nelif t < 150: base, noise_std = 2.5, 0.3\nelif t < 240: base, noise_std = 15.0, 1.2\nelif t < 270: base, noise_std = 2.5, 0.3\nelse: base, noise_std = 10.0, 1.2\nresult = round(max(0, base + random.gauss(0, noise_std)), 2)"
             }
         },
         {
-            "name": "vibration_y",
-            "address": "vibration_y",
+            "name": "spindle_load",
+            "address": "spindle_load",
             "data_type": "float32",
-            "unit": "m/s²",
-            "description": "Y轴振动加速度",
+            "unit": "%",
+            "description": "主轴负载率(0-100%)，与工步联动：换刀时0%，粗铣45-60%，半精铣35-50%，精铣25-40%",
             "access": "r",
             "generator_type": "script",
-            "min_value": 0.1,
-            "max_value": 2.5,
+            "min_value": 0.0,
+            "max_value": 120.0,
             "generator_config": {
-                "script": "elapsed = context['elapsed']; base = 0.5 + 0.3 * math.sin(2 * math.pi * elapsed / 75 + 1.0); noise = random.uniform(-0.15, 0.15); result = round(max(0.1, base + noise), 3)"
+                "script": "elapsed = context['elapsed']; t = elapsed % 360\nif t < 30: base = 0\nelif t < 120: base = 52.0\nelif t < 150: base = 0\nelif t < 240: base = 42.0\nelif t < 270: base = 0\nelse: base = 32.0\nnoise = random.gauss(0, 2.5) if base > 0 else 0; result = round(max(0, min(120.0, base + noise)), 2)"
             }
         },
         {
-            "name": "vibration_z",
-            "address": "vibration_z",
-            "data_type": "float32",
-            "unit": "m/s²",
-            "description": "Z轴振动加速度",
+            "name": "tool_number",
+            "address": "tool_number",
+            "data_type": "uint16",
+            "description": "当前刀号，与工步联动：换刀阶段切换，粗铣T01，半精铣T02，精铣T03",
             "access": "r",
             "generator_type": "script",
-            "min_value": 0.1,
-            "max_value": 3.0,
+            "min_value": 1,
+            "max_value": 12,
             "generator_config": {
-                "script": "elapsed = context['elapsed']; base = 0.7 + 0.4 * math.sin(2 * math.pi * elapsed / 60 + 2.1); noise = random.uniform(-0.2, 0.2); result = round(max(0.1, base + noise), 3)"
+                "script": "elapsed = context['elapsed']; t = elapsed % 360\nif t < 120: result = 1\nelif t < 240: result = 2\nelse: result = 3"
             }
         },
         {
diff --git a/protoforge/templates/modbus/fanuc_cnc.json b/protoforge/templates/modbus/fanuc_cnc.json
index 43622cf..3a21815 100644
--- a/protoforge/templates/modbus/fanuc_cnc.json
+++ b/protoforge/templates/modbus/fanuc_cnc.json
@@ -11,36 +11,42 @@
             "address": "0",
             "data_type": "uint16",
             "unit": "RPM",
-            "description": "主轴实际转速",
+            "description": "主轴实际转速，与工步联动：换刀归零，粗铣2000，半精铣4000，精铣6000",
             "access": "r",
-            "generator_type": "sawtooth",
+            "generator_type": "script",
             "min_value": 0,
-            "max_value": 12000,
-            "generator_config": {"period": 180}
+            "max_value": 8000,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; t = elapsed % 360\nif t < 30: target = 0\nelif t < 120: target = 2000\nelif t < 150: target = 0\nelif t < 240: target = 4000\nelif t < 270: target = 0\nelse: target = 6000\nnoise = random.gauss(0, 8) if target > 0 else 0; result = round(max(0, target + noise), 1)"
+            }
         },
         {
             "name": "feed_rate",
             "address": "1",
             "data_type": "float32",
             "unit": "mm/min",
-            "description": "实际进给速度",
+            "description": "实际进给速度，与工步联动：换刀时0，粗铣800，半精铣500，精铣300",
             "access": "r",
-            "generator_type": "sine",
-            "min_value": 200.0,
-            "max_value": 3000.0,
-            "generator_config": {"period": 90, "phase": 1.0}
+            "generator_type": "script",
+            "min_value": 0,
+            "max_value": 5000,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; t = elapsed % 360\nif t < 30: target = 0\nelif t < 120: target = 800\nelif t < 150: target = 0\nelif t < 240: target = 500\nelif t < 270: target = 0\nelse: target = 300\nnoise = random.gauss(0, 5) if target > 0 else 0; result = round(max(0, target + noise), 1)"
+            }
         },
         {
             "name": "spindle_current",
             "address": "2",
             "data_type": "float32",
             "unit": "A",
-            "description": "主轴电流",
+            "description": "主轴电流，与工步联动：换刀伺服保持2.5A，粗铣21A，半精铣15A，精铣10A",
             "access": "r",
-            "generator_type": "sine",
-            "min_value": 8.0,
-            "max_value": 35.0,
-            "generator_config": {"period": 120, "phase": 2.0}
+            "generator_type": "script",
+            "min_value": 0.0,
+            "max_value": 40.0,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; t = elapsed % 360\nif t < 30: base, noise_std = 2.5, 0.3\nelif t < 120: base, noise_std = 21.0, 1.2\nelif t < 150: base, noise_std = 2.5, 0.3\nelif t < 240: base, noise_std = 15.0, 1.2\nelif t < 270: base, noise_std = 2.5, 0.3\nelse: base, noise_std = 10.0, 1.2\nresult = round(max(0, base + random.gauss(0, noise_std)), 2)"
+            }
         },
         {
             "name": "vibration_x",
@@ -84,6 +90,20 @@
                 "script": "elapsed = context['elapsed']; base = 0.7 + 0.4 * math.sin(2 * math.pi * elapsed / 58 + 2.8); noise = random.uniform(-0.2, 0.2); result = round(max(0.1, base + noise), 3)"
             }
         },
+        {
+            "name": "spindle_load",
+            "address": "29",
+            "data_type": "float32",
+            "unit": "%",
+            "description": "主轴负载率(0-100%)，与工步联动：换刀时0%，粗铣45-60%，半精铣35-50%，精铣25-40%",
+            "access": "r",
+            "generator_type": "script",
+            "min_value": 0.0,
+            "max_value": 120.0,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; t = elapsed % 360\nif t < 30: base = 0\nelif t < 120: base = 52.0\nelif t < 150: base = 0\nelif t < 240: base = 42.0\nelif t < 270: base = 0\nelse: base = 32.0\nnoise = random.gauss(0, 2.5) if base > 0 else 0; result = round(max(0, min(120.0, base + noise)), 2)"
+            }
+        },
         {
             "name": "spindle_override",
             "address": "3",
@@ -126,10 +146,14 @@
             "name": "tool_no",
             "address": "7",
             "data_type": "uint16",
-            "description": "当前刀具号T",
+            "description": "当前刀具号，与工步联动：粗铣T1，半精铣T2，精铣T3",
             "access": "r",
-            "generator_type": "fixed",
-            "fixed_value": 3
+            "generator_type": "script",
+            "min_value": 1,
+            "max_value": 12,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; t = elapsed % 360\nif t < 120: result = 1\nelif t < 240: result = 2\nelse: result = 3"
+            }
         },
         {
             "name": "abs_x",
diff --git a/web/src/views/Devices.vue b/web/src/views/Devices.vue
index 65e0535..3a33068 100644
--- a/web/src/views/Devices.vue
+++ b/web/src/views/Devices.vue
@@ -364,7 +364,7 @@ const faultIntensityLabel = computed(() => {
 })
 
 function faultCategoryLabel(category) {
-  const map = { mechanical: '机械', thermal: '热', electrical: '电气', process: '工艺' }
+  const map = { mechanical: '机械', thermal: '热', electrical: '电气', process: '工艺', tool: '刀具' }
   return map[category] || category
 }
 

From 58b2e3685ec83a1b4520874510da9d7ce0542ef4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Wed, 27 May 2026 09:36:02 +0800
Subject: [PATCH 33/36] fix(scene): scene different

---
 protoforge/templates/modbus/fanuc_cnc.json    |  24 +-
 .../templates/modbus/fanuc_cnc_finish.json    | 259 ++++++++++++++++++
 .../modbus/fanuc_cnc_semi_finish.json         | 259 ++++++++++++++++++
 3 files changed, 529 insertions(+), 13 deletions(-)
 create mode 100644 protoforge/templates/modbus/fanuc_cnc_finish.json
 create mode 100644 protoforge/templates/modbus/fanuc_cnc_semi_finish.json

diff --git a/protoforge/templates/modbus/fanuc_cnc.json b/protoforge/templates/modbus/fanuc_cnc.json
index 3a21815..3ff30df 100644
--- a/protoforge/templates/modbus/fanuc_cnc.json
+++ b/protoforge/templates/modbus/fanuc_cnc.json
@@ -11,13 +11,13 @@
             "address": "0",
             "data_type": "uint16",
             "unit": "RPM",
-            "description": "主轴实际转速，与工步联动：换刀归零，粗铣2000，半精铣4000，精铣6000",
+            "description": "主轴实际转速，粗加工工位：空闲为0，启动后稳定在约2000RPM",
             "access": "r",
             "generator_type": "script",
             "min_value": 0,
             "max_value": 8000,
             "generator_config": {
-                "script": "elapsed = context['elapsed']; t = elapsed % 360\nif t < 30: target = 0\nelif t < 120: target = 2000\nelif t < 150: target = 0\nelif t < 240: target = 4000\nelif t < 270: target = 0\nelse: target = 6000\nnoise = random.gauss(0, 8) if target > 0 else 0; result = round(max(0, target + noise), 1)"
+                "script": "elapsed = context['elapsed']; t = elapsed % 180\nif t < 20:\n    target = 0\nelif t < 30:\n    target = 2000 * ((t - 20) / 10.0)\nelif t < 155:\n    target = 2000\nelif t < 165:\n    target = 2000 * (1 - ((t - 155) / 10.0))\nelse:\n    target = 0\nnoise = random.gauss(0, 15) if target > 100 else 0\nresult = int(round(max(0, min(8000, target + noise))))"
             }
         },
         {
@@ -25,13 +25,13 @@
             "address": "1",
             "data_type": "float32",
             "unit": "mm/min",
-            "description": "实际进给速度，与工步联动：换刀时0，粗铣800，半精铣500，精铣300",
+            "description": "粗加工实际进给速度：切入阶段中低速，稳定粗加工约800mm/min，空闲和主轴启动阶段为0",
             "access": "r",
             "generator_type": "script",
             "min_value": 0,
             "max_value": 5000,
             "generator_config": {
-                "script": "elapsed = context['elapsed']; t = elapsed % 360\nif t < 30: target = 0\nelif t < 120: target = 800\nelif t < 150: target = 0\nelif t < 240: target = 500\nelif t < 270: target = 0\nelse: target = 300\nnoise = random.gauss(0, 5) if target > 0 else 0; result = round(max(0, target + noise), 1)"
+                "script": "elapsed = context['elapsed']; t = elapsed % 180\nif t < 30:\n    target = 0\nelif t < 40:\n    target = 500 + 300 * ((t - 30) / 10.0)\nelif t < 140:\n    target = 800\nelif t < 155:\n    target = 400\nelse:\n    target = 0\nnoise = random.gauss(0, 25) if target > 0 else 0\nresult = round(max(0, min(5000, target + noise)), 1)"
             }
         },
         {
@@ -39,13 +39,13 @@
             "address": "2",
             "data_type": "float32",
             "unit": "A",
-            "description": "主轴电流，与工步联动：换刀伺服保持2.5A，粗铣21A，半精铣15A，精铣10A",
+            "description": "粗加工主轴电流：空闲低电流，启动阶段中等电流，切入瞬间升高，稳定粗加工约19~24A",
             "access": "r",
             "generator_type": "script",
             "min_value": 0.0,
             "max_value": 40.0,
             "generator_config": {
-                "script": "elapsed = context['elapsed']; t = elapsed % 360\nif t < 30: base, noise_std = 2.5, 0.3\nelif t < 120: base, noise_std = 21.0, 1.2\nelif t < 150: base, noise_std = 2.5, 0.3\nelif t < 240: base, noise_std = 15.0, 1.2\nelif t < 270: base, noise_std = 2.5, 0.3\nelse: base, noise_std = 10.0, 1.2\nresult = round(max(0, base + random.gauss(0, noise_std)), 2)"
+                "script": "elapsed = context['elapsed']; t = elapsed % 180\nif t < 20:\n    base, noise_std = 2.2, 0.25\nelif t < 30:\n    base, noise_std = 6.0, 0.8\nelif t < 40:\n    base, noise_std = 23.5, 1.5\nelif t < 140:\n    base, noise_std = 21.5, 1.3\nelif t < 155:\n    base, noise_std = 6.0, 0.8\nelif t < 165:\n    base, noise_std = 4.0, 0.5\nelse:\n    base, noise_std = 2.2, 0.25\nresult = round(max(0, min(40.0, base + random.gauss(0, noise_std))), 2)"
             }
         },
         {
@@ -95,13 +95,13 @@
             "address": "29",
             "data_type": "float32",
             "unit": "%",
-            "description": "主轴负载率(0-100%)，与工步联动：换刀时0%，粗铣45-60%，半精铣35-50%，精铣25-40%",
+            "description": "粗加工主轴负载率，常态0~100%，短时允许到120%；粗加工稳定阶段约45~65%，切入瞬间可能更高",
             "access": "r",
             "generator_type": "script",
             "min_value": 0.0,
             "max_value": 120.0,
             "generator_config": {
-                "script": "elapsed = context['elapsed']; t = elapsed % 360\nif t < 30: base = 0\nelif t < 120: base = 52.0\nelif t < 150: base = 0\nelif t < 240: base = 42.0\nelif t < 270: base = 0\nelse: base = 32.0\nnoise = random.gauss(0, 2.5) if base > 0 else 0; result = round(max(0, min(120.0, base + noise)), 2)"
+                "script": "elapsed = context['elapsed']; t = elapsed % 180\nif t < 20:\n    base, noise_std = 0.0, 0.0\nelif t < 30:\n    base, noise_std = 8.0, 2.0\nelif t < 40:\n    base, noise_std = 65.0, 4.0\nelif t < 140:\n    base, noise_std = 55.0, 4.0\nelif t < 155:\n    base, noise_std = 8.0, 2.0\nelif t < 165:\n    base, noise_std = 3.0, 1.0\nelse:\n    base, noise_std = 0.0, 0.0\nnoise = random.gauss(0, noise_std) if noise_std > 0 else 0\nresult = round(max(0, min(120.0, base + noise)), 2)"
             }
         },
         {
@@ -146,14 +146,12 @@
             "name": "tool_no",
             "address": "7",
             "data_type": "uint16",
-            "description": "当前刀具号，与工步联动：粗铣T1，半精铣T2，精铣T3",
+            "description": "当前刀具号，粗加工工位固定使用T1",
             "access": "r",
-            "generator_type": "script",
+            "generator_type": "fixed",
             "min_value": 1,
             "max_value": 12,
-            "generator_config": {
-                "script": "elapsed = context['elapsed']; t = elapsed % 360\nif t < 120: result = 1\nelif t < 240: result = 2\nelse: result = 3"
-            }
+            "fixed_value": 1
         },
         {
             "name": "abs_x",
diff --git a/protoforge/templates/modbus/fanuc_cnc_finish.json b/protoforge/templates/modbus/fanuc_cnc_finish.json
new file mode 100644
index 0000000..1243e59
--- /dev/null
+++ b/protoforge/templates/modbus/fanuc_cnc_finish.json
@@ -0,0 +1,259 @@
+{
+    "id": "modbus_fanuc_cnc_finish",
+    "name": "Fanuc CNC 精铣工位",
+    "protocol": "modbus_tcp",
+    "description": "FANUC Series 0i-MF数控系统，精铣工位：主轴约6000RPM，进给约300mm/min，切深小，表面粗糙度Ra0.8~1.6，要求主轴稳定性高",
+    "manufacturer": "FANUC",
+    "model": "0i-MF",
+    "points": [
+        {
+            "name": "spindle_speed",
+            "address": "0",
+            "data_type": "uint16",
+            "unit": "RPM",
+            "description": "主轴实际转速，精铣工位：空闲为0，启动后稳定在约6000RPM，精铣对转速稳定性要求高，波动小",
+            "access": "r",
+            "generator_type": "script",
+            "min_value": 0,
+            "max_value": 8000,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; t = elapsed % 120\nif t < 12:\n    target = 0\nelif t < 28:\n    target = 6000 * ((t - 12) / 16.0)\nelif t < 95:\n    target = 6000\nelif t < 110:\n    target = 6000 * (1 - ((t - 95) / 15.0))\nelse:\n    target = 0\nnoise = random.gauss(0, 8) if target > 100 else 0\nresult = int(round(max(0, min(8000, target + noise))))"
+            }
+        },
+        {
+            "name": "feed_rate",
+            "address": "1",
+            "data_type": "float32",
+            "unit": "mm/min",
+            "description": "精铣实际进给速度：切入阶段低速，稳定精铣约300mm/min，精铣进给慢且稳定，波动小",
+            "access": "r",
+            "generator_type": "script",
+            "min_value": 0,
+            "max_value": 5000,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; t = elapsed % 120\nif t < 28:\n    target = 0\nelif t < 36:\n    target = 150 + 150 * ((t - 28) / 8.0)\nelif t < 90:\n    target = 300\nelif t < 100:\n    target = 150\nelse:\n    target = 0\nnoise = random.gauss(0, 6) if target > 0 else 0\nresult = round(max(0, min(5000, target + noise)), 1)"
+            }
+        },
+        {
+            "name": "spindle_current",
+            "address": "2",
+            "data_type": "float32",
+            "unit": "A",
+            "description": "精铣主轴电流：空闲约2A，启动约4A，切入峰值约11A，稳定精铣约7~10A，精铣切深小电流低且稳定",
+            "access": "r",
+            "generator_type": "script",
+            "min_value": 0.0,
+            "max_value": 40.0,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; t = elapsed % 120\nif t < 12:\n    base, noise_std = 2.2, 0.2\nelif t < 28:\n    base, noise_std = 4.0, 0.4\nelif t < 36:\n    base, noise_std = 11.0, 0.8\nelif t < 90:\n    base, noise_std = 8.5, 0.5\nelif t < 100:\n    base, noise_std = 4.0, 0.4\nelif t < 110:\n    base, noise_std = 3.0, 0.3\nelse:\n    base, noise_std = 2.2, 0.2\nresult = round(max(0, min(40.0, base + random.gauss(0, noise_std))), 2)"
+            }
+        },
+        {
+            "name": "vibration_x",
+            "address": "23",
+            "data_type": "float32",
+            "unit": "m/s²",
+            "description": "X轴振动加速度，精铣切深小但转速高，振动幅值小、频率高",
+            "access": "r",
+            "generator_type": "script",
+            "min_value": 0.02,
+            "max_value": 1.5,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; t = elapsed % 120\nif 28 <= t < 100:\n    base = 0.18 + 0.1 * math.sin(2 * math.pi * elapsed / 28 + 0.8)\nelse:\n    base = 0.05\nnoise = random.uniform(-0.04, 0.04)\nresult = round(max(0.02, base + noise), 3)"
+            }
+        },
+        {
+            "name": "vibration_y",
+            "address": "25",
+            "data_type": "float32",
+            "unit": "m/s²",
+            "description": "Y轴振动加速度，精铣切深小但转速高，振动幅值小、频率高",
+            "access": "r",
+            "generator_type": "script",
+            "min_value": 0.02,
+            "max_value": 1.5,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; t = elapsed % 120\nif 28 <= t < 100:\n    base = 0.18 + 0.1 * math.sin(2 * math.pi * elapsed / 22 + 1.8)\nelse:\n    base = 0.05\nnoise = random.uniform(-0.04, 0.04)\nresult = round(max(0.02, base + noise), 3)"
+            }
+        },
+        {
+            "name": "vibration_z",
+            "address": "27",
+            "data_type": "float32",
+            "unit": "m/s²",
+            "description": "Z轴振动加速度，精铣切深小但转速高，振动幅值小、频率高",
+            "access": "r",
+            "generator_type": "script",
+            "min_value": 0.02,
+            "max_value": 1.5,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; t = elapsed % 120\nif 28 <= t < 100:\n    base = 0.2 + 0.12 * math.sin(2 * math.pi * elapsed / 18 + 2.8)\nelse:\n    base = 0.06\nnoise = random.uniform(-0.05, 0.05)\nresult = round(max(0.02, base + noise), 3)"
+            }
+        },
+        {
+            "name": "spindle_load",
+            "address": "29",
+            "data_type": "float32",
+            "unit": "%",
+            "description": "精铣主轴负载率，稳定阶段约15~28%，切入瞬间约32%，精铣切深小负载低且稳定，短时允许到120%",
+            "access": "r",
+            "generator_type": "script",
+            "min_value": 0.0,
+            "max_value": 120.0,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; t = elapsed % 120\nif t < 12:\n    base, noise_std = 0.0, 0.0\nelif t < 28:\n    base, noise_std = 4.0, 1.0\nelif t < 36:\n    base, noise_std = 32.0, 2.0\nelif t < 90:\n    base, noise_std = 22.0, 1.5\nelif t < 100:\n    base, noise_std = 4.0, 1.0\nelif t < 110:\n    base, noise_std = 1.5, 0.5\nelse:\n    base, noise_std = 0.0, 0.0\nnoise = random.gauss(0, noise_std) if noise_std > 0 else 0\nresult = round(max(0, min(120.0, base + noise)), 2)"
+            }
+        },
+        {
+            "name": "spindle_override",
+            "address": "3",
+            "data_type": "uint16",
+            "unit": "%",
+            "description": "主轴倍率",
+            "access": "r",
+            "generator_type": "fixed",
+            "fixed_value": 100
+        },
+        {
+            "name": "feed_override",
+            "address": "4",
+            "data_type": "uint16",
+            "unit": "%",
+            "description": "进给倍率",
+            "access": "r",
+            "generator_type": "fixed",
+            "fixed_value": 100
+        },
+        {
+            "name": "running_mode",
+            "address": "5",
+            "data_type": "uint16",
+            "description": "运行模式(1=MDI 2=AUTO 3=JOG 4=EDIT 5=HANDLE)",
+            "access": "r",
+            "generator_type": "fixed",
+            "fixed_value": 2
+        },
+        {
+            "name": "exec_status",
+            "address": "6",
+            "data_type": "uint16",
+            "description": "执行状态(0=空闲 1=运行 2=暂停 3=M00停 4=M01停)",
+            "access": "r",
+            "generator_type": "fixed",
+            "fixed_value": 1
+        },
+        {
+            "name": "tool_no",
+            "address": "7",
+            "data_type": "uint16",
+            "description": "当前刀具号，精铣工位固定使用T3",
+            "access": "r",
+            "generator_type": "fixed",
+            "min_value": 1,
+            "max_value": 12,
+            "fixed_value": 3
+        },
+        {
+            "name": "abs_x",
+            "address": "8",
+            "data_type": "float32",
+            "unit": "mm",
+            "description": "X轴绝对坐标",
+            "access": "r",
+            "generator_type": "random",
+            "min_value": -800.0,
+            "max_value": 800.0
+        },
+        {
+            "name": "abs_y",
+            "address": "10",
+            "data_type": "float32",
+            "unit": "mm",
+            "description": "Y轴绝对坐标",
+            "access": "r",
+            "generator_type": "random",
+            "min_value": -500.0,
+            "max_value": 500.0
+        },
+        {
+            "name": "abs_z",
+            "address": "12",
+            "data_type": "float32",
+            "unit": "mm",
+            "description": "Z轴绝对坐标",
+            "access": "r",
+            "generator_type": "random",
+            "min_value": -600.0,
+            "max_value": 200.0
+        },
+        {
+            "name": "mach_x",
+            "address": "14",
+            "data_type": "float32",
+            "unit": "mm",
+            "description": "X轴机械坐标",
+            "access": "r",
+            "generator_type": "random",
+            "min_value": -800.0,
+            "max_value": 800.0
+        },
+        {
+            "name": "mach_y",
+            "address": "16",
+            "data_type": "float32",
+            "unit": "mm",
+            "description": "Y轴机械坐标",
+            "access": "r",
+            "generator_type": "random",
+            "min_value": -500.0,
+            "max_value": 500.0
+        },
+        {
+            "name": "mach_z",
+            "address": "18",
+            "data_type": "float32",
+            "unit": "mm",
+            "description": "Z轴机械坐标",
+            "access": "r",
+            "generator_type": "random",
+            "min_value": -600.0,
+            "max_value": 200.0
+        },
+        {
+            "name": "part_count",
+            "address": "20",
+            "data_type": "uint16",
+            "description": "加工计数",
+            "access": "r",
+            "generator_type": "script",
+            "min_value": 0,
+            "max_value": 99999,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; result = min(int(elapsed / 120), 99999)"
+            }
+        },
+        {
+            "name": "cycle_time",
+            "address": "21",
+            "data_type": "uint16",
+            "unit": "s",
+            "description": "循环时间，精铣单件约120s",
+            "access": "r",
+            "generator_type": "fixed",
+            "fixed_value": 120
+        },
+        {
+            "name": "alarm_no",
+            "address": "22",
+            "data_type": "uint16",
+            "description": "报警号(0=无报警)",
+            "access": "r",
+            "generator_type": "fixed",
+            "fixed_value": 0
+        }
+    ],
+    "protocol_config": {
+        "slave_id": 8
+    },
+    "tags": ["CNC", "FANUC", "数控", "机床", "发那科", "精铣"]
+}
diff --git a/protoforge/templates/modbus/fanuc_cnc_semi_finish.json b/protoforge/templates/modbus/fanuc_cnc_semi_finish.json
new file mode 100644
index 0000000..9efc897
--- /dev/null
+++ b/protoforge/templates/modbus/fanuc_cnc_semi_finish.json
@@ -0,0 +1,259 @@
+{
+    "id": "modbus_fanuc_cnc_semi_finish",
+    "name": "Fanuc CNC 半精铣工位",
+    "protocol": "modbus_tcp",
+    "description": "FANUC Series 0i-MF数控系统，半精铣工位：主轴约4000RPM，进给约500mm/min，切深中等，表面粗糙度Ra3.2~6.3",
+    "manufacturer": "FANUC",
+    "model": "0i-MF",
+    "points": [
+        {
+            "name": "spindle_speed",
+            "address": "0",
+            "data_type": "uint16",
+            "unit": "RPM",
+            "description": "主轴实际转速，半精铣工位：空闲为0，启动后稳定在约4000RPM",
+            "access": "r",
+            "generator_type": "script",
+            "min_value": 0,
+            "max_value": 8000,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; t = elapsed % 150\nif t < 15:\n    target = 0\nelif t < 28:\n    target = 4000 * ((t - 15) / 13.0)\nelif t < 120:\n    target = 4000\nelif t < 133:\n    target = 4000 * (1 - ((t - 120) / 13.0))\nelse:\n    target = 0\nnoise = random.gauss(0, 20) if target > 100 else 0\nresult = int(round(max(0, min(8000, target + noise))))"
+            }
+        },
+        {
+            "name": "feed_rate",
+            "address": "1",
+            "data_type": "float32",
+            "unit": "mm/min",
+            "description": "半精铣实际进给速度：切入阶段低速，稳定半精铣约500mm/min，退刀阶段降速",
+            "access": "r",
+            "generator_type": "script",
+            "min_value": 0,
+            "max_value": 5000,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; t = elapsed % 150\nif t < 28:\n    target = 0\nelif t < 38:\n    target = 250 + 250 * ((t - 28) / 10.0)\nelif t < 115:\n    target = 500\nelif t < 128:\n    target = 250\nelse:\n    target = 0\nnoise = random.gauss(0, 15) if target > 0 else 0\nresult = round(max(0, min(5000, target + noise)), 1)"
+            }
+        },
+        {
+            "name": "spindle_current",
+            "address": "2",
+            "data_type": "float32",
+            "unit": "A",
+            "description": "半精铣主轴电流：空闲约2A，启动约5A，切入峰值约17A，稳定半精铣约13~16A",
+            "access": "r",
+            "generator_type": "script",
+            "min_value": 0.0,
+            "max_value": 40.0,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; t = elapsed % 150\nif t < 15:\n    base, noise_std = 2.2, 0.25\nelif t < 28:\n    base, noise_std = 5.0, 0.6\nelif t < 38:\n    base, noise_std = 17.0, 1.2\nelif t < 115:\n    base, noise_std = 14.5, 0.9\nelif t < 128:\n    base, noise_std = 5.0, 0.6\nelif t < 138:\n    base, noise_std = 3.5, 0.4\nelse:\n    base, noise_std = 2.2, 0.25\nresult = round(max(0, min(40.0, base + random.gauss(0, noise_std))), 2)"
+            }
+        },
+        {
+            "name": "vibration_x",
+            "address": "23",
+            "data_type": "float32",
+            "unit": "m/s²",
+            "description": "X轴振动加速度，半精铣切深中等，振动幅值中等",
+            "access": "r",
+            "generator_type": "script",
+            "min_value": 0.05,
+            "max_value": 2.0,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; t = elapsed % 150\nif 28 <= t < 128:\n    base = 0.35 + 0.2 * math.sin(2 * math.pi * elapsed / 55 + 0.8)\nelse:\n    base = 0.08\nnoise = random.uniform(-0.08, 0.08)\nresult = round(max(0.05, base + noise), 3)"
+            }
+        },
+        {
+            "name": "vibration_y",
+            "address": "25",
+            "data_type": "float32",
+            "unit": "m/s²",
+            "description": "Y轴振动加速度，半精铣切深中等，振动幅值中等",
+            "access": "r",
+            "generator_type": "script",
+            "min_value": 0.05,
+            "max_value": 2.0,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; t = elapsed % 150\nif 28 <= t < 128:\n    base = 0.35 + 0.2 * math.sin(2 * math.pi * elapsed / 45 + 1.8)\nelse:\n    base = 0.08\nnoise = random.uniform(-0.08, 0.08)\nresult = round(max(0.05, base + noise), 3)"
+            }
+        },
+        {
+            "name": "vibration_z",
+            "address": "27",
+            "data_type": "float32",
+            "unit": "m/s²",
+            "description": "Z轴振动加速度，半精铣切深中等，振动幅值中等",
+            "access": "r",
+            "generator_type": "script",
+            "min_value": 0.05,
+            "max_value": 2.0,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; t = elapsed % 150\nif 28 <= t < 128:\n    base = 0.4 + 0.22 * math.sin(2 * math.pi * elapsed / 38 + 2.8)\nelse:\n    base = 0.1\nnoise = random.uniform(-0.1, 0.1)\nresult = round(max(0.05, base + noise), 3)"
+            }
+        },
+        {
+            "name": "spindle_load",
+            "address": "29",
+            "data_type": "float32",
+            "unit": "%",
+            "description": "半精铣主轴负载率，稳定阶段约30~45%，切入瞬间约48%，短时允许到120%",
+            "access": "r",
+            "generator_type": "script",
+            "min_value": 0.0,
+            "max_value": 120.0,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; t = elapsed % 150\nif t < 15:\n    base, noise_std = 0.0, 0.0\nelif t < 28:\n    base, noise_std = 6.0, 1.5\nelif t < 38:\n    base, noise_std = 48.0, 3.0\nelif t < 115:\n    base, noise_std = 38.0, 2.5\nelif t < 128:\n    base, noise_std = 6.0, 1.5\nelif t < 138:\n    base, noise_std = 2.5, 0.8\nelse:\n    base, noise_std = 0.0, 0.0\nnoise = random.gauss(0, noise_std) if noise_std > 0 else 0\nresult = round(max(0, min(120.0, base + noise)), 2)"
+            }
+        },
+        {
+            "name": "spindle_override",
+            "address": "3",
+            "data_type": "uint16",
+            "unit": "%",
+            "description": "主轴倍率",
+            "access": "r",
+            "generator_type": "fixed",
+            "fixed_value": 100
+        },
+        {
+            "name": "feed_override",
+            "address": "4",
+            "data_type": "uint16",
+            "unit": "%",
+            "description": "进给倍率",
+            "access": "r",
+            "generator_type": "fixed",
+            "fixed_value": 100
+        },
+        {
+            "name": "running_mode",
+            "address": "5",
+            "data_type": "uint16",
+            "description": "运行模式(1=MDI 2=AUTO 3=JOG 4=EDIT 5=HANDLE)",
+            "access": "r",
+            "generator_type": "fixed",
+            "fixed_value": 2
+        },
+        {
+            "name": "exec_status",
+            "address": "6",
+            "data_type": "uint16",
+            "description": "执行状态(0=空闲 1=运行 2=暂停 3=M00停 4=M01停)",
+            "access": "r",
+            "generator_type": "fixed",
+            "fixed_value": 1
+        },
+        {
+            "name": "tool_no",
+            "address": "7",
+            "data_type": "uint16",
+            "description": "当前刀具号，半精铣工位固定使用T2",
+            "access": "r",
+            "generator_type": "fixed",
+            "min_value": 1,
+            "max_value": 12,
+            "fixed_value": 2
+        },
+        {
+            "name": "abs_x",
+            "address": "8",
+            "data_type": "float32",
+            "unit": "mm",
+            "description": "X轴绝对坐标",
+            "access": "r",
+            "generator_type": "random",
+            "min_value": -800.0,
+            "max_value": 800.0
+        },
+        {
+            "name": "abs_y",
+            "address": "10",
+            "data_type": "float32",
+            "unit": "mm",
+            "description": "Y轴绝对坐标",
+            "access": "r",
+            "generator_type": "random",
+            "min_value": -500.0,
+            "max_value": 500.0
+        },
+        {
+            "name": "abs_z",
+            "address": "12",
+            "data_type": "float32",
+            "unit": "mm",
+            "description": "Z轴绝对坐标",
+            "access": "r",
+            "generator_type": "random",
+            "min_value": -600.0,
+            "max_value": 200.0
+        },
+        {
+            "name": "mach_x",
+            "address": "14",
+            "data_type": "float32",
+            "unit": "mm",
+            "description": "X轴机械坐标",
+            "access": "r",
+            "generator_type": "random",
+            "min_value": -800.0,
+            "max_value": 800.0
+        },
+        {
+            "name": "mach_y",
+            "address": "16",
+            "data_type": "float32",
+            "unit": "mm",
+            "description": "Y轴机械坐标",
+            "access": "r",
+            "generator_type": "random",
+            "min_value": -500.0,
+            "max_value": 500.0
+        },
+        {
+            "name": "mach_z",
+            "address": "18",
+            "data_type": "float32",
+            "unit": "mm",
+            "description": "Z轴机械坐标",
+            "access": "r",
+            "generator_type": "random",
+            "min_value": -600.0,
+            "max_value": 200.0
+        },
+        {
+            "name": "part_count",
+            "address": "20",
+            "data_type": "uint16",
+            "description": "加工计数",
+            "access": "r",
+            "generator_type": "script",
+            "min_value": 0,
+            "max_value": 99999,
+            "generator_config": {
+                "script": "elapsed = context['elapsed']; result = min(int(elapsed / 150), 99999)"
+            }
+        },
+        {
+            "name": "cycle_time",
+            "address": "21",
+            "data_type": "uint16",
+            "unit": "s",
+            "description": "循环时间，半精铣单件约150s",
+            "access": "r",
+            "generator_type": "fixed",
+            "fixed_value": 150
+        },
+        {
+            "name": "alarm_no",
+            "address": "22",
+            "data_type": "uint16",
+            "description": "报警号(0=无报警)",
+            "access": "r",
+            "generator_type": "fixed",
+            "fixed_value": 0
+        }
+    ],
+    "protocol_config": {
+        "slave_id": 7
+    },
+    "tags": ["CNC", "FANUC", "数控", "机床", "发那科", "半精铣"]
+}

From 736baadcb2c8073cff474fcad2e2dd6c6662abb9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Wed, 27 May 2026 13:53:54 +0800
Subject: [PATCH 34/36] fix(scene): update scene cnc

---
 protoforge/templates/modbus/fanuc_cnc.json | 34 +++++++++++-----------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/protoforge/templates/modbus/fanuc_cnc.json b/protoforge/templates/modbus/fanuc_cnc.json
index 3ff30df..ebbc8f5 100644
--- a/protoforge/templates/modbus/fanuc_cnc.json
+++ b/protoforge/templates/modbus/fanuc_cnc.json
@@ -11,13 +11,13 @@
             "address": "0",
             "data_type": "uint16",
             "unit": "RPM",
-            "description": "主轴实际转速，粗加工工位：空闲为0，启动后稳定在约2000RPM",
+            "description": "主轴实际转速，粗加工工位：空闲为0，启动后稳定在约2000RPM，每件节拍有随机差异",
             "access": "r",
             "generator_type": "script",
             "min_value": 0,
             "max_value": 8000,
             "generator_config": {
-                "script": "elapsed = context['elapsed']; t = elapsed % 180\nif t < 20:\n    target = 0\nelif t < 30:\n    target = 2000 * ((t - 20) / 10.0)\nelif t < 155:\n    target = 2000\nelif t < 165:\n    target = 2000 * (1 - ((t - 155) / 10.0))\nelse:\n    target = 0\nnoise = random.gauss(0, 15) if target > 100 else 0\nresult = int(round(max(0, min(8000, target + noise))))"
+                "script": "elapsed = context['elapsed']\ncycle_idx = int(elapsed // 180)\nck = f'spd_{cycle_idx}'\nif ck not in cache:\n    rng = random.Random(cycle_idx * 7 + 1)\n    cache[ck] = {'dur': rng.randint(-8, 10), 'spd': rng.uniform(-40, 40)}\noff = cache[ck]\nperiod = 180 + off['dur']; t = elapsed % period\nramp = 8; cut_end = period - 25 - ramp\nif t < 20:\n    target = 0\nelif t < 20 + ramp:\n    target = (2000 + off['spd']) * ((t - 20) / ramp)\nelif t < cut_end:\n    target = 2000 + off['spd']\nelif t < cut_end + ramp:\n    target = (2000 + off['spd']) * (1 - (t - cut_end) / ramp)\nelse:\n    target = 0\nnoise = random.gauss(0, 12) if target > 100 else 0\nresult = int(round(max(0, min(8000, target + noise))))"
             }
         },
         {
@@ -25,13 +25,13 @@
             "address": "1",
             "data_type": "float32",
             "unit": "mm/min",
-            "description": "粗加工实际进给速度：切入阶段中低速，稳定粗加工约800mm/min，空闲和主轴启动阶段为0",
+            "description": "粗加工实际进给速度：切入爬升，稳定粗铣约800mm/min含拐角减速扰动，退出降速，空闲为0",
             "access": "r",
             "generator_type": "script",
             "min_value": 0,
             "max_value": 5000,
             "generator_config": {
-                "script": "elapsed = context['elapsed']; t = elapsed % 180\nif t < 30:\n    target = 0\nelif t < 40:\n    target = 500 + 300 * ((t - 30) / 10.0)\nelif t < 140:\n    target = 800\nelif t < 155:\n    target = 400\nelse:\n    target = 0\nnoise = random.gauss(0, 25) if target > 0 else 0\nresult = round(max(0, min(5000, target + noise)), 1)"
+                "script": "elapsed = context['elapsed']\ncycle_idx = int(elapsed // 180)\nck = f'feed_{cycle_idx}'\nif ck not in cache:\n    rng = random.Random(cycle_idx * 7 + 2)\n    cache[ck] = {'dur': rng.randint(-8, 10), 'feed': rng.uniform(-30, 50)}\noff = cache[ck]\nperiod = 180 + off['dur']; t = elapsed % period\ncut_end = period - 25 - 8\nif t < 30:\n    target = 0\nelif t < 40:\n    target = 400 + 400 * ((t - 30) / 10.0)\nelif t < cut_end:\n    base_feed = 800 + off['feed']\n    corner = 80 * math.sin(2 * math.pi * elapsed / 23.7) * max(0, math.sin(2 * math.pi * elapsed / 41.3))\n    target = base_feed + corner\nelif t < cut_end + 12:\n    target = 350\nelse:\n    target = 0\nnoise = random.gauss(0, 18) if target > 0 else 0\nresult = round(max(0, min(5000, target + noise)), 1)"
             }
         },
         {
@@ -39,13 +39,13 @@
             "address": "2",
             "data_type": "float32",
             "unit": "A",
-            "description": "粗加工主轴电流：空闲低电流，启动阶段中等电流，切入瞬间升高，稳定粗加工约19~24A",
+            "description": "粗加工主轴电流：空闲约2A，启动约6A，切入峰值约23A，稳定粗铣约19~24A含细碎波动，退刀降低",
             "access": "r",
             "generator_type": "script",
             "min_value": 0.0,
             "max_value": 40.0,
             "generator_config": {
-                "script": "elapsed = context['elapsed']; t = elapsed % 180\nif t < 20:\n    base, noise_std = 2.2, 0.25\nelif t < 30:\n    base, noise_std = 6.0, 0.8\nelif t < 40:\n    base, noise_std = 23.5, 1.5\nelif t < 140:\n    base, noise_std = 21.5, 1.3\nelif t < 155:\n    base, noise_std = 6.0, 0.8\nelif t < 165:\n    base, noise_std = 4.0, 0.5\nelse:\n    base, noise_std = 2.2, 0.25\nresult = round(max(0, min(40.0, base + random.gauss(0, noise_std))), 2)"
+                "script": "elapsed = context['elapsed']\ncycle_idx = int(elapsed // 180)\nck = f'cur_{cycle_idx}'\nif ck not in cache:\n    rng = random.Random(cycle_idx * 7 + 3)\n    cache[ck] = {'dur': rng.randint(-8, 10), 'cur': rng.uniform(-1.5, 2.0)}\noff = cache[ck]\nperiod = 180 + off['dur']; t = elapsed % period\ncut_end = period - 25 - 8\nif t < 20:\n    base, noise_std = 2.2, 0.2\nelif t < 20 + 8:\n    base, noise_std = 6.0, 0.7\nelif t < 40:\n    base, noise_std = 23.5 + off['cur'], 1.8\nelif t < cut_end:\n    drift = 0.8 * math.sin(2 * math.pi * elapsed / 37.4) + 0.5 * math.sin(2 * math.pi * elapsed / 19.1)\n    base, noise_std = 21.5 + off['cur'] + drift, 1.0\nelif t < cut_end + 12:\n    base, noise_std = 6.0, 0.7\nelif t < cut_end + 20:\n    base, noise_std = 3.5, 0.4\nelse:\n    base, noise_std = 2.2, 0.2\nresult = round(max(0, min(40.0, base + random.gauss(0, noise_std))), 2)"
             }
         },
         {
@@ -53,13 +53,13 @@
             "address": "23",
             "data_type": "float32",
             "unit": "m/s²",
-            "description": "X轴振动加速度",
+            "description": "X轴振动加速度，粗铣切削时约0.4~0.9m/s²，空闲时接近0",
             "access": "r",
             "generator_type": "script",
-            "min_value": 0.1,
+            "min_value": 0.0,
             "max_value": 2.5,
             "generator_config": {
-                "script": "elapsed = context['elapsed']; base = 0.5 + 0.3 * math.sin(2 * math.pi * elapsed / 85 + 0.8); noise = random.uniform(-0.15, 0.15); result = round(max(0.1, base + noise), 3)"
+                "script": "elapsed = context['elapsed']\ncycle_idx = int(elapsed // 180)\nck = f'vx_{cycle_idx}'\nif ck not in cache:\n    rng = random.Random(cycle_idx * 7 + 4)\n    cache[ck] = {'dur': rng.randint(-8, 10)}\nperiod = 180 + cache[ck]['dur']; t = elapsed % period\ncut_end = period - 25 - 8\ncutting = 40 <= t < cut_end\nif cutting:\n    base = 0.55 + 0.22 * math.sin(2 * math.pi * elapsed / 85 + 0.8) + 0.1 * math.sin(2 * math.pi * elapsed / 17.3)\n    noise = random.uniform(-0.12, 0.12)\nelse:\n    base = 0.04\n    noise = random.uniform(-0.02, 0.02)\nresult = round(max(0.0, base + noise), 3)"
             }
         },
         {
@@ -67,13 +67,13 @@
             "address": "25",
             "data_type": "float32",
             "unit": "m/s²",
-            "description": "Y轴振动加速度",
+            "description": "Y轴振动加速度，粗铣切削时约0.4~0.9m/s²，空闲时接近0",
             "access": "r",
             "generator_type": "script",
-            "min_value": 0.1,
+            "min_value": 0.0,
             "max_value": 2.5,
             "generator_config": {
-                "script": "elapsed = context['elapsed']; base = 0.5 + 0.3 * math.sin(2 * math.pi * elapsed / 70 + 1.8); noise = random.uniform(-0.15, 0.15); result = round(max(0.1, base + noise), 3)"
+                "script": "elapsed = context['elapsed']\ncycle_idx = int(elapsed // 180)\nck = f'vy_{cycle_idx}'\nif ck not in cache:\n    rng = random.Random(cycle_idx * 7 + 5)\n    cache[ck] = {'dur': rng.randint(-8, 10)}\nperiod = 180 + cache[ck]['dur']; t = elapsed % period\ncut_end = period - 25 - 8\ncutting = 40 <= t < cut_end\nif cutting:\n    base = 0.5 + 0.2 * math.sin(2 * math.pi * elapsed / 70 + 1.8) + 0.08 * math.sin(2 * math.pi * elapsed / 13.7)\n    noise = random.uniform(-0.1, 0.1)\nelse:\n    base = 0.04\n    noise = random.uniform(-0.02, 0.02)\nresult = round(max(0.0, base + noise), 3)"
             }
         },
         {
@@ -81,13 +81,13 @@
             "address": "27",
             "data_type": "float32",
             "unit": "m/s²",
-            "description": "Z轴振动加速度",
+            "description": "Z轴振动加速度，粗铣切削时约0.5~1.1m/s²（Z向切深方向幅值略大），空闲时接近0",
             "access": "r",
             "generator_type": "script",
-            "min_value": 0.1,
+            "min_value": 0.0,
             "max_value": 3.0,
             "generator_config": {
-                "script": "elapsed = context['elapsed']; base = 0.7 + 0.4 * math.sin(2 * math.pi * elapsed / 58 + 2.8); noise = random.uniform(-0.2, 0.2); result = round(max(0.1, base + noise), 3)"
+                "script": "elapsed = context['elapsed']\ncycle_idx = int(elapsed // 180)\nck = f'vz_{cycle_idx}'\nif ck not in cache:\n    rng = random.Random(cycle_idx * 7 + 6)\n    cache[ck] = {'dur': rng.randint(-8, 10)}\nperiod = 180 + cache[ck]['dur']; t = elapsed % period\ncut_end = period - 25 - 8\ncutting = 40 <= t < cut_end\nif cutting:\n    base = 0.65 + 0.28 * math.sin(2 * math.pi * elapsed / 58 + 2.8) + 0.12 * math.sin(2 * math.pi * elapsed / 11.2)\n    noise = random.uniform(-0.15, 0.15)\nelse:\n    base = 0.05\n    noise = random.uniform(-0.02, 0.02)\nresult = round(max(0.0, base + noise), 3)"
             }
         },
         {
@@ -95,13 +95,13 @@
             "address": "29",
             "data_type": "float32",
             "unit": "%",
-            "description": "粗加工主轴负载率，常态0~100%，短时允许到120%；粗加工稳定阶段约45~65%，切入瞬间可能更高",
+            "description": "粗加工主轴负载率，稳定粗铣约48~68%含刀路扰动，切入瞬间约65~75%，空闲接近0，短时允许到120%",
             "access": "r",
             "generator_type": "script",
             "min_value": 0.0,
             "max_value": 120.0,
             "generator_config": {
-                "script": "elapsed = context['elapsed']; t = elapsed % 180\nif t < 20:\n    base, noise_std = 0.0, 0.0\nelif t < 30:\n    base, noise_std = 8.0, 2.0\nelif t < 40:\n    base, noise_std = 65.0, 4.0\nelif t < 140:\n    base, noise_std = 55.0, 4.0\nelif t < 155:\n    base, noise_std = 8.0, 2.0\nelif t < 165:\n    base, noise_std = 3.0, 1.0\nelse:\n    base, noise_std = 0.0, 0.0\nnoise = random.gauss(0, noise_std) if noise_std > 0 else 0\nresult = round(max(0, min(120.0, base + noise)), 2)"
+                "script": "elapsed = context['elapsed']\ncycle_idx = int(elapsed // 180)\nck = f'load_{cycle_idx}'\nif ck not in cache:\n    rng = random.Random(cycle_idx * 7 + 7)\n    cache[ck] = {'dur': rng.randint(-8, 10), 'load': rng.uniform(-4.0, 6.0)}\noff = cache[ck]\nperiod = 180 + off['dur']; t = elapsed % period\ncut_end = period - 25 - 8\nif t < 20:\n    base, noise_std = 0.0, 0.0\nelif t < 20 + 8:\n    base, noise_std = 6.0, 1.5\nelif t < 40:\n    base, noise_std = 68.0 + off['load'], 3.5\nelif t < cut_end:\n    drift = 4.5 * math.sin(2 * math.pi * elapsed / 37.4) + 2.5 * math.sin(2 * math.pi * elapsed / 19.1) + 1.5 * math.sin(2 * math.pi * elapsed / 7.3)\n    base, noise_std = 56.0 + off['load'] + drift, 2.5\nelif t < cut_end + 12:\n    base, noise_std = 6.0, 1.5\nelif t < cut_end + 20:\n    base, noise_std = 2.0, 0.8\nelse:\n    base, noise_std = 0.0, 0.0\nnoise = random.gauss(0, noise_std) if noise_std > 0 else 0\nresult = round(max(0, min(120.0, base + noise)), 2)"
             }
         },
         {

From c9269967283d9cbe7b3f6b384ead70c7ccf2fab7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Thu, 28 May 2026 13:44:34 +0800
Subject: [PATCH 35/36] fix(fault): update fault

---
 ai/predictor/README.md     |  27 +++
 ai/predictor/__init__.py   |  30 +++
 ai/predictor/anomaly.py    | 242 ++++++++++++++++++++
 ai/predictor/config.py     | 186 ++++++++++++++++
 ai/predictor/discovery.py  |  84 +++++++
 ai/predictor/models.py     | 114 ++++++++++
 ai/predictor/phase_lock.py | 152 +++++++++++++
 ai/predictor/profiling.py  | 256 +++++++++++++++++++++
 ai/predictor/service.py    | 333 ++++++++++++++++++++++++++++
 ai/predictor/signal.py     | 335 ++++++++++++++++++++++++++++
 ai/predictor/state.py      | 328 +++++++++++++++++++++++++++
 ai/predictor/storage.py    | 438 ++++++++++++++++++++++++++++++++++++
 ai/predictor/template.py   | 384 ++++++++++++++++++++++++++++++++
 ai/pridict_v5.py           | 442 ++++++++++++++++++++++++++-----------
 protoforge/core/fault.py   |  71 +++++-
 15 files changed, 3282 insertions(+), 140 deletions(-)
 create mode 100644 ai/predictor/README.md
 create mode 100644 ai/predictor/__init__.py
 create mode 100644 ai/predictor/anomaly.py
 create mode 100644 ai/predictor/config.py
 create mode 100644 ai/predictor/discovery.py
 create mode 100644 ai/predictor/models.py
 create mode 100644 ai/predictor/phase_lock.py
 create mode 100644 ai/predictor/profiling.py
 create mode 100644 ai/predictor/service.py
 create mode 100644 ai/predictor/signal.py
 create mode 100644 ai/predictor/state.py
 create mode 100644 ai/predictor/storage.py
 create mode 100644 ai/predictor/template.py

diff --git a/ai/predictor/README.md b/ai/predictor/README.md
new file mode 100644
index 0000000..8fcc022
--- /dev/null
+++ b/ai/predictor/README.md
@@ -0,0 +1,27 @@
+```bash
+ai/predictor/
+  ├── __init__.py      # 公开 API：PredictorService, run()
+  ├── config.py        # 所有常量，支持环境变量覆盖
+  ├── models.py        # BaselineState, MetricProfile 数据类
+  ├── discovery.py     # VM 设备/指标发现
+  ├── signal.py        # 纯信号处理：平滑、FFT+自相关周期估计、谷底检测
+  ├── template.py      # 模板构建、预测、重采样、EMA 融合
+  ├── phase_lock.py    # Phase-lock 相位对齐
+  ├── anomaly.py       # 异常检测：边界计算、越界统计、三条件判断
+  ├── state.py         # 状态机：HEALTHY/ANOMALY/RECOVERING 生命周期
+  ├── profiling.py     # 自适应配置推断：infer_metric_profile, refresh_targets
+  ├── storage.py       # VM 读写、标签工具、状态持久化
+  └── service.py       # PredictorService 主类（run_once / run）
+
+  启动方式：
+  from ai.predictor import run
+  run()
+  # 或
+  from ai.predictor import PredictorService
+  PredictorService(vm_url="http://vm:8428").run()
+
+  主要改进：
+  - 全局变量（BASELINE_STATES、LAST_REAL_TS_WRITTEN、_TARGETS_CACHE）全部移入 PredictorService 实例属性
+  - IO 与计算完全分离：signal.py、template.py、anomaly.py 均为纯函数，无网络请求
+  - 每个模块顶部有职责说明，每个公开函数有完整 docstring
+```
diff --git a/ai/predictor/__init__.py b/ai/predictor/__init__.py
new file mode 100644
index 0000000..9e45810
--- /dev/null
+++ b/ai/predictor/__init__.py
@@ -0,0 +1,30 @@
+# -*- coding: utf-8 -*-
+"""
+predictor
+~~~~~~~~~
+ProtoForge 预测服务 package。
+
+对外暴露：
+- ``PredictorService``：预测服务主类，支持 run() 一键启动
+- ``run()``：便捷入口，使用默认配置启动服务
+
+快速启动::
+
+    from ai.predictor import run
+    run()
+
+或自定义配置::
+
+    from ai.predictor import PredictorService
+    svc = PredictorService(vm_url="http://vm:8428", poll_interval=60)
+    svc.run()
+"""
+
+from .service import PredictorService
+
+__all__ = ["PredictorService", "run"]
+
+
+def run() -> None:
+    """使用默认配置启动预测服务（一行启动）。"""
+    PredictorService().run()
diff --git a/ai/predictor/anomaly.py b/ai/predictor/anomaly.py
new file mode 100644
index 0000000..779d38e
--- /dev/null
+++ b/ai/predictor/anomaly.py
@@ -0,0 +1,242 @@
+# -*- coding: utf-8 -*-
+"""
+predictor.anomaly
+~~~~~~~~~~~~~~~~~
+异常检测：判断当前信号是否偏离健康基线。
+
+职责：
+- 计算预测边界（phase_point 用对称阈值，phase_band 用分位数带）
+- 应用物理上下限兜底（来自 override 文件）
+- 统计越界比例、连续越界秒数、最大越界倍数
+- 综合三个条件判断是否触发异常
+
+依赖：predictor.phase_lock, predictor.template, predictor.config, predictor.models
+"""
+
+from typing import Dict, Tuple
+
+import numpy as np
+
+from . import config
+from .models import BaselineState
+from .phase_lock import phase_lock_recent
+from .template import predict_state_bundle
+
+
+def max_consecutive_true(flags: np.ndarray) -> int:
+    """
+    计算布尔数组中最长连续 True 的长度。
+
+    用于统计最长连续越界秒数，是异常判断的条件之一。
+
+    Args:
+        flags: 布尔数组（True 表示该点越界）
+
+    Returns:
+        最长连续 True 的长度（整数）。
+    """
+    max_count = 0
+    current = 0
+    for flag in flags:
+        if bool(flag):
+            current += 1
+            max_count = max(max_count, current)
+        else:
+            current = 0
+    return int(max_count)
+
+
+def calc_point_bounds(
+    pred: np.ndarray,
+    abs_threshold: float,
+    rel_threshold: float,
+) -> Tuple[np.ndarray, np.ndarray]:
+    """
+    计算 phase_point 策略的对称预测边界。
+
+    边界宽度 = max(abs_threshold, |pred| * rel_threshold)，
+    取两者较大值，保证在小值区域有最小绝对容忍度。
+
+    Args:
+        pred: 预测中值数组
+        abs_threshold: 绝对误差阈值
+        rel_threshold: 相对误差阈值（相对于预测值的比例）
+
+    Returns:
+        (lower, upper) 边界数组对。
+    """
+    threshold = np.maximum(abs_threshold, np.abs(pred) * rel_threshold)
+    return pred - threshold, pred + threshold
+
+
+def calc_final_bounds(
+    state: BaselineState,
+    pred: np.ndarray,
+    lower_raw: np.ndarray,
+    upper_raw: np.ndarray,
+    target: Dict,
+) -> Tuple[np.ndarray, np.ndarray]:
+    """
+    计算最终预测边界，综合策略、动态填充和物理上下限。
+
+    phase_band 策略：
+        在分位数模板边界基础上，叠加动态填充（band_pad_abs 和相对填充取较大值），
+        覆盖正常的尖峰波动，避免误报。
+
+    phase_point 策略：
+        直接用对称阈值计算边界。
+
+    物理上下限（可选）：
+        来自 override 文件的 hard_max / hard_min，对边界做最终 clip。
+
+    Args:
+        state: 当前基线状态（提供策略信息）
+        pred: 预测中值数组
+        lower_raw: 模板下界数组（phase_band 为分位数，phase_point 等于 pred）
+        upper_raw: 模板上界数组
+        target: target dict，包含阈值和物理上下限配置
+
+    Returns:
+        (lower, upper) 最终边界数组对。
+    """
+    strategy = target.get("strategy", "phase_point")
+    abs_threshold = float(target.get("abs_threshold", 1.0))
+    rel_threshold = float(target.get("rel_threshold", 0.25))
+
+    if strategy == "phase_band":
+        pad_abs = float(target.get("band_pad_abs", abs_threshold))
+        # 动态填充：取绝对填充和相对填充（预测值的 25% * rel_threshold）的较大值
+        dynamic_pad = np.maximum(pad_abs, np.abs(pred) * rel_threshold * 0.25)
+        lower = lower_raw - dynamic_pad
+        upper = upper_raw + dynamic_pad
+    else:
+        lower, upper = calc_point_bounds(pred, abs_threshold, rel_threshold)
+
+    # 物理上下限兜底（来自 override 文件，可选）
+    hard_max = target.get("hard_max")
+    hard_min = target.get("hard_min")
+    if hard_max is not None:
+        upper = np.minimum(upper, float(hard_max))
+    if hard_min is not None:
+        lower = np.maximum(lower, float(hard_min))
+
+    return lower, upper
+
+
+def detect_anomaly(
+    state: BaselineState,
+    ts_grid: np.ndarray,
+    ys_model: np.ndarray,
+    ys_actual: np.ndarray,
+    target: Dict,
+) -> Tuple[bool, float, float, float, int, int, int, float]:
+    """
+    检测当前信号是否偏离健康基线，返回完整的诊断指标。
+
+    流程：
+    1. phase-lock 对齐：在最近窗口内找最优 (period, origin)
+    2. 用对齐后的参数预测最近窗口的值
+    3. 计算越界统计量
+    4. 按三个条件判断是否异常：
+       - 越界比例 >= outside_ratio_threshold
+       - 连续越界秒数 >= min_consecutive_outside
+       - 最大越界倍数 >= severe_exceed_ratio（单点严重越界立即报警）
+
+    Args:
+        state: 当前基线状态
+        ts_grid: 均匀 1 秒网格的时间戳数组
+        ys_model: 平滑后的信号（phase_point 用于比较）
+        ys_actual: 原始信号（phase_band 用于比较）
+        target: target dict，包含阈值配置
+
+    Returns:
+        (is_anomaly, outside_ratio, mean_abs_err, mean_rel_err,
+         best_period, best_origin, max_outside_seconds, max_exceed_ratio)
+    """
+    best_period, best_origin, pred_recent, _ = phase_lock_recent(
+        state=state,
+        ts_grid=ts_grid,
+        ys_model=ys_model,
+        target=target,
+    )
+
+    recent_len = len(pred_recent)
+    if recent_len <= 0:
+        return False, 0.0, 0.0, 0.0, best_period, best_origin, 0, 0.0
+
+    # phase_band 用原始信号比较（保留真实波动），phase_point 用平滑信号
+    if target.get("strategy", "phase_point") == "phase_band":
+        actual = ys_actual[-recent_len:].astype(float)
+    else:
+        actual = ys_model[-recent_len:].astype(float)
+
+    # 用 phase-lock 后的最优参数重新预测（临时 state，不修改原始 state）
+    tmp_state = BaselineState(
+        period=best_period,
+        phase_origin_ts=best_origin,
+        template=state.template,
+        lower_template=state.lower_template,
+        upper_template=state.upper_template,
+        strategy=state.strategy,
+        status=state.status,
+        clean_seconds=state.clean_seconds,
+        last_update_ts=state.last_update_ts,
+        last_seen_ts=state.last_seen_ts,
+        y_min=state.y_min,
+        y_max=state.y_max,
+    )
+
+    recent_ts = ts_grid[-recent_len:].astype(int).tolist()
+    pred, lower_raw, upper_raw = predict_state_bundle(tmp_state, recent_ts)
+
+    lower, upper = calc_final_bounds(
+        state=tmp_state,
+        pred=pred,
+        lower_raw=lower_raw,
+        upper_raw=upper_raw,
+        target=target,
+    )
+
+    # 计算越界量（负值表示在边界内，clip 到 0）
+    above_upper = actual - upper
+    below_lower = lower - actual
+    exceed = np.maximum(np.maximum(above_upper, below_lower), 0.0)
+    outside = exceed > 0
+
+    band_width = np.maximum(upper - lower, 1e-6)
+    exceed_ratio = exceed / band_width  # 越界量相对于边界宽度的倍数
+
+    abs_err = np.abs(actual - pred)
+    outside_ratio = float(np.mean(outside))
+    mean_abs_err = float(np.mean(abs_err))
+    mean_rel_err = float(np.mean(abs_err / np.maximum(np.abs(pred), 1e-6)))
+    max_outside_seconds = max_consecutive_true(outside)
+    max_exceed_ratio = float(np.max(exceed_ratio)) if len(exceed_ratio) > 0 else 0.0
+
+    # 从 target 读取阈值，允许每个指标独立配置
+    outside_ratio_threshold = float(
+        target.get("outside_ratio_threshold", config.OUTSIDE_RATIO_THRESHOLD)
+    )
+    min_consecutive_outside = int(
+        target.get("min_consecutive_outside", config.MIN_CONSECUTIVE_OUTSIDE)
+    )
+    severe_exceed_ratio = float(
+        target.get("severe_exceed_ratio", config.SEVERE_EXCEED_RATIO)
+    )
+
+    is_anomaly = (
+        outside_ratio >= outside_ratio_threshold
+        or max_outside_seconds >= min_consecutive_outside
+        or max_exceed_ratio >= severe_exceed_ratio
+    )
+
+    return (
+        is_anomaly,
+        outside_ratio,
+        mean_abs_err,
+        mean_rel_err,
+        int(best_period),
+        int(best_origin),
+        int(max_outside_seconds),
+        float(max_exceed_ratio),
+    )
diff --git a/ai/predictor/config.py b/ai/predictor/config.py
new file mode 100644
index 0000000..ed5c11b
--- /dev/null
+++ b/ai/predictor/config.py
@@ -0,0 +1,186 @@
+# -*- coding: utf-8 -*-
+"""
+predictor.config
+~~~~~~~~~~~~~~~~
+所有运行时配置常量，集中在此处管理。
+
+大部分参数支持通过环境变量覆盖，方便容器化部署时无需修改代码。
+环境变量前缀统一为 ``PROTOFORGE_``。
+"""
+
+import os
+from typing import List
+
+# ---------------------------------------------------------------------------
+# VictoriaMetrics 连接
+# ---------------------------------------------------------------------------
+
+#: VM HTTP 地址，默认本机
+VM_URL: str = os.environ.get("PROTOFORGE_VM_URL", "http://localhost:8428")
+
+# ---------------------------------------------------------------------------
+# 状态持久化
+# ---------------------------------------------------------------------------
+
+#: 健康模板状态文件路径（JSON），重启后可恢复学习进度
+STATE_FILE: str = os.environ.get(
+    "PROTOFORGE_STATE_FILE",
+    "/tmp/protoforge_predictor_state_v14.json",
+)
+
+# ---------------------------------------------------------------------------
+# 轮询与预测时间窗口
+# ---------------------------------------------------------------------------
+
+#: 拉取历史数据的时间窗口（分钟）
+HISTORY_MINUTES: int = int(os.environ.get("PROTOFORGE_HISTORY_MINUTES", "30"))
+
+#: 理论预测时间跨度（秒）
+HORIZON_SECONDS: int = int(os.environ.get("PROTOFORGE_HORIZON_SECONDS", "120"))
+
+#: 轮询间隔（秒）
+POLL_INTERVAL: int = int(os.environ.get("PROTOFORGE_POLL_INTERVAL", "30"))
+
+#: 实际写入 VM 的预测点数 = min(HORIZON_SECONDS, POLL_INTERVAL)
+#: 避免写入过多未来点导致 Grafana 图表出现"预测跳跃"
+WRITE_HORIZON_SECONDS: int = min(HORIZON_SECONDS, POLL_INTERVAL)
+
+#: VM 查询步长
+QUERY_STEP: str = "1s"
+
+#: 最少需要多少个历史点才能开始建模
+MIN_POINTS: int = 120
+
+# ---------------------------------------------------------------------------
+# 周期检测范围
+# ---------------------------------------------------------------------------
+
+#: 允许的最短周期（秒）
+MIN_PERIOD_SECONDS: int = 5
+
+#: 允许的最长周期（秒）
+MAX_PERIOD_SECONDS: int = 3600
+
+# ---------------------------------------------------------------------------
+# 模板学习参数
+# ---------------------------------------------------------------------------
+
+#: 构建模板至少需要多少个完整周期
+MIN_FULL_CYCLES_FOR_TEMPLATE: int = 3
+
+#: 最多使用最近多少个周期来构建模板（防止过旧数据污染）
+MAX_CYCLES_FOR_TEMPLATE: int = 8
+
+#: 谷底检测时，低于此百分位的点才被视为谷底候选
+VALLEY_QUANTILE: int = 45
+
+#: 健康状态下模板 EMA 更新步长（越小越保守，变化越慢）
+HEALTHY_EMA_ALPHA: float = 0.10
+
+#: 恢复状态下模板 EMA 更新步长（比健康状态更激进，加速追赶）
+RECOVERY_EMA_ALPHA: float = 0.25
+
+# ---------------------------------------------------------------------------
+# 异常检测默认阈值
+# ---------------------------------------------------------------------------
+
+#: 检测窗口（秒）：只看最近这段时间的数据来判断是否异常
+DETECT_WINDOW_SECONDS: int = 30
+
+#: 恢复期最短持续时间（秒）：异常消失后至少稳定这么久才恢复学习
+RECOVERY_MIN_SECONDS: int = 60
+
+#: 越界比例阈值：窗口内超过此比例的点越界则报警
+OUTSIDE_RATIO_THRESHOLD: float = 0.60
+
+#: 连续越界阈值（秒）：连续越界超过此秒数则报警
+MIN_CONSECUTIVE_OUTSIDE: int = 5
+
+#: 严重越界倍数：单点超出边界宽度的此倍数则立即报警
+SEVERE_EXCEED_RATIO: float = 1.8
+
+#: 真实数据最大允许延迟（秒）：超过此值认为数据管道异常，跳过预测
+MAX_DATA_LAG_SECONDS: int = 180
+
+# ---------------------------------------------------------------------------
+# Phase-lock 搜索参数
+# ---------------------------------------------------------------------------
+
+#: phase-lock 使用的最短历史窗口（秒）
+PHASE_LOCK_MIN_WINDOW_SECONDS: int = 45
+
+#: phase-lock 使用的最长历史窗口（秒）
+PHASE_LOCK_MAX_WINDOW_SECONDS: int = 180
+
+#: 周期搜索范围（相对于基准周期的比例），由 infer_metric_profile 动态覆盖
+PHASE_LOCK_PERIOD_SEARCH_RATIO: float = 0.12
+
+#: 相位原点搜索范围（相对于周期的比例）
+PHASE_LOCK_ORIGIN_SEARCH_RATIO: float = 0.35
+
+#: 周期搜索步长（秒）
+PHASE_LOCK_PERIOD_STEP: int = 1
+
+#: 相位原点搜索步长（秒）
+PHASE_LOCK_ORIGIN_STEP: int = 1
+
+# ---------------------------------------------------------------------------
+# 监控指标白名单
+# ---------------------------------------------------------------------------
+
+#: 默认监控的指标名列表
+_DEFAULT_MONITORED_METRICS: List[str] = [
+    "feed_rate",
+    "spindle_speed",
+    "spindle_current",
+    "spindle_load",
+    "vibration_x",
+    "vibration_y",
+    "vibration_z",
+]
+
+#: 实际使用的监控指标列表，可通过环境变量 PROTOFORGE_MONITORED_METRICS 覆盖
+#: 格式：逗号分隔的指标名，例如 "feed_rate,spindle_speed"
+MONITORED_METRICS: List[str] = [
+    m.strip()
+    for m in os.environ.get(
+        "PROTOFORGE_MONITORED_METRICS",
+        ",".join(_DEFAULT_MONITORED_METRICS),
+    ).split(",")
+    if m.strip()
+]
+
+# ---------------------------------------------------------------------------
+# 人工上下限覆盖文件
+# ---------------------------------------------------------------------------
+
+#: 覆盖文件路径，文件不存在时忽略（不报错）
+#: 文件格式（JSON）：
+#:   {
+#:     "device-id": {
+#:       "metric_name": {"hard_max": 35.0, "hard_min": 0.0}
+#:     }
+#:   }
+OVERRIDE_FILE: str = os.environ.get(
+    "PROTOFORGE_PREDICTOR_OVERRIDE",
+    "/etc/protoforge/predictor_override.json",
+)
+
+# ---------------------------------------------------------------------------
+# 目标列表刷新间隔
+# ---------------------------------------------------------------------------
+
+#: 每隔多少秒重新发现设备和指标（秒）
+TARGETS_REFRESH_INTERVAL: int = int(
+    os.environ.get("PROTOFORGE_TARGETS_REFRESH", "60")
+)
+
+# ---------------------------------------------------------------------------
+# 写入 VM 时附加的额外标签
+# ---------------------------------------------------------------------------
+
+#: 附加到所有预测指标上的标签，用于在 Grafana 中区分预测数据和原始数据
+EXTRA_PREDICT_LABELS = {
+    "forecast": "phase_band_health_v14",
+    "source": "protoforge",
+}
diff --git a/ai/predictor/discovery.py b/ai/predictor/discovery.py
new file mode 100644
index 0000000..c8b829a
--- /dev/null
+++ b/ai/predictor/discovery.py
@@ -0,0 +1,84 @@
+# -*- coding: utf-8 -*-
+"""
+predictor.discovery
+~~~~~~~~~~~~~~~~~~~
+Layer 1：设备与指标发现。
+
+职责：
+- 从 VictoriaMetrics 查询所有在线设备（device_id 标签值）
+- 探测指定设备上哪些指标名当前有数据
+
+本模块只做网络查询，不包含任何预测或统计逻辑。
+
+依赖：requests
+"""
+
+import logging
+from typing import List
+
+import requests
+
+logger = logging.getLogger(__name__)
+
+
+def discover_device_ids(vm_url: str) -> List[str]:
+    """
+    从 VictoriaMetrics 查询所有 device_id 标签值。
+
+    调用 VM 的 label values 接口，返回当前存储中出现过的所有设备 ID。
+    网络失败时返回空列表，不抛出异常，由调用方决定如何处理。
+
+    Args:
+        vm_url: VM HTTP 地址，如 "http://localhost:8428"
+
+    Returns:
+        设备 ID 字符串列表，空字符串已过滤。
+        查询失败时返回空列表。
+    """
+    try:
+        resp = requests.get(
+            f"{vm_url}/api/v1/label/device_id/values",
+            timeout=10,
+        )
+        resp.raise_for_status()
+        return [v for v in resp.json().get("data", []) if v]
+    except requests.RequestException as e:
+        logger.error("发现 device_id 失败: %s", e)
+        return []
+
+
+def discover_metrics_for_device(
+    vm_url: str,
+    device_id: str,
+    candidates: List[str],
+) -> List[str]:
+    """
+    探测指定设备在 VictoriaMetrics 中实际存在且有近期数据的指标名。
+
+    对 candidates 中的每个指标名发起即时查询（instant query），
+    只有返回非空 result 的指标才被认为"存在"。
+
+    Args:
+        vm_url: VM HTTP 地址
+        device_id: 设备标识，对应 VM 中的 device_id 标签值
+        candidates: 待探测的指标名列表，如 ["feed_rate", "spindle_speed"]
+
+    Returns:
+        实际有数据的指标名列表（保持 candidates 中的顺序）。
+        单个指标查询失败时静默跳过，不影响其他指标的探测。
+    """
+    found: List[str] = []
+    for metric in candidates:
+        try:
+            resp = requests.get(
+                f"{vm_url}/api/v1/query",
+                params={"query": f'{metric}{{device_id="{device_id}"}}'},
+                timeout=5,
+            )
+            resp.raise_for_status()
+            if resp.json().get("data", {}).get("result"):
+                found.append(metric)
+        except requests.RequestException:
+            # 单个指标查询失败不影响整体发现流程
+            pass
+    return found
diff --git a/ai/predictor/models.py b/ai/predictor/models.py
new file mode 100644
index 0000000..8b48bf5
--- /dev/null
+++ b/ai/predictor/models.py
@@ -0,0 +1,114 @@
+# -*- coding: utf-8 -*-
+"""
+predictor.models
+~~~~~~~~~~~~~~~~
+纯数据结构定义，不包含任何业务逻辑或 IO 操作。
+
+包含：
+- ``BaselineState``：单个指标的健康模板状态，记录周期、模板曲线、健康状态等
+- ``MetricProfile``：从历史数据统计出的指标特征，驱动策略和阈值的自动推断
+- 状态常量：HEALTHY / ANOMALY / RECOVERING
+"""
+
+from dataclasses import dataclass, field
+from typing import List
+
+# ---------------------------------------------------------------------------
+# 基线状态常量
+# ---------------------------------------------------------------------------
+
+#: 正常运行，模板持续学习更新
+BASELINE_STATUS_HEALTHY = "healthy"
+
+#: 检测到异常，模板冻结，不学习故障数据
+BASELINE_STATUS_ANOMALY = "anomaly"
+
+#: 异常消失，等待稳定后恢复学习
+BASELINE_STATUS_RECOVERING = "recovering"
+
+
+# ---------------------------------------------------------------------------
+# 基线状态
+# ---------------------------------------------------------------------------
+
+@dataclass
+class BaselineState:
+    """
+    单个指标的健康基线状态。
+
+    每个 (device_id, metric) 对应一个独立的 BaselineState 实例，
+    存储该指标的周期模板和当前健康状态。
+
+    Attributes:
+        period: 检测到的加工周期长度（秒）
+        phase_origin_ts: 相位原点时间戳（Unix 秒），用于计算当前相位
+        template: 中位数模板曲线，长度等于 period，用于预测
+        lower_template: 下界模板曲线（phase_band 策略时为分位数，否则等于 template）
+        upper_template: 上界模板曲线（phase_band 策略时为分位数，否则等于 template）
+        strategy: 预测策略，"phase_point" 或 "phase_band"
+        status: 当前健康状态，取值为 BASELINE_STATUS_* 常量
+        clean_seconds: 连续健康运行的秒数，用于判断是否可以更新模板
+        last_update_ts: 上次模板更新的时间戳（Unix 秒）
+        last_seen_ts: 上次处理该指标的时间戳（Unix 秒），用于计算 elapsed
+        y_min: 最近一段时间内的最小值，用于量程参考
+        y_max: 最近一段时间内的最大值，用于量程参考
+    """
+
+    period: int
+    phase_origin_ts: int
+    template: List[float]
+    lower_template: List[float]
+    upper_template: List[float]
+    strategy: str
+    status: str
+    clean_seconds: int
+    last_update_ts: int
+    last_seen_ts: int
+    y_min: float
+    y_max: float
+
+
+# ---------------------------------------------------------------------------
+# 指标特征（自适应配置推断结果）
+# ---------------------------------------------------------------------------
+
+@dataclass
+class MetricProfile:
+    """
+    从历史数据统计出的指标特征，用于自动推断预测策略和阈值。
+
+    由 ``profiling.infer_metric_profile()`` 生成，
+    再由 ``profiling.build_target()`` 转换为执行层 target dict。
+
+    Attributes:
+        device_id: 设备标识，对应 VM 中的 device_id 标签值
+        metric: 指标名，如 "feed_rate"、"spindle_current"
+        p5: 活跃段第 5 百分位数（过滤空闲零值后）
+        p95: 活跃段第 95 百分位数
+        iqr: p95 - p5，反映正常波动范围
+        cv: 变异系数（std / mean），衡量信号稳定性
+            cv < 0.15 → 稳定信号（精铣类）→ phase_point
+            cv >= 0.15 → 波动信号（粗铣负载、振动类）→ phase_band
+        strategy: 自动推断的预测策略，"phase_point" 或 "phase_band"
+        abs_threshold: 绝对误差阈值（自动计算）
+        rel_threshold: 相对误差阈值（自动计算）
+        band_low_q: phase_band 下界分位数（默认 5）
+        band_high_q: phase_band 上界分位数（默认 95）
+        band_pad_abs: phase_band 额外填充宽度，覆盖正常尖峰
+        phase_lock_period_search_ratio: phase-lock 周期搜索范围（相对比例）
+            由实测周期抖动率动态决定，周期越不稳定则搜索范围越宽
+    """
+
+    device_id: str
+    metric: str
+    p5: float
+    p95: float
+    iqr: float
+    cv: float
+    strategy: str
+    abs_threshold: float
+    rel_threshold: float
+    band_low_q: float
+    band_high_q: float
+    band_pad_abs: float
+    phase_lock_period_search_ratio: float
diff --git a/ai/predictor/phase_lock.py b/ai/predictor/phase_lock.py
new file mode 100644
index 0000000..b38acaf
--- /dev/null
+++ b/ai/predictor/phase_lock.py
@@ -0,0 +1,152 @@
+# -*- coding: utf-8 -*-
+"""
+predictor.phase_lock
+~~~~~~~~~~~~~~~~~~~~
+Phase-lock 相位对齐：在每次预测前动态校正周期和相位原点。
+
+职责：
+- 在基准周期附近搜索最优 (period, origin) 组合
+- 最小化最近时间窗口内的预测 MAE
+- 支持 target 级别的搜索范围配置（粗铣工位周期抖动大，需要更宽的范围）
+
+依赖：predictor.template, predictor.config, predictor.models
+"""
+
+import logging
+from typing import Dict, Optional, Tuple
+
+import numpy as np
+
+from . import config
+from .models import BaselineState
+from .template import (
+    normalize_origin_near,
+    predict_template_values,
+    resample_template,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def phase_lock_recent(
+    state: BaselineState,
+    ts_grid: np.ndarray,
+    ys_model: np.ndarray,
+    target: Optional[Dict] = None,
+) -> Tuple[int, int, np.ndarray, float]:
+    """
+    在最近时间窗口内搜索最优 (period, phase_origin) 组合。
+
+    搜索策略：
+    1. 确定搜索窗口（min/max 之间，约 2 倍周期）
+    2. 在 [base_period * (1 - ratio), base_period * (1 + ratio)] 范围内枚举周期
+    3. 对每个周期，在 origin ± origin_shift 范围内枚举相位原点
+    4. 选择 MAE + 周期偏移惩罚最小的组合
+       （惩罚项防止无谓地漂移到远离基准的周期）
+
+    Args:
+        state: 当前基线状态（提供基准 period、origin、template）
+        ts_grid: 均匀 1 秒网格的时间戳数组
+        ys_model: 平滑后的信号（用于计算 MAE）
+        target: target dict，可包含 phase_lock_period_search_ratio /
+                phase_lock_origin_search_ratio 覆盖默认搜索范围
+
+    Returns:
+        (best_period, best_origin, best_pred, best_mae) 元组：
+        - best_period: 最优周期（整数秒）
+        - best_origin: 最优相位原点（Unix 秒）
+        - best_pred: 最优参数下的预测值数组（长度为搜索窗口大小）
+        - best_mae: 最优 MAE
+    """
+    base_period = int(state.period)
+    base_origin = int(state.phase_origin_ts)
+    base_template = np.array(state.template, dtype=float)
+
+    # 从 target 读取搜索范围，允许粗铣工位使用更宽的范围
+    period_search_ratio = float(
+        (target or {}).get("phase_lock_period_search_ratio", config.PHASE_LOCK_PERIOD_SEARCH_RATIO)
+    )
+    origin_search_ratio = float(
+        (target or {}).get("phase_lock_origin_search_ratio", config.PHASE_LOCK_ORIGIN_SEARCH_RATIO)
+    )
+
+    # 数据不足时直接返回基准预测
+    if base_period <= 1 or len(base_template) <= 1:
+        ts_recent = ts_grid[-config.DETECT_WINDOW_SECONDS :].astype(int).tolist()
+        pred = predict_template_values(base_template, base_period, base_origin, ts_recent)
+        actual = ys_model[-len(ts_recent) :].astype(float)
+        mae = float(np.mean(np.abs(actual - pred))) if len(actual) else 0.0
+        return base_period, base_origin, pred, mae
+
+    # 搜索窗口：约 2 倍周期，clip 到 [min, max]
+    window_seconds = max(
+        config.PHASE_LOCK_MIN_WINDOW_SECONDS,
+        min(config.PHASE_LOCK_MAX_WINDOW_SECONDS, int(base_period * 2)),
+    )
+
+    cutoff = ts_grid[-1] - window_seconds
+    mask = ts_grid >= cutoff
+    ts_recent_arr = ts_grid[mask].astype(int)
+    actual = ys_model[mask].astype(float)
+
+    # 窗口内数据不足时退化到固定长度
+    if len(ts_recent_arr) < max(10, config.DETECT_WINDOW_SECONDS):
+        ts_recent_arr = ts_grid[-config.DETECT_WINDOW_SECONDS :].astype(int)
+        actual = ys_model[-config.DETECT_WINDOW_SECONDS :].astype(float)
+
+    ts_recent = ts_recent_arr.tolist()
+    last_ts = int(ts_recent[-1])
+
+    # 周期搜索范围
+    p_min = max(
+        int(config.MIN_PERIOD_SECONDS),
+        int(round(base_period * (1.0 - period_search_ratio))),
+    )
+    p_max = min(
+        int(config.MAX_PERIOD_SECONDS),
+        int(round(base_period * (1.0 + period_search_ratio))),
+    )
+
+    # 初始化为基准参数
+    best_period = base_period
+    best_origin = normalize_origin_near(base_origin, base_period, last_ts)
+    best_template = resample_template(base_template, best_period)
+    best_pred = predict_template_values(
+        template=best_template,
+        period=best_period,
+        phase_origin_ts=best_origin,
+        ts_list=ts_recent,
+    )
+    best_mae = float(np.mean(np.abs(actual - best_pred)))
+
+    for period in range(p_min, p_max + 1, config.PHASE_LOCK_PERIOD_STEP):
+        template = resample_template(base_template, period)
+        center_origin = normalize_origin_near(base_origin, period, last_ts)
+        origin_shift = max(2, int(round(period * origin_search_ratio)))
+
+        for shift in range(-origin_shift, origin_shift + 1, config.PHASE_LOCK_ORIGIN_STEP):
+            origin = center_origin + shift
+            pred = predict_template_values(
+                template=template,
+                period=period,
+                phase_origin_ts=origin,
+                ts_list=ts_recent,
+            )
+            mae = float(np.mean(np.abs(actual - pred)))
+
+            # 惩罚项：偏离基准周期越远，惩罚越大（0.5 秒/秒偏差）
+            # 防止在噪声中漂移到远离真实周期的位置
+            penalty = abs(period - base_period) * 0.5
+            score = mae + penalty
+            best_score = best_mae + abs(best_period - base_period) * 0.5
+
+            if score < best_score:
+                best_period = period
+                best_origin = origin
+                best_pred = pred
+                best_mae = mae
+
+    # 规整化最终原点到最新时间戳附近
+    best_origin = normalize_origin_near(best_origin, best_period, last_ts)
+
+    return int(best_period), int(best_origin), best_pred, float(best_mae)
diff --git a/ai/predictor/profiling.py b/ai/predictor/profiling.py
new file mode 100644
index 0000000..5330500
--- /dev/null
+++ b/ai/predictor/profiling.py
@@ -0,0 +1,256 @@
+# -*- coding: utf-8 -*-
+"""
+predictor.profiling
+~~~~~~~~~~~~~~~~~~~
+Layer 2：自适应配置推断。
+
+职责：
+- 从历史数据统计指标特征（p5/p95/IQR/cv/周期抖动率）
+- 自动推断预测策略（phase_point vs phase_band）和阈值
+- 加载人工上下限覆盖文件（override）
+- 将 MetricProfile 转换为执行层 target dict
+- 完整的发现 + 推断流程（refresh_targets）
+
+依赖：predictor.storage, predictor.discovery, predictor.signal, predictor.models, predictor.config
+"""
+
+import json
+import logging
+import os
+from typing import Dict, List, Optional
+
+import numpy as np
+
+from . import config
+from .discovery import discover_device_ids, discover_metrics_for_device
+from .models import MetricProfile
+from .signal import estimate_period_rough, find_valley_indices
+from .storage import fetch_history, normalize_history
+
+logger = logging.getLogger(__name__)
+
+
+def infer_metric_profile(
+    vm_url: str,
+    device_id: str,
+    metric: str,
+) -> Optional[MetricProfile]:
+    """
+    拉取历史数据，统计活跃段特征，自动推断预测策略和阈值。
+
+    推断逻辑：
+    - 空闲段过滤：排除 p10 以下的点，避免机床空闲时的零值拉低阈值
+    - strategy 判断：cv < 0.15 → phase_point（稳定信号），否则 phase_band（波动信号）
+    - abs_threshold：取 IQR * 0.8、量程 * 0.05、std * 2.0 三者最大值
+    - rel_threshold：min(0.30, cv * 1.5)
+    - band_pad_abs：max(IQR * 0.3, std)，覆盖正常尖峰
+    - phase_lock_period_search_ratio：由周期抖动率动态决定，clip 到 [0.12, 0.25]
+
+    Args:
+        vm_url: VM HTTP 地址
+        device_id: 设备标识
+        metric: 指标名
+
+    Returns:
+        MetricProfile，数据不足时返回 None。
+    """
+    ts_raw, ys_raw = fetch_history(
+        vm_url=vm_url,
+        query=f'{metric}{{device_id="{device_id}"}}',
+        minutes=config.HISTORY_MINUTES,
+    )
+
+    if len(ys_raw) < config.MIN_POINTS:
+        return None
+
+    arr = np.array(ys_raw, dtype=float)
+
+    # 过滤空闲段：只保留活跃值（高于 p10）
+    p10_val = float(np.percentile(arr, 10))
+    active = arr[arr > p10_val]
+    if len(active) < 30:
+        active = arr  # 数据全是活跃段，不过滤
+
+    mean_val = float(np.mean(active))
+    std_val = float(np.std(active))
+    cv = std_val / max(abs(mean_val), 1e-6)
+    p5 = float(np.percentile(active, 5))
+    p95 = float(np.percentile(active, 95))
+    iqr = p95 - p5
+
+    # 策略自动判断：cv 衡量信号稳定性
+    strategy = "phase_point" if cv < 0.15 else "phase_band"
+
+    # 阈值自动计算
+    abs_threshold = max(iqr * 0.8, (p95 - p5) * 0.05, std_val * 2.0)
+    rel_threshold = min(0.30, cv * 1.5)
+
+    # phase_band 容忍带宽度
+    band_pad_abs = max(iqr * 0.3, std_val)
+
+    # phase-lock 搜索范围：从历史数据估算周期抖动率
+    ts_grid, ys_grid = normalize_history(ts_raw, ys_raw)
+    period_search_ratio = config.PHASE_LOCK_PERIOD_SEARCH_RATIO  # 默认值
+
+    if len(ys_grid) >= config.MIN_POINTS:
+        rough_period = estimate_period_rough(ys_grid)
+        if rough_period > config.MIN_PERIOD_SECONDS:
+            valleys = find_valley_indices(ts_grid, ys_grid, rough_period)
+            if len(valleys) >= 3:
+                diffs = np.diff(ts_grid[valleys].astype(float))
+                valid = diffs[
+                    (diffs > rough_period * 0.5) & (diffs < rough_period * 2.0)
+                ]
+                if len(valid) >= 2:
+                    # 周期变异系数 * 2 作为搜索范围，clip 到 [0.12, 0.25]
+                    period_cv = float(np.std(valid) / max(np.mean(valid), 1e-6))
+                    period_search_ratio = float(np.clip(period_cv * 2.0, 0.12, 0.25))
+
+    logger.info(
+        "推断指标特征 device=%s metric=%s cv=%.3f strategy=%s "
+        "abs_thr=%.3f rel_thr=%.3f period_search=%.2f",
+        device_id, metric, cv, strategy,
+        abs_threshold, rel_threshold, period_search_ratio,
+    )
+
+    return MetricProfile(
+        device_id=device_id,
+        metric=metric,
+        p5=p5,
+        p95=p95,
+        iqr=iqr,
+        cv=cv,
+        strategy=strategy,
+        abs_threshold=abs_threshold,
+        rel_threshold=rel_threshold,
+        band_low_q=5.0,
+        band_high_q=95.0,
+        band_pad_abs=band_pad_abs,
+        phase_lock_period_search_ratio=period_search_ratio,
+    )
+
+
+def load_overrides(path: str) -> Dict:
+    """
+    加载人工上下限覆盖文件，文件不存在时返回空字典。
+
+    文件格式（JSON）：
+        {
+          "device-id": {
+            "metric_name": {"hard_max": 35.0, "hard_min": 0.0}
+          }
+        }
+
+    Args:
+        path: 覆盖文件路径
+
+    Returns:
+        覆盖配置字典，文件不存在或解析失败时返回空字典。
+    """
+    if not os.path.exists(path):
+        return {}
+    try:
+        with open(path, "r", encoding="utf-8") as f:
+            return json.load(f)
+    except Exception as e:
+        logger.warning("加载 override 文件失败 %s: %s", path, e)
+        return {}
+
+
+def build_target(profile: MetricProfile, overrides: Dict) -> Dict:
+    """
+    将 MetricProfile 转换为预测执行层可用的 target dict。
+
+    target dict 包含 run_once() 所需的全部配置：
+    - query / pred_metric / anomaly_metric
+    - strategy / 阈值 / 平滑窗口
+    - phase-lock 搜索范围
+    - 物理上下限（可选，来自 override 文件）
+
+    Args:
+        profile: 从历史数据推断出的指标特征
+        overrides: 覆盖配置字典（来自 load_overrides）
+
+    Returns:
+        target dict。
+    """
+    device_overrides = overrides.get(profile.device_id, {}).get(profile.metric, {})
+
+    target: Dict = {
+        "query": f'{profile.metric}{{device_id="{profile.device_id}"}}',
+        "pred_metric": f"{profile.metric}_predicted",
+        "anomaly_metric": f"{profile.metric}_anomaly",
+        "strategy": profile.strategy,
+        "abs_threshold": profile.abs_threshold,
+        "rel_threshold": profile.rel_threshold,
+        # phase_band 用更大的平滑窗口抑制脉冲噪声
+        "smooth_window": 5 if profile.strategy == "phase_band" else 2,
+        "outside_ratio_threshold": 0.60,
+        "min_consecutive_outside": 5,
+        "severe_exceed_ratio": 1.8,
+        "phase_lock_period_search_ratio": profile.phase_lock_period_search_ratio,
+        # origin 搜索范围约为 period 搜索范围的 2.5 倍
+        "phase_lock_origin_search_ratio": min(
+            0.45, profile.phase_lock_period_search_ratio * 2.5
+        ),
+        # 物理上下限（可选，来自 override 文件）
+        "hard_max": device_overrides.get("hard_max"),
+        "hard_min": device_overrides.get("hard_min"),
+    }
+
+    if profile.strategy == "phase_band":
+        target.update({
+            "band_low_q": profile.band_low_q,
+            "band_high_q": profile.band_high_q,
+            "band_pad_abs": profile.band_pad_abs,
+        })
+
+    return target
+
+
+def refresh_targets(
+    vm_url: str,
+    monitored_metrics: List[str],
+    override_path: str,
+) -> List[Dict]:
+    """
+    完整的发现 + 推断流程：发现所有设备，推断所有指标的配置，返回 target list。
+
+    流程：
+    1. 从 VM 发现所有 device_id
+    2. 对每个设备探测哪些指标有数据
+    3. 对每个有数据的指标推断 MetricProfile
+    4. 将 MetricProfile 转换为 target dict
+
+    若发现失败（无 device_id），返回空列表，由调用方决定是否保留旧列表。
+
+    Args:
+        vm_url: VM HTTP 地址
+        monitored_metrics: 待探测的指标名候选列表
+        override_path: 覆盖文件路径
+
+    Returns:
+        target dict 列表，每个元素对应一个 (device_id, metric) 对。
+    """
+    logger.info("开始发现设备和指标...")
+    overrides = load_overrides(override_path)
+    targets: List[Dict] = []
+
+    device_ids = discover_device_ids(vm_url)
+    if not device_ids:
+        logger.warning("未发现任何 device_id")
+        return []
+
+    for device_id in device_ids:
+        metrics = discover_metrics_for_device(vm_url, device_id, monitored_metrics)
+        for metric in metrics:
+            profile = infer_metric_profile(vm_url, device_id, metric)
+            if profile is not None:
+                targets.append(build_target(profile, overrides))
+
+    logger.info(
+        "目标列表已更新：%d 台设备，%d 个指标目标",
+        len(device_ids),
+        len(targets),
+    )
+    return targets
diff --git a/ai/predictor/service.py b/ai/predictor/service.py
new file mode 100644
index 0000000..1e9dca7
--- /dev/null
+++ b/ai/predictor/service.py
@@ -0,0 +1,333 @@
+# -*- coding: utf-8 -*-
+"""
+predictor.service
+~~~~~~~~~~~~~~~~~
+主服务类：组装所有模块，驱动预测主循环。
+
+职责：
+- 持有所有运行时状态（baseline_states、last_written、targets）
+- 按 TARGETS_REFRESH_INTERVAL 定期重新发现设备和指标
+- 每轮轮询：拉取历史数据 → 更新状态 → 预测 → 写入 VM
+- 每轮结束后持久化状态到文件
+
+依赖：所有其他 predictor 子模块
+"""
+
+import logging
+import time
+from datetime import datetime
+from typing import Dict, List, Optional
+
+from . import config
+from .anomaly import calc_final_bounds
+from .models import BaselineState
+from .profiling import refresh_targets
+from .signal import preprocess_values
+from .state import maybe_update_state
+from .storage import (
+    fetch_history,
+    load_state,
+    merge_labels,
+    normalize_history,
+    parse_labels_from_query,
+    save_state,
+    series_key,
+    write_prediction_bundle,
+)
+from .template import predict_state_bundle
+
+logger = logging.getLogger(__name__)
+
+
+class PredictorService:
+    """
+    预测服务主类。
+
+    封装所有运行时状态，支持多实例部署（每个实例独立持有状态）。
+    通过 run() 启动主循环，通过 run_once() 执行单轮预测。
+
+    Attributes:
+        _vm_url: VM HTTP 地址
+        _state_file: 状态持久化文件路径
+        _history_minutes: 拉取历史数据的时间窗口（分钟）
+        _write_horizon: 实际写入 VM 的预测点数（秒）
+        _poll_interval: 轮询间隔（秒）
+        _targets_refresh_interval: 目标列表刷新间隔（秒）
+        _monitored_metrics: 待监控的指标名列表
+        _override_file: 人工上下限覆盖文件路径
+        _extra_labels: 写入 VM 时附加的额外标签
+        _states: key → BaselineState 的字典（运行时状态）
+        _last_written: key → 上次写入的真实数据时间戳
+        _targets: 当前目标列表
+        _targets_last_refresh: 上次刷新目标列表的时间戳
+    """
+
+    def __init__(
+        self,
+        vm_url: str = config.VM_URL,
+        state_file: str = config.STATE_FILE,
+        history_minutes: int = config.HISTORY_MINUTES,
+        write_horizon: int = config.WRITE_HORIZON_SECONDS,
+        poll_interval: int = config.POLL_INTERVAL,
+        targets_refresh_interval: int = config.TARGETS_REFRESH_INTERVAL,
+        monitored_metrics: Optional[List[str]] = None,
+        override_file: str = config.OVERRIDE_FILE,
+        extra_labels: Optional[Dict[str, str]] = None,
+    ) -> None:
+        self._vm_url = vm_url
+        self._state_file = state_file
+        self._history_minutes = history_minutes
+        self._write_horizon = write_horizon
+        self._poll_interval = poll_interval
+        self._targets_refresh_interval = targets_refresh_interval
+        self._monitored_metrics = monitored_metrics or config.MONITORED_METRICS
+        self._override_file = override_file
+        self._extra_labels = extra_labels or config.EXTRA_PREDICT_LABELS
+
+        self._states: Dict[str, BaselineState] = {}
+        self._last_written: Dict[str, int] = {}
+        self._targets: List[Dict] = []
+        self._targets_last_refresh: float = 0.0
+
+    # ------------------------------------------------------------------
+    # 目标列表管理
+    # ------------------------------------------------------------------
+
+    def _refresh_targets_if_needed(self) -> None:
+        """
+        按 targets_refresh_interval 间隔重新发现设备和指标。
+
+        首次调用时立即执行发现。发现失败时保留现有目标列表。
+        """
+        now = time.time()
+        if now - self._targets_last_refresh < self._targets_refresh_interval and self._targets:
+            return
+
+        new_targets = refresh_targets(
+            vm_url=self._vm_url,
+            monitored_metrics=self._monitored_metrics,
+            override_path=self._override_file,
+        )
+
+        if new_targets:
+            self._targets = new_targets
+            self._targets_last_refresh = now
+        else:
+            logger.warning("发现流程未产生任何有效目标，保持现有目标列表")
+
+    # ------------------------------------------------------------------
+    # 预测时间轴
+    # ------------------------------------------------------------------
+
+    def _build_prediction_timestamps(
+        self,
+        key: str,
+        last_real_ts: int,
+        now_sec: int,
+    ) -> Optional[List[int]]:
+        """
+        构建预测时间戳列表（从 last_real_ts + 1 开始，共 write_horizon 个点）。
+
+        两种情况下跳过写入：
+        1. 真实数据延迟过大（数据管道异常）
+        2. 真实数据时间戳未推进（重复写入同一批预测）
+
+        Args:
+            key: 序列标识符
+            last_real_ts: 最新真实数据点的时间戳（Unix 秒）
+            now_sec: 当前时间戳（Unix 秒）
+
+        Returns:
+            预测时间戳列表，跳过时返回 None。
+        """
+        data_lag = now_sec - last_real_ts
+
+        if data_lag > config.MAX_DATA_LAG_SECONDS:
+            logger.warning(
+                "真实数据延迟过大，跳过预测 key=%s data_lag=%ss max=%ss",
+                key, data_lag, config.MAX_DATA_LAG_SECONDS,
+            )
+            return None
+
+        last_written_real_ts = self._last_written.get(key)
+        if last_written_real_ts is not None and last_real_ts <= int(last_written_real_ts):
+            logger.info(
+                "真实数据时间戳未推进，跳过重复写入 key=%s last_real_ts=%s last_written=%s",
+                key, last_real_ts, last_written_real_ts,
+            )
+            return None
+
+        return [last_real_ts + i + 1 for i in range(self._write_horizon)]
+
+    # ------------------------------------------------------------------
+    # 单轮预测
+    # ------------------------------------------------------------------
+
+    def run_once(self) -> None:
+        """
+        执行一轮预测：遍历所有目标，拉取数据、更新状态、写入预测结果。
+
+        每轮结束后将状态持久化到文件。
+        """
+        now_str = datetime.now().strftime("%H:%M:%S")
+
+        self._refresh_targets_if_needed()
+
+        if not self._targets:
+            logger.warning("[%s] 目标列表为空，等待设备发现完成", now_str)
+            return
+
+        for target in self._targets:
+            query = target["query"]
+            pred_metric = target["pred_metric"]
+            anomaly_metric = target["anomaly_metric"]
+            strategy = target.get("strategy", "phase_point")
+            smooth_window = int(target.get("smooth_window", 1))
+
+            # 1. 拉取历史数据
+            ts, ys = fetch_history(
+                vm_url=self._vm_url,
+                query=query,
+                minutes=self._history_minutes,
+            )
+
+            if len(ys) < config.MIN_POINTS:
+                logger.info("[%s] %s 数据不足（%d 点），跳过", now_str, query, len(ys))
+                continue
+
+            ts_grid, ys_grid_raw = normalize_history(ts, ys)
+
+            if len(ys_grid_raw) < config.MIN_POINTS:
+                logger.info(
+                    "[%s] %s 清洗后数据不足（%d 点），跳过",
+                    now_str, query, len(ys_grid_raw),
+                )
+                continue
+
+            # 2. 预处理（平滑）
+            ys_grid_model = preprocess_values(ys_grid_raw, strategy, smooth_window)
+
+            # 3. 构建写入标签
+            base_labels = parse_labels_from_query(query)
+            write_labels = merge_labels(base_labels, self._extra_labels)
+            key = series_key(pred_metric, write_labels)
+
+            # 4. 更新状态（异常检测 + 模板学习）
+            (
+                state,
+                is_anomaly,
+                outside_ratio,
+                mean_abs_err,
+                mean_rel_err,
+                max_outside_seconds,
+                max_exceed_ratio,
+            ) = maybe_update_state(
+                key=key,
+                ts_grid=ts_grid,
+                ys_model=ys_grid_model,
+                ys_actual=ys_grid_raw,
+                target=target,
+                states=self._states,
+            )
+
+            if state is None:
+                logger.info("[%s] %s 暂无可用健康模板，等待学习", now_str, query)
+                continue
+
+            # 5. 构建预测时间戳
+            now_sec = int(time.time())
+            last_real_ts = int(ts_grid[-1])
+            data_lag = now_sec - last_real_ts
+
+            ts_future = self._build_prediction_timestamps(
+                key=key,
+                last_real_ts=last_real_ts,
+                now_sec=now_sec,
+            )
+
+            if not ts_future:
+                continue
+
+            # 6. 预测
+            pred_values, lower_raw, upper_raw = predict_state_bundle(state, ts_future)
+            lower_values, upper_values = calc_final_bounds(
+                state=state,
+                pred=pred_values,
+                lower_raw=lower_raw,
+                upper_raw=upper_raw,
+                target=target,
+            )
+
+            # 7. 写入 VM
+            ok = write_prediction_bundle(
+                vm_url=self._vm_url,
+                pred_metric=pred_metric,
+                anomaly_metric=anomaly_metric,
+                labels=write_labels,
+                ts_future=ts_future,
+                pred_values=pred_values,
+                lower_values=lower_values,
+                upper_values=upper_values,
+                is_anomaly=is_anomaly,
+                outside_ratio=outside_ratio,
+                mean_abs_err=mean_abs_err,
+                mean_rel_err=mean_rel_err,
+                max_outside_seconds=max_outside_seconds,
+                max_exceed_ratio=max_exceed_ratio,
+                event_ts=last_real_ts,
+            )
+
+            if not ok:
+                logger.error("[%s] %s 写入预测数据失败", now_str, query)
+                continue
+
+            self._last_written[key] = last_real_ts
+
+            # 8. 打印摘要日志
+            future_start = datetime.fromtimestamp(ts_future[0]).strftime("%H:%M:%S")
+            future_end = datetime.fromtimestamp(ts_future[-1]).strftime("%H:%M:%S")
+            last_real_str = datetime.fromtimestamp(last_real_ts).strftime("%H:%M:%S")
+            origin_str = datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S")
+
+            logger.info(
+                "[%s] %-50s → %-35s strategy=%s status=%s anomaly=%s "
+                "outside=%.2f max_outside=%ss max_exceed=%.2f "
+                "period=%ss origin=%s last_real=%s lag=%ss 写入 %d 点，预测区间 %s ~ %s",
+                now_str, query, pred_metric,
+                state.strategy, state.status, is_anomaly,
+                outside_ratio, max_outside_seconds, max_exceed_ratio,
+                state.period, origin_str, last_real_str, data_lag,
+                len(ts_future), future_start, future_end,
+            )
+
+        save_state(self._state_file, self._states)
+
+    # ------------------------------------------------------------------
+    # 主循环
+    # ------------------------------------------------------------------
+
+    def run(self) -> None:
+        """
+        启动预测服务主循环。
+
+        加载持久化状态后进入无限循环，每隔 poll_interval 秒执行一次 run_once()。
+        """
+        self._states = load_state(self._state_file)
+
+        logger.info(
+            "预测服务启动 VM=%s 历史窗口=%dmin 理论预测窗口=%ds 实际写入窗口=%ds "
+            "轮询间隔=%ds state=%s forecast=%s override=%s refresh=%ds",
+            self._vm_url,
+            self._history_minutes,
+            config.HORIZON_SECONDS,
+            self._write_horizon,
+            self._poll_interval,
+            self._state_file,
+            self._extra_labels.get("forecast", ""),
+            self._override_file,
+            self._targets_refresh_interval,
+        )
+
+        while True:
+            self.run_once()
+            time.sleep(self._poll_interval)
diff --git a/ai/predictor/signal.py b/ai/predictor/signal.py
new file mode 100644
index 0000000..e948160
--- /dev/null
+++ b/ai/predictor/signal.py
@@ -0,0 +1,335 @@
+# -*- coding: utf-8 -*-
+"""
+predictor.signal
+~~~~~~~~~~~~~~~~
+纯信号处理与周期估计，不包含任何 IO 操作。
+
+职责：
+- 滚动中位数、移动平均等平滑算法
+- 基于 FFT + 自相关的周期估计
+- 谷底检测（用于模板构建的相位对齐）
+- 原始数据预处理（根据策略选择平滑方式）
+
+本模块所有函数均为纯函数，输入 numpy 数组，输出 numpy 数组或基本类型。
+
+依赖：numpy
+"""
+
+import math
+from typing import Dict, List, Tuple
+
+import numpy as np
+
+from . import config
+
+
+def rolling_median(arr: np.ndarray, window: int) -> np.ndarray:
+    """
+    对数组做滚动中位数平滑（边缘用 edge 填充）。
+
+    中位数对脉冲噪声鲁棒，适合 phase_band 策略的粗铣负载信号。
+    window 自动调整为奇数，保证对称填充。
+
+    Args:
+        arr: 输入数组
+        window: 滑动窗口大小（秒），<=1 时直接返回原数组
+
+    Returns:
+        平滑后的数组，长度与输入相同。
+    """
+    if window <= 1 or len(arr) < window:
+        return arr.astype(float)
+
+    # 保证奇数窗口，使填充对称
+    if window % 2 == 0:
+        window += 1
+
+    pad = window // 2
+    padded = np.pad(arr.astype(float), (pad, pad), mode="edge")
+
+    result = [float(np.median(padded[i : i + window])) for i in range(len(arr))]
+    return np.array(result, dtype=float)
+
+
+def moving_average(arr: np.ndarray, window: int) -> np.ndarray:
+    """
+    对数组做均匀权重移动平均（边缘用 edge 填充）。
+
+    比滚动中位数快，适合 phase_point 策略的稳定信号。
+    window 自动调整为奇数，保证对称填充。
+
+    Args:
+        arr: 输入数组
+        window: 滑动窗口大小（秒），<=1 时直接返回原数组
+
+    Returns:
+        平滑后的数组，长度与输入相同。
+    """
+    if window <= 1 or len(arr) < window:
+        return arr.astype(float)
+
+    if window % 2 == 0:
+        window += 1
+
+    kernel = np.ones(window, dtype=float) / window
+    pad = window // 2
+    padded = np.pad(arr.astype(float), (pad, pad), mode="edge")
+
+    return np.convolve(padded, kernel, mode="valid")
+
+
+def preprocess_values(
+    ys_grid: np.ndarray,
+    strategy: str,
+    smooth_window: int,
+) -> np.ndarray:
+    """
+    根据预测策略对原始数据进行预处理平滑。
+
+    - phase_band：使用滚动中位数（对脉冲噪声鲁棒）
+    - phase_point：使用移动平均（保留趋势，计算更快）
+
+    Args:
+        ys_grid: 均匀 1 秒网格上的原始值数组
+        strategy: "phase_point" 或 "phase_band"
+        smooth_window: 平滑窗口大小（秒），<=1 时不平滑
+
+    Returns:
+        平滑后的数组，长度与输入相同。
+    """
+    if strategy == "phase_band":
+        return rolling_median(ys_grid, smooth_window)
+
+    if smooth_window > 1:
+        return moving_average(ys_grid, smooth_window)
+
+    return ys_grid.astype(float)
+
+
+# ---------------------------------------------------------------------------
+# 周期估计
+# ---------------------------------------------------------------------------
+
+def estimate_period_by_fft(ys_arr: np.ndarray) -> float:
+    """
+    用 FFT 粗估信号的主周期（秒）。
+
+    取去均值后的功率谱中能量最大的频率分量，转换为周期。
+    结果被 clip 到 [MIN_PERIOD_SECONDS, MAX_PERIOD_SECONDS]。
+
+    Args:
+        ys_arr: 均匀采样的值数组（1 秒间隔）
+
+    Returns:
+        估计的周期（秒），浮点数。数据不足或全零时返回 60.0。
+    """
+    n = len(ys_arr)
+    if n < 8:
+        return 60.0
+
+    centered = ys_arr - np.mean(ys_arr)
+    if np.allclose(centered, 0):
+        return 60.0
+
+    fft_vals = np.fft.rfft(centered)
+    freqs = np.fft.rfftfreq(n, d=1.0)
+
+    if len(freqs) <= 1:
+        return 60.0
+
+    # 跳过直流分量（index 0），找功率最大的频率
+    power = np.abs(fft_vals[1:])
+    if len(power) == 0 or np.max(power) <= 0:
+        return 60.0
+
+    dominant_idx = int(np.argmax(power)) + 1
+    dominant_freq = float(freqs[dominant_idx])
+
+    if dominant_freq <= 0:
+        return 60.0
+
+    period = 1.0 / dominant_freq
+    return float(np.clip(period, config.MIN_PERIOD_SECONDS, config.MAX_PERIOD_SECONDS))
+
+
+def refine_period_by_autocorr(ys_arr: np.ndarray, init_period: float) -> float:
+    """
+    用自相关函数在 FFT 粗估周期附近精化周期。
+
+    在 [init_period * 0.7, init_period * 1.3] 范围内搜索自相关峰值，
+    比 FFT 对非整数周期和噪声更鲁棒。
+
+    Args:
+        ys_arr: 均匀采样的值数组
+        init_period: FFT 粗估的初始周期（秒）
+
+    Returns:
+        精化后的周期（秒），clip 到合法范围。
+    """
+    n = len(ys_arr)
+    if n < 20:
+        return float(np.clip(init_period, config.MIN_PERIOD_SECONDS, config.MAX_PERIOD_SECONDS))
+
+    centered = ys_arr - np.mean(ys_arr)
+    if np.allclose(centered, 0):
+        return float(np.clip(init_period, config.MIN_PERIOD_SECONDS, config.MAX_PERIOD_SECONDS))
+
+    # 全相关，取正半轴（lag >= 0）
+    corr = np.correlate(centered, centered, mode="full")[n - 1:]
+
+    p0 = int(round(init_period))
+    left = max(int(config.MIN_PERIOD_SECONDS), int(max(2, p0 * 0.7)))
+    right = min(n // 2, int(max(left + 1, p0 * 1.3)))
+
+    if right <= left:
+        return float(np.clip(init_period, config.MIN_PERIOD_SECONDS, config.MAX_PERIOD_SECONDS))
+
+    search = corr[left : right + 1]
+    if len(search) == 0:
+        return float(np.clip(init_period, config.MIN_PERIOD_SECONDS, config.MAX_PERIOD_SECONDS))
+
+    best_lag = left + int(np.argmax(search))
+    return float(np.clip(best_lag, config.MIN_PERIOD_SECONDS, config.MAX_PERIOD_SECONDS))
+
+
+def estimate_period_rough(ys_arr: np.ndarray) -> int:
+    """
+    FFT + 自相关两步法估计信号周期，返回整数秒。
+
+    先用 FFT 粗估，再用自相关精化，最后 clip 到合法范围。
+
+    Args:
+        ys_arr: 均匀采样的值数组
+
+    Returns:
+        估计的周期（整数秒）。
+    """
+    p_fft = estimate_period_by_fft(ys_arr)
+    p_refined = refine_period_by_autocorr(ys_arr, p_fft)
+    period = int(round(p_refined))
+    period = max(int(config.MIN_PERIOD_SECONDS), min(int(config.MAX_PERIOD_SECONDS), period))
+    return int(period)
+
+
+# ---------------------------------------------------------------------------
+# 谷底检测
+# ---------------------------------------------------------------------------
+
+def find_valley_indices(
+    ts_grid: np.ndarray,
+    ys_grid: np.ndarray,
+    expected_period: int,
+) -> List[int]:
+    """
+    在时序数据中检测周期性谷底（加工周期的起始点）。
+
+    算法步骤：
+    1. 对信号做轻度移动平均平滑，抑制高频噪声
+    2. 找低于 VALLEY_QUANTILE 百分位的局部极小值作为候选
+    3. 若候选不足，放宽条件（不限百分位）
+    4. 按最小间距过滤，同一间距内保留最低点
+    5. 按周期合理性（0.55~1.60 倍期望周期）清洗
+
+    Args:
+        ts_grid: 均匀 1 秒网格的时间戳数组
+        ys_grid: 对应的值数组
+        expected_period: 预期周期（秒），用于设置最小间距和合理性检查
+
+    Returns:
+        谷底在数组中的索引列表（已排序）。
+        数据不足时返回空列表。
+    """
+    n = len(ys_grid)
+    if n < max(10, expected_period * 2):
+        return []
+
+    period = max(3, int(expected_period))
+    # 平滑窗口约为周期的 8%，最大 21 秒，避免过度平滑
+    smooth_window = min(max(3, int(round(period * 0.08))), 21)
+    ys_smooth = moving_average(ys_grid, smooth_window)
+
+    threshold = float(np.percentile(ys_smooth, config.VALLEY_QUANTILE))
+
+    # 第一轮：只取低于阈值的局部极小值
+    candidates = [
+        i for i in range(1, n - 1)
+        if (
+            ys_smooth[i] <= ys_smooth[i - 1]
+            and ys_smooth[i] < ys_smooth[i + 1]
+            and ys_smooth[i] <= threshold
+        )
+    ]
+
+    # 候选不足时放宽：取所有局部极小值
+    if len(candidates) < config.MIN_FULL_CYCLES_FOR_TEMPLATE:
+        candidates = [
+            i for i in range(1, n - 1)
+            if ys_smooth[i] <= ys_smooth[i - 1] and ys_smooth[i] < ys_smooth[i + 1]
+        ]
+
+    if not candidates:
+        return []
+
+    # 按最小间距过滤：同一间距内保留最低点
+    min_distance = max(2, int(round(period * 0.55)))
+    selected: List[int] = []
+    for idx in candidates:
+        if not selected:
+            selected.append(idx)
+        elif idx - selected[-1] >= min_distance:
+            selected.append(idx)
+        elif ys_smooth[idx] < ys_smooth[selected[-1]]:
+            selected[-1] = idx
+
+    if len(selected) < 2:
+        return selected
+
+    # 按周期合理性清洗：间距过小则保留更低点，间距过大则直接接受
+    cleaned = [selected[0]]
+    for idx in selected[1:]:
+        diff = int(ts_grid[idx] - ts_grid[cleaned[-1]])
+        if int(period * 0.55) <= diff <= int(period * 1.60):
+            cleaned.append(idx)
+        elif diff < int(period * 0.55):
+            # 间距太小，保留更低的那个
+            if ys_smooth[idx] < ys_smooth[cleaned[-1]]:
+                cleaned[-1] = idx
+        else:
+            # 间距过大（可能漏检了一个谷底），直接接受
+            cleaned.append(idx)
+
+    return cleaned
+
+
+def detect_period_and_valleys(
+    ts_grid: np.ndarray,
+    ys_grid: np.ndarray,
+) -> Tuple[int, List[int]]:
+    """
+    综合估计周期并检测谷底。
+
+    先粗估周期，再检测谷底，最后用谷底间距的中位数修正周期。
+    谷底间距的中位数比 FFT 更能反映实际加工节拍。
+
+    Args:
+        ts_grid: 均匀 1 秒网格的时间戳数组
+        ys_grid: 对应的值数组
+
+    Returns:
+        (period, valley_indices) 元组：
+        - period: 修正后的周期（整数秒）
+        - valley_indices: 谷底索引列表
+    """
+    rough = estimate_period_rough(ys_grid)
+    valleys = find_valley_indices(ts_grid, ys_grid, rough)
+
+    if len(valleys) >= 3:
+        diffs = np.diff(ts_grid[valleys])
+        # 只取合理范围内的间距参与中位数计算
+        good = diffs[(diffs >= rough * 0.55) & (diffs <= rough * 1.60)]
+        period = int(round(float(np.median(good)))) if len(good) > 0 else rough
+    else:
+        period = rough
+
+    period = max(int(config.MIN_PERIOD_SECONDS), min(int(config.MAX_PERIOD_SECONDS), period))
+    return int(period), valleys
diff --git a/ai/predictor/state.py b/ai/predictor/state.py
new file mode 100644
index 0000000..d7adedd
--- /dev/null
+++ b/ai/predictor/state.py
@@ -0,0 +1,328 @@
+# -*- coding: utf-8 -*-
+"""
+predictor.state
+~~~~~~~~~~~~~~~
+状态生命周期管理：BaselineState 的创建、更新和 phase-lock 应用。
+
+职责：
+- 首次见到某指标时初始化健康基线
+- 每轮轮询时运行异常检测，更新状态机（HEALTHY / ANOMALY / RECOVERING）
+- 健康/恢复状态下用 EMA 渐进更新模板
+- 将 phase-lock 结果写回 state
+
+本模块不做任何 IO，states 字典由调用方（service.py）持有和传入。
+
+依赖：predictor.template, predictor.anomaly, predictor.config, predictor.models
+"""
+
+import logging
+import time
+from datetime import datetime
+from typing import Dict, Optional, Tuple
+
+import numpy as np
+
+from . import config
+from .anomaly import detect_anomaly
+from .models import (
+    BASELINE_STATUS_ANOMALY,
+    BASELINE_STATUS_HEALTHY,
+    BASELINE_STATUS_RECOVERING,
+    BaselineState,
+)
+from .template import (
+    build_current_baseline,
+    merge_template,
+    resample_template,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def create_initial_state(
+    ts_grid: np.ndarray,
+    ys_model: np.ndarray,
+    ys_actual: np.ndarray,
+    target: Dict,
+    now_sec: int,
+) -> Optional[BaselineState]:
+    """
+    从历史数据构建初始健康基线状态。
+
+    首次见到某指标时调用，需要足够的历史数据（MIN_POINTS 个点）。
+
+    Args:
+        ts_grid: 均匀 1 秒网格的时间戳数组
+        ys_model: 平滑后的信号（用于周期检测和中值模板）
+        ys_actual: 原始信号（用于分位数模板和量程统计）
+        target: target dict，包含策略和阈值配置
+        now_sec: 当前时间戳（Unix 秒）
+
+    Returns:
+        初始化的 BaselineState，数据不足时返回 None。
+    """
+    strategy = str(target.get("strategy", "phase_point"))
+    band_low_q = float(target.get("band_low_q", 5.0))
+    band_high_q = float(target.get("band_high_q", 95.0))
+
+    baseline = build_current_baseline(
+        ts_grid=ts_grid,
+        ys_mid_grid=ys_model,
+        ys_band_grid=ys_actual,
+        strategy=strategy,
+        band_low_q=band_low_q,
+        band_high_q=band_high_q,
+    )
+
+    if baseline is None:
+        return None
+
+    period, phase_origin_ts, template, lower_template, upper_template = baseline
+
+    return BaselineState(
+        period=int(period),
+        phase_origin_ts=int(phase_origin_ts),
+        template=template.astype(float).tolist(),
+        lower_template=lower_template.astype(float).tolist(),
+        upper_template=upper_template.astype(float).tolist(),
+        strategy=strategy,
+        status=BASELINE_STATUS_HEALTHY,
+        # 初始 clean_seconds 设为多个完整周期，表示已有足够的健康历史
+        clean_seconds=int(period * config.MAX_CYCLES_FOR_TEMPLATE),
+        last_update_ts=now_sec,
+        last_seen_ts=now_sec,
+        y_min=float(np.min(ys_actual)),
+        y_max=float(np.max(ys_actual)),
+    )
+
+
+def apply_phase_lock_to_state(
+    state: BaselineState,
+    best_period: int,
+    best_origin: int,
+) -> None:
+    """
+    将 phase-lock 搜索结果写回 state（原地修改）。
+
+    若周期发生变化，同时对三条模板做重采样，保持长度一致。
+
+    Args:
+        state: 要更新的基线状态（原地修改）
+        best_period: phase-lock 找到的最优周期（整数秒）
+        best_origin: phase-lock 找到的最优相位原点（Unix 秒）
+    """
+    best_period = int(best_period)
+    if best_period <= 1:
+        return
+
+    # 周期变化时重采样三条模板
+    if len(state.template) != best_period:
+        state.template = resample_template(
+            np.array(state.template, dtype=float), best_period
+        ).astype(float).tolist()
+
+    if len(state.lower_template) != best_period:
+        state.lower_template = resample_template(
+            np.array(state.lower_template, dtype=float), best_period
+        ).astype(float).tolist()
+
+    if len(state.upper_template) != best_period:
+        state.upper_template = resample_template(
+            np.array(state.upper_template, dtype=float), best_period
+        ).astype(float).tolist()
+
+    state.period = best_period
+    state.phase_origin_ts = int(best_origin)
+
+
+def maybe_update_state(
+    key: str,
+    ts_grid: np.ndarray,
+    ys_model: np.ndarray,
+    ys_actual: np.ndarray,
+    target: Dict,
+    states: Dict[str, BaselineState],
+) -> Tuple[Optional[BaselineState], bool, float, float, float, int, float]:
+    """
+    核心状态更新函数：检测异常并按状态机规则更新基线。
+
+    状态机转换：
+    - 无状态 → 初始化 → HEALTHY（返回，本轮不做异常检测）
+    - HEALTHY + 异常 → ANOMALY（冻结模板）
+    - ANOMALY + 正常 → RECOVERING（开始计时）
+    - RECOVERING + 正常 + 足够时间 → HEALTHY（恢复学习）
+    - HEALTHY/RECOVERING + 正常 + 足够时间 → 更新模板（EMA）
+
+    Args:
+        key: 序列唯一标识符（用于 states 字典的键）
+        ts_grid: 均匀 1 秒网格的时间戳数组
+        ys_model: 平滑后的信号
+        ys_actual: 原始信号
+        target: target dict，包含策略和阈值配置
+        states: 所有指标的状态字典（由 PredictorService 持有，原地修改）
+
+    Returns:
+        (state, is_anomaly, outside_ratio, mean_abs_err, mean_rel_err,
+         max_outside_seconds, max_exceed_ratio)
+        state 为 None 表示数据不足，本轮跳过。
+    """
+    now_sec = int(time.time())
+    state = states.get(key)
+
+    # 首次见到该指标：初始化健康基线
+    if state is None:
+        state = create_initial_state(
+            ts_grid=ts_grid,
+            ys_model=ys_model,
+            ys_actual=ys_actual,
+            target=target,
+            now_sec=now_sec,
+        )
+
+        if state is None:
+            return None, False, 0.0, 0.0, 0.0, 0, 0.0
+
+        states[key] = state
+        logger.info(
+            "初始化健康模板 key=%s strategy=%s period=%ss origin=%s clean=%ss",
+            key,
+            state.strategy,
+            state.period,
+            datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"),
+            state.clean_seconds,
+        )
+        return state, False, 0.0, 0.0, 0.0, 0, 0.0
+
+    # 计算距上次处理的时间（秒），用于累加 clean_seconds
+    elapsed = max(1, now_sec - int(state.last_seen_ts))
+    elapsed = min(elapsed, config.POLL_INTERVAL * 2)  # 防止长时间停机后 clean_seconds 暴增
+    state.last_seen_ts = now_sec
+
+    (
+        is_anomaly,
+        outside_ratio,
+        mean_abs_err,
+        mean_rel_err,
+        best_period,
+        best_origin,
+        max_outside_seconds,
+        max_exceed_ratio,
+    ) = detect_anomaly(
+        state=state,
+        ts_grid=ts_grid,
+        ys_model=ys_model,
+        ys_actual=ys_actual,
+        target=target,
+    )
+
+    # 异常：冻结模板，不学习故障数据
+    if is_anomaly:
+        state.status = BASELINE_STATUS_ANOMALY
+        state.clean_seconds = 0
+        states[key] = state
+        logger.warning(
+            "检测到异常，冻结模板 key=%s outside_ratio=%.2f max_outside=%ss "
+            "max_exceed_ratio=%.2f mean_abs_err=%.4f mean_rel_err=%.4f",
+            key, outside_ratio, max_outside_seconds,
+            max_exceed_ratio, mean_abs_err, mean_rel_err,
+        )
+        return state, True, outside_ratio, mean_abs_err, mean_rel_err, max_outside_seconds, max_exceed_ratio
+
+    # 正常：应用 phase-lock 结果
+    old_period = int(state.period)
+    old_origin = int(state.phase_origin_ts)
+    apply_phase_lock_to_state(state, best_period, best_origin)
+
+    if old_period != state.period or old_origin != state.phase_origin_ts:
+        logger.info(
+            "phase-lock key=%s period %s -> %s origin %s -> %s",
+            key, old_period, state.period,
+            datetime.fromtimestamp(old_origin).strftime("%H:%M:%S"),
+            datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"),
+        )
+
+    # 异常刚消失：进入恢复期，等待稳定后再恢复学习
+    if state.status == BASELINE_STATUS_ANOMALY:
+        state.status = BASELINE_STATUS_RECOVERING
+        state.clean_seconds = elapsed
+        states[key] = state
+        logger.info("异常开始恢复 key=%s clean_seconds=%ss", key, state.clean_seconds)
+        return state, False, outside_ratio, mean_abs_err, mean_rel_err, max_outside_seconds, max_exceed_ratio
+
+    # 累加健康时间
+    if state.status == BASELINE_STATUS_RECOVERING:
+        state.clean_seconds += elapsed
+    else:
+        state.status = BASELINE_STATUS_HEALTHY
+        state.clean_seconds += elapsed
+
+    # 健康时间不足：不更新模板
+    min_clean_for_update = max(
+        config.RECOVERY_MIN_SECONDS,
+        int(state.period) * config.MIN_FULL_CYCLES_FOR_TEMPLATE,
+    )
+    if state.clean_seconds < min_clean_for_update:
+        states[key] = state
+        return state, False, outside_ratio, mean_abs_err, mean_rel_err, max_outside_seconds, max_exceed_ratio
+
+    # 健康时间足够：用最近数据更新模板（EMA）
+    tail_seconds = min(
+        int(state.clean_seconds),
+        int(state.period) * config.MAX_CYCLES_FOR_TEMPLATE,
+    )
+
+    strategy = str(target.get("strategy", "phase_point"))
+    band_low_q = float(target.get("band_low_q", 5.0))
+    band_high_q = float(target.get("band_high_q", 95.0))
+
+    baseline = build_current_baseline(
+        ts_grid=ts_grid,
+        ys_mid_grid=ys_model,
+        ys_band_grid=ys_actual,
+        strategy=strategy,
+        band_low_q=band_low_q,
+        band_high_q=band_high_q,
+        tail_seconds=tail_seconds,
+    )
+
+    if baseline is None:
+        states[key] = state
+        return state, False, outside_ratio, mean_abs_err, mean_rel_err, max_outside_seconds, max_exceed_ratio
+
+    new_period, new_origin, new_template, new_lower_template, new_upper_template = baseline
+
+    # 恢复期用更激进的 alpha，加速追赶真实信号
+    alpha = config.RECOVERY_EMA_ALPHA if state.status == BASELINE_STATUS_RECOVERING else config.HEALTHY_EMA_ALPHA
+
+    state.template = merge_template(
+        np.array(state.template, dtype=float), new_template, alpha
+    ).astype(float).tolist()
+    state.lower_template = merge_template(
+        np.array(state.lower_template, dtype=float), new_lower_template, alpha
+    ).astype(float).tolist()
+    state.upper_template = merge_template(
+        np.array(state.upper_template, dtype=float), new_upper_template, alpha
+    ).astype(float).tolist()
+
+    state.period = int(new_period)
+    state.phase_origin_ts = int(new_origin)
+    state.status = BASELINE_STATUS_HEALTHY
+    state.last_update_ts = now_sec
+
+    # 更新量程统计（用于 Grafana 展示）
+    if tail_seconds > 0 and len(ys_actual) >= tail_seconds:
+        state.y_min = float(np.min(ys_actual[-tail_seconds:]))
+        state.y_max = float(np.max(ys_actual[-tail_seconds:]))
+    else:
+        state.y_min = float(np.min(ys_actual))
+        state.y_max = float(np.max(ys_actual))
+
+    states[key] = state
+    logger.info(
+        "更新健康模板 key=%s strategy=%s period=%ss origin=%s clean=%ss alpha=%.2f",
+        key, state.strategy, state.period,
+        datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S"),
+        state.clean_seconds, alpha,
+    )
+
+    return state, False, outside_ratio, mean_abs_err, mean_rel_err, max_outside_seconds, max_exceed_ratio
diff --git a/ai/predictor/storage.py b/ai/predictor/storage.py
new file mode 100644
index 0000000..f614528
--- /dev/null
+++ b/ai/predictor/storage.py
@@ -0,0 +1,438 @@
+# -*- coding: utf-8 -*-
+"""
+predictor.storage
+~~~~~~~~~~~~~~~~~
+VictoriaMetrics 读写层，封装所有网络 IO。
+
+职责：
+- 从 VM 拉取历史时序数据（query_range）
+- 将预测结果和异常指标写入 VM（import/prometheus）
+- 标签字符串的序列化与解析
+- 状态文件的持久化读写
+
+本模块不包含任何预测或异常检测逻辑，只负责数据的搬运和格式转换。
+
+依赖：requests, numpy
+"""
+
+import json
+import logging
+import math
+import os
+import re
+from dataclasses import asdict
+from datetime import datetime, timedelta
+from typing import Dict, List, Optional, Tuple
+
+import numpy as np
+import requests
+
+from .models import BaselineState
+
+logger = logging.getLogger(__name__)
+
+# 用于解析 PromQL 标签字符串的正则，匹配 key="value" 格式
+_LABEL_PATTERN = re.compile(
+    r'\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"((?:\\.|[^"])*)"\s*'
+)
+
+
+# ---------------------------------------------------------------------------
+# 历史数据读取
+# ---------------------------------------------------------------------------
+
+def fetch_history(
+    vm_url: str,
+    query: str,
+    minutes: int,
+    step: str = "1s",
+) -> Tuple[List[float], List[float]]:
+    """
+    从 VictoriaMetrics 拉取指定查询的历史时序数据。
+
+    Args:
+        vm_url: VM HTTP 地址，如 "http://localhost:8428"
+        query: PromQL 查询表达式，如 'feed_rate{device_id="fanuc-cnc"}'
+        minutes: 向前拉取多少分钟的历史数据
+        step: 查询步长，默认 "1s"（每秒一个点）
+
+    Returns:
+        (timestamps, values) 两个列表，长度相同。
+        如果查询失败或无数据，返回两个空列表。
+    """
+    now = datetime.now()
+    start = now - timedelta(minutes=minutes)
+
+    try:
+        resp = requests.get(
+            f"{vm_url}/api/v1/query_range",
+            params={
+                "query": query,
+                "start": start.timestamp(),
+                "end": now.timestamp(),
+                "step": step,
+            },
+            timeout=10,
+        )
+        resp.raise_for_status()
+    except requests.RequestException as e:
+        logger.error("拉取历史数据失败 query=%s: %s", query, e)
+        return [], []
+
+    try:
+        result = resp.json().get("data", {}).get("result", [])
+    except Exception as e:
+        logger.error("解析 VM 响应失败 query=%s: %s", query, e)
+        return [], []
+
+    if not result:
+        return [], []
+
+    ts_list: List[float] = []
+    ys_list: List[float] = []
+
+    for item in result[0].get("values", []):
+        if len(item) < 2:
+            continue
+        try:
+            t = float(item[0])
+            y = float(item[1])
+        except (TypeError, ValueError):
+            continue
+        # 过滤 NaN / Inf，防止后续 numpy 计算出错
+        if math.isfinite(t) and math.isfinite(y):
+            ts_list.append(t)
+            ys_list.append(y)
+
+    return ts_list, ys_list
+
+
+def normalize_history(
+    ts: List[float],
+    ys: List[float],
+) -> Tuple[np.ndarray, np.ndarray]:
+    """
+    将原始时序数据规整化为均匀 1 秒间隔的网格。
+
+    处理步骤：
+    1. 去重（同一秒内取最后一个值）
+    2. 按时间戳排序
+    3. 线性插值填充缺失秒
+
+    Args:
+        ts: 原始时间戳列表（Unix 秒，可以是浮点数）
+        ys: 对应的值列表
+
+    Returns:
+        (ts_grid, ys_grid) 均匀 1 秒间隔的 numpy 数组。
+        如果输入无效，返回两个空数组。
+    """
+    if not ts or not ys or len(ts) != len(ys):
+        return np.array([]), np.array([])
+
+    # 去重：同一秒取最后写入的值
+    data: Dict[int, float] = {}
+    for t, y in zip(ts, ys):
+        try:
+            sec = int(round(float(t)))
+            val = float(y)
+        except (TypeError, ValueError):
+            continue
+        if math.isfinite(sec) and math.isfinite(val):
+            data[sec] = val
+
+    if not data:
+        return np.array([]), np.array([])
+
+    sorted_items = sorted(data.items())
+    ts_clean = np.array([x[0] for x in sorted_items], dtype=float)
+    ys_clean = np.array([x[1] for x in sorted_items], dtype=float)
+
+    if len(ts_clean) < 2:
+        return ts_clean, ys_clean
+
+    start_sec = int(ts_clean[0])
+    end_sec = int(ts_clean[-1])
+
+    if end_sec <= start_sec:
+        return ts_clean, ys_clean
+
+    # 构建均匀网格并插值
+    ts_grid = np.arange(start_sec, end_sec + 1, 1, dtype=float)
+    ys_grid = np.interp(ts_grid, ts_clean, ys_clean)
+
+    return ts_grid, ys_grid
+
+
+# ---------------------------------------------------------------------------
+# 标签工具
+# ---------------------------------------------------------------------------
+
+def prom_escape_label_value(value: str) -> str:
+    """对 Prometheus 标签值进行转义，处理反斜杠、换行符和双引号。"""
+    return (
+        str(value)
+        .replace("\\", "\\\\")
+        .replace("\n", "\\n")
+        .replace('"', '\\"')
+    )
+
+
+def labels_to_str(labels: Dict[str, str]) -> str:
+    """
+    将标签字典序列化为 Prometheus 格式的标签字符串。
+
+    Example:
+        {"device_id": "fanuc-cnc", "source": "protoforge"}
+        → '{device_id="fanuc-cnc",source="protoforge"}'
+    """
+    if not labels:
+        return ""
+    parts = [
+        f'{k}="{prom_escape_label_value(labels[k])}"'
+        for k in sorted(labels)
+    ]
+    return "{" + ",".join(parts) + "}"
+
+
+def parse_labels_from_query(query: str) -> Dict[str, str]:
+    """
+    从 PromQL 查询字符串中提取标签字典。
+
+    Example:
+        'feed_rate{device_id="fanuc-cnc"}' → {"device_id": "fanuc-cnc"}
+    """
+    labels: Dict[str, str] = {}
+
+    if "{" not in query or "}" not in query:
+        return labels
+
+    try:
+        label_part = query[query.index("{") + 1 : query.rindex("}")]
+    except ValueError:
+        return labels
+
+    for match in _LABEL_PATTERN.finditer(label_part):
+        key = match.group(1)
+        value = (
+            match.group(2)
+            .replace('\\"', '"')
+            .replace("\\n", "\n")
+            .replace("\\\\", "\\")
+        )
+        labels[key] = value
+
+    return labels
+
+
+def merge_labels(*dicts: Dict[str, str]) -> Dict[str, str]:
+    """合并多个标签字典，后面的字典覆盖前面的同名键。"""
+    result: Dict[str, str] = {}
+    for d in dicts:
+        if d:
+            result.update(d)
+    return result
+
+
+def series_key(metric_name: str, labels: Dict[str, str]) -> str:
+    """生成唯一的序列标识符，用于 BaselineState 字典的键。"""
+    return metric_name + labels_to_str(labels)
+
+
+# ---------------------------------------------------------------------------
+# 数据写入
+# ---------------------------------------------------------------------------
+
+def write_series(
+    vm_url: str,
+    metric_name: str,
+    labels: Dict[str, str],
+    ts_list: List[int],
+    values: List[float],
+) -> bool:
+    """
+    将一条时序数据写入 VictoriaMetrics（Prometheus remote write 格式）。
+
+    Args:
+        vm_url: VM HTTP 地址
+        metric_name: 指标名
+        labels: 标签字典
+        ts_list: 时间戳列表（Unix 秒）
+        values: 对应的值列表
+
+    Returns:
+        写入成功返回 True，否则返回 False。
+    """
+    if not ts_list or not values or len(ts_list) != len(values):
+        return False
+
+    label_str = labels_to_str(labels)
+    lines: List[str] = []
+
+    for t, y in zip(ts_list, values):
+        try:
+            ts_sec = int(round(float(t)))
+            val = float(y)
+        except (TypeError, ValueError):
+            continue
+        if math.isfinite(ts_sec) and math.isfinite(val):
+            # VM 使用毫秒时间戳
+            lines.append(f"{metric_name}{label_str} {val:.6f} {ts_sec * 1000}")
+
+    if not lines:
+        return False
+
+    payload = "\n".join(lines) + "\n"
+
+    try:
+        resp = requests.post(
+            f"{vm_url}/api/v1/import/prometheus",
+            data=payload.encode("utf-8"),
+            headers={"Content-Type": "text/plain; version=0.0.4; charset=utf-8"},
+            timeout=10,
+        )
+        resp.raise_for_status()
+        return True
+    except requests.RequestException as e:
+        logger.error("写入数据失败 metric=%s: %s", metric_name, e)
+        return False
+
+
+def write_prediction_bundle(
+    vm_url: str,
+    pred_metric: str,
+    anomaly_metric: str,
+    labels: Dict[str, str],
+    ts_future: List[int],
+    pred_values: np.ndarray,
+    lower_values: np.ndarray,
+    upper_values: np.ndarray,
+    is_anomaly: bool,
+    outside_ratio: float,
+    mean_abs_err: float,
+    mean_rel_err: float,
+    max_outside_seconds: int,
+    max_exceed_ratio: float,
+    event_ts: int,
+) -> bool:
+    """
+    一次性写入一个指标的完整预测结果包，包含：
+    - 预测中值曲线（pred_metric）
+    - 预测下界曲线（pred_metric_lower）
+    - 预测上界曲线（pred_metric_upper）
+    - 异常标志（anomaly_metric，0 或 1）
+    - 各项异常诊断指标（outside_ratio、mean_abs_error 等）
+
+    Args:
+        vm_url: VM HTTP 地址
+        pred_metric: 预测指标名，如 "feed_rate_predicted"
+        anomaly_metric: 异常指标名，如 "feed_rate_anomaly"
+        labels: 写入时附加的标签
+        ts_future: 预测时间戳列表（未来时刻，Unix 秒）
+        pred_values: 预测中值数组
+        lower_values: 预测下界数组
+        upper_values: 预测上界数组
+        is_anomaly: 当前是否判定为异常
+        outside_ratio: 检测窗口内越界点比例
+        mean_abs_err: 平均绝对误差
+        mean_rel_err: 平均相对误差
+        max_outside_seconds: 最长连续越界秒数
+        max_exceed_ratio: 最大越界倍数（相对于边界宽度）
+        event_ts: 异常诊断指标的时间戳（通常为最后一个真实数据点的时间戳）
+
+    Returns:
+        所有写入均成功返回 True，任意一个失败返回 False。
+    """
+    # 异常诊断指标附加 type 标签，便于在 Grafana 中过滤
+    anomaly_labels = {**labels, "type": "prediction_deviation"}
+
+    results = [
+        write_series(vm_url, pred_metric, labels,
+                     ts_future, pred_values.tolist()),
+        write_series(vm_url, f"{pred_metric}_lower", labels,
+                     ts_future, lower_values.tolist()),
+        write_series(vm_url, f"{pred_metric}_upper", labels,
+                     ts_future, upper_values.tolist()),
+        write_series(vm_url, anomaly_metric, anomaly_labels,
+                     [event_ts], [1.0 if is_anomaly else 0.0]),
+        write_series(vm_url, f"{anomaly_metric}_outside_ratio", anomaly_labels,
+                     [event_ts], [outside_ratio]),
+        write_series(vm_url, f"{anomaly_metric}_mean_abs_error", anomaly_labels,
+                     [event_ts], [mean_abs_err]),
+        write_series(vm_url, f"{anomaly_metric}_mean_rel_error", anomaly_labels,
+                     [event_ts], [mean_rel_err]),
+        write_series(vm_url, f"{anomaly_metric}_max_consecutive_outside", anomaly_labels,
+                     [event_ts], [float(max_outside_seconds)]),
+        write_series(vm_url, f"{anomaly_metric}_max_exceed_ratio", anomaly_labels,
+                     [event_ts], [float(max_exceed_ratio)]),
+    ]
+
+    return all(results)
+
+
+# ---------------------------------------------------------------------------
+# 状态持久化
+# ---------------------------------------------------------------------------
+
+def load_state(path: str) -> Dict[str, BaselineState]:
+    """
+    从 JSON 文件加载所有指标的基线状态。
+
+    文件不存在时返回空字典（正常首次启动情况）。
+    字段不完整的条目会被跳过，不会导致整体加载失败。
+
+    Args:
+        path: 状态文件路径
+
+    Returns:
+        key → BaselineState 的字典
+    """
+    if not os.path.exists(path):
+        return {}
+
+    try:
+        with open(path, "r", encoding="utf-8") as f:
+            raw = json.load(f)
+    except Exception as e:
+        logger.warning("加载状态文件失败，将重新学习: %s", e)
+        return {}
+
+    required_fields = {
+        "period", "phase_origin_ts", "template", "lower_template",
+        "upper_template", "strategy", "status", "clean_seconds",
+        "last_update_ts", "last_seen_ts", "y_min", "y_max",
+    }
+
+    states: Dict[str, BaselineState] = {}
+    for key, value in raw.get("baseline_states", {}).items():
+        if required_fields.issubset(value.keys()):
+            states[key] = BaselineState(**value)
+
+    logger.info("已加载状态文件 %s，共 %d 条记录", path, len(states))
+    return states
+
+
+def save_state(path: str, states: Dict[str, BaselineState]) -> None:
+    """
+    将所有指标的基线状态原子写入 JSON 文件。
+
+    使用临时文件 + os.replace 保证写入原子性，
+    避免进程崩溃时产生损坏的状态文件。
+
+    Args:
+        path: 状态文件路径
+        states: key → BaselineState 的字典
+    """
+    try:
+        raw = {
+            "baseline_states": {
+                key: asdict(state)
+                for key, state in states.items()
+            }
+        }
+        tmp_path = path + ".tmp"
+        with open(tmp_path, "w", encoding="utf-8") as f:
+            json.dump(raw, f, ensure_ascii=False, indent=2)
+        os.replace(tmp_path, path)
+    except Exception as e:
+        logger.warning("保存状态文件失败: %s", e)
diff --git a/ai/predictor/template.py b/ai/predictor/template.py
new file mode 100644
index 0000000..86d8170
--- /dev/null
+++ b/ai/predictor/template.py
@@ -0,0 +1,384 @@
+# -*- coding: utf-8 -*-
+"""
+predictor.template
+~~~~~~~~~~~~~~~~~~
+模板的构建、预测、重采样与融合，不包含任何 IO 操作。
+
+职责：
+- 从历史谷底片段构建周期模板（中值/分位数）
+- 基于模板和相位原点预测未来值
+- 模板重采样（周期变化时对齐长度）
+- EMA 融合新旧模板（渐进式学习）
+- 相位原点规整化
+
+依赖：numpy, predictor.signal, predictor.config, predictor.models
+"""
+
+import math
+from typing import List, Optional, Tuple
+
+import numpy as np
+
+from . import config
+from .models import BaselineState
+from .signal import moving_average
+
+
+# ---------------------------------------------------------------------------
+# 模板构建
+# ---------------------------------------------------------------------------
+
+def build_templates_from_valleys(
+    ts_grid: np.ndarray,
+    ys_mid_grid: np.ndarray,
+    ys_band_grid: np.ndarray,
+    period: int,
+    valleys: List[int],
+    strategy: str,
+    band_low_q: float,
+    band_high_q: float,
+) -> Optional[Tuple[np.ndarray, np.ndarray, np.ndarray]]:
+    """
+    从历史谷底片段构建三条模板曲线（中值、下界、上界）。
+
+    每个相邻谷底对定义一个周期片段，将其重采样到统一的 period 长度，
+    再按策略聚合：
+    - phase_point：加权平均（越近的周期权重越高）
+    - phase_band：中位数 + 分位数（对异常周期鲁棒）
+
+    Args:
+        ts_grid: 均匀 1 秒网格的时间戳数组
+        ys_mid_grid: 用于构建中值模板的平滑信号（phase_point 用）
+        ys_band_grid: 用于构建分位数模板的原始信号（phase_band 用）
+        period: 目标模板长度（秒）
+        valleys: 谷底索引列表
+        strategy: "phase_point" 或 "phase_band"
+        band_low_q: phase_band 下界分位数（如 5.0）
+        band_high_q: phase_band 上界分位数（如 95.0）
+
+    Returns:
+        (mid_template, lower_template, upper_template) 三个长度为 period 的数组。
+        数据不足时返回 None。
+    """
+    if period <= 1 or len(valleys) < config.MIN_FULL_CYCLES_FOR_TEMPLATE + 1:
+        return None
+
+    # 筛选长度合理的周期片段（0.55~1.60 倍期望周期）
+    pairs = [
+        (a, b, float(ts_grid[b] - ts_grid[a]))
+        for a, b in zip(valleys[:-1], valleys[1:])
+        if period * 0.55 <= float(ts_grid[b] - ts_grid[a]) <= period * 1.60
+    ]
+
+    if len(pairs) < config.MIN_FULL_CYCLES_FOR_TEMPLATE:
+        return None
+
+    # 只取最近 MAX_CYCLES_FOR_TEMPLATE 个周期，防止过旧数据污染
+    pairs = pairs[-config.MAX_CYCLES_FOR_TEMPLATE:]
+
+    phase_grid = np.arange(period, dtype=float)
+    mid_segments: List[np.ndarray] = []
+    band_segments: List[np.ndarray] = []
+    weights: List[float] = []
+
+    for idx, (a, b, cycle_len) in enumerate(pairs):
+        seg_ts = ts_grid[a : b + 1]
+        seg_mid_y = ys_mid_grid[a : b + 1]
+        seg_band_y = ys_band_grid[a : b + 1]
+
+        if len(seg_mid_y) < 3 or len(seg_band_y) < 3:
+            continue
+
+        # 将片段的时间轴归一化到 [0, period)，再插值到统一相位网格
+        x_old = (seg_ts - seg_ts[0]) / cycle_len * period
+        mid_seg = np.interp(phase_grid, x_old, seg_mid_y)
+        band_seg = np.interp(phase_grid, x_old, seg_band_y)
+
+        mid_segments.append(mid_seg.astype(float))
+        band_segments.append(band_seg.astype(float))
+        # 越近的周期权重越高（线性递增，范围 0.5~1.0）
+        weights.append(0.5 + 0.5 * ((idx + 1) / len(pairs)))
+
+    if len(mid_segments) < config.MIN_FULL_CYCLES_FOR_TEMPLATE:
+        return None
+
+    mid_arr = np.vstack(mid_segments)
+    band_arr = np.vstack(band_segments)
+    w_arr = np.array(weights, dtype=float)
+
+    if strategy == "phase_band":
+        # 分位数聚合：对异常周期鲁棒，保留正常波动范围
+        mid_template = np.percentile(mid_arr, 50, axis=0)
+        lower_template = np.percentile(band_arr, band_low_q, axis=0)
+        upper_template = np.percentile(band_arr, band_high_q, axis=0)
+    else:
+        # 加权平均：越近的周期贡献越大
+        mid_template = np.average(mid_arr, axis=0, weights=w_arr)
+        lower_template = mid_template.copy()
+        upper_template = mid_template.copy()
+
+    return (
+        mid_template.astype(float),
+        lower_template.astype(float),
+        upper_template.astype(float),
+    )
+
+
+def build_current_baseline(
+    ts_grid: np.ndarray,
+    ys_mid_grid: np.ndarray,
+    ys_band_grid: np.ndarray,
+    strategy: str,
+    band_low_q: float,
+    band_high_q: float,
+    tail_seconds: Optional[int] = None,
+) -> Optional[Tuple[int, int, np.ndarray, np.ndarray, np.ndarray]]:
+    """
+    从历史数据构建当前基线（周期 + 相位原点 + 三条模板曲线）。
+
+    可选 tail_seconds 参数限制只使用最近一段数据，
+    用于健康状态下的增量模板更新（避免使用过旧的异常数据）。
+
+    Args:
+        ts_grid: 均匀 1 秒网格的时间戳数组
+        ys_mid_grid: 平滑后的信号（用于周期检测和中值模板）
+        ys_band_grid: 原始信号（用于分位数模板）
+        strategy: "phase_point" 或 "phase_band"
+        band_low_q: phase_band 下界分位数
+        band_high_q: phase_band 上界分位数
+        tail_seconds: 若指定，只使用最近 tail_seconds 秒的数据
+
+    Returns:
+        (period, phase_origin_ts, template, lower_template, upper_template)
+        数据不足或无法检测到谷底时返回 None。
+    """
+    from .signal import detect_period_and_valleys
+
+    if len(ys_mid_grid) < config.MIN_POINTS or len(ys_band_grid) < config.MIN_POINTS:
+        return None
+
+    if tail_seconds is not None and tail_seconds > 0:
+        cutoff = ts_grid[-1] - int(tail_seconds)
+        mask = ts_grid >= cutoff
+        ts_use = ts_grid[mask]
+        ys_mid_use = ys_mid_grid[mask]
+        ys_band_use = ys_band_grid[mask]
+    else:
+        ts_use = ts_grid
+        ys_mid_use = ys_mid_grid
+        ys_band_use = ys_band_grid
+
+    if len(ys_mid_use) < config.MIN_POINTS or len(ys_band_use) < config.MIN_POINTS:
+        return None
+
+    period, valleys = detect_period_and_valleys(ts_use, ys_mid_use)
+
+    templates = build_templates_from_valleys(
+        ts_grid=ts_use,
+        ys_mid_grid=ys_mid_use,
+        ys_band_grid=ys_band_use,
+        period=period,
+        valleys=valleys,
+        strategy=strategy,
+        band_low_q=band_low_q,
+        band_high_q=band_high_q,
+    )
+
+    if templates is None or len(valleys) == 0:
+        return None
+
+    template, lower_template, upper_template = templates
+    # 以最后一个谷底作为相位原点
+    phase_origin_ts = int(round(float(ts_use[valleys[-1]])))
+
+    return int(period), phase_origin_ts, template, lower_template, upper_template
+
+
+# ---------------------------------------------------------------------------
+# 模板预测
+# ---------------------------------------------------------------------------
+
+def circular_template_value(template: np.ndarray, phase: float) -> float:
+    """
+    从模板中读取指定相位处的值（线性插值，循环边界）。
+
+    Args:
+        template: 长度为 period 的模板数组
+        phase: 相位（0 到 period 之间的浮点数）
+
+    Returns:
+        插值后的模板值。
+    """
+    period = len(template)
+    if period == 0:
+        return 0.0
+
+    phase = float(phase) % period
+    i0 = int(math.floor(phase)) % period
+    i1 = (i0 + 1) % period
+    frac = phase - math.floor(phase)
+
+    return float((1.0 - frac) * template[i0] + frac * template[i1])
+
+
+def resample_template(old_template: np.ndarray, new_period: int) -> np.ndarray:
+    """
+    将模板重采样到新的周期长度。
+
+    当 phase-lock 检测到周期漂移时，需要将旧模板拉伸/压缩到新周期。
+    使用循环扩展（拼接三份）保证边界处插值正确。
+
+    Args:
+        old_template: 原始模板数组
+        new_period: 目标周期长度（秒）
+
+    Returns:
+        重采样后的模板数组，长度为 new_period。
+    """
+    old_period = len(old_template)
+    if old_period == new_period:
+        return old_template.astype(float)
+
+    if old_period <= 1 or new_period <= 1:
+        return np.full(new_period, float(np.mean(old_template)), dtype=float)
+
+    # 归一化到 [0, 1) 相位空间，循环扩展保证边界插值正确
+    old_x = np.linspace(0.0, 1.0, old_period, endpoint=False)
+    new_x = np.linspace(0.0, 1.0, new_period, endpoint=False)
+
+    old_x_ext = np.concatenate([old_x - 1.0, old_x, old_x + 1.0])
+    old_y_ext = np.concatenate([old_template, old_template, old_template])
+
+    return np.interp(new_x, old_x_ext, old_y_ext).astype(float)
+
+
+def predict_template_values(
+    template: np.ndarray,
+    period: int,
+    phase_origin_ts: int,
+    ts_list: List[int],
+) -> np.ndarray:
+    """
+    根据模板和相位原点，预测一组时间戳处的值。
+
+    相位 = (ts - phase_origin_ts) mod period，
+    再从模板中线性插值读取对应值。
+
+    Args:
+        template: 长度为 period 的模板数组
+        period: 周期（秒）
+        phase_origin_ts: 相位原点时间戳（Unix 秒）
+        ts_list: 待预测的时间戳列表（Unix 秒）
+
+    Returns:
+        预测值数组，长度与 ts_list 相同。
+    """
+    if period <= 1:
+        return np.zeros(len(ts_list), dtype=float)
+
+    if len(template) != period:
+        template = resample_template(template, period)
+
+    values = [
+        circular_template_value(template, (int(ts) - int(phase_origin_ts)) % period)
+        for ts in ts_list
+    ]
+    return np.array(values, dtype=float)
+
+
+def predict_state_bundle(
+    state: BaselineState,
+    ts_list: List[int],
+) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    """
+    用 BaselineState 中的三条模板预测一组时间戳处的值。
+
+    Args:
+        state: 当前基线状态（包含 period、phase_origin_ts、三条模板）
+        ts_list: 待预测的时间戳列表（Unix 秒）
+
+    Returns:
+        (mid, lower, upper) 三个预测数组，长度与 ts_list 相同。
+    """
+    period = int(state.period)
+    origin = int(state.phase_origin_ts)
+
+    mid = predict_template_values(
+        template=np.array(state.template, dtype=float),
+        period=period,
+        phase_origin_ts=origin,
+        ts_list=ts_list,
+    )
+    lower = predict_template_values(
+        template=np.array(state.lower_template, dtype=float),
+        period=period,
+        phase_origin_ts=origin,
+        ts_list=ts_list,
+    )
+    upper = predict_template_values(
+        template=np.array(state.upper_template, dtype=float),
+        period=period,
+        phase_origin_ts=origin,
+        ts_list=ts_list,
+    )
+    return mid, lower, upper
+
+
+def normalize_origin_near(origin: int, period: int, near_ts: int) -> int:
+    """
+    将相位原点规整化到 near_ts 附近（使 origin <= near_ts < origin + period）。
+
+    phase-lock 搜索时需要将原点移到最近的时间窗口内，
+    避免因原点过旧导致相位计算溢出。
+
+    Args:
+        origin: 当前相位原点（Unix 秒）
+        period: 周期（秒）
+        near_ts: 目标时间戳（通常为最新数据点的时间戳）
+
+    Returns:
+        规整化后的相位原点（Unix 秒）。
+    """
+    if period <= 1:
+        return origin
+
+    origin = int(origin)
+    period = int(period)
+    near_ts = int(near_ts)
+
+    while origin + period <= near_ts:
+        origin += period
+
+    while origin > near_ts:
+        origin -= period
+
+    return origin
+
+
+def merge_template(
+    old_template: np.ndarray,
+    new_template: np.ndarray,
+    alpha: float,
+) -> np.ndarray:
+    """
+    用 EMA 融合旧模板和新模板。
+
+    merged = (1 - alpha) * old + alpha * new
+
+    若两者长度不同，先将旧模板重采样到新模板长度。
+    alpha 越大，新模板权重越高（学习越激进）。
+
+    Args:
+        old_template: 旧模板数组
+        new_template: 新模板数组
+        alpha: EMA 步长，clip 到 [0, 1]
+
+    Returns:
+        融合后的模板数组，长度与 new_template 相同。
+    """
+    alpha = float(np.clip(alpha, 0.0, 1.0))
+
+    if len(old_template) != len(new_template):
+        old_template = resample_template(old_template, len(new_template))
+
+    return ((1.0 - alpha) * old_template + alpha * new_template).astype(float)
diff --git a/ai/pridict_v5.py b/ai/pridict_v5.py
index 6894a66..dde0b11 100644
--- a/ai/pridict_v5.py
+++ b/ai/pridict_v5.py
@@ -1,27 +1,31 @@
 # -*- coding: utf-8 -*-
 """
-ProtoForge Predictor v12
+ProtoForge Predictor v13
 
 核心能力：
-1. feed_rate / spindle_speed / spindle_current 使用 phase-lock 点预测。
-2. vibration_x / vibration_y / vibration_z 使用 phase-band 预测带。
-3. vibration 类指标：
-   - predicted 使用平滑后的中位数模板，用于趋势参考。
-   - upper/lower 使用原始波动分位数模板 + padding，用于正常波动容忍带。
-   - 偶发越界不直接报警，只有持续越界 / 高比例越界 / 严重越界才报警。
-4. 预测起点锚定最后一个真实点 last_real_ts，避免时间错位。
-5. 异常期间冻结健康模板，不学习故障数据。
-6. 故障恢复后等待稳定，再恢复模板学习。
-7. 写入：
-   - xxx_predicted
-   - xxx_predicted_upper
-   - xxx_predicted_lower
-   - xxx_anomaly
-   - xxx_anomaly_outside_ratio
-   - xxx_anomaly_mean_abs_error
-   - xxx_anomaly_mean_rel_error
-   - xxx_anomaly_max_consecutive_outside
-   - xxx_anomaly_max_exceed_ratio
+1. 支持三个独立 CNC 工位：粗铣(fanuc-cnc)、半精铣(fanuc-cnc-semi-finish)、精铣(fanuc-cnc-finish)
+2. 覆盖指标：feed_rate / spindle_speed / spindle_current / spindle_load
+3. feed_rate / spindle_speed / spindle_current 使用 phase-lock 点预测。
+4. spindle_load 使用 phase_band 预测带（多频漂移容忍）。
+5. vibration_x / vibration_y / vibration_z 使用 phase-band 预测带。
+6. 各工位独立阈值配置，匹配实际量程差异：
+   - 粗铣：spindle_speed~2000RPM, feed_rate~800mm/min, spindle_current~21A, spindle_load~56%
+   - 半精铣：spindle_speed~4000RPM, feed_rate~500mm/min, spindle_current~14.5A, spindle_load~38%
+   - 精铣：spindle_speed~6000RPM, feed_rate~300mm/min, spindle_current~8.5A, spindle_load~22%
+7. 粗铣周期含随机抖动(±10s)，phase-lock 搜索范围扩大至 ±18%。
+8. 预测起点锚定最后一个真实点 last_real_ts，避免时间错位。
+9. 异常期间冻结健康模板，不学习故障数据。
+10. 故障恢复后等待稳定，再恢复模板学习。
+11. 写入：
+    - xxx_predicted
+    - xxx_predicted_upper
+    - xxx_predicted_lower
+    - xxx_anomaly
+    - xxx_anomaly_outside_ratio
+    - xxx_anomaly_mean_abs_error
+    - xxx_anomaly_mean_rel_error
+    - xxx_anomaly_max_consecutive_outside
+    - xxx_anomaly_max_exceed_ratio
 """
 
 import json
@@ -55,7 +59,7 @@
 # =============================================================================
 
 VM_URL = "http://localhost:8428"
-STATE_FILE = "/tmp/protoforge_predictor_state_v12.json"
+STATE_FILE = "/tmp/protoforge_predictor_state_v14.json"
 
 HISTORY_MINUTES = 30
 HORIZON_SECONDS = 120
@@ -86,6 +90,7 @@
 
 MAX_DATA_LAG_SECONDS = 180
 
+# 默认 phase-lock 搜索参数（精铣/半精铣：固定周期，搜索范围窄）
 PHASE_LOCK_MIN_WINDOW_SECONDS = 45
 PHASE_LOCK_MAX_WINDOW_SECONDS = 180
 PHASE_LOCK_PERIOD_SEARCH_RATIO = 0.12
@@ -95,105 +100,260 @@
 
 
 # =============================================================================
-# 指标配置
+# 监控指标白名单（可通过环境变量 PROTOFORGE_MONITORED_METRICS 覆盖）
 # =============================================================================
 
-PREDICT_TARGETS = [
-    {
-        "query": 'feed_rate{device_id="fanuc-cnc"}',
-        "pred_metric": "feed_rate_predicted",
-        "anomaly_metric": "feed_rate_anomaly",
-        "strategy": "phase_point",
-        "abs_threshold": 400.0,
-        "rel_threshold": 0.25,
-        "smooth_window": 1,
-        "outside_ratio_threshold": 0.60,
-        "min_consecutive_outside": 5,
-        "severe_exceed_ratio": 1.8,
-    },
-    {
-        "query": 'spindle_speed{device_id="fanuc-cnc"}',
-        "pred_metric": "spindle_speed_predicted",
-        "anomaly_metric": "spindle_speed_anomaly",
-        "strategy": "phase_point",
-        "abs_threshold": 500.0,
-        "rel_threshold": 0.25,
-        "smooth_window": 1,
-        "outside_ratio_threshold": 0.60,
-        "min_consecutive_outside": 5,
-        "severe_exceed_ratio": 1.8,
-    },
-    {
-        "query": 'spindle_current{device_id="fanuc-cnc"}',
-        "pred_metric": "spindle_current_predicted",
-        "anomaly_metric": "spindle_current_anomaly",
-        "strategy": "phase_point",
-        "abs_threshold": 5.0,
-        "rel_threshold": 0.25,
-        "smooth_window": 1,
+_DEFAULT_MONITORED_METRICS = [
+    "feed_rate",
+    "spindle_speed",
+    "spindle_current",
+    "spindle_load",
+    "vibration_x",
+    "vibration_y",
+    "vibration_z",
+]
+
+MONITORED_METRICS: List[str] = [
+    m.strip()
+    for m in os.environ.get(
+        "PROTOFORGE_MONITORED_METRICS",
+        ",".join(_DEFAULT_MONITORED_METRICS),
+    ).split(",")
+    if m.strip()
+]
+
+# 人工上下限覆盖文件（可选，不存在则忽略）
+# 格式：{"device-id": {"metric_name": {"hard_max": 35.0, "hard_min": 0.0}}}
+OVERRIDE_FILE = os.environ.get(
+    "PROTOFORGE_PREDICTOR_OVERRIDE",
+    "/etc/protoforge/predictor_override.json",
+)
+
+# 目标列表刷新间隔（秒）
+TARGETS_REFRESH_INTERVAL = int(os.environ.get("PROTOFORGE_TARGETS_REFRESH", "60"))
+
+# 运行时目标缓存
+_TARGETS_CACHE: List[Dict] = []
+_TARGETS_LAST_REFRESH: float = 0.0
+
+
+# =============================================================================
+# Layer 1: 设备与指标发现
+# =============================================================================
+
+def discover_device_ids() -> List[str]:
+    """查询 VM 中所有 device_id 标签值。"""
+    try:
+        resp = requests.get(
+            f"{VM_URL}/api/v1/label/device_id/values",
+            timeout=10,
+        )
+        resp.raise_for_status()
+        return [v for v in resp.json().get("data", []) if v]
+    except requests.RequestException as e:
+        logger.error("发现 device_id 失败: %s", e)
+        return []
+
+
+def discover_metrics_for_device(device_id: str) -> List[str]:
+    """查询该设备在 VM 中实际存在且有近期数据的指标名。"""
+    found = []
+    for metric in MONITORED_METRICS:
+        try:
+            resp = requests.get(
+                f"{VM_URL}/api/v1/query",
+                params={"query": f'{metric}{{device_id="{device_id}"}}'},
+                timeout=5,
+            )
+            resp.raise_for_status()
+            if resp.json().get("data", {}).get("result"):
+                found.append(metric)
+        except requests.RequestException:
+            pass
+    return found
+
+
+# =============================================================================
+# Layer 2: 自适应配置推断
+# =============================================================================
+
+@dataclass
+class MetricProfile:
+    """从历史数据统计出的指标特征，驱动策略和阈值的自动推断。"""
+    device_id: str
+    metric: str
+    p5: float           # 活跃段 5th percentile
+    p95: float          # 活跃段 95th percentile
+    iqr: float          # p95 - p5
+    cv: float           # 变异系数 std/mean（衡量稳定性）
+    strategy: str       # "phase_point" 或 "phase_band"
+    abs_threshold: float
+    rel_threshold: float
+    band_low_q: float
+    band_high_q: float
+    band_pad_abs: float
+    phase_lock_period_search_ratio: float
+
+
+def infer_metric_profile(device_id: str, metric: str) -> Optional["MetricProfile"]:
+    """
+    拉取历史数据，统计活跃段特征，自动推断预测策略和阈值。
+
+    空闲段过滤：排除 p10 以下的点，避免机床空闲时的零值拉低阈值。
+    strategy 判断：cv < 0.15 → phase_point（稳定信号），否则 phase_band（波动信号）。
+    phase_lock 搜索范围：由周期长度的变异系数动态决定，周期抖动大则搜索范围宽。
+    """
+    ts_raw, ys_raw = fetch_history(f'{metric}{{device_id="{device_id}"}}')
+    if len(ys_raw) < MIN_POINTS:
+        return None
+
+    arr = np.array(ys_raw, dtype=float)
+
+    # 过滤空闲段：只保留活跃值（高于 p10）
+    p10_val = float(np.percentile(arr, 10))
+    active = arr[arr > p10_val]
+    if len(active) < 30:
+        active = arr  # 数据全是活跃段，不过滤
+
+    mean_val = float(np.mean(active))
+    std_val = float(np.std(active))
+    cv = std_val / max(abs(mean_val), 1e-6)
+    p5 = float(np.percentile(active, 5))
+    p95 = float(np.percentile(active, 95))
+    iqr = p95 - p5
+
+    # 策略自动判断
+    strategy = "phase_point" if cv < 0.15 else "phase_band"
+
+    # 阈值自动计算：取 IQR 的 80%、量程的 5%、2倍标准差 三者最大值
+    abs_threshold = max(iqr * 0.8, (p95 - p5) * 0.05, std_val * 2.0)
+    rel_threshold = min(0.30, cv * 1.5)
+
+    # phase_band 容忍带宽度：IQR 的 30% 或 1 倍标准差，取较大值
+    band_pad_abs = max(iqr * 0.3, std_val)
+
+    # phase-lock 搜索范围：从历史数据估算周期抖动率
+    # 用 FFT 粗估周期，再用自相关精化，最后计算多周期长度的变异系数
+    ts_grid, ys_grid = normalize_history(ts_raw, ys_raw)
+    period_search_ratio = PHASE_LOCK_PERIOD_SEARCH_RATIO  # 默认值
+    if len(ys_grid) >= MIN_POINTS:
+        rough_period = estimate_period_rough(ys_grid)
+        if rough_period > MIN_PERIOD_SECONDS:
+            # 用谷底间距估算周期抖动
+            valleys = find_valley_indices(ts_grid, ys_grid, rough_period)
+            if len(valleys) >= 3:
+                diffs = np.diff(ts_grid[valleys].astype(float))
+                valid = diffs[(diffs > rough_period * 0.5) & (diffs < rough_period * 2.0)]
+                if len(valid) >= 2:
+                    period_cv = float(np.std(valid) / max(np.mean(valid), 1e-6))
+                    period_search_ratio = float(np.clip(period_cv * 2.0, 0.12, 0.25))
+
+    logger.info(
+        "推断指标特征 device=%s metric=%s cv=%.3f strategy=%s abs_thr=%.3f rel_thr=%.3f period_search=%.2f",
+        device_id, metric, cv, strategy, abs_threshold, rel_threshold, period_search_ratio,
+    )
+
+    return MetricProfile(
+        device_id=device_id,
+        metric=metric,
+        p5=p5,
+        p95=p95,
+        iqr=iqr,
+        cv=cv,
+        strategy=strategy,
+        abs_threshold=abs_threshold,
+        rel_threshold=rel_threshold,
+        band_low_q=5.0,
+        band_high_q=95.0,
+        band_pad_abs=band_pad_abs,
+        phase_lock_period_search_ratio=period_search_ratio,
+    )
+
+
+def load_overrides() -> Dict:
+    """加载人工上下限覆盖文件，文件不存在时返回空字典。"""
+    if not os.path.exists(OVERRIDE_FILE):
+        return {}
+    try:
+        with open(OVERRIDE_FILE, "r", encoding="utf-8") as f:
+            return json.load(f)
+    except Exception as e:
+        logger.warning("加载 override 文件失败 %s: %s", OVERRIDE_FILE, e)
+        return {}
+
+
+def build_target(profile: MetricProfile, overrides: Dict) -> Dict:
+    """将 MetricProfile 转换为预测执行层可用的 target dict。"""
+    device_overrides = overrides.get(profile.device_id, {}).get(profile.metric, {})
+
+    target: Dict = {
+        "query": f'{profile.metric}{{device_id="{profile.device_id}"}}',
+        "pred_metric": f"{profile.metric}_predicted",
+        "anomaly_metric": f"{profile.metric}_anomaly",
+        "strategy": profile.strategy,
+        "abs_threshold": profile.abs_threshold,
+        "rel_threshold": profile.rel_threshold,
+        "smooth_window": 5 if profile.strategy == "phase_band" else 2,
         "outside_ratio_threshold": 0.60,
         "min_consecutive_outside": 5,
         "severe_exceed_ratio": 1.8,
-    },
-    {
-        "query": 'vibration_x{device_id="fanuc-cnc"}',
-        "pred_metric": "vibration_x_predicted",
-        "anomaly_metric": "vibration_x_anomaly",
-        "strategy": "phase_band",
-
-        # vibration 类指标噪声、尖峰较多，不建议用很窄的阈值。
-        "abs_threshold": 0.18,
-        "rel_threshold": 0.55,
-
-        # 平滑只用于相位锁定和 predicted 中位趋势。
-        "smooth_window": 5,
-
-        # upper/lower 用原始值分位数，范围放宽，覆盖正常尖峰。
-        "band_low_q": 1,
-        "band_high_q": 99,
-        "band_pad_abs": 0.15,
-
-        # 偶发越界容忍。
-        "outside_ratio_threshold": 0.70,
-        "min_consecutive_outside": 5,
-        "severe_exceed_ratio": 2.0,
-    },
-    {
-        "query": 'vibration_y{device_id="fanuc-cnc"}',
-        "pred_metric": "vibration_y_predicted",
-        "anomaly_metric": "vibration_y_anomaly",
-        "strategy": "phase_band",
-        "abs_threshold": 0.18,
-        "rel_threshold": 0.55,
-        "smooth_window": 5,
-        "band_low_q": 1,
-        "band_high_q": 99,
-        "band_pad_abs": 0.15,
-        "outside_ratio_threshold": 0.70,
-        "min_consecutive_outside": 5,
-        "severe_exceed_ratio": 2.0,
-    },
-    {
-        "query": 'vibration_z{device_id="fanuc-cnc"}',
-        "pred_metric": "vibration_z_predicted",
-        "anomaly_metric": "vibration_z_anomaly",
-        "strategy": "phase_band",
-        "abs_threshold": 0.18,
-        "rel_threshold": 0.55,
-        "smooth_window": 5,
-        "band_low_q": 1,
-        "band_high_q": 99,
-        "band_pad_abs": 0.15,
-        "outside_ratio_threshold": 0.70,
-        "min_consecutive_outside": 5,
-        "severe_exceed_ratio": 2.0,
-    },
-]
+        "phase_lock_period_search_ratio": profile.phase_lock_period_search_ratio,
+        "phase_lock_origin_search_ratio": min(
+            0.45, profile.phase_lock_period_search_ratio * 2.5
+        ),
+        # 物理上下限（可选，来自 override 文件）
+        "hard_max": device_overrides.get("hard_max"),
+        "hard_min": device_overrides.get("hard_min"),
+    }
+
+    if profile.strategy == "phase_band":
+        target.update({
+            "band_low_q": profile.band_low_q,
+            "band_high_q": profile.band_high_q,
+            "band_pad_abs": profile.band_pad_abs,
+        })
+
+    return target
+
+
+def refresh_targets_if_needed() -> None:
+    """
+    按 TARGETS_REFRESH_INTERVAL 间隔重新发现设备和指标，动态更新目标列表。
+    首次调用时立即执行发现。
+    """
+    global _TARGETS_CACHE, _TARGETS_LAST_REFRESH
+
+    now = time.time()
+    if now - _TARGETS_LAST_REFRESH < TARGETS_REFRESH_INTERVAL and _TARGETS_CACHE:
+        return
+
+    logger.info("开始发现设备和指标...")
+    overrides = load_overrides()
+    targets: List[Dict] = []
+
+    device_ids = discover_device_ids()
+    if not device_ids:
+        logger.warning("未发现任何 device_id，保持现有目标列表")
+        return
+
+    for device_id in device_ids:
+        metrics = discover_metrics_for_device(device_id)
+        for metric in metrics:
+            profile = infer_metric_profile(device_id, metric)
+            if profile is not None:
+                targets.append(build_target(profile, overrides))
+
+    if targets:
+        _TARGETS_CACHE = targets
+        _TARGETS_LAST_REFRESH = now
+        logger.info(
+            "目标列表已更新：%d 台设备，%d 个指标目标",
+            len(device_ids),
+            len(targets),
+        )
+    else:
+        logger.warning("发现流程未产生任何有效目标，保持现有目标列表")
 
-EXTRA_PREDICT_LABELS = {
-    "forecast": "phase_band_health_v12",
-    "source": "protoforge",
-}
 
 BASELINE_STATUS_HEALTHY = "healthy"
 BASELINE_STATUS_ANOMALY = "anomaly"
@@ -612,8 +772,6 @@ def build_templates_from_valleys(
 
     if strategy == "phase_band":
         mid_template = np.percentile(mid_arr, 50, axis=0)
-
-        # upper/lower 使用原始值分布，而不是平滑值分布。
         lower_template = np.percentile(band_arr, low_q, axis=0)
         upper_template = np.percentile(band_arr, high_q, axis=0)
     else:
@@ -794,17 +952,28 @@ def merge_template(
 
 # =============================================================================
 # Phase Lock
+# 支持 target 级别的 phase_lock_period_search_ratio / phase_lock_origin_search_ratio
+# 粗铣工位周期含随机抖动(±10s)，需要更宽的搜索范围
 # =============================================================================
 
 def phase_lock_recent(
     state: BaselineState,
     ts_grid: np.ndarray,
     ys_model: np.ndarray,
+    target: Optional[Dict] = None,
 ) -> Tuple[int, int, np.ndarray, float]:
     base_period = int(state.period)
     base_origin = int(state.phase_origin_ts)
     base_template = np.array(state.template, dtype=float)
 
+    # 从 target 读取搜索范围，允许粗铣工位使用更宽的范围
+    period_search_ratio = float(
+        (target or {}).get("phase_lock_period_search_ratio", PHASE_LOCK_PERIOD_SEARCH_RATIO)
+    )
+    origin_search_ratio = float(
+        (target or {}).get("phase_lock_origin_search_ratio", PHASE_LOCK_ORIGIN_SEARCH_RATIO)
+    )
+
     if base_period <= 1 or len(base_template) <= 1:
         ts_recent = ts_grid[-DETECT_WINDOW_SECONDS:].astype(int).tolist()
         pred = predict_template_values(base_template, base_period, base_origin, ts_recent)
@@ -832,11 +1001,11 @@ def phase_lock_recent(
 
     p_min = max(
         int(MIN_PERIOD_SECONDS),
-        int(round(base_period * (1.0 - PHASE_LOCK_PERIOD_SEARCH_RATIO))),
+        int(round(base_period * (1.0 - period_search_ratio))),
     )
     p_max = min(
         int(MAX_PERIOD_SECONDS),
-        int(round(base_period * (1.0 + PHASE_LOCK_PERIOD_SEARCH_RATIO))),
+        int(round(base_period * (1.0 + period_search_ratio))),
     )
 
     best_period = base_period
@@ -855,7 +1024,7 @@ def phase_lock_recent(
     for period in range(p_min, p_max + 1, PHASE_LOCK_PERIOD_STEP):
         template = resample_template(base_template, period)
         center_origin = normalize_origin_near(base_origin, period, last_ts)
-        origin_shift = max(2, int(round(period * PHASE_LOCK_ORIGIN_SEARCH_RATIO)))
+        origin_shift = max(2, int(round(period * origin_search_ratio)))
 
         for shift in range(-origin_shift, origin_shift + 1, PHASE_LOCK_ORIGIN_STEP):
             origin = center_origin + shift
@@ -925,7 +1094,6 @@ def calc_final_bounds(
     if strategy == "phase_band":
         pad_abs = float(target.get("band_pad_abs", abs_threshold))
 
-        # 对 vibration 类指标：边界更像正常波动容忍带，不是硬边界。
         dynamic_pad = np.maximum(
             pad_abs,
             np.abs(pred) * rel_threshold * 0.25,
@@ -933,10 +1101,18 @@ def calc_final_bounds(
 
         lower = lower_raw - dynamic_pad
         upper = upper_raw + dynamic_pad
+    else:
+        lower, upper = calc_point_bounds(pred, abs_threshold, rel_threshold)
 
-        return lower, upper
+    # 物理上下限兜底（来自 override 文件，可选）
+    hard_max = target.get("hard_max")
+    hard_min = target.get("hard_min")
+    if hard_max is not None:
+        upper = np.minimum(upper, float(hard_max))
+    if hard_min is not None:
+        lower = np.maximum(lower, float(hard_min))
 
-    return calc_point_bounds(pred, abs_threshold, rel_threshold)
+    return lower, upper
 
 
 def detect_anomaly(
@@ -950,6 +1126,7 @@ def detect_anomaly(
         state=state,
         ts_grid=ts_grid,
         ys_model=ys_model,
+        target=target,
     )
 
     recent_len = len(pred_recent)
@@ -1018,11 +1195,6 @@ def detect_anomaly(
         target.get("severe_exceed_ratio", SEVERE_EXCEED_RATIO)
     )
 
-    # 核心优化：
-    # 1. 偶发 1~3 个点越界不报警。
-    # 2. 持续越界才报警。
-    # 3. 高比例越界才报警。
-    # 4. 严重越界才立即报警。
     is_anomaly = (
         outside_ratio >= outside_ratio_threshold
         or max_outside_seconds >= min_consecutive_outside
@@ -1653,7 +1825,13 @@ def build_prediction_timestamps(
 def run_once() -> None:
     now_str = datetime.now().strftime("%H:%M:%S")
 
-    for target in PREDICT_TARGETS:
+    refresh_targets_if_needed()
+
+    if not _TARGETS_CACHE:
+        logger.warning("[%s] 目标列表为空，等待设备发现完成", now_str)
+        return
+
+    for target in _TARGETS_CACHE:
         query = target["query"]
         pred_metric = target["pred_metric"]
         anomaly_metric = target["anomaly_metric"]
@@ -1749,7 +1927,7 @@ def run_once() -> None:
         origin_str = datetime.fromtimestamp(state.phase_origin_ts).strftime("%H:%M:%S")
 
         logger.info(
-            "[%s] %-40s → %-35s strategy=%s status=%s anomaly=%s outside=%.2f max_outside=%ss max_exceed=%.2f period=%ss origin=%s last_real=%s lag=%ss 写入 %d 点，预测区间 %s ~ %s",
+            "[%s] %-50s → %-35s strategy=%s status=%s anomaly=%s outside=%.2f max_outside=%ss max_exceed=%.2f period=%ss origin=%s last_real=%s lag=%ss 写入 %d 点，预测区间 %s ~ %s",
             now_str,
             query,
             pred_metric,
@@ -1775,7 +1953,7 @@ def main() -> None:
     load_state()
 
     logger.info(
-        "预测服务启动 VM=%s 历史窗口=%dmin 理论预测窗口=%ds 实际写入窗口=%ds 轮询间隔=%ds state=%s forecast=%s",
+        "预测服务启动 VM=%s 历史窗口=%dmin 理论预测窗口=%ds 实际写入窗口=%ds 轮询间隔=%ds state=%s forecast=%s override=%s refresh=%ds",
         VM_URL,
         HISTORY_MINUTES,
         HORIZON_SECONDS,
@@ -1783,6 +1961,8 @@ def main() -> None:
         POLL_INTERVAL,
         STATE_FILE,
         EXTRA_PREDICT_LABELS["forecast"],
+        OVERRIDE_FILE,
+        TARGETS_REFRESH_INTERVAL,
     )
 
     while True:
@@ -1791,4 +1971,4 @@ def main() -> None:
 
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/protoforge/core/fault.py b/protoforge/core/fault.py
index 11b61a7..2182bf1 100644
--- a/protoforge/core/fault.py
+++ b/protoforge/core/fault.py
@@ -32,24 +32,77 @@
 BUILTIN_FAULT_TYPES: list[FaultTypeDefinition] = [
 
     # ------------------------------------------------------------------
-    # 进给堵转 — 工件夹紧松动或切削量过大导致进给卡死
-    # 特征：进给速率瞬间降为0，主轴负载和电流急剧升高，主轴仍在转（区别于崩刃）
-    # 模式：瞬间注入
+    # 进给堵转（粗铣）— fanuc-cnc
+    # 量程：spindle_speed~2000RPM, feed_rate~800mm/min,
+    #        spindle_current~21A, spindle_load~56%
+    # 堵转目标：load→92%, current→38A，转速维持+轻微抖动
     # ------------------------------------------------------------------
     FaultTypeDefinition(
-        id="feed_stall",
-        name="进给堵转",
-        description="进给轴卡死，进给速率降为零，主轴负载和电流急剧升高，主轴转速维持（区别于崩刃停主轴）",
+        id="feed_stall_rough",
+        name="进给堵转（粗铣）",
+        description="粗铣进给轴卡死，进给速率降为零，主轴负载升至~92%，电流升至~38A，主轴转速维持（区别于崩刃停主轴）",
         category="process",
         default_duration=20.0,
-        tags=["进给", "堵转", "突发"],
+        tags=["进给", "堵转", "突发", "粗铣"],
         point_faults=[
             PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT,
                              target_value=0.0, noise_scale=0.0),
             PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT,
-                             multiplier=2.8, noise_scale=5.0),
+                             target_value=92.0, noise_scale=4.0),
             PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
-                             multiplier=3.8, noise_scale=1.5),
+                             target_value=38.0, noise_scale=1.5),
+            PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT,
+                             multiplier=1.0, noise_scale=30.0),
+        ],
+    ),
+
+    # ------------------------------------------------------------------
+    # 进给堵转（半精铣）— fanuc-cnc-semi-finish
+    # 量程：spindle_speed~4000RPM, feed_rate~500mm/min,
+    #        spindle_current~14.5A, spindle_load~38%
+    # 堵转目标：load→68%, current→26A，转速维持+轻微抖动
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="feed_stall_semi",
+        name="进给堵转（半精铣）",
+        description="半精铣进给轴卡死，进给速率降为零，主轴负载升至~68%，电流升至~26A，主轴转速维持（区别于崩刃停主轴）",
+        category="process",
+        default_duration=20.0,
+        tags=["进给", "堵转", "突发", "半精铣"],
+        point_faults=[
+            PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT,
+                             target_value=0.0, noise_scale=0.0),
+            PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT,
+                             target_value=68.0, noise_scale=3.0),
+            PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
+                             target_value=26.0, noise_scale=1.2),
+            PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT,
+                             multiplier=1.0, noise_scale=50.0),
+        ],
+    ),
+
+    # ------------------------------------------------------------------
+    # 进给堵转（精铣）— fanuc-cnc-finish
+    # 量程：spindle_speed~6000RPM, feed_rate~300mm/min,
+    #        spindle_current~8.5A, spindle_load~22%
+    # 堵转目标：load→40%, current→15A，转速维持+轻微抖动
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="feed_stall_finish",
+        name="进给堵转（精铣）",
+        description="精铣进给轴卡死，进给速率降为零，主轴负载升至~40%，电流升至~15A，主轴转速维持（区别于崩刃停主轴）",
+        category="process",
+        default_duration=20.0,
+        tags=["进给", "堵转", "突发", "精铣"],
+        point_faults=[
+            PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT,
+                             target_value=0.0, noise_scale=0.0),
+            PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT,
+                             target_value=40.0, noise_scale=2.0),
+            PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
+                             target_value=15.0, noise_scale=0.8),
+            PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT,
+                             multiplier=1.0, noise_scale=80.0),
         ],
     ),
 

From 07fc5d6897f2ab7b228fa16d748cd9ad1c74abb9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=B0=91=E6=99=BA?=
 <zhangshaozhi@bailongma-inc.com>
Date: Fri, 29 May 2026 10:09:13 +0800
Subject: [PATCH 36/36] fix(fault): fault fix

---
 FAULT_INJECTION.md         |  47 ++++++++++---
 protoforge/core/fault.py   | 133 +++++++++++++++++++++++++++----------
 protoforge/models/fault.py |   8 ++-
 3 files changed, 142 insertions(+), 46 deletions(-)

diff --git a/FAULT_INJECTION.md b/FAULT_INJECTION.md
index 951648d..22746bf 100644
--- a/FAULT_INJECTION.md
+++ b/FAULT_INJECTION.md
@@ -160,19 +160,48 @@ DELETE /api/v1/devices/{device_id}/fault
 
 ---
 
-### spindle_overheat — 主轴过热
+### spindle_overheat_rough — 主轴过热（粗铣）
 
 - **分类**：thermal
-- **模式**：渐进式
+- **模式**：渐进式（绝对目标值）
 - **默认持续时间**：240 秒
-- **真实场景**：长时间高负荷或冷却系统故障，热保护机制逐渐降低转速
+- **真实场景**：粗铣主轴长时间高负荷或冷却不足，负载/电流持续高位，热保护渐进降速
 
-| 测点 | 变化方向 | 峰值倍率 |
-|------|---------|---------|
-| `spindle_current` | 升高 | ×1.8 |
-| `spindle_speed` | 降低 | ×0.6 |
-| `vibration_x` | 升高 | ×1.5 |
-| `vibration_z` | 升高 | ×1.5 |
+| 测点 | 变化方向 | 目标值 |
+|------|---------|--------|
+| `spindle_load` | 持续升高 | →85% |
+| `spindle_current` | 持续升高 | →34A |
+| `spindle_speed` | 渐进降低 | →1400 RPM |
+
+---
+
+### spindle_overheat_semi — 主轴过热（半精铣）
+
+- **分类**：thermal
+- **模式**：渐进式（绝对目标值）
+- **默认持续时间**：240 秒
+- **真实场景**：半精铣主轴长时间高负荷或冷却不足，负载/电流持续高位，热保护渐进降速
+
+| 测点 | 变化方向 | 目标值 |
+|------|---------|--------|
+| `spindle_load` | 持续升高 | →72% |
+| `spindle_current` | 持续升高 | →24A |
+| `spindle_speed` | 渐进降低 | →2600 RPM |
+
+---
+
+### spindle_overheat_finish — 主轴过热（精铣）
+
+- **分类**：thermal
+- **模式**：渐进式（绝对目标值）
+- **默认持续时间**：240 秒
+- **真实场景**：精铣主轴长时间高负荷或冷却不足，负载/电流持续高位，热保护渐进降速
+
+| 测点 | 变化方向 | 目标值 |
+|------|---------|--------|
+| `spindle_load` | 持续升高 | →48% |
+| `spindle_current` | 持续升高 | →15A |
+| `spindle_speed` | 渐进降低 | →3800 RPM |
 
 ---
 
diff --git a/protoforge/core/fault.py b/protoforge/core/fault.py
index 2182bf1..857bf90 100644
--- a/protoforge/core/fault.py
+++ b/protoforge/core/fault.py
@@ -35,12 +35,12 @@
     # 进给堵转（粗铣）— fanuc-cnc
     # 量程：spindle_speed~2000RPM, feed_rate~800mm/min,
     #        spindle_current~21A, spindle_load~56%
-    # 堵转目标：load→92%, current→38A，转速维持+轻微抖动
+    # 堵转目标：load→85~100%, current→34~42A，转速维持+轻微抖动
     # ------------------------------------------------------------------
     FaultTypeDefinition(
         id="feed_stall_rough",
         name="进给堵转（粗铣）",
-        description="粗铣进给轴卡死，进给速率降为零，主轴负载升至~92%，电流升至~38A，主轴转速维持（区别于崩刃停主轴）",
+        description="粗铣进给轴卡死，进给速率降为零，主轴负载升至85~100%，电流升至34~42A，主轴转速维持（区别于崩刃停主轴）",
         category="process",
         default_duration=20.0,
         tags=["进给", "堵转", "突发", "粗铣"],
@@ -48,9 +48,9 @@
             PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT,
                              target_value=0.0, noise_scale=0.0),
             PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT,
-                             target_value=92.0, noise_scale=4.0),
+                             target_min=85.0, target_max=100.0, noise_scale=4.0),
             PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
-                             target_value=38.0, noise_scale=1.5),
+                             target_min=34.0, target_max=42.0, noise_scale=1.5),
             PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT,
                              multiplier=1.0, noise_scale=30.0),
         ],
@@ -60,12 +60,12 @@
     # 进给堵转（半精铣）— fanuc-cnc-semi-finish
     # 量程：spindle_speed~4000RPM, feed_rate~500mm/min,
     #        spindle_current~14.5A, spindle_load~38%
-    # 堵转目标：load→68%, current→26A，转速维持+轻微抖动
+    # 堵转目标：load→62~75%, current→23~29A，转速维持+轻微抖动
     # ------------------------------------------------------------------
     FaultTypeDefinition(
         id="feed_stall_semi",
         name="进给堵转（半精铣）",
-        description="半精铣进给轴卡死，进给速率降为零，主轴负载升至~68%，电流升至~26A，主轴转速维持（区别于崩刃停主轴）",
+        description="半精铣进给轴卡死，进给速率降为零，主轴负载升至62~75%，电流升至23~29A，主轴转速维持（区别于崩刃停主轴）",
         category="process",
         default_duration=20.0,
         tags=["进给", "堵转", "突发", "半精铣"],
@@ -73,9 +73,9 @@
             PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT,
                              target_value=0.0, noise_scale=0.0),
             PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT,
-                             target_value=68.0, noise_scale=3.0),
+                             target_min=62.0, target_max=75.0, noise_scale=3.0),
             PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
-                             target_value=26.0, noise_scale=1.2),
+                             target_min=23.0, target_max=29.0, noise_scale=1.2),
             PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT,
                              multiplier=1.0, noise_scale=50.0),
         ],
@@ -85,12 +85,12 @@
     # 进给堵转（精铣）— fanuc-cnc-finish
     # 量程：spindle_speed~6000RPM, feed_rate~300mm/min,
     #        spindle_current~8.5A, spindle_load~22%
-    # 堵转目标：load→40%, current→15A，转速维持+轻微抖动
+    # 堵转目标：load→36~45%, current→13~17A，转速维持+轻微抖动
     # ------------------------------------------------------------------
     FaultTypeDefinition(
         id="feed_stall_finish",
         name="进给堵转（精铣）",
-        description="精铣进给轴卡死，进给速率降为零，主轴负载升至~40%，电流升至~15A，主轴转速维持（区别于崩刃停主轴）",
+        description="精铣进给轴卡死，进给速率降为零，主轴负载升至36~45%，电流升至13~17A，主轴转速维持（区别于崩刃停主轴）",
         category="process",
         default_duration=20.0,
         tags=["进给", "堵转", "突发", "精铣"],
@@ -98,33 +98,79 @@
             PointFaultConfig(point="feed_rate", mode=FaultMode.INSTANT,
                              target_value=0.0, noise_scale=0.0),
             PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT,
-                             target_value=40.0, noise_scale=2.0),
+                             target_min=36.0, target_max=45.0, noise_scale=2.0),
             PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
-                             target_value=15.0, noise_scale=0.8),
+                             target_min=13.0, target_max=17.0, noise_scale=0.8),
             PointFaultConfig(point="spindle_speed", mode=FaultMode.INSTANT,
                              multiplier=1.0, noise_scale=80.0),
         ],
     ),
 
     # ------------------------------------------------------------------
-    # 主轴过热 — 长时间高负荷或冷却系统故障
-    # 特征：主轴负载和电流持续偏高，转速因热保护逐渐降低
-    # 模式：渐进式，持续时间较长
+    # 主轴过热（粗铣）— fanuc-cnc
+    # 基线：spindle_speed~2000RPM, spindle_current~21A, spindle_load~56%
+    # 过热目标范围：load 78~92%，current 30~38A，转速降至 1200~1600RPM
+    # 范围模拟不同冷却状态、负荷历史、环境温度下的个体差异
+    # 模式：渐进式；全部用 target_min/max，避免 multiplier 在空载基线=0 时失效
     # ------------------------------------------------------------------
     FaultTypeDefinition(
-        id="spindle_overheat",
-        name="主轴过热",
-        description="主轴长时间高负荷运转或冷却不足，spindle_load和spindle_current持续偏高，转速因热保护渐进下降",
+        id="spindle_overheat_rough",
+        name="主轴过热（粗铣）",
+        description="粗铣主轴长时间高负荷或冷却不足，spindle_load渐进升至78~92%，spindle_current升至30~38A，转速因热保护渐进降至1200~1600RPM",
         category="thermal",
         default_duration=240.0,
-        tags=["主轴", "过热", "渐进"],
+        tags=["主轴", "过热", "渐进", "粗铣"],
         point_faults=[
             PointFaultConfig(point="spindle_load", mode=FaultMode.GRADUAL,
-                             multiplier=1.6, noise_scale=3.0),
+                             target_min=78.0, target_max=92.0, noise_scale=3.5),
             PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL,
-                             multiplier=1.8, noise_scale=1.2),
+                             target_min=30.0, target_max=38.0, noise_scale=1.5),
             PointFaultConfig(point="spindle_speed", mode=FaultMode.GRADUAL,
-                             multiplier=0.6, noise_scale=50.0),
+                             target_min=1200, target_max=1600, noise_scale=40.0),
+        ],
+    ),
+
+    # ------------------------------------------------------------------
+    # 主轴过热（半精铣）— fanuc-cnc-semi-finish
+    # 基线：spindle_speed~4000RPM, spindle_current~14.5A, spindle_load~38%
+    # 过热目标范围：load 65~78%，current 21~27A，转速降至 2400~2900RPM
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="spindle_overheat_semi",
+        name="主轴过热（半精铣）",
+        description="半精铣主轴长时间高负荷或冷却不足，spindle_load渐进升至65~78%，spindle_current升至21~27A，转速因热保护渐进降至2400~2900RPM",
+        category="thermal",
+        default_duration=240.0,
+        tags=["主轴", "过热", "渐进", "半精铣"],
+        point_faults=[
+            PointFaultConfig(point="spindle_load", mode=FaultMode.GRADUAL,
+                             target_min=65.0, target_max=78.0, noise_scale=3.0),
+            PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL,
+                             target_min=21.0, target_max=27.0, noise_scale=1.2),
+            PointFaultConfig(point="spindle_speed", mode=FaultMode.GRADUAL,
+                             target_min=2400, target_max=2900, noise_scale=50.0),
+        ],
+    ),
+
+    # ------------------------------------------------------------------
+    # 主轴过热（精铣）— fanuc-cnc-finish
+    # 基线：spindle_speed~6000RPM, spindle_current~8.5A, spindle_load~22%
+    # 过热目标范围：load 42~55%，current 13~17A，转速降至 3600~4200RPM
+    # ------------------------------------------------------------------
+    FaultTypeDefinition(
+        id="spindle_overheat_finish",
+        name="主轴过热（精铣）",
+        description="精铣主轴长时间高负荷或冷却不足，spindle_load渐进升至42~55%，spindle_current升至13~17A，转速因热保护渐进降至3600~4200RPM",
+        category="thermal",
+        default_duration=240.0,
+        tags=["主轴", "过热", "渐进", "精铣"],
+        point_faults=[
+            PointFaultConfig(point="spindle_load", mode=FaultMode.GRADUAL,
+                             target_min=42.0, target_max=55.0, noise_scale=2.0),
+            PointFaultConfig(point="spindle_current", mode=FaultMode.GRADUAL,
+                             target_min=13.0, target_max=17.0, noise_scale=0.8),
+            PointFaultConfig(point="spindle_speed", mode=FaultMode.GRADUAL,
+                             target_min=3600, target_max=4200, noise_scale=60.0),
         ],
     ),
 
@@ -230,15 +276,15 @@
     FaultTypeDefinition(
         id="air_cutting",
         name="空切检测",
-        description="刀具未接触工件，spindle_load跌至空载区间(5-15%)，spindle_current降至空转水平，转速进给保持正常",
+        description="刀具未接触工件，spindle_load跌至空载区间(4-12%)，spindle_current降至空转水平，转速进给保持正常",
         category="tool",
         default_duration=180.0,
         tags=["刀具", "空切", "工况切换", "负载"],
         point_faults=[
             PointFaultConfig(point="spindle_load", mode=FaultMode.INSTANT,
-                             target_value=8.0, noise_scale=2.0),
+                             target_min=4.0, target_max=12.0, noise_scale=2.0),
             PointFaultConfig(point="spindle_current", mode=FaultMode.INSTANT,
-                             target_value=2.5, noise_scale=0.3),
+                             target_min=2.0, target_max=3.5, noise_scale=0.3),
         ],
     ),
 
@@ -356,6 +402,13 @@ def inject(self, device: Any, request: FaultInjectRequest) -> FaultInfo:
                 except (TypeError, ValueError):
                     baseline[pf.point] = 0.0
 
+        # 对有范围定义的测点，注入时随机采样一个实际目标值
+        # 使每次注入的故障严重程度有所不同，模拟真实场景的个体差异
+        resolved_targets: dict[str, float] = {}
+        for pf in fault_type.point_faults:
+            if pf.target_min is not None and pf.target_max is not None:
+                resolved_targets[pf.point] = random.uniform(pf.target_min, pf.target_max)
+
         fault = ActiveFault(
             fault_id=uuid.uuid4().hex[:12],
             device_id=device.id,
@@ -365,10 +418,11 @@ def inject(self, device: Any, request: FaultInjectRequest) -> FaultInfo:
             duration=duration,
             started_at=time.time(),
             baseline_values=baseline,
+            resolved_targets=resolved_targets,
         )
         self._active[device.id] = fault
-        logger.info("Fault injected: device=%s type=%s duration=%.0fs",
-                    device.id, fault_type.id, duration)
+        logger.info("Fault injected: device=%s type=%s duration=%.0fs resolved_targets=%s",
+                    device.id, fault_type.id, duration, resolved_targets)
         return self._to_info(fault, fault_type)
 
     def apply(self, device: Any) -> None:
@@ -400,13 +454,14 @@ def apply(self, device: Any) -> None:
             baseline = fault.baseline_values.get(pf.point, 0.0)
             if baseline == 0.0:
                 # 基线为0说明注入时设备处于换刀/停机状态
-                # target_value 模式可以直接执行（如崩刃归零、空切归空载）
+                # target_value / resolved_targets 模式可以直接执行
                 # multiplier 模式跳过，避免在零基线上产生无意义的值
-                if pf.target_value is None:
+                if pf.target_value is None and pf.point not in fault.resolved_targets:
                     continue
 
+            resolved_target = fault.resolved_targets.get(pf.point)
             device._point_values[pf.point] = self._compute_value(
-                pf, baseline, progress, fault.intensity
+                pf, baseline, progress, fault.intensity, resolved_target
             )
 
     def clear(self, device_id: str) -> bool:
@@ -451,20 +506,26 @@ def _compute_value(
         baseline: float,
         progress: float,
         intensity: float,
+        resolved_target: Optional[float] = None,
     ) -> float:
-        """根据故障配置和当前进度计算覆盖值"""
+        """根据故障配置和当前进度计算覆盖值。
+
+        目标值优先级：resolved_target（注入时随机采样）> target_value（固定值）> multiplier
+        """
+        # 确定本次注入的实际目标值
+        effective_target: Optional[float] = resolved_target if resolved_target is not None else pf.target_value
+
         if pf.mode == FaultMode.INSTANT:
-            # 瞬间模式：直接用目标值，不随时间变化
-            if pf.target_value is not None:
-                target = pf.target_value
+            if effective_target is not None:
+                target = effective_target
             elif pf.multiplier is not None:
                 target = baseline * (1.0 + (pf.multiplier - 1.0) * intensity)
             else:
                 target = baseline
         else:
             # 渐进模式：随 progress 线性劣化
-            if pf.target_value is not None:
-                target = baseline + (pf.target_value - baseline) * progress * intensity
+            if effective_target is not None:
+                target = baseline + (effective_target - baseline) * progress * intensity
             elif pf.multiplier is not None:
                 target = baseline * (1.0 + (pf.multiplier - 1.0) * progress * intensity)
             else:
diff --git a/protoforge/models/fault.py b/protoforge/models/fault.py
index cc038e0..9928332 100644
--- a/protoforge/models/fault.py
+++ b/protoforge/models/fault.py
@@ -25,6 +25,11 @@ class PointFaultConfig(BaseModel):
     target_value: Optional[float] = None
     multiplier: Optional[float] = None     # 异常值 = 当前正常值 × multiplier
 
+    # 目标值范围：注入时在 [target_min, target_max] 内随机采样一个实际目标值
+    # 设置后会覆盖 target_value，使每次注入的故障严重程度有所不同
+    target_min: Optional[float] = None
+    target_max: Optional[float] = None
+
     # GRADUAL 模式：从当前值线性劣化到 target_value 或 multiplier 倍
     # 劣化程度 = progress(0~1) × (target - baseline)
     noise_scale: float = 0.0               # 叠加随机噪声幅度，模拟真实抖动
@@ -59,7 +64,8 @@ class ActiveFault(BaseModel):
     duration: float = 120.0
     started_at: float = 0.0
     cleared_at: Optional[float] = None
-    baseline_values: dict[str, float] = Field(default_factory=dict)  # 注入时的正常基线值
+    baseline_values: dict[str, float] = Field(default_factory=dict)   # 注入时的正常基线值
+    resolved_targets: dict[str, float] = Field(default_factory=dict)  # 注入时随机采样的实际目标值
 
 
 class FaultInfo(BaseModel):