diff --git a/CHANGELOG.md b/CHANGELOG.md index 287a22f..b601f78 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,24 @@ All notable changes to a3s-observer will be documented in this file. +## [0.11.0] — SecurityAction: privesc / injection / open-port + +### Added + +- **`AgentEvent::SecurityAction { pid, kind, detail }`** — one rare-and-loud, in-kernel-filtered + event for the security-sensitive syscalls an agent rarely makes but that matter when it does. + Three kinds, all on a single `SEC_EVENTS` ring: + - `setuid-root` — `setuid`/`setresuid`/`setreuid` setting (e)uid 0 from a non-root caller + (privilege escalation, including the EPERM-bound *attempt*). Gated to the thread-group leader, + so glibc's NPTL setxid broadcast doesn't fan out one escalation into N duplicate events. + - `ptrace` — `ptrace(ATTACH|SEIZE)` of another process (`detail` = target pid): process injection. + - `bind` — `bind()` to a fixed **non-loopback** port (`detail` = port): an off-host-reachable + listener. Loopback (127.0.0.0/8) binds are filtered as common local-helper noise. + + Group escalation (`setgid`) and loopback-only binds are intentionally out of scope. Validated + live on Linux 6.8 (all three fire with correct `detail`, verifier loads clean, multithreaded + `setuid` deduped to one event), and adversarially reviewed before release. + ## [0.10.0] — richer signals: exit-signal, LLM model, dest-port, uid ### Added diff --git a/Cargo.lock b/Cargo.lock index c2615de..13614fe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,7 +4,7 @@ version = 4 [[package]] name = "a3s-observer" -version = "0.10.0" +version = "0.11.0" dependencies = [ "serde", "serde_json", @@ -13,7 +13,7 @@ dependencies = [ [[package]] name = "a3s-observer-collector" -version = "0.10.0" +version = "0.11.0" dependencies = [ "a3s-observer", "a3s-observer-common", @@ -28,11 +28,11 @@ dependencies = [ [[package]] name = "a3s-observer-common" -version = "0.10.0" +version = "0.11.0" [[package]] name = "a3s-observer-ebpf" -version = "0.10.0" +version = "0.11.0" dependencies = [ "a3s-observer-common", "aya-ebpf", diff --git a/Cargo.toml b/Cargo.toml index f212df6..e5e4af5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "a3s-observer" -version = "0.10.0" +version = "0.11.0" edition = "2021" license = "MIT" description = "General-purpose, language-agnostic eBPF observability for AI agents (LLM calls, tools, files, network egress)." diff --git a/README.md b/README.md index ccdb8fa..b0b66d0 100644 --- a/README.md +++ b/README.md @@ -53,6 +53,7 @@ latency / TTFT, or plaintext) / **where** (peer IP / hostname). | `unlink`\* | `sys_enter_unlinkat` | `FileDelete` — files deleted (`A3S_OBSERVER_FILES=1`) | | `ssl`\* | OpenSSL `SSL_write` / `SSL_read` uprobes | `SslContent` — request/response plaintext (`A3S_OBSERVER_SSL=1`) | | `llm-api`\* | parsed from `SslContent` | `LlmApi` — **model** + token usage (`A3S_OBSERVER_SSL=1`) | +| `security` | `setuid` / `ptrace` / `bind` syscalls | `SecurityAction` — privilege escalation (→root) / process injection / opened a listening port (rare + in-kernel-filtered) | Userspace enriches each event with **identity** (k8s cgroup→pod, `/proc` comm+ppid, or an in-kernel `comm` fallback for short-lived processes), a `(pid,fd)→peer` **correlation**, and diff --git a/a3s-observer-collector/Cargo.toml b/a3s-observer-collector/Cargo.toml index e53f2ce..d722722 100644 --- a/a3s-observer-collector/Cargo.toml +++ b/a3s-observer-collector/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "a3s-observer-collector" -version = "0.10.0" +version = "0.11.0" edition = "2021" license = "MIT" description = "a3s-observer collector: loads the eBPF probes and exports enriched events." diff --git a/a3s-observer-collector/src/main.rs b/a3s-observer-collector/src/main.rs index 094dbed..ccabe1b 100644 --- a/a3s-observer-collector/src/main.rs +++ b/a3s-observer-collector/src/main.rs @@ -11,8 +11,8 @@ use a3s_observer::{ KubeResolver, LogExporter, Provider, ServiceClassifier, SniClassifier, }; use a3s_observer_common::{ - ConnectEvent, DnsEvent, ExecEvent, ExitEvent, FileEvent, LlmEvent, SslEvent, TlsEvent, - FILE_DELETE_FLAG, + ConnectEvent, DnsEvent, ExecEvent, ExitEvent, FileEvent, LlmEvent, SecEvent, SslEvent, + TlsEvent, FILE_DELETE_FLAG, SEC_BIND, SEC_PTRACE, SEC_SETUID, }; use anyhow::Context as _; use aya::{ @@ -74,6 +74,11 @@ async fn main() -> anyhow::Result<()> { ("read_exit", "sys_exit_read"), ("recv_exit", "sys_exit_recvfrom"), ("sock_close", "sys_enter_close"), + ("sec_setuid", "sys_enter_setuid"), + ("sec_setresuid", "sys_enter_setresuid"), + ("sec_setreuid", "sys_enter_setreuid"), + ("sec_ptrace", "sys_enter_ptrace"), + ("sec_bind", "sys_enter_bind"), ]; if files { probes.push(("file_open", "sys_enter_openat")); @@ -173,6 +178,10 @@ async fn main() -> anyhow::Result<()> { ebpf.take_map("SSL_EVENTS") .context("`SSL_EVENTS` missing")?, )?; + let mut sec_ring = RingBuf::try_from( + ebpf.take_map("SEC_EVENTS") + .context("`SEC_EVENTS` missing")?, + )?; // Cumulative count of events dropped because a ring was full (data-loss visibility). let drops: PerCpuArray<_, u64> = PerCpuArray::try_from(ebpf.take_map("DROPS").context("`DROPS` missing")?)?; @@ -220,6 +229,7 @@ async fn main() -> anyhow::Result<()> { file = stats.file, llm = stats.llm, ssl = stats.ssl, + sec = stats.sec, dropped, output_dropped, "a3s-observer: events in the last 60s (dropped = cumulative ring-full, \ @@ -259,6 +269,25 @@ async fn main() -> anyhow::Result<()> { }); } } + while let Some(item) = sec_ring.next() { + if let Some(ev) = read_pod::(&item) { + let kind = match ev.kind { + SEC_SETUID => "setuid-root", + SEC_PTRACE => "ptrace", + SEC_BIND => "bind", + _ => continue, + }; + emit(exporter.as_ref(), &mut stats, EnrichedEvent { + identity: identity_for(&resolver, ev.pid, &ev.comm), + provider: None, + event: AgentEvent::SecurityAction { + pid: ev.pid, + kind, + detail: ev.detail, + }, + }); + } + } // Drain connect BEFORE tls so a same-poll ClientHello finds its peer. while let Some(item) = connect_ring.next() { if let Some(ev) = read_pod::(&item) { @@ -415,6 +444,7 @@ async fn main() -> anyhow::Result<()> { file = stats.file, llm = stats.llm, ssl = stats.ssl, + sec = stats.sec, "a3s-observer-collector: stopped (final window)" ); Ok(()) @@ -512,6 +542,7 @@ struct Stats { file: u64, llm: u64, ssl: u64, + sec: u64, } /// Export an event and count it by kind for the throughput report. @@ -526,6 +557,7 @@ fn emit(exporter: &dyn Exporter, stats: &mut Stats, ev: EnrichedEvent) { AgentEvent::LlmCall { .. } => stats.llm += 1, AgentEvent::SslContent { .. } => stats.ssl += 1, AgentEvent::LlmApi { .. } => stats.llm += 1, + AgentEvent::SecurityAction { .. } => stats.sec += 1, } exporter.export(&ev); } diff --git a/a3s-observer-common/Cargo.toml b/a3s-observer-common/Cargo.toml index 2cc02a6..c149e8c 100644 --- a/a3s-observer-common/Cargo.toml +++ b/a3s-observer-common/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "a3s-observer-common" -version = "0.10.0" +version = "0.11.0" edition = "2021" license = "MIT" description = "Shared no_std types crossing the eBPF <-> userspace boundary for a3s-observer." diff --git a/a3s-observer-common/src/lib.rs b/a3s-observer-common/src/lib.rs index e52d8ee..80c67c2 100644 --- a/a3s-observer-common/src/lib.rs +++ b/a3s-observer-common/src/lib.rs @@ -133,3 +133,19 @@ pub struct SslEvent { pub comm: [u8; 16], pub data: [u8; SSL_SNAP_LEN], } + +/// A security-sensitive action — rare and high-signal, filtered in-kernel so volume stays near +/// zero. One event/ring covers several syscalls (privilege escalation, process injection, opening +/// a listening port) instead of a probe-per-syscall sprawl — keeps the model + ring count bounded. +#[repr(C)] +#[derive(Clone, Copy)] +pub struct SecEvent { + pub pid: u32, + pub kind: u32, // SEC_* below + pub detail: u64, // SEC_SETUID: 0 (escalated-to uid) · SEC_PTRACE: target pid · SEC_BIND: port + pub comm: [u8; 16], +} + +pub const SEC_SETUID: u32 = 1; // setuid/setresuid → euid 0 from a non-root caller (privesc) +pub const SEC_PTRACE: u32 = 2; // ptrace(ATTACH|SEIZE) of another process (injection) +pub const SEC_BIND: u32 = 3; // bind() to a fixed (non-ephemeral) port (opened a listener) diff --git a/a3s-observer-ebpf/Cargo.toml b/a3s-observer-ebpf/Cargo.toml index e9c70a9..997b990 100644 --- a/a3s-observer-ebpf/Cargo.toml +++ b/a3s-observer-ebpf/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "a3s-observer-ebpf" -version = "0.10.0" +version = "0.11.0" edition = "2021" license = "MIT" publish = false diff --git a/a3s-observer-ebpf/src/main.rs b/a3s-observer-ebpf/src/main.rs index ce4965e..a9e25b6 100644 --- a/a3s-observer-ebpf/src/main.rs +++ b/a3s-observer-ebpf/src/main.rs @@ -2,8 +2,9 @@ #![no_main] use a3s_observer_common::{ - ConnectEvent, DnsEvent, ExecEvent, ExitEvent, FileEvent, LlmEvent, SslEvent, TlsEvent, - ARGV_SLOTS, ARG_LEN, DNS_SNAP_LEN, FILE_DELETE_FLAG, PATH_SNAP_LEN, SSL_SNAP_LEN, TLS_SNAP_LEN, + ConnectEvent, DnsEvent, ExecEvent, ExitEvent, FileEvent, LlmEvent, SecEvent, SslEvent, + TlsEvent, ARGV_SLOTS, ARG_LEN, DNS_SNAP_LEN, FILE_DELETE_FLAG, PATH_SNAP_LEN, SEC_BIND, + SEC_PTRACE, SEC_SETUID, SSL_SNAP_LEN, TLS_SNAP_LEN, }; use aya_ebpf::{ helpers::gen::bpf_probe_read_user, @@ -39,6 +40,11 @@ static FILE_EVENTS: RingBuf = RingBuf::with_byte_size(256 * 1024, 0); #[map] static LLM_EVENTS: RingBuf = RingBuf::with_byte_size(64 * 1024, 0); +// Security-sensitive actions (privesc / injection / open-port). In-kernel-filtered to the loud +// cases, so this stays near-empty — a small ring is plenty. +#[map] +static SEC_EVENTS: RingBuf = RingBuf::with_byte_size(64 * 1024, 0); + // Count of events dropped because a ring was full — data-loss visibility under extreme load. #[map] static DROPS: PerCpuArray = PerCpuArray::with_max_entries(1, 0); @@ -376,6 +382,131 @@ fn try_connect(ctx: &TracePointContext) -> Result { Ok(0) } +// ---- security-sensitive actions: privesc (setuid) / injection (ptrace) / open-port (bind) ---- +// +// One ring, in-kernel-filtered to the loud cases. These syscalls are rare for a normal agent, so +// when one fires it's worth a look — that's the whole point of a separate "rare and loud" tier. + +fn emit_sec(kind: u32, detail: u64) { + let Some(mut entry) = reserve_or_drop::(&SEC_EVENTS) else { + return; + }; + let ev = entry.as_mut_ptr(); + unsafe { + (*ev).pid = (bpf_get_current_pid_tgid() >> 32) as u32; + (*ev).kind = kind; + (*ev).detail = detail; + (*ev).comm = bpf_get_current_comm().unwrap_or_default(); + } + entry.submit(0); +} + +// Escalation TO root from a non-root caller — the loud case. Dropping privs (root → nobody, which +// every daemon does at boot) is noise and is filtered out. NOTE: legitimate setuid-root tools +// (sudo/su/passwd) also fire here — it's a genuine privilege transition, expected to pair with a +// ToolExec of the setuid binary, not inherently malicious. +fn try_setuid_to(target: u32) { + // glibc broadcasts setuid/setresuid/setreuid to EVERY thread (NPTL setxid), so one logical + // escalation fires this per-thread — the same fanout do_exit has. Emit once, from the + // thread-group leader (tgid == tid), matching the proc_exit convention. (A raw setuid syscall + // from a non-leader thread is thus missed — vanishingly rare vs the glibc/single-threaded paths.) + let id = bpf_get_current_pid_tgid(); + if (id >> 32) as u32 != id as u32 { + return; + } + if target == 0 && (bpf_get_current_uid_gid() as u32) != 0 { + emit_sec(SEC_SETUID, 0); + } +} + +#[tracepoint] +pub fn sec_setuid(ctx: TracePointContext) -> u32 { + try_sec_setuid(&ctx).unwrap_or(0) +} +fn try_sec_setuid(ctx: &TracePointContext) -> Result { + let uid: u64 = unsafe { ctx.read_at(16)? }; // sys_enter_setuid: uid_t uid @16 + try_setuid_to(uid as u32); + Ok(0) +} + +#[tracepoint] +pub fn sec_setresuid(ctx: TracePointContext) -> u32 { + try_sec_setresuid(&ctx).unwrap_or(0) +} +fn try_sec_setresuid(ctx: &TracePointContext) -> Result { + // sys_enter_setresuid: ruid @16, euid @24, suid @32 — the euid grants effective privilege. + let euid: u64 = unsafe { ctx.read_at(24)? }; + try_setuid_to(euid as u32); + Ok(0) +} + +#[tracepoint] +pub fn sec_setreuid(ctx: TracePointContext) -> u32 { + try_sec_setreuid(&ctx).unwrap_or(0) +} +fn try_sec_setreuid(ctx: &TracePointContext) -> Result { + // sys_enter_setreuid: ruid @16, euid @24 — euid is the effective uid being set (the privesc + // path os.setreuid / seteuid take, which neither setuid nor setresuid catches). + let euid: u64 = unsafe { ctx.read_at(24)? }; + try_setuid_to(euid as u32); + Ok(0) +} + +#[tracepoint] +pub fn sec_ptrace(ctx: TracePointContext) -> u32 { + try_sec_ptrace(&ctx).unwrap_or(0) +} +fn try_sec_ptrace(ctx: &TracePointContext) -> Result { + // sys_enter_ptrace: long request @16, long pid @24. + let request: u64 = unsafe { ctx.read_at(16)? }; + let target: u64 = unsafe { ctx.read_at(24)? }; + // PTRACE_ATTACH = 16, PTRACE_SEIZE = 0x4206 — the gateway to memory/register injection + // (you must attach before POKE*). TRACEME = 0 is benign self-trace. Skip self-targeting. + let self_pid = (bpf_get_current_pid_tgid() >> 32) as u32; + if (request == 16 || request == 0x4206) && target as u32 != self_pid { + emit_sec(SEC_PTRACE, target); + } + Ok(0) +} + +#[tracepoint] +pub fn sec_bind(ctx: TracePointContext) -> u32 { + try_sec_bind(&ctx).unwrap_or(0) +} +fn try_sec_bind(ctx: &TracePointContext) -> Result { + // sys_enter_bind: int fd @16, struct sockaddr *umyaddr @24, int addrlen @32 — same shape as connect. + let addr_ptr: *const u8 = unsafe { ctx.read_at(24)? }; + let addrlen: u64 = unsafe { ctx.read_at(32)? }; + if addrlen < 8 { + return Ok(0); + } + let mut fam = [0u8; 2]; + if unsafe { bpf_probe_read_user_buf(addr_ptr, &mut fam) }.is_err() { + return Ok(0); + } + let family = u16::from_ne_bytes(fam); + if family != 2 && family != 10 { + return Ok(0); // AF_INET / AF_INET6 only + } + // Skip loopback (127.0.0.0/8) binds — local-only helper sockets (runtime debug/metrics servers) + // are common noise; an off-host-reachable listener is the loud case. (IPv6 ::1 not filtered.) + if family == 2 { + let mut oct = [0u8; 1]; + let _ = unsafe { bpf_probe_read_user_buf(addr_ptr.add(4), &mut oct) }; // first octet of sin_addr + if oct[0] == 127 { + return Ok(0); + } + } + let mut port = [0u8; 2]; + let _ = unsafe { bpf_probe_read_user_buf(addr_ptr.add(2), &mut port) }; // sin_port (network order) + let port = u16::from_be_bytes(port); + // port 0 = kernel picks (a client's ephemeral source port); a fixed port = a server listening. + if port != 0 { + emit_sec(SEC_BIND, port as u64); + } + Ok(0) +} + // ---- DNS query (sys_enter_sendto to :53) ---- // Detects a UDP DNS query by the dest port (sockaddr @ offset 48) and copies the packet; // userspace parses the question name. Connected-UDP sends (NULL dest addr) aren't covered. diff --git a/deploy/daemonset.yaml b/deploy/daemonset.yaml index e556749..b55d88c 100644 --- a/deploy/daemonset.yaml +++ b/deploy/daemonset.yaml @@ -25,7 +25,7 @@ spec: terminationGracePeriodSeconds: 30 # the collector flushes a final report on SIGTERM containers: - name: a3s-observer - image: 10.12.111.133:49164/a3s/observer:0.10.0 # mirror of ghcr.io/a3s-lab/observer:0.10.0 + image: 10.12.111.133:49164/a3s/observer:0.11.0 # mirror of ghcr.io/a3s-lab/observer:0.11.0 securityContext: # eBPF load + tracepoint attach requires privileged. (Verified: a non-root # process with only CAP_BPF+CAP_PERFMON fails to attach — the tracefs tracepoint diff --git a/src/model.rs b/src/model.rs index d94eadf..47bb1e1 100644 --- a/src/model.rs +++ b/src/model.rs @@ -78,6 +78,19 @@ pub enum AgentEvent { prompt_tokens: Option, completion_tokens: Option, }, + /// A security-sensitive action — rare and high-signal, filtered in-kernel: privilege escalation + /// (`setuid`/`setresuid`/`setreuid` → root from non-root — note legitimate `sudo`/`su` also fire + /// this; it's a real transition, expected to pair with a `ToolExec`), process injection (`ptrace` + /// attach/seize of another process), or opening an off-host-reachable listening port (`bind` to a + /// fixed non-loopback port). `kind` names which; `detail` is kind-specific. Group (`setgid`) and + /// loopback-only binds are intentionally out of scope. + SecurityAction { + pid: u32, + /// "setuid-root" (privesc) | "ptrace" (injection) | "bind" (opened a port). + kind: &'static str, + /// ptrace: target pid · bind: port · setuid-root: 0. + detail: u64, + }, } /// An [`AgentEvent`] tagged with the resolved [`Identity`] and, for LLM calls, the