diff --git a/src/bpf/gpuprobe.bpf.c b/src/bpf/gpuprobe.bpf.c index 8cbc8c5..e66c294 100644 --- a/src/bpf/gpuprobe.bpf.c +++ b/src/bpf/gpuprobe.bpf.c @@ -64,7 +64,7 @@ int memleak_cuda_malloc(struct pt_regs *ctx) __u32 pid, key0 = 0; e.size = (__u64)PT_REGS_PARM2(ctx); - dev_ptr = (__u64) PT_REGS_PARM1(ctx); + dev_ptr = (__u64)PT_REGS_PARM1(ctx); pid = (__u32)bpf_get_current_pid_tgid(); e.event_type = CUDA_MALLOC; @@ -102,10 +102,11 @@ int memleak_cuda_malloc_ret(struct pt_regs *ctx) if (!map_ptr) { return -1; } - dev_ptr = *(__u64*)map_ptr; + dev_ptr = *(__u64 *)map_ptr; // read the value copied into `*devPtr` by `cudaMalloc` from userspace - if (bpf_probe_read_user(&e->device_addr, sizeof(void *), (void*)dev_ptr)) { + if (bpf_probe_read_user(&e->device_addr, sizeof(void *), + (void *)dev_ptr)) { return -1; } @@ -151,9 +152,11 @@ int trace_cuda_free_ret(struct pt_regs *ctx) } struct kernel_launch_event { - __u64 timestamp; + __u64 start; //< timestamp + __u64 end; //< timestamp __u64 kern_offset; __u32 pid; + __s32 ret; }; struct { @@ -163,17 +166,46 @@ struct { __uint(max_entries, 10240); } kernel_launch_events_queue SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, __u32); + __type(value, struct kernel_launch_event); + __uint(max_entries, 1024); +} kernel_launch_pid_to_event SEC(".maps"); + SEC("uprobe/cudaKernelLaunch") int trace_cuda_launch_kernel(struct pt_regs *ctx) { struct kernel_launch_event e; - void *kern_offset; - e.timestamp = bpf_ktime_get_ns(); + e.start = bpf_ktime_get_ns(); e.kern_offset = (__u64)PT_REGS_PARM1(ctx); e.pid = (__u32)bpf_get_current_pid_tgid(); - return bpf_map_push_elem(&kernel_launch_events_queue, &e, 0); + return bpf_map_update_elem(&kernel_launch_pid_to_event, &e.pid, &e, 0); +} + +SEC("uretprobe/cudaKernelLaunch") +int trace_cuda_launch_kernel_ret(struct pt_regs *ctx) +{ + bpf_printk("called kernel launch uretprobe\n"); + struct kernel_launch_event *e; + __u32 pid; + + pid = (__u32)bpf_get_current_pid_tgid(); + e = bpf_map_lookup_elem(&kernel_launch_pid_to_event, &pid); + + if (!e || e->pid != pid) + return -1; + + e->ret = (__s32)PT_REGS_RC(ctx); + e->end = bpf_ktime_get_ns(); + + __s32 ret = bpf_map_push_elem(&kernel_launch_events_queue, e, 0); + if (ret != 0) { + bpf_printk("call failed\n"); + } + return ret; } /** diff --git a/src/gpuprobe/cuda_error.rs b/src/gpuprobe/cuda_error.rs index 73cadfb..47eb069 100644 --- a/src/gpuprobe/cuda_error.rs +++ b/src/gpuprobe/cuda_error.rs @@ -7,7 +7,7 @@ use super::GpuprobeError; #[repr(i32)] #[derive(std::cmp::PartialEq, std::cmp::Eq, std::hash::Hash, Clone, Copy, Debug)] pub enum CudaErrorT { - CudaSuccess, + CudaSuccess = 0, CudaErrorInvalidValue, CudaErrorMemoryAllocation, UnsupportedErrorType, @@ -28,6 +28,7 @@ impl CudaErrorT { pub enum EventType { CudaMalloc, CudaFree, + CudaLaunchKernel, } impl ToString for EventType { @@ -35,6 +36,7 @@ impl ToString for EventType { match self { Self::CudaMalloc => "cudaMalloc", Self::CudaFree => "cudaFree", + Self::CudaLaunchKernel => "cudaLaunchKernel", } .to_string() } diff --git a/src/gpuprobe/gpuprobe_cudatrace.rs b/src/gpuprobe/gpuprobe_cudatrace.rs index 8c6a3e0..e8c1964 100644 --- a/src/gpuprobe/gpuprobe_cudatrace.rs +++ b/src/gpuprobe/gpuprobe_cudatrace.rs @@ -9,7 +9,10 @@ use std::collections::{BTreeMap, HashMap}; use libbpf_rs::{MapCore, UprobeOpts}; -use super::{Gpuprobe, GpuprobeError}; +use super::{ + cuda_error::{CudaError, CudaErrorT, EventType}, + Gpuprobe, GpuprobeError, +}; /// contains implementations for the cudatrace program impl Gpuprobe { @@ -22,6 +25,12 @@ impl Gpuprobe { ..Default::default() }; + let opts_launch_kernel_ret = UprobeOpts { + func_name: "cudaLaunchKernel".to_string(), + retprobe: true, + ..Default::default() + }; + let cuda_launch_kernel_uprobe_link = self .skel .skel @@ -30,7 +39,16 @@ impl Gpuprobe { .attach_uprobe_with_opts(-1, &self.opts.libcudart_path, 0, opts_launch_kernel) .map_err(|_| GpuprobeError::AttachError)?; + let cuda_launch_kernel_uretprobe_link = self + .skel + .skel + .progs + .trace_cuda_launch_kernel_ret + .attach_uprobe_with_opts(-1, &self.opts.libcudart_path, 0, opts_launch_kernel_ret) + .map_err(|_| GpuprobeError::AttachError)?; + self.links.links.trace_cuda_launch_kernel = Some(cuda_launch_kernel_uprobe_link); + self.links.links.trace_cuda_launch_kernel_ret = Some(cuda_launch_kernel_uretprobe_link); Ok(()) } @@ -60,8 +78,18 @@ impl Gpuprobe { )); } }; - self.glob_process_table.create_entry(event.pid)?; - self.cudatrace_state.handle_event(event)?; + + if event.is_error() { + println!("error-code = {}", event.ret); + self.err_state.insert(CudaError { + pid: event.pid, + event: EventType::CudaLaunchKernel, + error: CudaErrorT::from_int(event.ret), + })?; + } else { + self.glob_process_table.create_entry(event.pid)?; + self.cudatrace_state.handle_event(event)?; + } } Ok(()) @@ -117,9 +145,11 @@ impl CudatraceState { } struct KernelLaunchEvent { - timestamp: u64, + start: u64, + end: u64, kern_offset: u64, pid: u32, + ret: i32, } impl KernelLaunchEvent { @@ -134,4 +164,9 @@ impl KernelLaunchEvent { // 2. The byte array is at least as large as the struct unsafe { Some(std::ptr::read_unaligned(bytes.as_ptr() as *const Self)) } } + + /// Return true iff the event is an error + pub fn is_error(&self) -> bool { + self.ret != CudaErrorT::CudaSuccess as i32 + } } diff --git a/src/gpuprobe/gpuprobe_memleak.rs b/src/gpuprobe/gpuprobe_memleak.rs index 5b926bb..7cb5d39 100644 --- a/src/gpuprobe/gpuprobe_memleak.rs +++ b/src/gpuprobe/gpuprobe_memleak.rs @@ -100,22 +100,18 @@ impl Gpuprobe { } }; - match event.is_error() { - false => { - self.glob_process_table.create_entry(event.pid)?; - self.memleak_state.handle_event(event)?; - } - true => { - let err = CudaError { - pid: event.pid, - event: match event.event_type { - 0 => super::cuda_error::EventType::CudaMalloc, - _ => super::cuda_error::EventType::CudaFree, - }, - error: CudaErrorT::from_int(event.ret), - }; - self.err_state.insert(err)?; - } + if event.is_error() { + self.err_state.insert(CudaError { + pid: event.pid, + event: match event.event_type { + 0 => super::cuda_error::EventType::CudaMalloc, + _ => super::cuda_error::EventType::CudaFree, + }, + error: CudaErrorT::from_int(event.ret), + })?; + } else { + self.glob_process_table.create_entry(event.pid)?; + self.memleak_state.handle_event(event)?; } } Ok(()) diff --git a/src/gpuprobe/mod.rs b/src/gpuprobe/mod.rs index e870d93..97200ab 100644 --- a/src/gpuprobe/mod.rs +++ b/src/gpuprobe/mod.rs @@ -83,6 +83,7 @@ const DEFAULT_LINKS: GpuprobeLinks = GpuprobeLinks { trace_cuda_free: None, trace_cuda_free_ret: None, trace_cuda_launch_kernel: None, + trace_cuda_launch_kernel_ret: None, trace_cuda_memcpy: None, trace_cuda_memcpy_ret: None, };