Class: Vivarium::Daemon

Inherits:
Object
  • Object
show all
Defined in:
lib/vivarium.rb

Constant Summary collapse

BPF_PROGRAM_TEMPLATE =
<<~CLANG
  #include <linux/socket.h>
  #include <uapi/linux/in.h>
  #include <uapi/linux/in6.h>
  #include <uapi/linux/ip.h>
  #include <uapi/linux/udp.h>

  #ifndef SOCK_STREAM
  #define SOCK_STREAM 1
  #endif
  #ifndef SOCK_DGRAM
  #define SOCK_DGRAM 2
  #endif

  struct net;
  struct sock;
  struct sk_buff;
  struct task_struct;
  struct kernel_siginfo;
  struct cred;
  struct user_namespace;
  struct linux_binprm;

  struct path {
    void *mnt;
    void *dentry;
  };
  struct file {
    char __off[__VIVARIUM_F_PATH_OFFSET__];
    struct path f_path;
  };

  struct qstr {
    union {
      struct {
        u64 hash_len;
      };
      struct {
        u32 hash;
        u32 len;
      };
    };
    const unsigned char *name;
  };

  struct dentry_base {
    char __pad[__VIVARIUM_DENTRY_D_NAME_OFFSET__];
    struct qstr d_name;
  };

  struct dentry {
    char __pad[__VIVARIUM_DENTRY_D_PARENT_OFFSET__];
    struct dentry *d_parent;
  };

  struct sockaddr_t {
    u16 sa_family;
    unsigned char sa_data[14];
  };

  struct sockaddr_in_t {
    u16 sin_family;
    u16 sin_port;
    u32 sin_addr;
    unsigned char pad[8];
  };

  struct sockaddr_in6_t {
    u16 sin6_family;
    u16 sin6_port;
    u32 sin6_flowinfo;
    unsigned char sin6_addr[16];
    u32 sin6_scope_id;
  };

  struct sockaddr_port_t {
    u16 family;
    u16 port;
  };

  struct iovec_t {
    void *iov_base;
    unsigned long iov_len;
  };

  struct user_msghdr_t {
    void *msg_name;
    int msg_namelen;
    struct iovec_t *msg_iov;
    unsigned long msg_iovlen;
    void *msg_control;
    unsigned long msg_controllen;
    unsigned int msg_flags;
  };

  struct mmsghdr_t {
    struct user_msghdr_t msg_hdr;
    unsigned int msg_len;
  };

  struct sk_buff_t {
    unsigned char *head;
    unsigned char *data;
    u32 len;
    u16 mac_header;
    u16 network_header;
    u16 transport_header;
  };

  // trace_id is a 128-bit value carried as two u64 halves (hi/lo). They are
  // kept as flat scalar fields (not a nested struct) because rbbcc/Fiddle's
  // CParser cannot decode nested-struct members of a BPF map value type.
  struct event_t {
    u64 ktime_ns;
    u32 pid;
    u32 tid;
    u32 uid;
    u32 gid;
    u64 trace_id_hi;
    u64 trace_id_lo;
    u64 span_id;
    u64 parent_span_id;
    char comm[#{EVENT_COMM_SIZE}];
    char event_name[16];
    char payload[#{EVENT_PAYLOAD_SIZE}];
    u64 dropped_since_last;
  };

  // Per-thread OpenTelemetry context. trace_id (hi/lo) is issued by userspace
  // at target registration and inherited by spawned children; span_id is
  // re-issued per tid (root in userspace, children at fork).
  struct otel_ctx_t {
    u64 trace_id_hi;
    u64 trace_id_lo;
    u64 span_id;
    u64 parent_span_id;
  };

  BPF_HASH(config_root_targets, u32, u8, 1024);
  BPF_HASH(config_spawned_targets, u32, u8, 8192);
  BPF_HASH(dns_connected_tids, u32, u8, 8192);
  BPF_HASH(otel_ctx, u32, struct otel_ctx_t, 8192);
  BPF_RINGBUF_OUTPUT(events, #{EVENTS_RINGBUF_PAGES});
  BPF_ARRAY(drop_counter, u64, 1);

  static __always_inline u64 rand_span_id()
  {
    return ((u64)bpf_get_prandom_u32() << 32) | (u64)bpf_get_prandom_u32();
  }

  static __always_inline int target_enabled(u32 pid, u32 tid)
  {
    u8 *enabled_root = config_root_targets.lookup(&pid);
    if (enabled_root && *enabled_root == 1) {
      return 1;
    }

    u8 *enabled_spawned = config_spawned_targets.lookup(&tid);
    if (enabled_spawned && *enabled_spawned == 1) {
      return 1;
    }

    return 0;
  }

  static __always_inline int monitored_capability(int cap)
  {
    switch (cap) {
      case 1:  /* CAP_DAC_OVERRIDE */
      case 2:  /* CAP_DAC_READ_SEARCH */
      case 6:  /* CAP_SETGID */
      case 7:  /* CAP_SETUID */
      case 12: /* CAP_NET_ADMIN */
      case 16: /* CAP_SYS_MODULE */
      case 17: /* CAP_SYS_RAWIO */
      case 19: /* CAP_SYS_PTRACE */
      case 21: /* CAP_SYS_ADMIN */
      case 22: /* CAP_SYS_BOOT */
      case 25: /* CAP_SYS_TIME */
      case 38: /* CAP_PERFMON */
      case 39: /* CAP_BPF */
      case 40: /* CAP_CHECKPOINT_RESTORE */
        return 1;
      default:
        return 0;
    }
  }

  static __always_inline void submit_event(struct event_t *src)
  {
    u32 key = 0;
    u64 *cnt;

    struct event_t *ev = events.ringbuf_reserve(sizeof(struct event_t));
    if (!ev) {
      cnt = drop_counter.lookup(&key);
      if (cnt) {
        __sync_fetch_and_add(cnt, 1);
      }
      return;
    }

    __builtin_memcpy(ev, src, sizeof(*ev));
    ev->ktime_ns = bpf_ktime_get_ns();
    ev->tid = (u32)bpf_get_current_pid_tgid();
    ev->dropped_since_last = 0;

    u64 uid_gid = bpf_get_current_uid_gid();
    ev->uid = (u32)uid_gid;
    ev->gid = (u32)(uid_gid >> 32);
    bpf_get_current_comm(&ev->comm, sizeof(ev->comm));

    u32 ctid = (u32)bpf_get_current_pid_tgid();
    struct otel_ctx_t *octx = otel_ctx.lookup(&ctid);
    if (octx) {
      ev->trace_id_hi = octx->trace_id_hi;
      ev->trace_id_lo = octx->trace_id_lo;
      ev->span_id = octx->span_id;
      ev->parent_span_id = octx->parent_span_id;
    }

    cnt = drop_counter.lookup(&key);
    if (cnt && *cnt > 0) {
      ev->dropped_since_last = __sync_lock_test_and_set(cnt, 0);
    }

    events.ringbuf_submit(ev, 0);
  }

  static __always_inline void submit_env_event(u32 pid, const char *op, u32 op_len, const char *name_ptr)
  {
    struct event_t ev = {};
    ev.pid = pid;
    __builtin_memcpy(ev.event_name, "env_caccess", 12);

    if (op && op_len > 0) {
      if (op_len > #{ENV_PAYLOAD_OP_SIZE} - 1) {
        op_len = #{ENV_PAYLOAD_OP_SIZE} - 1;
      }
      __builtin_memcpy(&ev.payload[0], op, op_len);
    }

    if (name_ptr) {
      bpf_probe_read_user_str(&ev.payload[#{ENV_PAYLOAD_KEY_OFFSET}], #{ENV_PAYLOAD_KEY_SIZE}, name_ptr);
    }

    submit_event(&ev);
  }

  static __always_inline int is_dns_destination(void *addr)
  {
    u16 family = 0;
    bpf_probe_read_user(&family, sizeof(family), addr);

    if (family == AF_INET) {
      struct sockaddr_in_t sin = {};
      bpf_probe_read_user(&sin, sizeof(sin), addr);
      return sin.sin_port == __constant_htons(53);
    }

    if (family == AF_INET6) {
      struct sockaddr_in6_t sin6 = {};
      bpf_probe_read_user(&sin6, sizeof(sin6), addr);
      return sin6.sin6_port == __constant_htons(53);
    }

    return 0;
  }

  static __always_inline void submit_dns_req(u32 pid, unsigned char *payload, unsigned int payload_len)
  {
    unsigned int copy_len = payload_len;

    if (copy_len <= 12) {
      return;
    }

    copy_len -= 12;
    if (copy_len > 64) {
      copy_len = 64;
    }

    struct event_t ev = {};
    ev.pid = pid;
    __builtin_memcpy(ev.event_name, "dns_req", 8);
    bpf_probe_read_user(&ev.payload[0], copy_len, payload + 12);
    submit_event(&ev);
  }

  static __always_inline int read_dentry_name(struct dentry *dentry, char *buffer, size_t max)
  {
    struct dentry_base d = {};
    struct qstr qname = {};

    if (!dentry || !buffer) {
      return -1;
    }

    bpf_probe_read_kernel(&d, sizeof(d), (void *)dentry);
    if (!d.d_name.name) {
      return -1;
    }

    unsigned int len = d.d_name.len;
    if (len > max) {
      len = max;
    }

    bpf_probe_read_kernel_str(buffer, len + 1, (void *)d.d_name.name);
    return len;
  }

  TRACEPOINT_PROBE(sched, sched_process_fork)
  {
    u32 parent = args->parent_pid;
    u32 child = args->child_pid;
    u8 one = 1;
    int is_target = 0;

    u8 *enabled_root = config_root_targets.lookup(&parent);
    if (enabled_root && *enabled_root == 1) {
      is_target = 1;
      config_spawned_targets.update(&child, &one);
    } else {
      u8 *enabled_spawned = config_spawned_targets.lookup(&parent);
      if (enabled_spawned && *enabled_spawned == 1) {
        is_target = 1;
        config_spawned_targets.update(&child, &one);
      }
    }

    if (is_target) {
      u64 pid_tgid = bpf_get_current_pid_tgid();

      // Re-issue a fresh span_id for the child, inheriting the parent's
      // trace_id and linking the child's parent_span_id to the parent span.
      u32 parent_tid = (u32)pid_tgid;
      struct otel_ctx_t *pctx = otel_ctx.lookup(&parent_tid);
      struct otel_ctx_t cctx = {};
      u64 child_span = rand_span_id();
      if (pctx) {
        cctx.trace_id_hi = pctx->trace_id_hi;
        cctx.trace_id_lo = pctx->trace_id_lo;
        cctx.parent_span_id = pctx->span_id;
      }
      cctx.span_id = child_span;
      otel_ctx.update(&child, &cctx);

      struct event_t ev = {};
      ev.pid = pid_tgid >> 32;
      __builtin_memcpy(ev.event_name, "proc_fork", 10);
      __builtin_memcpy(&ev.payload[0], &child, sizeof(child));
      __builtin_memcpy(&ev.payload[8], &child_span, sizeof(child_span));
      submit_event(&ev);
    }

    return 0;
  }

  TRACEPOINT_PROBE(sched, sched_process_exit)
  {
    u64 pid_tgid = bpf_get_current_pid_tgid();
    u32 pid = pid_tgid >> 32;
    u32 tid = (u32)pid_tgid;
    if (target_enabled(pid, tid)) {
      struct event_t ev = {};
      ev.pid = pid;
      __builtin_memcpy(ev.event_name, "proc_exit", 10);
      submit_event(&ev);
    }
    config_spawned_targets.delete(&tid);
    dns_connected_tids.delete(&tid);
    otel_ctx.delete(&tid);
    return 0;
  }

  LSM_PROBE(file_open, struct file *file)
  {
    u64 pid_tgid = bpf_get_current_pid_tgid();
    u32 pid = pid_tgid >> 32;
    u32 tid = (u32)pid_tgid;
    if (!target_enabled(pid, tid)) {
      return 0;
    }

    struct event_t ev = {};
    int path_ret;
    ev.pid = pid;
    __builtin_memcpy(ev.event_name, "path_open", 9);

    path_ret = bpf_d_path(&file->f_path, ev.payload, sizeof(ev.payload));
    if (path_ret < 0) {
      if (ev.payload[0] == 0) {
        __builtin_memcpy(ev.payload, "<path_error>", 13);
      }
    }

    submit_event(&ev);

    return 0;
  }

  LSM_PROBE(mmap_file, struct file *file, unsigned long reqprot,
            unsigned long prot, unsigned long flags)
  {
    if (!file) {
      return 0;
    }
    if (!((prot | reqprot) & 0x04)) {   /* PROT_EXEC */
      return 0;
    }

    u64 pid_tgid = bpf_get_current_pid_tgid();
    u32 pid = pid_tgid >> 32;
    u32 tid = (u32)pid_tgid;
    if (!target_enabled(pid, tid)) {
      return 0;
    }

    struct event_t ev = {};
    int path_ret;
    ev.pid = pid;
    __builtin_memcpy(ev.event_name, "mmap_exec", 10);

    path_ret = bpf_d_path(&file->f_path, ev.payload, sizeof(ev.payload));
    if (path_ret < 0) {
      if (ev.payload[0] == 0) {
        __builtin_memcpy(ev.payload, "<path_error>", 13);
      }
    }

    submit_event(&ev);
    return 0;
  }

  LSM_PROBE(socket_create, int family, int type, int protocol, int kern)
  {
    u64 pid_tgid = bpf_get_current_pid_tgid();
    u32 pid = pid_tgid >> 32;
    u32 tid = (u32)pid_tgid;

    if (!target_enabled(pid, tid)) {
      return 0;
    }

    if ((family == AF_INET || family == AF_INET6) && (type == SOCK_STREAM || type == SOCK_DGRAM)) {
      return 0;
    }

    struct event_t ev = {};
    u16 family16 = family;
    u16 type16 = type;
    u16 proto16 = protocol;

    ev.pid = pid;
    __builtin_memcpy(ev.event_name, "odd_socket", 11);
    __builtin_memcpy(&ev.payload[0], &family16, sizeof(family16));
    __builtin_memcpy(&ev.payload[2], &type16, sizeof(type16));
    __builtin_memcpy(&ev.payload[4], &proto16, sizeof(proto16));
    submit_event(&ev);

    return 0;
  }

  LSM_PROBE(socket_connect, struct socket *sock, struct sockaddr *address, int addrlen)
  {
    u64 pid_tgid = bpf_get_current_pid_tgid();
    u32 pid = pid_tgid >> 32;
    u32 tid = (u32)pid_tgid;
    u16 family = 0;
    u8 one = 1;

    if (!target_enabled(pid, tid)) {
      return 0;
    }

    if (!address) {
      return 0;
    }

    bpf_probe_read_kernel(&family, sizeof(family), address);

    struct event_t ev = {};
    ev.pid = pid;
    __builtin_memcpy(ev.event_name, "sock_connect", 13);
    __builtin_memcpy(&ev.payload[0], &family, sizeof(family));

    if (family == AF_INET) {
      struct sockaddr_in_t sin = {};
      bpf_probe_read_kernel(&sin, sizeof(sin), address);
      __builtin_memcpy(&ev.payload[2], &sin.sin_port, sizeof(sin.sin_port));
      __builtin_memcpy(&ev.payload[4], &sin.sin_addr, sizeof(sin.sin_addr));
      if (sin.sin_port == __constant_htons(53)) {
        dns_connected_tids.update(&tid, &one);
      }
    } else if (family == AF_INET6) {
      struct sockaddr_in6_t sin6 = {};
      bpf_probe_read_kernel(&sin6, sizeof(sin6), address);
      __builtin_memcpy(&ev.payload[2], &sin6.sin6_port, sizeof(sin6.sin6_port));
      __builtin_memcpy(&ev.payload[4], &sin6.sin6_addr, sizeof(sin6.sin6_addr));
      if (sin6.sin6_port == __constant_htons(53)) {
        dns_connected_tids.update(&tid, &one);
      }
    }

    submit_event(&ev);

    return 0;
  }

  TRACEPOINT_PROBE(syscalls, sys_enter_sendmsg)
  {
    u64 pid_tgid = bpf_get_current_pid_tgid();
    u32 pid = pid_tgid >> 32;
    u32 tid = (u32)pid_tgid;
    struct user_msghdr_t msg = {};
    struct iovec_t iov = {};

    if (!target_enabled(pid, tid)) {
      return 0;
    }

    if (!args->msg) {
      return 0;
    }

    bpf_probe_read_user(&msg, sizeof(msg), args->msg);
    if (!msg.msg_iov || msg.msg_iovlen == 0) {
      return 0;
    }

    if (msg.msg_name && !is_dns_destination(msg.msg_name)) {
      return 0;
    }

    bpf_probe_read_user(&iov, sizeof(iov), msg.msg_iov);
    if (!iov.iov_base) {
      return 0;
    }

    submit_dns_req(pid, (unsigned char *)iov.iov_base, (unsigned int)iov.iov_len);

    return 0;
  }

  TRACEPOINT_PROBE(syscalls, sys_enter_sendto)
  {
    u64 pid_tgid = bpf_get_current_pid_tgid();
    u32 pid = pid_tgid >> 32;
    u32 tid = (u32)pid_tgid;
    unsigned char *buff = args->buff;
    int dns_match = 0;

    if (!target_enabled(pid, tid)) {
      return 0;
    }

    if (!buff) {
      return 0;
    }

    if (args->addr) {
      dns_match = is_dns_destination(args->addr);
    } else {
      u8 *connected = dns_connected_tids.lookup(&tid);
      dns_match = connected && *connected == 1;
    }

    if (!dns_match) {
      return 0;
    }

    submit_dns_req(pid, buff, args->len);
    dns_connected_tids.delete(&tid);

    return 0;
  }

  TRACEPOINT_PROBE(syscalls, sys_enter_sendmmsg)
  {
    u64 pid_tgid = bpf_get_current_pid_tgid();
    u32 pid = pid_tgid >> 32;
    u32 tid = (u32)pid_tgid;
    struct mmsghdr_t mmsg = {};
    struct iovec_t iov = {};

    if (!target_enabled(pid, tid)) {
      return 0;
    }

    if (!args->mmsg) {
      return 0;
    }

    bpf_probe_read_user(&mmsg, sizeof(mmsg), args->mmsg);
    if (mmsg.msg_hdr.msg_name && !is_dns_destination(mmsg.msg_hdr.msg_name)) {
      return 0;
    }

    if (!mmsg.msg_hdr.msg_iov || mmsg.msg_hdr.msg_iovlen == 0) {
      return 0;
    }

    bpf_probe_read_user(&iov, sizeof(iov), mmsg.msg_hdr.msg_iov);
    if (!iov.iov_base) {
      return 0;
    }

    submit_dns_req(pid, (unsigned char *)iov.iov_base, (unsigned int)iov.iov_len);

    return 0;
  }

  TRACEPOINT_PROBE(syscalls, sys_enter_execve)
  {
    u64 pid_tgid = bpf_get_current_pid_tgid();
    u32 pid = pid_tgid >> 32;
    u32 tid = (u32)pid_tgid;
    const char *argv0 = 0;
    const char *argv1 = 0;
    const char *argv2 = 0;

    if (!target_enabled(pid, tid)) {
      return 0;
    }

    if (!args->filename) {
      return 0;
    }

    struct event_t ev = {};
    ev.pid = pid;
    __builtin_memcpy(ev.event_name, "proc_exec", 10);
    bpf_probe_read_user_str(&ev.payload[0], #{PROC_EXEC_SLOT_SIZE}, args->filename);

    if (args->argv) {
      bpf_probe_read_user(&argv0, sizeof(argv0), &args->argv[0]);
      bpf_probe_read_user(&argv1, sizeof(argv1), &args->argv[1]);
      bpf_probe_read_user(&argv2, sizeof(argv2), &args->argv[2]);

      if (argv0) {
        bpf_probe_read_user_str(&ev.payload[#{PROC_EXEC_SLOT_SIZE}], #{PROC_EXEC_SLOT_SIZE}, argv0);
      }
      if (argv1) {
        bpf_probe_read_user_str(&ev.payload[#{PROC_EXEC_SLOT_SIZE * 2}], #{PROC_EXEC_SLOT_SIZE}, argv1);
      }
      if (argv2) {
        bpf_probe_read_user_str(&ev.payload[#{PROC_EXEC_SLOT_SIZE * 3}], #{PROC_EXEC_SLOT_SIZE}, argv2);
      }
    }

    submit_event(&ev);
    return 0;
  }

  LSM_PROBE(ptrace_access_check, struct task_struct *child, unsigned int mode)
  {
    u64 pid_tgid = bpf_get_current_pid_tgid();
    u32 pid = pid_tgid >> 32;
    u32 tid = (u32)pid_tgid;

    if (!target_enabled(pid, tid)) {
      return 0;
    }

    struct event_t ev = {};
    u32 mode32 = mode;

    ev.pid = pid;
    __builtin_memcpy(ev.event_name, "ptrace_check", 13);
    __builtin_memcpy(&ev.payload[0], &mode32, sizeof(mode32));
    submit_event(&ev);

    return 0;
  }

  LSM_PROBE(sb_mount, const char *dev_name, const struct path *path, const char *type, unsigned long flags, void *data)
  {
    u64 pid_tgid = bpf_get_current_pid_tgid();
    u32 pid = pid_tgid >> 32;
    u32 tid = (u32)pid_tgid;

    if (!target_enabled(pid, tid)) {
      return 0;
    }

    struct event_t ev = {};
    u64 flags64 = flags;

    ev.pid = pid;
    __builtin_memcpy(ev.event_name, "sb_mount", 9);
    __builtin_memcpy(&ev.payload[0], &flags64, sizeof(flags64));

    if (dev_name) {
      bpf_probe_read_kernel_str(&ev.payload[8], 120, dev_name);
    }
    if (type) {
      bpf_probe_read_kernel_str(&ev.payload[128], 120, type);
    }

    submit_event(&ev);

    return 0;
  }

  LSM_PROBE(kernel_read_file, struct file *file, int id, int contents)
  {
    u64 pid_tgid = bpf_get_current_pid_tgid();
    u32 pid = pid_tgid >> 32;
    u32 tid = (u32)pid_tgid;

    if (!target_enabled(pid, tid)) {
      return 0;
    }

    struct event_t ev = {};
    u32 id32 = id;
    u32 contents32 = contents;

    ev.pid = pid;
    __builtin_memcpy(ev.event_name, "kernel_read_file", 16);
    __builtin_memcpy(&ev.payload[0], &id32, sizeof(id32));
    __builtin_memcpy(&ev.payload[4], &contents32, sizeof(contents32));
    submit_event(&ev);

    return 0;
  }

  LSM_PROBE(task_kill, struct task_struct *p, struct kernel_siginfo *info, int sig, const struct cred *cred)
  {
    u64 pid_tgid = bpf_get_current_pid_tgid();
    u32 pid = pid_tgid >> 32;
    u32 tid = (u32)pid_tgid;

    if (!target_enabled(pid, tid)) {
      return 0;
    }

    struct event_t ev = {};

    ev.pid = pid;
    __builtin_memcpy(ev.event_name, "task_kill", 10);
    __builtin_memcpy(&ev.payload[0], &sig, sizeof(sig));
    submit_event(&ev);

    return 0;
  }

  LSM_PROBE(task_fix_setuid, struct cred *new, const struct cred *old, int flags)
  {
    u64 pid_tgid = bpf_get_current_pid_tgid();
    u32 pid = pid_tgid >> 32;
    u32 tid = (u32)pid_tgid;

    if (!target_enabled(pid, tid)) {
      return 0;
    }

    struct event_t ev = {};
    u32 flags32 = flags;

    ev.pid = pid;
    __builtin_memcpy(ev.event_name, "setid_change", 13);
    __builtin_memcpy(&ev.payload[0], &flags32, sizeof(flags32));
    submit_event(&ev);

    return 0;
  }

  LSM_PROBE(capable, const struct cred *cred, struct user_namespace *targ_ns, int cap, unsigned int opts)
  {
    u64 pid_tgid = bpf_get_current_pid_tgid();
    u32 pid = pid_tgid >> 32;
    u32 tid = (u32)pid_tgid;

    if (!target_enabled(pid, tid)) {
      return 0;
    }

    if (!monitored_capability(cap)) {
      return 0;
    }

    struct event_t ev = {};
    u32 cap32 = cap;
    u32 opts32 = opts;

    ev.pid = pid;
    __builtin_memcpy(ev.event_name, "capable_check", 14);
    __builtin_memcpy(&ev.payload[0], &cap32, sizeof(cap32));
    __builtin_memcpy(&ev.payload[4], &opts32, sizeof(opts32));
    submit_event(&ev);

    return 0;
  }

  LSM_PROBE(bprm_creds_from_file, struct linux_binprm *bprm, struct file *file)
  {
    u64 pid_tgid = bpf_get_current_pid_tgid();
    u32 pid = pid_tgid >> 32;
    u32 tid = (u32)pid_tgid;

    if (!target_enabled(pid, tid)) {
      return 0;
    }

    struct event_t ev = {};
    u8 has_file = 0;

    ev.pid = pid;
    __builtin_memcpy(ev.event_name, "bprm_creds", 11);

    if (file) {
      has_file = 1;
      bpf_d_path(&file->f_path, &ev.payload[1], sizeof(ev.payload) - 1);
    }

    __builtin_memcpy(&ev.payload[0], &has_file, sizeof(has_file));
    submit_event(&ev);

    return 0;
  }

  LSM_PROBE(inode_symlink, struct inode *dir, struct dentry *dentry, const char *oldname)
  {
    u64 pid_tgid = bpf_get_current_pid_tgid();
    u32 pid = pid_tgid >> 32;
    u32 tid = (u32)pid_tgid;

    if (!target_enabled(pid, tid)) {
      return 0;
    }

    struct event_t ev = {};
    ev.pid = pid;
    __builtin_memcpy(ev.event_name, "file_symlink", 13);

    if (oldname) {
      bpf_probe_read_user_str(&ev.payload[0], 128, oldname);
    }

    if (dentry) {
      read_dentry_name(dentry, &ev.payload[128], 128);
    }

    submit_event(&ev);
    return 0;
  }

  LSM_PROBE(inode_link, struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry)
  {
    u64 pid_tgid = bpf_get_current_pid_tgid();
    u32 pid = pid_tgid >> 32;
    u32 tid = (u32)pid_tgid;

    if (!target_enabled(pid, tid)) {
      return 0;
    }

    struct event_t ev = {};
    ev.pid = pid;
    __builtin_memcpy(ev.event_name, "file_hardlink", 14);

    if (old_dentry) {
      read_dentry_name(old_dentry, &ev.payload[0], 128);
    }

    if (new_dentry) {
      read_dentry_name(new_dentry, &ev.payload[128], 128);
    }

    submit_event(&ev);
    return 0;
  }

  LSM_PROBE(inode_rename, struct inode *old_dir, struct dentry *old_dentry, 
            struct inode *new_dir, struct dentry *new_dentry, unsigned int flags)
  {
    u64 pid_tgid = bpf_get_current_pid_tgid();
    u32 pid = pid_tgid >> 32;
    u32 tid = (u32)pid_tgid;

    if (!target_enabled(pid, tid)) {
      return 0;
    }

    struct event_t ev = {};
    ev.pid = pid;
    __builtin_memcpy(ev.event_name, "file_rename", 12);

    if (old_dentry) {
      read_dentry_name(old_dentry, &ev.payload[0], 128);
    }

    if (new_dentry) {
      read_dentry_name(new_dentry, &ev.payload[128], 128);
    }

    submit_event(&ev);
    return 0;
  }

  LSM_PROBE(inode_unlink, struct inode *dir, struct dentry *dentry)
  {
    u64 pid_tgid = bpf_get_current_pid_tgid();
    u32 pid = pid_tgid >> 32;
    u32 tid = (u32)pid_tgid;

    if (!target_enabled(pid, tid)) {
      return 0;
    }

    struct event_t ev = {};
    ev.pid = pid;
    __builtin_memcpy(ev.event_name, "file_unlink", 12);

    if (dentry) {
       read_dentry_name(dentry, &ev.payload[0], 128);
     
       struct dentry *parent = dentry->d_parent;
       if (parent && parent != dentry) {
         read_dentry_name(parent, &ev.payload[128], 128);
       }
    }

    submit_event(&ev);
    return 0;
  }

  LSM_PROBE(path_chmod, struct path *path, umode_t mode)
  {
    u64 pid_tgid = bpf_get_current_pid_tgid();
    u32 pid = pid_tgid >> 32;
    u32 tid = (u32)pid_tgid;

    if (!target_enabled(pid, tid)) {
      return 0;
    }

    if (!path) {
      return 0;
    }

    struct event_t ev = {};
    u16 mode_short = mode & 0xFFFF;
    ev.pid = pid;
    __builtin_memcpy(ev.event_name, "file_chmod", 11);
    __builtin_memcpy(&ev.payload[0], &mode_short, sizeof(mode_short));

    bpf_d_path(path, &ev.payload[2], sizeof(ev.payload) - 2);
    submit_event(&ev);
    return 0;
  }

  TRACEPOINT_PROBE(syscalls, sys_enter_getdents64)
  {
    u64 pid_tgid = bpf_get_current_pid_tgid();
    u32 pid = pid_tgid >> 32;
    u32 tid = (u32)pid_tgid;

    if (!target_enabled(pid, tid)) {
      return 0;
    }

    struct event_t ev = {};
    u32 fd = args->fd;
    u32 count = args->count;

    ev.pid = pid;
    __builtin_memcpy(ev.event_name, "file_getdents", 14);
    __builtin_memcpy(&ev.payload[0], &fd, sizeof(fd));
    __builtin_memcpy(&ev.payload[4], &count, sizeof(count));

    submit_event(&ev);
    return 0;
  }

  int on_ssl_write(struct pt_regs *ctx)
  {
    u64 pid_tgid = bpf_get_current_pid_tgid();
    u32 pid = pid_tgid >> 32;
    u32 tid = (u32)pid_tgid;

    if (!target_enabled(pid, tid)) {
      return 0;
    }

    const char *buf = (const char *)PT_REGS_PARM2(ctx);
    int num = (int)PT_REGS_PARM3(ctx);
    if (!buf || num <= 0) {
      return 0;
    }

    struct event_t ev = {};
    ev.pid = pid;
    __builtin_memcpy(ev.event_name, "ssl_write", 10);

    u32 data_len = (u32)num;
    u32 cap = data_len;
    if (cap > #{SSL_WRITE_PAYLOAD_DATA_MAX}) {
      cap = #{SSL_WRITE_PAYLOAD_DATA_MAX};
    }
    __builtin_memcpy(&ev.payload[#{SSL_WRITE_PAYLOAD_DATA_LEN_OFFSET}], &data_len, sizeof(data_len));
    __builtin_memcpy(&ev.payload[#{SSL_WRITE_PAYLOAD_CAP_LEN_OFFSET}], &cap, sizeof(cap));
    if (bpf_probe_read_user(&ev.payload[#{SSL_WRITE_PAYLOAD_DATA_OFFSET}], cap, buf) < 0) {
      u32 zero = 0;
      __builtin_memcpy(&ev.payload[#{SSL_WRITE_PAYLOAD_CAP_LEN_OFFSET}], &zero, sizeof(zero));
    }

    submit_event(&ev);
    return 0;
  }

  int on_dlopen(struct pt_regs *ctx)
  {
    u64 pid_tgid = bpf_get_current_pid_tgid();
    u32 pid = pid_tgid >> 32;
    u32 tid = (u32)pid_tgid;
    if (!target_enabled(pid, tid)) {
      return 0;
    }

    const char *filename = (const char *)PT_REGS_PARM1(ctx);
    if (!filename) {
      return 0;
    }

    struct event_t ev = {};
    ev.pid = pid;
    __builtin_memcpy(ev.event_name, "dlopen", 7);

    if (bpf_probe_read_user_str(ev.payload, sizeof(ev.payload), filename) < 0) {
      __builtin_memcpy(ev.payload, "<path_error>", 13);
    }

    submit_event(&ev);
    return 0;
  }

  int on_getenv(struct pt_regs *ctx)
  {
    u64 pid_tgid = bpf_get_current_pid_tgid();
    u32 pid = pid_tgid >> 32;
    u32 tid = (u32)pid_tgid;
    const char *name = (const char *)PT_REGS_PARM1(ctx);

    if (!target_enabled(pid, tid) || !name) {
      return 0;
    }

    submit_env_event(pid, "getenv", 6, name);
    return 0;
  }

  int on_setenv(struct pt_regs *ctx)
  {
    u64 pid_tgid = bpf_get_current_pid_tgid();
    u32 pid = pid_tgid >> 32;
    u32 tid = (u32)pid_tgid;
    const char *name = (const char *)PT_REGS_PARM1(ctx);

    if (!target_enabled(pid, tid) || !name) {
      return 0;
    }

    submit_env_event(pid, "setenv", 6, name);
    return 0;
  }

  int on_unsetenv(struct pt_regs *ctx)
  {
    u64 pid_tgid = bpf_get_current_pid_tgid();
    u32 pid = pid_tgid >> 32;
    u32 tid = (u32)pid_tgid;
    const char *name = (const char *)PT_REGS_PARM1(ctx);

    if (!target_enabled(pid, tid) || !name) {
      return 0;
    }

    submit_env_event(pid, "unsetenv", 8, name);
    return 0;
  }

  int on_putenv(struct pt_regs *ctx)
  {
    u64 pid_tgid = bpf_get_current_pid_tgid();
    u32 pid = pid_tgid >> 32;
    u32 tid = (u32)pid_tgid;
    const char *string = (const char *)PT_REGS_PARM1(ctx);

    if (!target_enabled(pid, tid) || !string) {
      return 0;
    }

    submit_env_event(pid, "putenv", 6, string);
    return 0;
  }

  int on_clearenv(struct pt_regs *ctx)
  {
    u64 pid_tgid = bpf_get_current_pid_tgid();
    u32 pid = pid_tgid >> 32;
    u32 tid = (u32)pid_tgid;

    if (!target_enabled(pid, tid)) {
      return 0;
    }

    submit_env_event(pid, "clearenv", 8, 0);
    return 0;
  }
CLANG
SPAN_PROBE_TEMPLATE =

USDT span handlers are generated per attached .so so each gets a unique fn_name. BCC emits bpf_readarg<fn_name>_<n> per USDT context, so sharing one fn_name across contexts triggers a redefinition error.

<<~CLANG
  int on_span_start__SUFFIX__(struct pt_regs *ctx)
  {
    u64 pid_tgid = bpf_get_current_pid_tgid();
    u32 pid = pid_tgid >> 32;
    u32 tid = (u32)pid_tgid;

    if (!target_enabled(pid, tid)) {
      return 0;
    }

    u64 method_str_ptr = 0;
    u64 file_str_ptr = 0;
    s64 lineno = 0;
    bpf_usdt_readarg(1, ctx, &method_str_ptr);
    bpf_usdt_readarg(2, ctx, &file_str_ptr);
    bpf_usdt_readarg(3, ctx, &lineno);

    struct event_t ev = {};
    ev.pid = pid;
    __builtin_memcpy(ev.event_name, "span_start", 11);
    bpf_probe_read_user_str(&ev.payload[0], #{SPAN_METHOD_SIZE}, (void*)method_str_ptr);
    bpf_probe_read_user_str(&ev.payload[#{SPAN_METHOD_SIZE}], #{SPAN_FILE_SIZE}, (void*)file_str_ptr);
    __builtin_memcpy(&ev.payload[#{SPAN_LINENO_OFFSET}], &lineno, sizeof(lineno));
    submit_event(&ev);
    return 0;
  }

  int on_span_stop__SUFFIX__(struct pt_regs *ctx)
  {
    u64 pid_tgid = bpf_get_current_pid_tgid();
    u32 pid = pid_tgid >> 32;
    u32 tid = (u32)pid_tgid;

    if (!target_enabled(pid, tid)) {
      return 0;
    }

    u64 method_str_ptr = 0;
    u64 file_str_ptr = 0;
    s64 lineno = 0;
    bpf_usdt_readarg(1, ctx, &method_str_ptr);
    bpf_usdt_readarg(2, ctx, &file_str_ptr);
    bpf_usdt_readarg(3, ctx, &lineno);

    struct event_t ev = {};
    ev.pid = pid;
    __builtin_memcpy(ev.event_name, "span_stop", 10);
    bpf_probe_read_user_str(&ev.payload[0], #{SPAN_METHOD_SIZE}, (void*)method_str_ptr);
    bpf_probe_read_user_str(&ev.payload[#{SPAN_METHOD_SIZE}], #{SPAN_FILE_SIZE}, (void*)file_str_ptr);
    __builtin_memcpy(&ev.payload[#{SPAN_LINENO_OFFSET}], &lineno, sizeof(lineno));
    submit_event(&ev);
    return 0;
  }

  int on_span_raise__SUFFIX__(struct pt_regs *ctx)
  {
    u64 pid_tgid = bpf_get_current_pid_tgid();
    u32 pid = pid_tgid >> 32;
    u32 tid = (u32)pid_tgid;

    if (!target_enabled(pid, tid)) {
      return 0;
    }

    u64 error_str_ptr = 0;
    u64 message_str_ptr = 0;
    u64 file_str_ptr = 0;
    s64 lineno = 0;
    bpf_usdt_readarg(1, ctx, &error_str_ptr);
    bpf_usdt_readarg(2, ctx, &message_str_ptr);
    bpf_usdt_readarg(3, ctx, &file_str_ptr);
    bpf_usdt_readarg(4, ctx, &lineno);

    struct event_t ev = {};
    ev.pid = pid;
    __builtin_memcpy(ev.event_name, "span_raise", 11);
    bpf_probe_read_user_str(&ev.payload[0], #{SPAN_RAISE_SLOT_SIZE}, (void*)error_str_ptr);
    bpf_probe_read_user_str(&ev.payload[#{SPAN_RAISE_SLOT_SIZE}], #{SPAN_RAISE_SLOT_SIZE}, (void*)message_str_ptr);
    bpf_probe_read_user_str(&ev.payload[#{SPAN_RAISE_SLOT_SIZE * 2}], #{SPAN_RAISE_SLOT_SIZE}, (void*)file_str_ptr);
    __builtin_memcpy(&ev.payload[#{SPAN_RAISE_LINENO_OFFSET}], &lineno, sizeof(lineno));
    submit_event(&ev);
    return 0;
  }
CLANG

Instance Method Summary collapse

Constructor Details

#initialize(pin_dir: Vivarium.bpf_pin_dir, socket_path: Vivarium.socket_path, ssl_trace: true, libssl_path: nil, dlopen_trace: true, env_trace: true, libc_path: nil, usdt_so_paths: nil) ⇒ Daemon

Returns a new instance of Daemon.



1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
# File 'lib/vivarium.rb', line 1847

def initialize(pin_dir: Vivarium.bpf_pin_dir, socket_path: Vivarium.socket_path,
               ssl_trace: true, libssl_path: nil,
               dlopen_trace: true, env_trace: true, libc_path: nil,
               usdt_so_paths: nil)
  @pin_dir        = pin_dir
  @socket_path    = socket_path
  @ssl_trace      = ssl_trace
  @libssl_path    = libssl_path
  @dlopen_trace   = dlopen_trace
  @env_trace      = env_trace
  @libc_path      = libc_path
  @usdt_so_paths  = usdt_so_paths
end

Instance Method Details

#runObject



1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
# File 'lib/vivarium.rb', line 1861

def run
  ensure_root!
  FileUtils.mkdir_p(@pin_dir)

  f_path_offset = detect_f_path_offset
  d_name_offset = detect_dentry_d_name_offset
  d_parent_offset = detect_dentry_d_parent_offset
  program = BPF_PROGRAM_TEMPLATE
    .gsub("__VIVARIUM_F_PATH_OFFSET__", f_path_offset.to_s)
    .gsub("__VIVARIUM_DENTRY_D_NAME_OFFSET__", d_name_offset.to_s)
    .gsub("__VIVARIUM_DENTRY_D_PARENT_OFFSET__", d_parent_offset.to_s)

  usdt_so_paths = resolve_usdt_so_paths
  usdt_contexts = build_usdt_contexts(usdt_so_paths)
  program += build_span_probe_sources(usdt_contexts)

  bpf = RbBCC::BCC.new(text: program, usdt_contexts: usdt_contexts.map(&:last))

  attach_ssl_write_uprobe(bpf) if @ssl_trace
  attach_dlopen_uprobe(bpf) if @dlopen_trace
  attach_env_uprobes(bpf) if @env_trace

  config_root_targets = bpf["config_root_targets"]
  config_spawned_targets = bpf["config_spawned_targets"]
  otel_ctx = bpf["otel_ctx"]
  events_ringbuf = bpf["events"]

  config_spawned_targets.clear
  otel_ctx.clear

  pin_map(config_root_targets, File.join(@pin_dir, "config_root_targets"))
  pin_map(config_spawned_targets, File.join(@pin_dir, "config_spawned_targets"))
  pin_map(events_ringbuf, File.join(@pin_dir, "events"))

  event_log = EventLog.new
  registry = Registry.new(config_root_targets, config_spawned_targets, otel_ctx)
  start_ringbuf_poller(bpf, events_ringbuf, event_log)

  @api_server = ApiServer.new(
    socket_path: @socket_path,
    event_log: event_log,
    registry: registry,
    daemon_pid: Process.pid
  )
  @api_server.start

  puts "[vivariumd] started"
  puts "[vivariumd] pinned maps in #{@pin_dir}"
  puts "[vivariumd] watching LSM file_open (f_path offset=#{f_path_offset})"
  puts "[vivariumd] watching inode_unlink (d_parent offset=#{d_parent_offset}, d_name offset=#{d_name_offset})"
  puts "[vivariumd] API listening on unix:#{@socket_path}"

  loop do
    sleep 1
  end
rescue Interrupt
  puts "\n[vivariumd] stopping"
ensure
  @api_server&.stop
end