1
0
mirror of https://github.com/containers/youki synced 2025-04-09 09:09:11 +02:00

Init a seccomp project (#2729)

* Init a seccomp project

Signed-off-by: utam0k <k0ma@utam0k.jp>

* Make gen_validation a static method

Signed-off-by: utam0k <k0ma@utam0k.jp>

---------

Signed-off-by: utam0k <k0ma@utam0k.jp>
This commit is contained in:
Toru Komatsu 2024-04-04 21:32:39 +09:00 committed by GitHub
parent c98c0ca518
commit 13d791b8e1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 644 additions and 1 deletions

@ -14,4 +14,4 @@ jobs:
with:
mode: exactly
count: 1
labels: "kind/feature, kind/bug, kind/documentation, kind/test, kind/cleanup, dependencies"
labels: "kind/feature, kind/bug, kind/documentation, kind/test, kind/cleanup, dependencies, kind/experimental"

@ -1,6 +1,7 @@
[workspace]
resolver = "2"
members = ["crates/*", "tests/contest/*", "tools/*"]
exclude = ["experiment/seccomp"]
[profile.release]
lto = true

147
experiment/seccomp/Cargo.lock generated Normal file

@ -0,0 +1,147 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "anyhow"
version = "1.0.81"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0952808a6c2afd1aa8947271f3a60f1a6763c7b912d210184c5149b5cf147247"
[[package]]
name = "autocfg"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "bitflags"
version = "2.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "cfg_aliases"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e"
[[package]]
name = "libc"
version = "0.2.153"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd"
[[package]]
name = "memoffset"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c"
dependencies = [
"autocfg",
]
[[package]]
name = "nix"
version = "0.27.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2eb04e9c688eff1c89d72b407f168cf79bb9e867a9d3323ed6c01519eb9cc053"
dependencies = [
"bitflags",
"cfg-if",
"libc",
"memoffset",
]
[[package]]
name = "nix"
version = "0.28.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab2156c4fce2f8df6c499cc1c763e4394b7482525bf2a9701c9d79d215f519e4"
dependencies = [
"bitflags",
"cfg-if",
"cfg_aliases",
"libc",
]
[[package]]
name = "prctl"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "059a34f111a9dee2ce1ac2826a68b24601c4298cfeb1a587c3cb493d5ab46f52"
dependencies = [
"libc",
"nix 0.28.0",
]
[[package]]
name = "proc-macro2"
version = "1.0.79"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef"
dependencies = [
"proc-macro2",
]
[[package]]
name = "seccomp"
version = "0.0.0"
dependencies = [
"anyhow",
"nix 0.27.1",
"prctl",
"thiserror",
]
[[package]]
name = "syn"
version = "2.0.52"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b699d15b36d1f02c3e7c69f8ffef53de37aefae075d8488d4ba1a7788d574a07"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "thiserror"
version = "1.0.58"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "03468839009160513471e86a034bb2c5c0e4baae3b43f79ffc55c4a5427b3297"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.58"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "unicode-ident"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"

@ -0,0 +1,24 @@
[package]
name = "seccomp"
version = "0.0.0"
description = "Library for seccomp"
license-file = "../../LICENSE"
repository = "https://github.com/containers/youki"
homepage = "https://containers.github.io/youki"
readme = "README.md"
authors = ["youki team"]
edition = "2021"
autoexamples = true
keywords = ["youki", "container", "seccomp"]
[dependencies]
nix = { version = "0.27.1", features = [
"ioctl",
"socket",
"sched",
"mount",
"dir",
] }
thiserror = "1.0.57"
prctl = "1.0.0"
anyhow = "1.0"

@ -0,0 +1,6 @@
This is an experimental project in order to get away from libseccomp.
Ref: https://github.com/containers/youki/issues/2724
```console
$ cargo run
```

@ -0,0 +1,18 @@
use crate::instruction::Instruction;
use crate::instruction::*;
pub enum Arch {
X86,
}
pub fn gen_validate(arc: &Arch) -> Vec<Instruction> {
let arch = match arc {
Arch::X86 => AUDIT_ARCH_X86_64,
};
vec![
Instruction::stmt(BPF_LD | BPF_W | BPF_ABS, SECCOMP_DATA_ARCH_OFFSET as u32),
Instruction::jump(BPF_JMP | BPF_JEQ | BPF_K, 1, 0, arch),
Instruction::stmt(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS),
]
}

@ -0,0 +1,63 @@
// BPF Instruction classes.
// See /usr/include/linux/bpf_common.h .
// Load operation.
pub const BPF_LD: u16 = 0x00;
// ALU operation.
pub const BPF_ALU: u16 = 0x04;
// Jump operation.
pub const BPF_JMP: u16 = 0x05;
// Return operation.
pub const BPF_RET: u16 = 0x06;
// BPF ld/ldx fields.
// See /usr/include/linux/bpf_common.h .
// Operand size is a word.
pub const BPF_W: u16 = 0x00;
// Load from data area (where `seccomp_data` is).
pub const BPF_ABS: u16 = 0x20;
// BPF alu fields.
// See /usr/include/linux/bpf_common.h .
pub const BPF_AND: u16 = 0x50;
// BPF jmp fields.
// See /usr/include/linux/bpf_common.h .
// Unconditional jump.
pub const BPF_JA: u16 = 0x00;
// Jump with comparisons.
pub const BPF_JEQ: u16 = 0x10;
pub const BPF_JGT: u16 = 0x20;
pub const BPF_JGE: u16 = 0x30;
// Test against the value in the K register.
pub const BPF_K: u16 = 0x00;
// Return codes for BPF programs.
// See /usr/include/linux/seccomp.h .
pub const SECCOMP_RET_ALLOW: u32 = 0x7fff_0000;
pub const SECCOMP_RET_ERRNO: u32 = 0x0005_0000;
pub const SECCOMP_RET_KILL_THREAD: u32 = 0x0000_0000;
pub const SECCOMP_RET_KILL_PROCESS: u32 = 0x8000_0000;
pub const SECCOMP_RET_LOG: u32 = 0x7ffc_0000;
pub const SECCOMP_RET_TRACE: u32 = 0x7ff0_0000;
pub const SECCOMP_RET_TRAP: u32 = 0x0003_0000;
pub const SECCOMP_RET_MASK: u32 = 0x0000_ffff;
pub const SECCOMP_RET_USER_NOTIF: u32 = 0x7fc00000;
// Architecture identifiers.
// See /usr/include/linux/audit.h .
pub const AUDIT_ARCH_X86_64: u32 = 62 | 0x8000_0000 | 0x4000_0000;
pub const AUDIT_ARCH_AARCH64: u32 = 183 | 0x8000_0000 | 0x4000_0000;
// ```c
// struct seccomp_data {
// int nr;
// __u32 arch;
// __u64 instruction_pointer;
// __u64 args[6];
// };
// ```
pub const SECCOMP_DATA_ARCH_OFFSET: u8 = 4;
pub const SECCOMP_DATA_ARGS_OFFSET: u8 = 16;
pub const SECCOMP_DATA_ARG_SIZE: u8 = 8;
pub const SECCOMP_IOC_MAGIC: u8 = b'!';

@ -0,0 +1,69 @@
use std::os::raw::{c_uchar, c_uint, c_ushort};
// https://docs.kernel.org/networking/filter.html#structure
// <linux/filter.h>: sock_filter
#[repr(C)]
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Instruction {
pub code: c_ushort,
pub offset_jump_true: c_uchar,
pub offset_jump_false: c_uchar,
pub multiuse_field: c_uint,
}
impl Instruction {
fn new(
code: c_ushort,
jump_true: c_uchar,
jump_false: c_uchar,
multiuse_field: c_uint,
) -> Self {
Instruction {
code,
offset_jump_true: jump_true,
offset_jump_false: jump_false,
multiuse_field,
}
}
pub fn jump(
code: c_ushort,
jump_true: c_uchar,
jump_false: c_uchar,
multiuse_field: c_uint,
) -> Self {
Self::new(code, jump_true, jump_false, multiuse_field)
}
pub fn stmt(code: c_ushort, k: c_uint) -> Self {
Self::new(code, 0, 0, k)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::instruction::*;
#[test]
fn test_bpf_instructions() {
assert_eq!(
Instruction::stmt(BPF_LD | BPF_W | BPF_ABS, 16),
Instruction {
code: 0x20,
offset_jump_true: 0,
offset_jump_false: 0,
multiuse_field: 16,
}
);
assert_eq!(
Instruction::jump(BPF_JMP | BPF_JEQ | BPF_K, 10, 2, 5),
Instruction {
code: 0x15,
offset_jump_true: 2,
offset_jump_false: 5,
multiuse_field: 10,
}
);
}
}

@ -0,0 +1,7 @@
mod arch;
mod consts;
mod inst;
pub use arch::{gen_validate, Arch};
pub use consts::*;
pub use inst::Instruction;

@ -0,0 +1,2 @@
pub mod instruction;
pub mod seccomp;

@ -0,0 +1,127 @@
use nix::unistd::mkdir;
use seccomp::{
instruction::{self, *},
seccomp::{NotifyFd, Seccomp},
};
use std::io::{IoSlice, IoSliceMut};
use std::os::fd::{IntoRawFd, OwnedFd};
use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
use std::slice;
use anyhow::Result;
use nix::{
libc,
sys::{
socket::{
self, ControlMessage, ControlMessageOwned, MsgFlags, SockFlag, SockType, UnixAddr,
},
stat::Mode,
},
unistd::close,
};
fn send_fd<F: AsRawFd>(sock: OwnedFd, fd: &F) -> nix::Result<()> {
let fd = fd.as_raw_fd();
let cmsgs = [ControlMessage::ScmRights(slice::from_ref(&fd))];
let iov = [IoSlice::new(b"x")];
socket::sendmsg::<()>(sock.into_raw_fd(), &iov, &cmsgs, MsgFlags::empty(), None)?;
Ok(())
}
fn recv_fd<F: FromRawFd>(sock: RawFd) -> nix::Result<Option<F>> {
let mut iov_buf = [];
let mut iov = [IoSliceMut::new(&mut iov_buf)];
let mut cmsg_buf = nix::cmsg_space!(RawFd);
let msg = socket::recvmsg::<UnixAddr>(sock, &mut iov, Some(&mut cmsg_buf), MsgFlags::empty())?;
match msg.cmsgs().next() {
Some(ControlMessageOwned::ScmRights(fds)) if fds.len() > 0 => {
let fd = unsafe { F::from_raw_fd(fds[0]) };
Ok(Some(fd))
}
_ => Ok(None),
}
}
fn handle_notifications(notify_fd: NotifyFd) -> nix::Result<()> {
loop {
println!("Waiting on next");
let req = notify_fd.recv()?.notif;
assert_eq!(req.data.nr, libc::SYS_mkdir as i32);
println!(
"Got notification for mkdir(2): id={}, pid={}, nr={}",
req.id, req.pid, req.data.nr
);
}
}
fn main() -> Result<()> {
let (sock_for_child, sock_for_parent) = socket::socketpair(
socket::AddressFamily::Unix,
SockType::Stream,
None,
SockFlag::empty(),
)?;
let _ = prctl::set_no_new_privileges(true);
let mut bpf_prog = instruction::gen_validate(&Arch::X86);
bpf_prog.append(&mut vec![
// A: Check if syscall is getcwd
Instruction::stmt(BPF_LD | BPF_W | BPF_ABS, 0),
Instruction::jump(BPF_JMP | BPF_JEQ | BPF_K, 0, 1, libc::SYS_getcwd as u32), // If false, go to B
Instruction::stmt(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS),
// B: Check if syscall is mkdir and if so, return seccomp notify
Instruction::stmt(BPF_LD | BPF_W | BPF_ABS, 0),
Instruction::jump(BPF_JMP | BPF_JEQ | BPF_K, 0, 1, libc::SYS_mkdir as u32), // If false, go to C
Instruction::stmt(BPF_RET | BPF_K, SECCOMP_RET_USER_NOTIF),
// C: Pass
Instruction::stmt(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
]);
let seccomp = Seccomp { filters: bpf_prog };
if let nix::unistd::ForkResult::Child = unsafe { nix::unistd::fork()? } {
// nix::unistd::ForkResult::Parent { child } => match wait::waitpid(child, None)? {
// wait::WaitStatus::Signaled(_, signal, _) => {
// if signal == Signal::SIGSYS {
// println!("Got SIGSYS, seccomp filter applied successfully!");
// return Ok(());
// }
// dbg!(signal);
// }
// wait_status => {
// dbg!("Unexpected wait status: {:?}", wait_status);
// }
// },
std::panic::catch_unwind(|| {
let notify_fd = seccomp.apply().unwrap();
println!(
"Seccomp applied successfully with notify fd: {:?}",
notify_fd
);
send_fd(sock_for_child, &notify_fd).unwrap();
if let Err(e) = mkdir("/tmp/test", Mode::S_IRUSR | Mode::S_IWUSR) {
eprintln!("Failed to mkdir: {}", e);
} else {
println!("mkdir succeeded");
}
})
.unwrap();
std::process::exit(0);
};
let notify_fd = recv_fd::<NotifyFd>(sock_for_parent.as_raw_fd())?.unwrap();
close(sock_for_child.as_raw_fd())?;
close(sock_for_parent.as_raw_fd())?;
handle_notifications(notify_fd)?;
Ok(())
}

@ -0,0 +1,179 @@
use core::fmt;
use std::{
mem::MaybeUninit,
os::{
raw::{c_long, c_uint, c_ulong, c_ushort, c_void},
unix::io::{AsRawFd, FromRawFd, IntoRawFd, RawFd},
},
};
use nix::{
errno::Errno,
ioctl_readwrite, ioctl_write_ptr, libc,
libc::{SECCOMP_FILTER_FLAG_NEW_LISTENER, SECCOMP_SET_MODE_FILTER},
unistd,
};
use crate::instruction::{Instruction, SECCOMP_IOC_MAGIC};
#[derive(Debug, thiserror::Error)]
pub enum SeccompError {
#[error("Failed to apply seccomp rules: {0}")]
Apply(String),
}
pub struct Seccomp {
pub filters: Vec<Instruction>,
}
impl Seccomp {
pub fn new() -> Self {
Seccomp {
filters: Vec::new(),
}
}
// apply applies the seccomp rules to the current process and return a fd for seccomp notify.
pub fn apply(&self) -> Result<NotifyFd, SeccompError> {
let mut prog = Filters {
len: self.filters.len() as _,
filter: self.filters.as_ptr(),
};
// TODO: Address the case where don't use seccomp notify.
let notify_fd = unsafe {
seccomp(
SECCOMP_SET_MODE_FILTER,
SECCOMP_FILTER_FLAG_NEW_LISTENER,
&mut prog as *mut _ as *mut c_void,
)
};
Errno::result(notify_fd).map_err(|e| SeccompError::Apply(e.to_string()))?;
Ok(unsafe { NotifyFd::from_raw_fd(notify_fd as RawFd) })
}
}
#[derive(Debug)]
pub struct NotifyFd {
fd: RawFd,
}
impl Drop for NotifyFd {
fn drop(&mut self) {
unistd::close(self.fd).unwrap()
}
}
impl FromRawFd for NotifyFd {
unsafe fn from_raw_fd(fd: RawFd) -> Self {
NotifyFd { fd }
}
}
impl IntoRawFd for NotifyFd {
fn into_raw_fd(self) -> RawFd {
let NotifyFd { fd } = self;
fd
}
}
impl AsRawFd for NotifyFd {
fn as_raw_fd(&self) -> RawFd {
self.fd
}
}
// TODO: Rename
#[repr(C)]
#[derive(Debug)]
pub struct SeccompData {
pub nr: libc::c_int,
pub arch: u32,
pub instruction_pointer: u64,
pub args: [u64; 6],
}
#[repr(C)]
#[derive(Debug)]
pub struct SeccompNotif {
pub id: u64,
pub pid: u32,
pub flags: u32,
pub data: SeccompData,
}
#[repr(C)]
#[derive(Debug)]
pub struct SeccompNotifResp {
pub id: u64,
pub val: i64,
pub error: i32,
pub flags: u32,
}
#[repr(C)]
#[derive(Debug)]
pub struct SeccompNotifSizes {
pub seccomp_notif: u16,
pub seccomp_notif_resp: u16,
pub seccomp_data: u16,
}
#[repr(C)]
#[derive(Debug)]
pub struct SeccompNotifAddfd {
pub id: u64,
pub flags: u32,
pub srcfd: u32,
pub newfd: u32,
pub newfd_flags: u32,
}
ioctl_readwrite!(seccomp_notif_ioctl_recv, SECCOMP_IOC_MAGIC, 0, SeccompNotif);
ioctl_readwrite!(
seccomp_notif_ioctl_send,
SECCOMP_IOC_MAGIC,
1,
SeccompNotifResp
);
ioctl_write_ptr!(seccomp_notif_ioctl_id_valid, SECCOMP_IOC_MAGIC, 2, u64);
ioctl_write_ptr!(
seccomp_notif_ioctl_addfd,
SECCOMP_IOC_MAGIC,
3,
SeccompNotifAddfd
);
pub struct Notification<'f> {
pub notif: SeccompNotif,
pub fd: &'f NotifyFd,
}
impl<'f> fmt::Debug for Notification<'f> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Debug::fmt(&self.notif, f)
}
}
impl NotifyFd {
pub fn recv(&self) -> nix::Result<Notification> {
let mut res = MaybeUninit::zeroed();
let notif = unsafe {
seccomp_notif_ioctl_recv(self.fd, res.as_mut_ptr())?;
res.assume_init()
};
Ok(Notification { notif, fd: &self })
}
}
unsafe fn seccomp(op: c_uint, flags: c_ulong, args: *mut c_void) -> c_long {
libc::syscall(libc::SYS_seccomp, op, flags, args)
}
#[repr(C)]
struct Filters {
pub len: c_ushort,
pub filter: *const Instruction,
}