From bef32e083f5f0aa5a71f25decc2e08454462ae03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kris=20N=C3=B3va?= Date: Thu, 23 Mar 2023 10:57:51 -0700 Subject: [PATCH] Introduce seccomp feature for libcontainer with musl (#1484) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Introduce seccomp feature for libcontainer with musl Signed-off-by: Kris Nóva * remove mystery merge conflict Signed-off-by: Eric Fang --------- Signed-off-by: Kris Nóva Signed-off-by: Eric Fang Co-authored-by: Eric Fang --- crates/libcontainer/Cargo.toml | 5 ++-- crates/libcontainer/README.md | 21 +++++++++++++ crates/libcontainer/src/lib.rs | 4 ++- .../src/process/container_init_process.rs | 24 +++++++++++++-- .../src/process/container_main_process.rs | 28 +++++++++++------ crates/libcontainer/src/syscall/linux.rs | 30 +++++++++++-------- crates/libcontainer/src/utils.rs | 1 + scripts/features_test.sh | 15 +++++++++- 8 files changed, 101 insertions(+), 27 deletions(-) diff --git a/crates/libcontainer/Cargo.toml b/crates/libcontainer/Cargo.toml index 1c5b8529..a4aa96c9 100644 --- a/crates/libcontainer/Cargo.toml +++ b/crates/libcontainer/Cargo.toml @@ -12,7 +12,8 @@ rust-version = "1.58.1" keywords = ["youki", "container", "cgroups"] [features] -default = ["systemd", "v2", "v1"] +default = ["systemd", "v2", "v1", "libseccomp"] +libseccomp = ["dep:libseccomp"] systemd = ["libcgroups/systemd", "v2"] v2 = ["libcgroups/v2"] v1 = ["libcgroups/v1"] @@ -35,7 +36,7 @@ oci-spec = { version = "^0.6.0", features = ["runtime"] } procfs = "0.15.1" prctl = "1.0.0" libcgroups = { version = "0.0.4", path = "../libcgroups", default-features = false } -libseccomp = { version = "0.3.0" } +libseccomp = { version = "0.3.0", optional=true } serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" syscalls = "0.6.8" diff --git a/crates/libcontainer/README.md b/crates/libcontainer/README.md index ec79d705..f31f8195 100644 --- a/crates/libcontainer/README.md +++ b/crates/libcontainer/README.md @@ -1 +1,22 @@ # libcontainer + +### Building with musl + +In order to build with musl you must first remove the libseccomp dependency as it will reference shared libraries (`libdbus` and `libseccomp`) which cannot be built with musl. + +Do this by using adding flags to Cargo. Use the `--no-default-features` flag followed by `-F` and whatever features you intend to build with such as `v2` as defined in Cargo.toml under features section. + +Next you will also need the `+nightly` flags when building with `rustup` and `cargo`. + +```bash +# Add rustup +nigthly musl to toolchain +rustup +nightly target add $(uname -m)-unknown-linux-musl + +# Build rustup +nigthly stdlib with musl +rustup +nightly toolchain install nightly-$(uname -m)-unknown-linux-musl + +# Build musl standard library +cargo +nightly build -Zbuild-std --target $(uname -m)-unknown-linux-musl --no-default-features -F v2 + +cargo +nightly build --target $(uname -m)-unknown-linux-musl --no-default-features -F v2 +``` \ No newline at end of file diff --git a/crates/libcontainer/src/lib.rs b/crates/libcontainer/src/lib.rs index 4d28afaa..4ab5fbd5 100644 --- a/crates/libcontainer/src/lib.rs +++ b/crates/libcontainer/src/lib.rs @@ -8,9 +8,11 @@ pub mod notify_socket; pub mod process; pub mod rootfs; pub mod rootless; -pub mod seccomp; pub mod signal; pub mod syscall; pub mod tty; pub mod utils; pub mod workload; + +#[cfg(feature = "libseccomp")] +pub mod seccomp; diff --git a/crates/libcontainer/src/process/container_init_process.rs b/crates/libcontainer/src/process/container_init_process.rs index 2ab7a986..b31cc470 100644 --- a/crates/libcontainer/src/process/container_init_process.rs +++ b/crates/libcontainer/src/process/container_init_process.rs @@ -3,14 +3,13 @@ use crate::apparmor; use crate::syscall::Syscall; use crate::{ capabilities, hooks, namespaces::Namespaces, process::channel, rootfs::RootFS, - rootless::Rootless, seccomp, tty, utils, + rootless::Rootless, tty, utils, }; use anyhow::{bail, Context, Ok, Result}; use nix::mount::MsFlags; use nix::sched::CloneFlags; use nix::sys::stat::Mode; use nix::unistd::setsid; - use nix::unistd::{self, Gid, Uid}; use oci_spec::runtime::{LinuxNamespaceType, Spec, User}; use std::collections::HashMap; @@ -20,6 +19,12 @@ use std::{ path::{Path, PathBuf}, }; +#[cfg(feature = "libseccomp")] +use crate::seccomp; + +#[cfg(not(feature = "libseccomp"))] +use log::warn; + fn sysctl(kernel_params: &HashMap) -> Result<()> { let sys = PathBuf::from("/proc/sys"); for (kernel_param, value) in kernel_params { @@ -158,6 +163,7 @@ fn reopen_dev_null() -> Result<()> { Ok(()) } +#[allow(unused_variables)] pub fn container_init_process( args: &ContainerArgs, main_sender: &mut channel::MainSender, @@ -348,6 +354,7 @@ pub fn container_init_process( // Without no new privileges, seccomp is a privileged operation. We have to // do this before dropping capabilities. Otherwise, we should do it later, // as close to exec as possible. + #[cfg(feature = "libseccomp")] if let Some(seccomp) = linux.seccomp() { if proc.no_new_privileges().is_none() { let notify_fd = @@ -356,6 +363,10 @@ pub fn container_init_process( .context("failed to sync seccomp")?; } } + #[cfg(not(feature = "libseccomp"))] + if proc.no_new_privileges().is_none() { + warn!("seccomp not available, unable to enforce no_new_privileges!") + } capabilities::reset_effective(syscall).context("Failed to reset effective capabilities")?; if let Some(caps) = proc.capabilities() { @@ -384,6 +395,7 @@ pub fn container_init_process( // Initialize seccomp profile right before we are ready to execute the // payload so as few syscalls will happen between here and payload exec. The // notify socket will still need network related syscalls. + #[cfg(feature = "libseccomp")] if let Some(seccomp) = linux.seccomp() { if proc.no_new_privileges().is_some() { let notify_fd = @@ -392,6 +404,10 @@ pub fn container_init_process( .context("failed to sync seccomp")?; } } + #[cfg(not(feature = "libseccomp"))] + if proc.no_new_privileges().is_some() { + warn!("seccomp not available, unable to set seccomp privileges!") + } // this checks if the binary to run actually exists and if we have permissions to run it. // Taken from https://github.com/opencontainers/runc/blob/25c9e888686773e7e06429133578038a9abc091d/libcontainer/standard_init_linux.go#L195-L206 @@ -514,6 +530,7 @@ fn set_supplementary_gids( Ok(()) } +#[cfg(feature = "libseccomp")] fn sync_seccomp( fd: Option, main_sender: &mut channel::MainSender, @@ -539,8 +556,10 @@ mod tests { syscall::create_syscall, test::{ArgName, MountArgs, TestHelperSyscall}, }; + #[cfg(feature = "libseccomp")] use nix::unistd; use oci_spec::runtime::{LinuxNamespaceBuilder, SpecBuilder, UserBuilder}; + #[cfg(feature = "libseccomp")] use serial_test::serial; use std::fs; @@ -675,6 +694,7 @@ mod tests { #[test] #[serial] + #[cfg(feature = "libseccomp")] fn test_sync_seccomp() -> Result<()> { use std::os::unix::io::IntoRawFd; use std::thread; diff --git a/crates/libcontainer/src/process/container_main_process.rs b/crates/libcontainer/src/process/container_main_process.rs index a2b89d9d..5dc2b71e 100644 --- a/crates/libcontainer/src/process/container_main_process.rs +++ b/crates/libcontainer/src/process/container_main_process.rs @@ -2,17 +2,22 @@ use crate::{ container::ContainerProcessState, process::{args::ContainerArgs, channel, container_intermediate_process, fork}, rootless::Rootless, - seccomp, utils, + utils, }; use anyhow::{Context, Result}; +use nix::sys::wait::{waitpid, WaitStatus}; +use nix::unistd::Pid; + +#[cfg(feature = "libseccomp")] +use crate::seccomp; +#[cfg(feature = "libseccomp")] use nix::{ - sys::{ - socket::{self, UnixAddr}, - wait::{waitpid, WaitStatus}, - }, - unistd::{self, Pid}, + sys::socket::{self, UnixAddr}, + unistd::{self}, }; +#[cfg(feature = "libseccomp")] use oci_spec::runtime; +#[cfg(feature = "libseccomp")] use std::{io::IoSlice, path::Path}; pub fn container_main_process(container_args: &ContainerArgs) -> Result { @@ -66,6 +71,7 @@ pub fn container_main_process(container_args: &ContainerArgs) -> Result { if let Some(linux) = container_args.spec.linux() { if let Some(seccomp) = linux.seccomp() { + #[allow(unused_variables)] let state = ContainerProcessState { oci_version: container_args.spec.version().to_string(), // runc hardcode the `seccompFd` name for fds. @@ -79,6 +85,7 @@ pub fn container_main_process(container_args: &ContainerArgs) -> Result { .state .clone(), }; + #[cfg(feature = "libseccomp")] sync_seccomp(seccomp, &state, init_sender, main_receiver) .context("failed to sync seccomp with init")?; } @@ -114,6 +121,7 @@ pub fn container_main_process(container_args: &ContainerArgs) -> Result { Ok(init_pid) } +#[cfg(feature = "libseccomp")] fn sync_seccomp( seccomp: &runtime::LinuxSeccomp, state: &ContainerProcessState, @@ -141,6 +149,7 @@ fn sync_seccomp( Ok(()) } +#[cfg(feature = "libseccomp")] fn sync_seccomp_send_msg(listener_path: &Path, msg: &[u8], fd: i32) -> Result<()> { // The seccomp listener has specific instructions on how to transmit the // information through seccomp listener. Therefore, we have to use @@ -197,9 +206,9 @@ mod tests { sched::{unshare, CloneFlags}, unistd::{self, getgid, getuid}, }; - use oci_spec::runtime::{ - LinuxIdMappingBuilder, LinuxSeccompAction, LinuxSeccompBuilder, LinuxSyscallBuilder, - }; + use oci_spec::runtime::LinuxIdMappingBuilder; + #[cfg(feature = "libseccomp")] + use oci_spec::runtime::{LinuxSeccompAction, LinuxSeccompBuilder, LinuxSyscallBuilder}; use serial_test::serial; use std::fs; @@ -320,6 +329,7 @@ mod tests { #[test] #[serial] + #[cfg(feature = "libseccomp")] fn test_sync_seccomp() -> Result<()> { use std::io::Read; use std::os::unix::io::IntoRawFd; diff --git a/crates/libcontainer/src/syscall/linux.rs b/crates/libcontainer/src/syscall/linux.rs index ef332c98..84209be1 100644 --- a/crates/libcontainer/src/syscall/linux.rs +++ b/crates/libcontainer/src/syscall/linux.rs @@ -1,13 +1,4 @@ //! Implements Command trait for Linux systems -use std::ffi::{CStr, CString, OsStr}; -use std::fs; -use std::os::unix::ffi::OsStrExt; -use std::os::unix::fs::symlink; -use std::os::unix::io::RawFd; -use std::str::FromStr; -use std::sync::Arc; -use std::{any::Any, mem, path::Path, ptr}; - use anyhow::{anyhow, bail, Context, Error, Result}; use caps::{CapSet, CapsHashSet}; use libc::{c_char, setdomainname, uid_t}; @@ -21,9 +12,16 @@ use nix::{ unistd, unistd::{chown, fchdir, pivot_root, setgroups, sethostname, Gid, Uid}, }; -use syscalls::{syscall, Sysno, Sysno::close_range}; - use oci_spec::runtime::LinuxRlimit; +use std::ffi::{CStr, CString, OsStr}; +use std::fs; +use std::os::unix::ffi::OsStrExt; +use std::os::unix::fs::symlink; +use std::os::unix::io::RawFd; +use std::str::FromStr; +use std::sync::Arc; +use std::{any::Any, mem, path::Path, ptr}; +use syscalls::{syscall, Sysno, Sysno::close_range}; use super::Syscall; use crate::syscall::syscall::CloseRange; @@ -374,7 +372,13 @@ impl Syscall for LinuxSyscall { rlim_cur: rlimit.soft(), rlim_max: rlimit.hard(), }; + + // Change for musl libc based on seccomp needs + #[cfg(not(target_env = "musl"))] let res = unsafe { libc::setrlimit(rlimit.typ() as u32, rlim) }; + #[cfg(target_env = "musl")] + let res = unsafe { libc::setrlimit(rlimit.typ() as i32, rlim) }; + if let Err(e) = Errno::result(res).map(drop) { bail!("Failed to set {:?}. {:?}", rlimit.typ(), e) } @@ -472,7 +476,6 @@ impl Syscall for LinuxSyscall { CloseRange::CLOEXEC.bits() ) }; - match result { Ok(_) => Ok(()), Err(e) if e == syscalls::Errno::ENOSYS || e == syscalls::Errno::EINVAL => { @@ -500,6 +503,9 @@ impl Syscall for LinuxSyscall { }; let result = unsafe { // TODO: nix/libc crate hasn't supported mount_setattr system call yet. + // TODO: @krisnova migrate all youki to libc::SYS_mount_setattr + // https://docs.rs/libc/0.2.139/libc/constant.SYS_mount_setattr.html + // https://docs.rs/libc/0.2.139/libc/fn.syscall.html syscall!( Sysno::mount_setattr, dirfd, diff --git a/crates/libcontainer/src/utils.rs b/crates/libcontainer/src/utils.rs index efc6335e..09a5fa7d 100644 --- a/crates/libcontainer/src/utils.rs +++ b/crates/libcontainer/src/utils.rs @@ -371,6 +371,7 @@ pub(crate) mod test_utils { message: String, } + #[allow(dead_code)] pub fn test_in_child_process Result<()>>(cb: F) -> Result<()> { let (mut sender, mut receiver) = channel::channel::()?; match unsafe { nix::unistd::fork()? } { diff --git a/scripts/features_test.sh b/scripts/features_test.sh index 14bc3de2..62cdf606 100755 --- a/scripts/features_test.sh +++ b/scripts/features_test.sh @@ -1,5 +1,4 @@ #!/bin/bash - set -eu # Build the different features individually @@ -16,4 +15,18 @@ cargo test --no-default-features -F systemd cargo test --no-default-features -F v2 -F cgroupsv2_devices cargo test --no-default-features -F systemd -F cgroupsv2_devices +# Build with musl: libcontainer +cargo +nightly build \ + -Zbuild-std \ + --target $(uname -m)-unknown-linux-musl \ + --package libcontainer \ + --no-default-features -F v2 + +# Test with musl: libcontainer +cargo +nightly test \ + -Zbuild-std \ + --target $(uname -m)-unknown-linux-musl \ + --package libcontainer \ + --no-default-features -F v2 + exit 0 \ No newline at end of file