mirror of
https://github.com/containers/youki
synced 2024-05-08 00:26:14 +02:00
Introduce seccomp feature for libcontainer with musl (#1484)
* Introduce seccomp feature for libcontainer with musl Signed-off-by: Kris Nóva <kris@nivenly.com> * remove mystery merge conflict Signed-off-by: Eric Fang <yihuaf@unkies.org> --------- Signed-off-by: Kris Nóva <kris@nivenly.com> Signed-off-by: Eric Fang <yihuaf@unkies.org> Co-authored-by: Eric Fang <yihuaf@unkies.org>
This commit is contained in:
parent
55d8368e8e
commit
bef32e083f
|
@ -12,7 +12,8 @@ rust-version = "1.58.1"
|
|||
keywords = ["youki", "container", "cgroups"]
|
||||
|
||||
[features]
|
||||
default = ["systemd", "v2", "v1"]
|
||||
default = ["systemd", "v2", "v1", "libseccomp"]
|
||||
libseccomp = ["dep:libseccomp"]
|
||||
systemd = ["libcgroups/systemd", "v2"]
|
||||
v2 = ["libcgroups/v2"]
|
||||
v1 = ["libcgroups/v1"]
|
||||
|
@ -35,7 +36,7 @@ oci-spec = { version = "^0.6.0", features = ["runtime"] }
|
|||
procfs = "0.15.1"
|
||||
prctl = "1.0.0"
|
||||
libcgroups = { version = "0.0.4", path = "../libcgroups", default-features = false }
|
||||
libseccomp = { version = "0.3.0" }
|
||||
libseccomp = { version = "0.3.0", optional=true }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
syscalls = "0.6.8"
|
||||
|
|
|
@ -1 +1,22 @@
|
|||
# libcontainer
|
||||
|
||||
### Building with musl
|
||||
|
||||
In order to build with musl you must first remove the libseccomp dependency as it will reference shared libraries (`libdbus` and `libseccomp`) which cannot be built with musl.
|
||||
|
||||
Do this by using adding flags to Cargo. Use the `--no-default-features` flag followed by `-F` and whatever features you intend to build with such as `v2` as defined in Cargo.toml under features section.
|
||||
|
||||
Next you will also need the `+nightly` flags when building with `rustup` and `cargo`.
|
||||
|
||||
```bash
|
||||
# Add rustup +nigthly musl to toolchain
|
||||
rustup +nightly target add $(uname -m)-unknown-linux-musl
|
||||
|
||||
# Build rustup +nigthly stdlib with musl
|
||||
rustup +nightly toolchain install nightly-$(uname -m)-unknown-linux-musl
|
||||
|
||||
# Build musl standard library
|
||||
cargo +nightly build -Zbuild-std --target $(uname -m)-unknown-linux-musl --no-default-features -F v2
|
||||
|
||||
cargo +nightly build --target $(uname -m)-unknown-linux-musl --no-default-features -F v2
|
||||
```
|
|
@ -8,9 +8,11 @@ pub mod notify_socket;
|
|||
pub mod process;
|
||||
pub mod rootfs;
|
||||
pub mod rootless;
|
||||
pub mod seccomp;
|
||||
pub mod signal;
|
||||
pub mod syscall;
|
||||
pub mod tty;
|
||||
pub mod utils;
|
||||
pub mod workload;
|
||||
|
||||
#[cfg(feature = "libseccomp")]
|
||||
pub mod seccomp;
|
||||
|
|
|
@ -3,14 +3,13 @@ use crate::apparmor;
|
|||
use crate::syscall::Syscall;
|
||||
use crate::{
|
||||
capabilities, hooks, namespaces::Namespaces, process::channel, rootfs::RootFS,
|
||||
rootless::Rootless, seccomp, tty, utils,
|
||||
rootless::Rootless, tty, utils,
|
||||
};
|
||||
use anyhow::{bail, Context, Ok, Result};
|
||||
use nix::mount::MsFlags;
|
||||
use nix::sched::CloneFlags;
|
||||
use nix::sys::stat::Mode;
|
||||
use nix::unistd::setsid;
|
||||
|
||||
use nix::unistd::{self, Gid, Uid};
|
||||
use oci_spec::runtime::{LinuxNamespaceType, Spec, User};
|
||||
use std::collections::HashMap;
|
||||
|
@ -20,6 +19,12 @@ use std::{
|
|||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
#[cfg(feature = "libseccomp")]
|
||||
use crate::seccomp;
|
||||
|
||||
#[cfg(not(feature = "libseccomp"))]
|
||||
use log::warn;
|
||||
|
||||
fn sysctl(kernel_params: &HashMap<String, String>) -> Result<()> {
|
||||
let sys = PathBuf::from("/proc/sys");
|
||||
for (kernel_param, value) in kernel_params {
|
||||
|
@ -158,6 +163,7 @@ fn reopen_dev_null() -> Result<()> {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
#[allow(unused_variables)]
|
||||
pub fn container_init_process(
|
||||
args: &ContainerArgs,
|
||||
main_sender: &mut channel::MainSender,
|
||||
|
@ -348,6 +354,7 @@ pub fn container_init_process(
|
|||
// Without no new privileges, seccomp is a privileged operation. We have to
|
||||
// do this before dropping capabilities. Otherwise, we should do it later,
|
||||
// as close to exec as possible.
|
||||
#[cfg(feature = "libseccomp")]
|
||||
if let Some(seccomp) = linux.seccomp() {
|
||||
if proc.no_new_privileges().is_none() {
|
||||
let notify_fd =
|
||||
|
@ -356,6 +363,10 @@ pub fn container_init_process(
|
|||
.context("failed to sync seccomp")?;
|
||||
}
|
||||
}
|
||||
#[cfg(not(feature = "libseccomp"))]
|
||||
if proc.no_new_privileges().is_none() {
|
||||
warn!("seccomp not available, unable to enforce no_new_privileges!")
|
||||
}
|
||||
|
||||
capabilities::reset_effective(syscall).context("Failed to reset effective capabilities")?;
|
||||
if let Some(caps) = proc.capabilities() {
|
||||
|
@ -384,6 +395,7 @@ pub fn container_init_process(
|
|||
// Initialize seccomp profile right before we are ready to execute the
|
||||
// payload so as few syscalls will happen between here and payload exec. The
|
||||
// notify socket will still need network related syscalls.
|
||||
#[cfg(feature = "libseccomp")]
|
||||
if let Some(seccomp) = linux.seccomp() {
|
||||
if proc.no_new_privileges().is_some() {
|
||||
let notify_fd =
|
||||
|
@ -392,6 +404,10 @@ pub fn container_init_process(
|
|||
.context("failed to sync seccomp")?;
|
||||
}
|
||||
}
|
||||
#[cfg(not(feature = "libseccomp"))]
|
||||
if proc.no_new_privileges().is_some() {
|
||||
warn!("seccomp not available, unable to set seccomp privileges!")
|
||||
}
|
||||
|
||||
// this checks if the binary to run actually exists and if we have permissions to run it.
|
||||
// Taken from https://github.com/opencontainers/runc/blob/25c9e888686773e7e06429133578038a9abc091d/libcontainer/standard_init_linux.go#L195-L206
|
||||
|
@ -514,6 +530,7 @@ fn set_supplementary_gids(
|
|||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(feature = "libseccomp")]
|
||||
fn sync_seccomp(
|
||||
fd: Option<i32>,
|
||||
main_sender: &mut channel::MainSender,
|
||||
|
@ -539,8 +556,10 @@ mod tests {
|
|||
syscall::create_syscall,
|
||||
test::{ArgName, MountArgs, TestHelperSyscall},
|
||||
};
|
||||
#[cfg(feature = "libseccomp")]
|
||||
use nix::unistd;
|
||||
use oci_spec::runtime::{LinuxNamespaceBuilder, SpecBuilder, UserBuilder};
|
||||
#[cfg(feature = "libseccomp")]
|
||||
use serial_test::serial;
|
||||
use std::fs;
|
||||
|
||||
|
@ -675,6 +694,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
#[serial]
|
||||
#[cfg(feature = "libseccomp")]
|
||||
fn test_sync_seccomp() -> Result<()> {
|
||||
use std::os::unix::io::IntoRawFd;
|
||||
use std::thread;
|
||||
|
|
|
@ -2,17 +2,22 @@ use crate::{
|
|||
container::ContainerProcessState,
|
||||
process::{args::ContainerArgs, channel, container_intermediate_process, fork},
|
||||
rootless::Rootless,
|
||||
seccomp, utils,
|
||||
utils,
|
||||
};
|
||||
use anyhow::{Context, Result};
|
||||
use nix::sys::wait::{waitpid, WaitStatus};
|
||||
use nix::unistd::Pid;
|
||||
|
||||
#[cfg(feature = "libseccomp")]
|
||||
use crate::seccomp;
|
||||
#[cfg(feature = "libseccomp")]
|
||||
use nix::{
|
||||
sys::{
|
||||
socket::{self, UnixAddr},
|
||||
wait::{waitpid, WaitStatus},
|
||||
},
|
||||
unistd::{self, Pid},
|
||||
sys::socket::{self, UnixAddr},
|
||||
unistd::{self},
|
||||
};
|
||||
#[cfg(feature = "libseccomp")]
|
||||
use oci_spec::runtime;
|
||||
#[cfg(feature = "libseccomp")]
|
||||
use std::{io::IoSlice, path::Path};
|
||||
|
||||
pub fn container_main_process(container_args: &ContainerArgs) -> Result<Pid> {
|
||||
|
@ -66,6 +71,7 @@ pub fn container_main_process(container_args: &ContainerArgs) -> Result<Pid> {
|
|||
|
||||
if let Some(linux) = container_args.spec.linux() {
|
||||
if let Some(seccomp) = linux.seccomp() {
|
||||
#[allow(unused_variables)]
|
||||
let state = ContainerProcessState {
|
||||
oci_version: container_args.spec.version().to_string(),
|
||||
// runc hardcode the `seccompFd` name for fds.
|
||||
|
@ -79,6 +85,7 @@ pub fn container_main_process(container_args: &ContainerArgs) -> Result<Pid> {
|
|||
.state
|
||||
.clone(),
|
||||
};
|
||||
#[cfg(feature = "libseccomp")]
|
||||
sync_seccomp(seccomp, &state, init_sender, main_receiver)
|
||||
.context("failed to sync seccomp with init")?;
|
||||
}
|
||||
|
@ -114,6 +121,7 @@ pub fn container_main_process(container_args: &ContainerArgs) -> Result<Pid> {
|
|||
Ok(init_pid)
|
||||
}
|
||||
|
||||
#[cfg(feature = "libseccomp")]
|
||||
fn sync_seccomp(
|
||||
seccomp: &runtime::LinuxSeccomp,
|
||||
state: &ContainerProcessState,
|
||||
|
@ -141,6 +149,7 @@ fn sync_seccomp(
|
|||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(feature = "libseccomp")]
|
||||
fn sync_seccomp_send_msg(listener_path: &Path, msg: &[u8], fd: i32) -> Result<()> {
|
||||
// The seccomp listener has specific instructions on how to transmit the
|
||||
// information through seccomp listener. Therefore, we have to use
|
||||
|
@ -197,9 +206,9 @@ mod tests {
|
|||
sched::{unshare, CloneFlags},
|
||||
unistd::{self, getgid, getuid},
|
||||
};
|
||||
use oci_spec::runtime::{
|
||||
LinuxIdMappingBuilder, LinuxSeccompAction, LinuxSeccompBuilder, LinuxSyscallBuilder,
|
||||
};
|
||||
use oci_spec::runtime::LinuxIdMappingBuilder;
|
||||
#[cfg(feature = "libseccomp")]
|
||||
use oci_spec::runtime::{LinuxSeccompAction, LinuxSeccompBuilder, LinuxSyscallBuilder};
|
||||
use serial_test::serial;
|
||||
use std::fs;
|
||||
|
||||
|
@ -320,6 +329,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
#[serial]
|
||||
#[cfg(feature = "libseccomp")]
|
||||
fn test_sync_seccomp() -> Result<()> {
|
||||
use std::io::Read;
|
||||
use std::os::unix::io::IntoRawFd;
|
||||
|
|
|
@ -1,13 +1,4 @@
|
|||
//! Implements Command trait for Linux systems
|
||||
use std::ffi::{CStr, CString, OsStr};
|
||||
use std::fs;
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
use std::os::unix::fs::symlink;
|
||||
use std::os::unix::io::RawFd;
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
use std::{any::Any, mem, path::Path, ptr};
|
||||
|
||||
use anyhow::{anyhow, bail, Context, Error, Result};
|
||||
use caps::{CapSet, CapsHashSet};
|
||||
use libc::{c_char, setdomainname, uid_t};
|
||||
|
@ -21,9 +12,16 @@ use nix::{
|
|||
unistd,
|
||||
unistd::{chown, fchdir, pivot_root, setgroups, sethostname, Gid, Uid},
|
||||
};
|
||||
use syscalls::{syscall, Sysno, Sysno::close_range};
|
||||
|
||||
use oci_spec::runtime::LinuxRlimit;
|
||||
use std::ffi::{CStr, CString, OsStr};
|
||||
use std::fs;
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
use std::os::unix::fs::symlink;
|
||||
use std::os::unix::io::RawFd;
|
||||
use std::str::FromStr;
|
||||
use std::sync::Arc;
|
||||
use std::{any::Any, mem, path::Path, ptr};
|
||||
use syscalls::{syscall, Sysno, Sysno::close_range};
|
||||
|
||||
use super::Syscall;
|
||||
use crate::syscall::syscall::CloseRange;
|
||||
|
@ -374,7 +372,13 @@ impl Syscall for LinuxSyscall {
|
|||
rlim_cur: rlimit.soft(),
|
||||
rlim_max: rlimit.hard(),
|
||||
};
|
||||
|
||||
// Change for musl libc based on seccomp needs
|
||||
#[cfg(not(target_env = "musl"))]
|
||||
let res = unsafe { libc::setrlimit(rlimit.typ() as u32, rlim) };
|
||||
#[cfg(target_env = "musl")]
|
||||
let res = unsafe { libc::setrlimit(rlimit.typ() as i32, rlim) };
|
||||
|
||||
if let Err(e) = Errno::result(res).map(drop) {
|
||||
bail!("Failed to set {:?}. {:?}", rlimit.typ(), e)
|
||||
}
|
||||
|
@ -472,7 +476,6 @@ impl Syscall for LinuxSyscall {
|
|||
CloseRange::CLOEXEC.bits()
|
||||
)
|
||||
};
|
||||
|
||||
match result {
|
||||
Ok(_) => Ok(()),
|
||||
Err(e) if e == syscalls::Errno::ENOSYS || e == syscalls::Errno::EINVAL => {
|
||||
|
@ -500,6 +503,9 @@ impl Syscall for LinuxSyscall {
|
|||
};
|
||||
let result = unsafe {
|
||||
// TODO: nix/libc crate hasn't supported mount_setattr system call yet.
|
||||
// TODO: @krisnova migrate all youki to libc::SYS_mount_setattr
|
||||
// https://docs.rs/libc/0.2.139/libc/constant.SYS_mount_setattr.html
|
||||
// https://docs.rs/libc/0.2.139/libc/fn.syscall.html
|
||||
syscall!(
|
||||
Sysno::mount_setattr,
|
||||
dirfd,
|
||||
|
|
|
@ -371,6 +371,7 @@ pub(crate) mod test_utils {
|
|||
message: String,
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn test_in_child_process<F: FnOnce() -> Result<()>>(cb: F) -> Result<()> {
|
||||
let (mut sender, mut receiver) = channel::channel::<TestResult>()?;
|
||||
match unsafe { nix::unistd::fork()? } {
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -eu
|
||||
|
||||
# Build the different features individually
|
||||
|
@ -16,4 +15,18 @@ cargo test --no-default-features -F systemd
|
|||
cargo test --no-default-features -F v2 -F cgroupsv2_devices
|
||||
cargo test --no-default-features -F systemd -F cgroupsv2_devices
|
||||
|
||||
# Build with musl: libcontainer
|
||||
cargo +nightly build \
|
||||
-Zbuild-std \
|
||||
--target $(uname -m)-unknown-linux-musl \
|
||||
--package libcontainer \
|
||||
--no-default-features -F v2
|
||||
|
||||
# Test with musl: libcontainer
|
||||
cargo +nightly test \
|
||||
-Zbuild-std \
|
||||
--target $(uname -m)-unknown-linux-musl \
|
||||
--package libcontainer \
|
||||
--no-default-features -F v2
|
||||
|
||||
exit 0
|
Loading…
Reference in New Issue