1
0
mirror of https://github.com/containers/youki synced 2024-11-22 17:02:00 +01:00

fix(libcontainer) no_pivot args is not used (#2923)

* Support setting no_pivot_root for create and run command

Signed-off-by: Vanient <xiadanni1@huawei.com>

* fix: mount move before choot

Move the rootfs to the root of the host filesystem before chrooting,
this is equivalent to pivot_root, if don't move mount first, we will
not see the new rootfs when exec into the container

Signed-off-by: xujihui1985 <xujihui1985@gmail.com>

* fix(chroot): ensure mount occurs before chroot to mimic pivot_root behavior

Move the mount operation to occur before calling chroot to better simulate the effect of pivot_root.
Add a check to confirm if the current process is running inside an isolated mount namespace, ensuring proper mount handling.

Signed-off-by: xujihui1985 <xujihui1985@gmail.com>

* implement intergration test for no-pivot

Signed-off-by: xujihui1985 <xujihui1985@gmail.com>

* fix: add comments to no-pivot related code

Signed-off-by: xujihui1985 <xujihui1985@gmail.com>

* fix(lint): fix format

Signed-off-by: xujihui1985 <xujihui1985@gmail.com>

---------

Signed-off-by: Vanient <xiadanni1@huawei.com>
Signed-off-by: xujihui1985 <xujihui1985@gmail.com>
Co-authored-by: Vanient <xiadanni1@huawei.com>
This commit is contained in:
AngrySean 2024-10-29 12:23:05 +08:00 committed by GitHub
parent 6cee44685a
commit d07159691d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 304 additions and 21 deletions

@ -49,6 +49,8 @@ pub(super) struct ContainerBuilderImpl {
pub detached: bool,
/// Default executes the specified execution of a generic command
pub executor: Box<dyn Executor>,
/// If do not use pivot root to jail process inside rootfs
pub no_pivot: bool,
}
impl ContainerBuilderImpl {
@ -154,6 +156,7 @@ impl ContainerBuilderImpl {
cgroup_config,
detached: self.detached,
executor: self.executor.clone(),
no_pivot: self.no_pivot,
};
let (init_pid, need_to_clean_up_intel_rdt_dir) =

@ -20,6 +20,7 @@ pub struct InitContainerBuilder {
bundle: PathBuf,
use_systemd: bool,
detached: bool,
no_pivot: bool,
}
impl InitContainerBuilder {
@ -31,6 +32,7 @@ impl InitContainerBuilder {
bundle,
use_systemd: true,
detached: true,
no_pivot: false,
}
}
@ -45,6 +47,11 @@ impl InitContainerBuilder {
self
}
pub fn with_no_pivot(mut self, no_pivot: bool) -> Self {
self.no_pivot = no_pivot;
self
}
/// Creates a new container
pub fn build(self) -> Result<Container, LibcontainerError> {
let spec = self.load_spec()?;
@ -95,6 +102,7 @@ impl InitContainerBuilder {
preserve_fds: self.base.preserve_fds,
detached: self.detached,
executor: self.base.executor,
no_pivot: self.no_pivot,
};
builder_impl.create()?;

@ -142,6 +142,7 @@ impl TenantContainerBuilder {
preserve_fds: self.base.preserve_fds,
detached: self.detached,
executor: self.base.executor,
no_pivot: false,
};
let pid = builder_impl.create()?;

@ -42,4 +42,6 @@ pub struct ContainerArgs {
pub detached: bool,
/// Manage the functions that actually run on the container
pub executor: Box<dyn Executor>,
/// If do not use pivot root to jail process inside rootfs
pub no_pivot: bool,
}

@ -4,7 +4,7 @@ use std::path::{Path, PathBuf};
use std::{env, fs, mem};
use nc;
use nix::mount::MsFlags;
use nix::mount::{MntFlags, MsFlags};
use nix::sched::CloneFlags;
use nix::sys::stat::Mode;
use nix::unistd::{self, setsid, Gid, Uid};
@ -270,6 +270,76 @@ fn reopen_dev_null() -> Result<()> {
Ok(())
}
// umount or hide the target path. If the target path is mounted
// try to unmount it first if the unmount operation fails with EINVAL
// then mount a tmpfs with size 0k to hide the target path.
fn unmount_or_hide(syscall: &dyn Syscall, target: impl AsRef<Path>) -> Result<()> {
let target_path = target.as_ref();
match syscall.umount2(target_path, MntFlags::MNT_DETACH) {
Ok(_) => Ok(()),
Err(SyscallError::Nix(nix::errno::Errno::EINVAL)) => syscall
.mount(
None,
target_path,
Some("tmpfs"),
MsFlags::MS_RDONLY,
Some("size=0k"),
)
.map_err(InitProcessError::SyscallOther),
Err(err) => Err(InitProcessError::SyscallOther(err)),
}
}
fn move_root(syscall: &dyn Syscall, rootfs: &Path) -> Result<()> {
unistd::chdir(rootfs).map_err(InitProcessError::NixOther)?;
// umount /sys and /proc if they are mounted, the purpose is to
// unmount or hide the /sys and /proc filesystems before the process changes its
// root to the new rootfs. thus ensure that the /sys and /proc filesystems are not
// accessible in the new rootfs. the logic is borrowed from crun
// https://github.com/containers/crun/blob/53cd1c1c697d7351d0cad23708d29bf4a7980a3a/src/libcrun/linux.c#L2780
unmount_or_hide(syscall, "/sys")?;
unmount_or_hide(syscall, "/proc")?;
syscall
.mount(Some(rootfs), Path::new("/"), None, MsFlags::MS_MOVE, None)
.map_err(|err| {
tracing::error!(?err, ?rootfs, "failed to mount ms_move");
InitProcessError::SyscallOther(err)
})?;
syscall.chroot(Path::new(".")).map_err(|err| {
tracing::error!(?err, ?rootfs, "failed to chroot");
InitProcessError::SyscallOther(err)
})?;
unistd::chdir("/").map_err(InitProcessError::NixOther)?;
Ok(())
}
fn do_pivot_root(
syscall: &dyn Syscall,
namespaces: &Namespaces,
no_pivot: bool,
rootfs: impl AsRef<Path>,
) -> Result<()> {
let rootfs_path = rootfs.as_ref();
let handle_error = |err: SyscallError, msg: &str| -> InitProcessError {
tracing::error!(?err, ?rootfs_path, msg);
InitProcessError::SyscallOther(err)
};
match namespaces.get(LinuxNamespaceType::Mount)? {
Some(_) if no_pivot => move_root(syscall, rootfs_path),
Some(_) => syscall
.pivot_rootfs(rootfs.as_ref())
.map_err(|err| handle_error(err, "failed to pivot root")),
None => syscall
.chroot(rootfs_path)
.map_err(|err| handle_error(err, "failed to chroot")),
}
}
// Some variables are unused in the case where libseccomp feature is not enabled.
#[allow(unused_variables)]
pub fn container_init_process(
@ -343,18 +413,7 @@ pub fn container_init_process(
// we use pivot_root, but if we are on the host mount namespace, we will
// use simple chroot. Scary things will happen if you try to pivot_root
// in the host mount namespace...
if namespaces.get(LinuxNamespaceType::Mount)?.is_some() {
// change the root of filesystem of the process to the rootfs
syscall.pivot_rootfs(rootfs_path).map_err(|err| {
tracing::error!(?err, ?rootfs_path, "failed to pivot root");
InitProcessError::SyscallOther(err)
})?;
} else {
syscall.chroot(rootfs_path).map_err(|err| {
tracing::error!(?err, ?rootfs_path, "failed to chroot");
InitProcessError::SyscallOther(err)
})?;
}
do_pivot_root(syscall.as_ref(), &namespaces, args.no_pivot, rootfs_path)?;
// As we have changed the root mount, from here on
// logs are no longer visible in journalctl

@ -574,6 +574,11 @@ impl Syscall for LinuxSyscall {
}?;
Ok(())
}
fn umount2(&self, target: &Path, flags: MntFlags) -> Result<()> {
umount2(target, flags)?;
Ok(())
}
}
#[cfg(test)]

@ -8,7 +8,7 @@ use std::sync::Arc;
use caps::{CapSet, CapsHashSet};
use libc;
use nix::mount::MsFlags;
use nix::mount::{MntFlags, MsFlags};
use nix::sched::CloneFlags;
use nix::sys::stat::{Mode, SFlag};
use nix::unistd::{Gid, Uid};
@ -54,6 +54,7 @@ pub trait Syscall {
size: libc::size_t,
) -> Result<()>;
fn set_io_priority(&self, class: i64, priority: i64) -> Result<()>;
fn umount2(&self, target: &Path, flags: MntFlags) -> Result<()>;
}
#[derive(Clone, Copy)]

@ -6,7 +6,7 @@ use std::path::{Path, PathBuf};
use std::sync::Arc;
use caps::{CapSet, CapsHashSet};
use nix::mount::MsFlags;
use nix::mount::{MntFlags, MsFlags};
use nix::sched::CloneFlags;
use nix::sys::stat::{Mode, SFlag};
use nix::unistd::{Gid, Uid};
@ -44,6 +44,12 @@ pub struct IoPriorityArgs {
pub priority: i64,
}
#[derive(Clone, PartialEq, Eq, Debug)]
pub struct UMount2Args {
pub target: PathBuf,
pub flags: MntFlags,
}
#[derive(Default)]
struct Mock {
values: Vec<Box<dyn Any>>,
@ -64,6 +70,7 @@ pub enum ArgName {
Groups,
Capability,
IoPriority,
UMount2,
}
impl ArgName {
@ -259,6 +266,16 @@ impl Syscall for TestHelperSyscall {
Box::new(IoPriorityArgs { class, priority }),
)
}
fn umount2(&self, target: &Path, flags: MntFlags) -> Result<()> {
self.mocks.act(
ArgName::UMount2,
Box::new(UMount2Args {
target: target.to_owned(),
flags,
}),
)
}
}
impl TestHelperSyscall {
@ -369,4 +386,13 @@ impl TestHelperSyscall {
.map(|x| x.downcast_ref::<IoPriorityArgs>().unwrap().clone())
.collect::<Vec<IoPriorityArgs>>()
}
pub fn get_umount_args(&self) -> Vec<UMount2Args> {
self.mocks
.fetch(ArgName::UMount2)
.values
.iter()
.map(|x| x.downcast_ref::<UMount2Args>().unwrap().clone())
.collect::<Vec<UMount2Args>>()
}
}

@ -24,6 +24,7 @@ pub fn create(args: Create, root_path: PathBuf, systemd_cgroup: bool) -> Result<
.as_init(&args.bundle)
.with_systemd(systemd_cgroup)
.with_detach(true)
.with_no_pivot(args.no_pivot)
.build()?;
Ok(())

@ -22,6 +22,7 @@ pub fn run(args: Run, root_path: PathBuf, systemd_cgroup: bool) -> Result<i32> {
.as_init(&args.bundle)
.with_systemd(systemd_cgroup)
.with_detach(args.detach)
.with_no_pivot(args.no_pivot)
.build()?;
container

@ -19,6 +19,7 @@ use crate::tests::io_priority::get_io_priority_test;
use crate::tests::lifecycle::{ContainerCreate, ContainerLifecycle};
use crate::tests::linux_ns_itype::get_ns_itype_tests;
use crate::tests::mounts_recursive::get_mounts_recursive_test;
use crate::tests::no_pivot::get_no_pivot_test;
use crate::tests::pidfile::get_pidfile_test;
use crate::tests::readonly_paths::get_ro_paths_test;
use crate::tests::scheduler::get_scheduler_test;
@ -113,6 +114,7 @@ fn main() -> Result<()> {
let scheduler = get_scheduler_test();
let io_priority_test = get_io_priority_test();
let devices = get_devices_test();
let no_pivot = get_no_pivot_test();
tm.add_test_group(Box::new(cl));
tm.add_test_group(Box::new(cc));
@ -136,6 +138,7 @@ fn main() -> Result<()> {
tm.add_test_group(Box::new(sysctl));
tm.add_test_group(Box::new(scheduler));
tm.add_test_group(Box::new(devices));
tm.add_test_group(Box::new(no_pivot));
tm.add_test_group(Box::new(io_priority_test));
tm.add_cleanup(Box::new(cgroups::cleanup_v1));

@ -9,6 +9,7 @@ pub mod io_priority;
pub mod lifecycle;
pub mod linux_ns_itype;
pub mod mounts_recursive;
pub mod no_pivot;
pub mod pidfile;
pub mod readonly_paths;
pub mod scheduler;

@ -0,0 +1,29 @@
use anyhow::{Context, Result};
use oci_spec::runtime::{ProcessBuilder, Spec, SpecBuilder};
use test_framework::{test_result, Test, TestGroup, TestResult};
use crate::utils::test_utils::test_inside_container_with_no_pivot;
fn create_spec() -> Result<Spec> {
SpecBuilder::default()
.process(
ProcessBuilder::default()
.args(vec!["runtimetest".to_string(), "no_pivot".to_string()])
.build()?,
)
.build()
.context("failed to create spec")
}
fn no_pivot_test() -> TestResult {
let spec = test_result!(create_spec());
test_inside_container_with_no_pivot(spec, &|_| Ok(()))
}
pub fn get_no_pivot_test() -> TestGroup {
let mut test_group = TestGroup::new("no_pivot");
let no_pivot_test = Test::new("no_pivot_test", Box::new(no_pivot_test));
test_group.add(vec![Box::new(no_pivot_test)]);
test_group
}

@ -42,11 +42,9 @@ pub struct ContainerData {
pub create_result: std::io::Result<ExitStatus>,
}
/// Starts the runtime with given directory as root directory
pub fn create_container<P: AsRef<Path>>(id: &str, dir: P) -> Result<Child> {
let res = Command::new(get_runtime_path())
// set stdio so that we can get o/p of runtimetest
// in test_inside_container function
fn create_container_command<P: AsRef<Path>>(id: &str, dir: P, with_pivot_root: bool) -> Command {
let mut command = Command::new(get_runtime_path());
command
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.arg("--root")
@ -54,7 +52,23 @@ pub fn create_container<P: AsRef<Path>>(id: &str, dir: P) -> Result<Child> {
.arg("create")
.arg(id)
.arg("--bundle")
.arg(dir.as_ref().join("bundle"))
.arg(dir.as_ref().join("bundle"));
if with_pivot_root {
command.arg("--no-pivot");
}
command
}
/// Starts the runtime with given directory as root directory
pub fn create_container<P: AsRef<Path>>(id: &str, dir: P) -> Result<Child> {
let res = create_container_command(id, dir, false)
.spawn()
.context("could not create container")?;
Ok(res)
}
pub fn create_container_no_pivot<P: AsRef<Path>>(id: &str, dir: P) -> Result<Child> {
let res = create_container_command(id, dir, true)
.spawn()
.context("could not create container")?;
Ok(res)
@ -232,6 +246,100 @@ pub fn test_inside_container(
TestResult::Passed
}
// just copy-pasted from test_inside_container for now, but with no pivot root
// need to refactor this to avoid duplication
pub fn test_inside_container_with_no_pivot(
spec: Spec,
setup_for_test: &dyn Fn(&Path) -> Result<()>,
) -> TestResult {
let id = generate_uuid();
let id_str = id.to_string();
let bundle = prepare_bundle().unwrap();
// This will do the required setup for the test
test_result!(setup_for_test(
&bundle.as_ref().join("bundle").join("rootfs")
));
set_config(&bundle, &spec).unwrap();
// as we have to run runtimetest inside the container, and is expects
// the config.json to be at path /config.json we save it there
let path = bundle
.as_ref()
.join("bundle")
.join("rootfs")
.join("config.json");
spec.save(path).unwrap();
let runtimetest_path = get_runtimetest_path();
// The config will directly use runtime as the command to be run, so we have to
// save the runtimetest binary at its /bin
std::fs::copy(
runtimetest_path,
bundle
.as_ref()
.join("bundle")
.join("rootfs")
.join("bin")
.join("runtimetest"),
)
.unwrap();
let create_process = create_container_no_pivot(&id_str, &bundle).unwrap();
// here we do not wait for the process by calling wait() as in the test_outside_container
// function because we need the output of the runtimetest. If we call wait, it will return
// and we won't have an easy way of getting the stdio of the runtimetest.
// Thus to make sure the container is created, we just wait for sometime, and
// assume that the create command was successful. If it wasn't we can catch that error
// in the start_container, as we can not start a non-created container anyways
std::thread::sleep(std::time::Duration::from_millis(1000));
match start_container(&id_str, &bundle)
.unwrap()
.wait_with_output()
{
Ok(c) => c,
Err(e) => return TestResult::Failed(anyhow!("container start failed : {:?}", e)),
};
let create_output = create_process
.wait_with_output()
.context("getting output after starting the container failed")
.unwrap();
let stdout = String::from_utf8_lossy(&create_output.stdout);
if !stdout.is_empty() {
println!(
"{:?}",
anyhow!("container stdout was not empty, found : {}", stdout)
)
}
let stderr = String::from_utf8_lossy(&create_output.stderr);
if !stderr.is_empty() {
return TestResult::Failed(anyhow!(
"container stderr was not empty, found : {}",
stderr
));
}
let (out, err) = get_state(&id_str, &bundle).unwrap();
if !err.is_empty() {
return TestResult::Failed(anyhow!(
"error in getting state after starting the container : {}",
err
));
}
let state: State = match serde_json::from_str(&out) {
Ok(v) => v,
Err(e) => return TestResult::Failed(anyhow!("error in parsing state of container after start in test_inside_container : stdout : {}, parse error : {}",out,e)),
};
if state.status != "stopped" {
return TestResult::Failed(anyhow!("error : unexpected container status in test_inside_runtime : expected stopped, got {}, container state : {:?}",state.status,state));
}
kill_container(&id_str, &bundle).unwrap().wait().unwrap();
delete_container(&id_str, &bundle).unwrap().wait().unwrap();
TestResult::Passed
}
pub fn check_container_created(data: &ContainerData) -> Result<()> {
match &data.create_result {
Ok(exit_status) => {

@ -44,6 +44,7 @@ fn main() {
"io_priority_class_be" => tests::test_io_priority_class(&spec, IoprioClassBe),
"io_priority_class_idle" => tests::test_io_priority_class(&spec, IoprioClassIdle),
"devices" => tests::validate_devices(&spec),
"no_pivot" => tests::validate_rootfs(),
_ => eprintln!("error due to unexpected execute test name: {execute_test}"),
}
}

@ -545,3 +545,37 @@ pub fn test_io_priority_class(spec: &Spec, io_priority_class: IOPriorityClass) {
eprintln!("error ioprio_get expected priority {expected_priority:?}, got {priority}")
}
}
// the validate_rootfs function is used to validate the rootfs of the container is
// as expected. This function is used in the no_pivot test to validate the rootfs
pub fn validate_rootfs() {
// list the first level directories in the rootfs
let mut entries = fs::read_dir("/")
.unwrap()
.filter_map(|entry| {
entry.ok().and_then(|e| {
let path = e.path();
if path.is_dir() {
path.file_name()
.and_then(|name| name.to_str().map(|s| s.to_owned()))
} else {
None
}
})
})
.collect::<Vec<String>>();
// sort the entries to make the test deterministic
entries.sort();
// this is the list of directories that we expect to find in the rootfs
let mut expected = vec![
"bin", "dev", "etc", "home", "proc", "root", "sys", "tmp", "usr", "var",
];
// sort the expected entries to make the test deterministic
expected.sort();
// compare the expected entries with the actual entries
if entries != expected {
eprintln!("error due to rootfs want {expected:?}, got {entries:?}");
}
}