From 601df9ecd301db3ebf37b399d1058717c1e1bf34 Mon Sep 17 00:00:00 2001 From: Yashodhan <54112038+YJDoc2@users.noreply.github.com> Date: Sat, 27 Apr 2024 18:19:58 +0530 Subject: [PATCH] Fix cgroups determination in exec implementation (#2720) * Set cgroups path for tenant containers from main container Signed-off-by: Yashodhan Joshi * Ignore new_user_ns for creating cgroups path Signed-off-by: Yashodhan Joshi * Remove user_ns param completely Signed-off-by: Yashodhan Joshi * Add tests in podman rootless for exec Signed-off-by: Yashodhan Joshi * Fix add_task implementation for cgroups v2 and systemd Signed-off-by: Yashodhan Joshi * minor refactor in tenant builder Signed-off-by: Yashodhan Joshi * Add unit test for systemd add_task function Signed-off-by: Yashodhan Joshi * Fix task addition to properly add tasks via dbus api Signed-off-by: Yashodhan Joshi * Fix cross cotainers for tests running Signed-off-by: Yashodhan Joshi --------- Signed-off-by: Yashodhan Joshi --- Cross.toml | 1 + .../src/systemd/dbus_native/client.rs | 7 +++ .../src/systemd/dbus_native/dbus.rs | 4 ++ .../src/systemd/dbus_native/proxy.rs | 7 +++ crates/libcgroups/src/systemd/manager.rs | 54 ++++++++++++++++++- crates/libcgroups/src/v2/manager.rs | 4 ++ crates/libcontainer/src/config.rs | 12 +++-- .../src/container/builder_impl.rs | 12 +---- .../libcontainer/src/container/container.rs | 3 +- .../src/container/init_builder.rs | 2 +- .../src/container/tenant_builder.rs | 10 +++- crates/libcontainer/src/utils.rs | 17 ++---- cross/Dockerfile.gnu | 3 ++ cross/Dockerfile.musl | 3 ++ hack/busctl.sh | 14 +++++ scripts/cargo.sh | 6 ++- tests/rootless-tests/run.sh | 30 ++++++++++- 17 files changed, 153 insertions(+), 36 deletions(-) create mode 100644 hack/busctl.sh diff --git a/Cross.toml b/Cross.toml index 18e0f3d5..a61adb93 100644 --- a/Cross.toml +++ b/Cross.toml @@ -1,5 +1,6 @@ [build] default-target = "x86_64-unknown-linux-gnu" +env.passthrough = ["XDG_RUNTIME_DIR"] [target.aarch64-unknown-linux-gnu] dockerfile = "cross/Dockerfile.gnu" diff --git a/crates/libcgroups/src/systemd/dbus_native/client.rs b/crates/libcgroups/src/systemd/dbus_native/client.rs index 2eca6b5c..a1a28d15 100644 --- a/crates/libcgroups/src/systemd/dbus_native/client.rs +++ b/crates/libcgroups/src/systemd/dbus_native/client.rs @@ -26,4 +26,11 @@ pub trait SystemdClient { fn systemd_version(&self) -> Result; fn control_cgroup_root(&self) -> Result; + + fn add_process_to_unit( + &self, + unit_name: &str, + subcgroup: &str, + pid: u32, + ) -> Result<(), SystemdClientError>; } diff --git a/crates/libcgroups/src/systemd/dbus_native/dbus.rs b/crates/libcgroups/src/systemd/dbus_native/dbus.rs index fba1d991..fa989686 100644 --- a/crates/libcgroups/src/systemd/dbus_native/dbus.rs +++ b/crates/libcgroups/src/systemd/dbus_native/dbus.rs @@ -453,6 +453,10 @@ impl SystemdClient for DbusConnection { let cgroup_root = proxy.control_group()?; Ok(PathBuf::from(&cgroup_root)) } + fn add_process_to_unit(&self, unit_name: &str, subcgroup: &str, pid: u32) -> Result<()> { + let proxy = self.create_proxy(); + proxy.attach_process(unit_name, subcgroup, pid) + } } #[cfg(test)] diff --git a/crates/libcgroups/src/systemd/dbus_native/proxy.rs b/crates/libcgroups/src/systemd/dbus_native/proxy.rs index c69c0db0..567f4fca 100644 --- a/crates/libcgroups/src/systemd/dbus_native/proxy.rs +++ b/crates/libcgroups/src/systemd/dbus_native/proxy.rs @@ -239,4 +239,11 @@ impl<'conn> Proxy<'conn> { v => panic!("control group expected string variant, got {:?} instead", v), } } + pub fn attach_process(&self, name: &str, cgroup: &str, pid: u32) -> Result<()> { + self.method_call::<_, ()>( + "org.freedesktop.systemd1.Manager", + "AttachProcessesToUnit", + Some((name, cgroup, vec![pid])), + ) + } } diff --git a/crates/libcgroups/src/systemd/manager.rs b/crates/libcgroups/src/systemd/manager.rs index ca4e2fe6..bba4c3f2 100644 --- a/crates/libcgroups/src/systemd/manager.rs +++ b/crates/libcgroups/src/systemd/manager.rs @@ -353,6 +353,12 @@ impl CgroupManager for Manager { if pid.as_raw() == -1 { return Ok(()); } + if self.client.transient_unit_exists(&self.unit_name) { + tracing::debug!("Transient unit {:?} already exists", self.unit_name); + self.client + .add_process_to_unit(&self.unit_name, "", pid.as_raw() as u32)?; + return Ok(()); + } tracing::debug!("Starting {:?}", self.unit_name); self.client.start_transient_unit( @@ -430,8 +436,11 @@ mod tests { use anyhow::{Context, Result}; use super::*; - use crate::systemd::dbus_native::{ - client::SystemdClient, serialize::Variant, utils::SystemdClientError, + use crate::{ + common::DEFAULT_CGROUP_ROOT, + systemd::dbus_native::{ + client::SystemdClient, serialize::Variant, utils::SystemdClientError, + }, }; struct TestSystemdClient {} @@ -474,6 +483,15 @@ mod tests { fn control_cgroup_root(&self) -> Result { Ok(PathBuf::from("/")) } + + fn add_process_to_unit( + &self, + _unit_name: &str, + _subcgroup: &str, + _pid: u32, + ) -> Result<(), SystemdClientError> { + Ok(()) + } } #[test] @@ -528,4 +546,36 @@ mod tests { Ok(()) } + #[test] + fn test_task_addition() { + let manager = Manager::new( + DEFAULT_CGROUP_ROOT.into(), + ":youki:test".into(), + "youki_test_container".into(), + false, + ) + .unwrap(); + let mut p1 = std::process::Command::new("sleep") + .arg("1s") + .spawn() + .unwrap(); + let p1_id = nix::unistd::Pid::from_raw(p1.id() as i32); + let mut p2 = std::process::Command::new("sleep") + .arg("1s") + .spawn() + .unwrap(); + let p2_id = nix::unistd::Pid::from_raw(p2.id() as i32); + manager.add_task(p1_id).unwrap(); + manager.add_task(p2_id).unwrap(); + let all_pids = manager.get_all_pids().unwrap(); + assert!(all_pids.contains(&p1_id)); + assert!(all_pids.contains(&p2_id)); + // wait till both processes are finished so we can cleanup the cgroup + let _ = p1.wait(); + let _ = p2.wait(); + manager.remove().unwrap(); + // the remove call above should remove the dir, we just do this again + // for contingency, and thus ignore the result + let _ = fs::remove_dir(&manager.full_path); + } } diff --git a/crates/libcgroups/src/v2/manager.rs b/crates/libcgroups/src/v2/manager.rs index d6e354a9..e816d9b2 100644 --- a/crates/libcgroups/src/v2/manager.rs +++ b/crates/libcgroups/src/v2/manager.rs @@ -151,6 +151,10 @@ impl CgroupManager for Manager { type Error = V2ManagerError; fn add_task(&self, pid: Pid) -> Result<(), Self::Error> { + if self.full_path.exists() { + common::write_cgroup_file(self.full_path.join(CGROUP_PROCS), pid)?; + return Ok(()); + } self.create_unified_cgroup(pid)?; Ok(()) } diff --git a/crates/libcontainer/src/config.rs b/crates/libcontainer/src/config.rs index ec307140..6ec30d41 100644 --- a/crates/libcontainer/src/config.rs +++ b/crates/libcontainer/src/config.rs @@ -47,7 +47,7 @@ pub struct YoukiConfig { } impl<'a> YoukiConfig { - pub fn from_spec(spec: &'a Spec, container_id: &str, new_user_ns: bool) -> Result { + pub fn from_spec(spec: &'a Spec, container_id: &str) -> Result { Ok(YoukiConfig { hooks: spec.hooks().clone(), cgroup_path: utils::get_cgroup_path( @@ -56,7 +56,6 @@ impl<'a> YoukiConfig { .ok_or(ConfigError::MissingLinux)? .cgroups_path(), container_id, - new_user_ns, ), }) } @@ -106,10 +105,13 @@ mod tests { fn test_config_from_spec() -> Result<()> { let container_id = "sample"; let spec = Spec::default(); - let config = YoukiConfig::from_spec(&spec, container_id, false)?; + let config = YoukiConfig::from_spec(&spec, container_id)?; assert_eq!(&config.hooks, spec.hooks()); dbg!(&config.cgroup_path); - assert_eq!(config.cgroup_path, PathBuf::from(container_id)); + assert_eq!( + config.cgroup_path, + PathBuf::from(format!(":youki:{container_id}")) + ); Ok(()) } @@ -118,7 +120,7 @@ mod tests { let container_id = "sample"; let tmp = tempfile::tempdir().expect("create temp dir"); let spec = Spec::default(); - let config = YoukiConfig::from_spec(&spec, container_id, false)?; + let config = YoukiConfig::from_spec(&spec, container_id)?; config.save(&tmp)?; let act = YoukiConfig::load(&tmp)?; assert_eq!(act, config); diff --git a/crates/libcontainer/src/container/builder_impl.rs b/crates/libcontainer/src/container/builder_impl.rs index 38f2f1cc..92ab4f97 100644 --- a/crates/libcontainer/src/container/builder_impl.rs +++ b/crates/libcontainer/src/container/builder_impl.rs @@ -68,11 +68,7 @@ impl ContainerBuilderImpl { fn run_container(&mut self) -> Result { let linux = self.spec.linux().as_ref().ok_or(MissingSpecError::Linux)?; - let cgroups_path = utils::get_cgroup_path( - linux.cgroups_path(), - &self.container_id, - self.user_ns_config.is_some(), - ); + let cgroups_path = utils::get_cgroup_path(linux.cgroups_path(), &self.container_id); let cgroup_config = libcgroups::common::CgroupConfig { cgroup_path: cgroups_path, systemd_cgroup: self.use_systemd || self.user_ns_config.is_some(), @@ -186,11 +182,7 @@ impl ContainerBuilderImpl { fn cleanup_container(&self) -> Result<(), LibcontainerError> { let linux = self.spec.linux().as_ref().ok_or(MissingSpecError::Linux)?; - let cgroups_path = utils::get_cgroup_path( - linux.cgroups_path(), - &self.container_id, - self.user_ns_config.is_some(), - ); + let cgroups_path = utils::get_cgroup_path(linux.cgroups_path(), &self.container_id); let cmanager = libcgroups::common::create_cgroup_manager(libcgroups::common::CgroupConfig { cgroup_path: cgroups_path, diff --git a/crates/libcontainer/src/container/container.rs b/crates/libcontainer/src/container/container.rs index 24111465..faedbd3c 100644 --- a/crates/libcontainer/src/container/container.rs +++ b/crates/libcontainer/src/container/container.rs @@ -332,8 +332,7 @@ mod tests { let tmp_dir = tempfile::tempdir().unwrap(); use oci_spec::runtime::Spec; let spec = Spec::default(); - let config = - YoukiConfig::from_spec(&spec, "123", false).context("convert spec to config")?; + let config = YoukiConfig::from_spec(&spec, "123").context("convert spec to config")?; config.save(tmp_dir.path()).context("save config")?; let container = Container { diff --git a/crates/libcontainer/src/container/init_builder.rs b/crates/libcontainer/src/container/init_builder.rs index f56e0010..f9878196 100644 --- a/crates/libcontainer/src/container/init_builder.rs +++ b/crates/libcontainer/src/container/init_builder.rs @@ -88,7 +88,7 @@ impl InitContainerBuilder { let user_ns_config = UserNamespaceConfig::new(&spec)?; - let config = YoukiConfig::from_spec(&spec, container.id(), user_ns_config.is_some())?; + let config = YoukiConfig::from_spec(&spec, container.id())?; config.save(&container_dir).map_err(|err| { tracing::error!(?container_dir, "failed to save config: {}", err); err diff --git a/crates/libcontainer/src/container/tenant_builder.rs b/crates/libcontainer/src/container/tenant_builder.rs index 0d805a0d..adc1cdf1 100644 --- a/crates/libcontainer/src/container/tenant_builder.rs +++ b/crates/libcontainer/src/container/tenant_builder.rs @@ -327,9 +327,17 @@ impl TenantContainerBuilder { let init_process = procfs::process::Process::new(container_pid.as_raw())?; let ns = self.get_namespaces(init_process.namespaces()?.0)?; - let linux = LinuxBuilder::default().namespaces(ns).build()?; + // it should never be the case that linux is not present in spec + let spec_linux = spec.linux().as_ref().unwrap(); + let mut linux_builder = LinuxBuilder::default().namespaces(ns); + + if let Some(ref cgroup_path) = spec_linux.cgroups_path() { + linux_builder = linux_builder.cgroups_path(cgroup_path.clone()); + } + let linux = linux_builder.build()?; spec.set_process(Some(process)).set_linux(Some(linux)); + Ok(()) } diff --git a/crates/libcontainer/src/utils.rs b/crates/libcontainer/src/utils.rs index 024b970f..4e21224a 100644 --- a/crates/libcontainer/src/utils.rs +++ b/crates/libcontainer/src/utils.rs @@ -147,17 +147,10 @@ pub fn get_user_home(uid: u32) -> Option { } /// If None, it will generate a default path for cgroups. -pub fn get_cgroup_path( - cgroups_path: &Option, - container_id: &str, - new_user_ns: bool, -) -> PathBuf { +pub fn get_cgroup_path(cgroups_path: &Option, container_id: &str) -> PathBuf { match cgroups_path { Some(cpath) => cpath.clone(), - None => match new_user_ns { - false => PathBuf::from(container_id), - true => PathBuf::from(format!(":youki:{container_id}")), - }, + None => PathBuf::from(format!(":youki:{container_id}")), } } @@ -323,11 +316,11 @@ mod tests { fn test_get_cgroup_path() { let cid = "sample_container_id"; assert_eq!( - get_cgroup_path(&None, cid, false), - PathBuf::from("sample_container_id") + get_cgroup_path(&None, cid), + PathBuf::from(":youki:sample_container_id") ); assert_eq!( - get_cgroup_path(&Some(PathBuf::from("/youki")), cid, false), + get_cgroup_path(&Some(PathBuf::from("/youki")), cid), PathBuf::from("/youki") ); } diff --git a/cross/Dockerfile.gnu b/cross/Dockerfile.gnu index 100f44a1..93b42a53 100644 --- a/cross/Dockerfile.gnu +++ b/cross/Dockerfile.gnu @@ -13,3 +13,6 @@ RUN dpkg --add-architecture ${CROSS_DEB_ARCH} && \ zlib1g-dev:${CROSS_DEB_ARCH} \ # dependencies to build wasmedge-sys libzstd-dev:${CROSS_DEB_ARCH} + +COPY hack/busctl.sh /bin/busctl +RUN chmod +x /bin/busctl diff --git a/cross/Dockerfile.musl b/cross/Dockerfile.musl index 81f32084..acac8a0f 100644 --- a/cross/Dockerfile.musl +++ b/cross/Dockerfile.musl @@ -22,6 +22,9 @@ ENV LIBSECCOMP_LIB_PATH="${CROSS_SYSROOT}/lib" ENV WASMEDGE_DEP_STDCXX_LINK_TYPE="static" ENV WASMEDGE_DEP_STDCXX_LIB_PATH="${CROSS_SYSROOT}/lib" +COPY hack/busctl.sh /bin/busctl +RUN chmod +x /bin/busctl + # wasmedge-sys (through llvm) needs some symbols defined in libgcc RUN mkdir /.cargo && cat <<'EOF' > /.cargo/config.toml [target.'cfg(target_env = "musl")'] diff --git a/hack/busctl.sh b/hack/busctl.sh new file mode 100644 index 00000000..7223f0a3 --- /dev/null +++ b/hack/busctl.sh @@ -0,0 +1,14 @@ +#!/bin/sh + +# This hack script is the dummy busctl command used when running tests with cross containers. + +# The issue is that we cannot run systemd or dbus inside the test container without a lot +# of hacks. For one specific test - test_task_addition, we need to check that the task +# addition via systemd manager works. We mount the host dbus socket in the test container, so +# dbus calls work, but for the initial authentication, we use busctl which needs dbus and systemd +# to be present and running. So instead of doing all that, we simply run the container with the +# actual test running user's uid/gid and here we echo the only relevant line from busctl's +# output, using id to get the uid. This is a hack, but less complex than actually setting up +# and running the systemd+dbus inside the container. + +echo "OwnerUID=$(id -u)" diff --git a/scripts/cargo.sh b/scripts/cargo.sh index a5369094..1e78bcfa 100755 --- a/scripts/cargo.sh +++ b/scripts/cargo.sh @@ -56,7 +56,11 @@ if [ "$CARGO" == "cross" ]; then # mount run to have access to dbus socket. # mount /tmp so as shared for test_make_parent_mount_private - export CROSS_CONTAINER_OPTS="--privileged -v/run:/run --mount=type=bind,source=/tmp,destination=/tmp,bind-propagation=shared" + # Then there are few "hacks" specificallt for test_task_addition + # run with user same as the invoking user, so that the dbus is connected with correct user + # we want pid ns of host, because we will be connecting to the host dbus, and it needs task pid from host + # finally we need to mount the cgroup as read-only, as we need that to check if the tasks are correctly added + export CROSS_CONTAINER_OPTS="--privileged --user `id -u`:`id -g` --pid=host -v /sys/fs/cgroup:/sys/fs/cgroup:ro -v/run:/run --mount=type=bind,source=/tmp,destination=/tmp,bind-propagation=shared" fi if [ "$1" == "--print-target-dir" ]; then diff --git a/tests/rootless-tests/run.sh b/tests/rootless-tests/run.sh index 02095915..74149771 100755 --- a/tests/rootless-tests/run.sh +++ b/tests/rootless-tests/run.sh @@ -10,9 +10,35 @@ podman rm --force --ignore create-test # remove if existing podman create --runtime $runtime --name create-test hello-world log=$(podman start -a create-test) echo $log | grep "This message shows that your installation appears to be working correctly" -podman rm create-test +podman rm --force --ignore create-test rand=$(head -c 10 /dev/random | base64) log=$(podman run --runtime $runtime fedora echo "$rand") -echo $log | grep $rand \ No newline at end of file +echo $log | grep $rand + +podman kill exec-test || true # ignore failure for killing +podman rm --force --ignore exec-test +podman run -d --runtime $runtime --name exec-test busybox sleep 10m + +rand=$(head -c 10 /dev/random | base64) + +log=$(podman exec --runtime $runtime exec-test echo "$rand") +echo $log | grep $rand + +CGROUP_SUB_PATH=$(podman inspect exec-test | jq .[0].State.CgroupPath | tr -d "\"") +CGROUP_PATH="/sys/fs/cgroup$CGROUP_SUB_PATH/cgroup.procs" + +# assert we have exactly one process in the cgroup +test $(cat $CGROUP_PATH | wc -l) -eq 1 +# assert pid match +test $(cat $CGROUP_PATH) -eq $(podman inspect exec-test | jq .[0].State.Pid) + +podman exec -d --runtime $runtime exec-test sleep 5m + +# we cannot exactly check the pid of tenant here, instead just check that there are +# two processes in the same cgroup now +test $(cat $CGROUP_PATH | wc -l) -eq 2 + +podman kill exec-test +podman rm --force --ignore exec-test