1
0
mirror of https://github.com/containers/youki synced 2024-11-26 06:08:07 +01:00

Fix cgroups determination in exec implementation (#2720)

* Set cgroups path for tenant containers from main container

Signed-off-by: Yashodhan Joshi <yjdoc2@gmail.com>

* Ignore new_user_ns for creating cgroups path

Signed-off-by: Yashodhan Joshi <yjdoc2@gmail.com>

* Remove user_ns param completely

Signed-off-by: Yashodhan Joshi <yjdoc2@gmail.com>

* Add tests in podman rootless for exec

Signed-off-by: Yashodhan Joshi <yjdoc2@gmail.com>

* Fix add_task implementation for cgroups v2 and systemd

Signed-off-by: Yashodhan Joshi <yjdoc2@gmail.com>

* minor refactor in tenant builder

Signed-off-by: Yashodhan Joshi <yjdoc2@gmail.com>

* Add unit test for systemd add_task function

Signed-off-by: Yashodhan Joshi <yjdoc2@gmail.com>

* Fix task addition to properly add tasks via dbus api

Signed-off-by: Yashodhan Joshi <yjdoc2@gmail.com>

* Fix cross cotainers for tests running

Signed-off-by: Yashodhan Joshi <yjdoc2@gmail.com>

---------

Signed-off-by: Yashodhan Joshi <yjdoc2@gmail.com>
This commit is contained in:
Yashodhan 2024-04-27 18:19:58 +05:30 committed by GitHub
parent cd9bfd8d79
commit 601df9ecd3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 153 additions and 36 deletions

@ -1,5 +1,6 @@
[build] [build]
default-target = "x86_64-unknown-linux-gnu" default-target = "x86_64-unknown-linux-gnu"
env.passthrough = ["XDG_RUNTIME_DIR"]
[target.aarch64-unknown-linux-gnu] [target.aarch64-unknown-linux-gnu]
dockerfile = "cross/Dockerfile.gnu" dockerfile = "cross/Dockerfile.gnu"

@ -26,4 +26,11 @@ pub trait SystemdClient {
fn systemd_version(&self) -> Result<u32, SystemdClientError>; fn systemd_version(&self) -> Result<u32, SystemdClientError>;
fn control_cgroup_root(&self) -> Result<PathBuf, SystemdClientError>; fn control_cgroup_root(&self) -> Result<PathBuf, SystemdClientError>;
fn add_process_to_unit(
&self,
unit_name: &str,
subcgroup: &str,
pid: u32,
) -> Result<(), SystemdClientError>;
} }

@ -453,6 +453,10 @@ impl SystemdClient for DbusConnection {
let cgroup_root = proxy.control_group()?; let cgroup_root = proxy.control_group()?;
Ok(PathBuf::from(&cgroup_root)) Ok(PathBuf::from(&cgroup_root))
} }
fn add_process_to_unit(&self, unit_name: &str, subcgroup: &str, pid: u32) -> Result<()> {
let proxy = self.create_proxy();
proxy.attach_process(unit_name, subcgroup, pid)
}
} }
#[cfg(test)] #[cfg(test)]

@ -239,4 +239,11 @@ impl<'conn> Proxy<'conn> {
v => panic!("control group expected string variant, got {:?} instead", v), v => panic!("control group expected string variant, got {:?} instead", v),
} }
} }
pub fn attach_process(&self, name: &str, cgroup: &str, pid: u32) -> Result<()> {
self.method_call::<_, ()>(
"org.freedesktop.systemd1.Manager",
"AttachProcessesToUnit",
Some((name, cgroup, vec![pid])),
)
}
} }

@ -353,6 +353,12 @@ impl CgroupManager for Manager {
if pid.as_raw() == -1 { if pid.as_raw() == -1 {
return Ok(()); return Ok(());
} }
if self.client.transient_unit_exists(&self.unit_name) {
tracing::debug!("Transient unit {:?} already exists", self.unit_name);
self.client
.add_process_to_unit(&self.unit_name, "", pid.as_raw() as u32)?;
return Ok(());
}
tracing::debug!("Starting {:?}", self.unit_name); tracing::debug!("Starting {:?}", self.unit_name);
self.client.start_transient_unit( self.client.start_transient_unit(
@ -430,8 +436,11 @@ mod tests {
use anyhow::{Context, Result}; use anyhow::{Context, Result};
use super::*; use super::*;
use crate::systemd::dbus_native::{ use crate::{
common::DEFAULT_CGROUP_ROOT,
systemd::dbus_native::{
client::SystemdClient, serialize::Variant, utils::SystemdClientError, client::SystemdClient, serialize::Variant, utils::SystemdClientError,
},
}; };
struct TestSystemdClient {} struct TestSystemdClient {}
@ -474,6 +483,15 @@ mod tests {
fn control_cgroup_root(&self) -> Result<PathBuf, SystemdClientError> { fn control_cgroup_root(&self) -> Result<PathBuf, SystemdClientError> {
Ok(PathBuf::from("/")) Ok(PathBuf::from("/"))
} }
fn add_process_to_unit(
&self,
_unit_name: &str,
_subcgroup: &str,
_pid: u32,
) -> Result<(), SystemdClientError> {
Ok(())
}
} }
#[test] #[test]
@ -528,4 +546,36 @@ mod tests {
Ok(()) Ok(())
} }
#[test]
fn test_task_addition() {
let manager = Manager::new(
DEFAULT_CGROUP_ROOT.into(),
":youki:test".into(),
"youki_test_container".into(),
false,
)
.unwrap();
let mut p1 = std::process::Command::new("sleep")
.arg("1s")
.spawn()
.unwrap();
let p1_id = nix::unistd::Pid::from_raw(p1.id() as i32);
let mut p2 = std::process::Command::new("sleep")
.arg("1s")
.spawn()
.unwrap();
let p2_id = nix::unistd::Pid::from_raw(p2.id() as i32);
manager.add_task(p1_id).unwrap();
manager.add_task(p2_id).unwrap();
let all_pids = manager.get_all_pids().unwrap();
assert!(all_pids.contains(&p1_id));
assert!(all_pids.contains(&p2_id));
// wait till both processes are finished so we can cleanup the cgroup
let _ = p1.wait();
let _ = p2.wait();
manager.remove().unwrap();
// the remove call above should remove the dir, we just do this again
// for contingency, and thus ignore the result
let _ = fs::remove_dir(&manager.full_path);
}
} }

@ -151,6 +151,10 @@ impl CgroupManager for Manager {
type Error = V2ManagerError; type Error = V2ManagerError;
fn add_task(&self, pid: Pid) -> Result<(), Self::Error> { fn add_task(&self, pid: Pid) -> Result<(), Self::Error> {
if self.full_path.exists() {
common::write_cgroup_file(self.full_path.join(CGROUP_PROCS), pid)?;
return Ok(());
}
self.create_unified_cgroup(pid)?; self.create_unified_cgroup(pid)?;
Ok(()) Ok(())
} }

@ -47,7 +47,7 @@ pub struct YoukiConfig {
} }
impl<'a> YoukiConfig { impl<'a> YoukiConfig {
pub fn from_spec(spec: &'a Spec, container_id: &str, new_user_ns: bool) -> Result<Self> { pub fn from_spec(spec: &'a Spec, container_id: &str) -> Result<Self> {
Ok(YoukiConfig { Ok(YoukiConfig {
hooks: spec.hooks().clone(), hooks: spec.hooks().clone(),
cgroup_path: utils::get_cgroup_path( cgroup_path: utils::get_cgroup_path(
@ -56,7 +56,6 @@ impl<'a> YoukiConfig {
.ok_or(ConfigError::MissingLinux)? .ok_or(ConfigError::MissingLinux)?
.cgroups_path(), .cgroups_path(),
container_id, container_id,
new_user_ns,
), ),
}) })
} }
@ -106,10 +105,13 @@ mod tests {
fn test_config_from_spec() -> Result<()> { fn test_config_from_spec() -> Result<()> {
let container_id = "sample"; let container_id = "sample";
let spec = Spec::default(); let spec = Spec::default();
let config = YoukiConfig::from_spec(&spec, container_id, false)?; let config = YoukiConfig::from_spec(&spec, container_id)?;
assert_eq!(&config.hooks, spec.hooks()); assert_eq!(&config.hooks, spec.hooks());
dbg!(&config.cgroup_path); dbg!(&config.cgroup_path);
assert_eq!(config.cgroup_path, PathBuf::from(container_id)); assert_eq!(
config.cgroup_path,
PathBuf::from(format!(":youki:{container_id}"))
);
Ok(()) Ok(())
} }
@ -118,7 +120,7 @@ mod tests {
let container_id = "sample"; let container_id = "sample";
let tmp = tempfile::tempdir().expect("create temp dir"); let tmp = tempfile::tempdir().expect("create temp dir");
let spec = Spec::default(); let spec = Spec::default();
let config = YoukiConfig::from_spec(&spec, container_id, false)?; let config = YoukiConfig::from_spec(&spec, container_id)?;
config.save(&tmp)?; config.save(&tmp)?;
let act = YoukiConfig::load(&tmp)?; let act = YoukiConfig::load(&tmp)?;
assert_eq!(act, config); assert_eq!(act, config);

@ -68,11 +68,7 @@ impl ContainerBuilderImpl {
fn run_container(&mut self) -> Result<Pid, LibcontainerError> { fn run_container(&mut self) -> Result<Pid, LibcontainerError> {
let linux = self.spec.linux().as_ref().ok_or(MissingSpecError::Linux)?; let linux = self.spec.linux().as_ref().ok_or(MissingSpecError::Linux)?;
let cgroups_path = utils::get_cgroup_path( let cgroups_path = utils::get_cgroup_path(linux.cgroups_path(), &self.container_id);
linux.cgroups_path(),
&self.container_id,
self.user_ns_config.is_some(),
);
let cgroup_config = libcgroups::common::CgroupConfig { let cgroup_config = libcgroups::common::CgroupConfig {
cgroup_path: cgroups_path, cgroup_path: cgroups_path,
systemd_cgroup: self.use_systemd || self.user_ns_config.is_some(), systemd_cgroup: self.use_systemd || self.user_ns_config.is_some(),
@ -186,11 +182,7 @@ impl ContainerBuilderImpl {
fn cleanup_container(&self) -> Result<(), LibcontainerError> { fn cleanup_container(&self) -> Result<(), LibcontainerError> {
let linux = self.spec.linux().as_ref().ok_or(MissingSpecError::Linux)?; let linux = self.spec.linux().as_ref().ok_or(MissingSpecError::Linux)?;
let cgroups_path = utils::get_cgroup_path( let cgroups_path = utils::get_cgroup_path(linux.cgroups_path(), &self.container_id);
linux.cgroups_path(),
&self.container_id,
self.user_ns_config.is_some(),
);
let cmanager = let cmanager =
libcgroups::common::create_cgroup_manager(libcgroups::common::CgroupConfig { libcgroups::common::create_cgroup_manager(libcgroups::common::CgroupConfig {
cgroup_path: cgroups_path, cgroup_path: cgroups_path,

@ -332,8 +332,7 @@ mod tests {
let tmp_dir = tempfile::tempdir().unwrap(); let tmp_dir = tempfile::tempdir().unwrap();
use oci_spec::runtime::Spec; use oci_spec::runtime::Spec;
let spec = Spec::default(); let spec = Spec::default();
let config = let config = YoukiConfig::from_spec(&spec, "123").context("convert spec to config")?;
YoukiConfig::from_spec(&spec, "123", false).context("convert spec to config")?;
config.save(tmp_dir.path()).context("save config")?; config.save(tmp_dir.path()).context("save config")?;
let container = Container { let container = Container {

@ -88,7 +88,7 @@ impl InitContainerBuilder {
let user_ns_config = UserNamespaceConfig::new(&spec)?; let user_ns_config = UserNamespaceConfig::new(&spec)?;
let config = YoukiConfig::from_spec(&spec, container.id(), user_ns_config.is_some())?; let config = YoukiConfig::from_spec(&spec, container.id())?;
config.save(&container_dir).map_err(|err| { config.save(&container_dir).map_err(|err| {
tracing::error!(?container_dir, "failed to save config: {}", err); tracing::error!(?container_dir, "failed to save config: {}", err);
err err

@ -327,9 +327,17 @@ impl TenantContainerBuilder {
let init_process = procfs::process::Process::new(container_pid.as_raw())?; let init_process = procfs::process::Process::new(container_pid.as_raw())?;
let ns = self.get_namespaces(init_process.namespaces()?.0)?; let ns = self.get_namespaces(init_process.namespaces()?.0)?;
let linux = LinuxBuilder::default().namespaces(ns).build()?;
// it should never be the case that linux is not present in spec
let spec_linux = spec.linux().as_ref().unwrap();
let mut linux_builder = LinuxBuilder::default().namespaces(ns);
if let Some(ref cgroup_path) = spec_linux.cgroups_path() {
linux_builder = linux_builder.cgroups_path(cgroup_path.clone());
}
let linux = linux_builder.build()?;
spec.set_process(Some(process)).set_linux(Some(linux)); spec.set_process(Some(process)).set_linux(Some(linux));
Ok(()) Ok(())
} }

@ -147,17 +147,10 @@ pub fn get_user_home(uid: u32) -> Option<PathBuf> {
} }
/// If None, it will generate a default path for cgroups. /// If None, it will generate a default path for cgroups.
pub fn get_cgroup_path( pub fn get_cgroup_path(cgroups_path: &Option<PathBuf>, container_id: &str) -> PathBuf {
cgroups_path: &Option<PathBuf>,
container_id: &str,
new_user_ns: bool,
) -> PathBuf {
match cgroups_path { match cgroups_path {
Some(cpath) => cpath.clone(), Some(cpath) => cpath.clone(),
None => match new_user_ns { None => PathBuf::from(format!(":youki:{container_id}")),
false => PathBuf::from(container_id),
true => PathBuf::from(format!(":youki:{container_id}")),
},
} }
} }
@ -323,11 +316,11 @@ mod tests {
fn test_get_cgroup_path() { fn test_get_cgroup_path() {
let cid = "sample_container_id"; let cid = "sample_container_id";
assert_eq!( assert_eq!(
get_cgroup_path(&None, cid, false), get_cgroup_path(&None, cid),
PathBuf::from("sample_container_id") PathBuf::from(":youki:sample_container_id")
); );
assert_eq!( assert_eq!(
get_cgroup_path(&Some(PathBuf::from("/youki")), cid, false), get_cgroup_path(&Some(PathBuf::from("/youki")), cid),
PathBuf::from("/youki") PathBuf::from("/youki")
); );
} }

@ -13,3 +13,6 @@ RUN dpkg --add-architecture ${CROSS_DEB_ARCH} && \
zlib1g-dev:${CROSS_DEB_ARCH} \ zlib1g-dev:${CROSS_DEB_ARCH} \
# dependencies to build wasmedge-sys # dependencies to build wasmedge-sys
libzstd-dev:${CROSS_DEB_ARCH} libzstd-dev:${CROSS_DEB_ARCH}
COPY hack/busctl.sh /bin/busctl
RUN chmod +x /bin/busctl

@ -22,6 +22,9 @@ ENV LIBSECCOMP_LIB_PATH="${CROSS_SYSROOT}/lib"
ENV WASMEDGE_DEP_STDCXX_LINK_TYPE="static" ENV WASMEDGE_DEP_STDCXX_LINK_TYPE="static"
ENV WASMEDGE_DEP_STDCXX_LIB_PATH="${CROSS_SYSROOT}/lib" ENV WASMEDGE_DEP_STDCXX_LIB_PATH="${CROSS_SYSROOT}/lib"
COPY hack/busctl.sh /bin/busctl
RUN chmod +x /bin/busctl
# wasmedge-sys (through llvm) needs some symbols defined in libgcc # wasmedge-sys (through llvm) needs some symbols defined in libgcc
RUN mkdir /.cargo && cat <<'EOF' > /.cargo/config.toml RUN mkdir /.cargo && cat <<'EOF' > /.cargo/config.toml
[target.'cfg(target_env = "musl")'] [target.'cfg(target_env = "musl")']

14
hack/busctl.sh Normal file

@ -0,0 +1,14 @@
#!/bin/sh
# This hack script is the dummy busctl command used when running tests with cross containers.
# The issue is that we cannot run systemd or dbus inside the test container without a lot
# of hacks. For one specific test - test_task_addition, we need to check that the task
# addition via systemd manager works. We mount the host dbus socket in the test container, so
# dbus calls work, but for the initial authentication, we use busctl which needs dbus and systemd
# to be present and running. So instead of doing all that, we simply run the container with the
# actual test running user's uid/gid and here we echo the only relevant line from busctl's
# output, using id to get the uid. This is a hack, but less complex than actually setting up
# and running the systemd+dbus inside the container.
echo "OwnerUID=$(id -u)"

@ -56,7 +56,11 @@ if [ "$CARGO" == "cross" ]; then
# mount run to have access to dbus socket. # mount run to have access to dbus socket.
# mount /tmp so as shared for test_make_parent_mount_private # mount /tmp so as shared for test_make_parent_mount_private
export CROSS_CONTAINER_OPTS="--privileged -v/run:/run --mount=type=bind,source=/tmp,destination=/tmp,bind-propagation=shared" # Then there are few "hacks" specificallt for test_task_addition
# run with user same as the invoking user, so that the dbus is connected with correct user
# we want pid ns of host, because we will be connecting to the host dbus, and it needs task pid from host
# finally we need to mount the cgroup as read-only, as we need that to check if the tasks are correctly added
export CROSS_CONTAINER_OPTS="--privileged --user `id -u`:`id -g` --pid=host -v /sys/fs/cgroup:/sys/fs/cgroup:ro -v/run:/run --mount=type=bind,source=/tmp,destination=/tmp,bind-propagation=shared"
fi fi
if [ "$1" == "--print-target-dir" ]; then if [ "$1" == "--print-target-dir" ]; then

@ -10,9 +10,35 @@ podman rm --force --ignore create-test # remove if existing
podman create --runtime $runtime --name create-test hello-world podman create --runtime $runtime --name create-test hello-world
log=$(podman start -a create-test) log=$(podman start -a create-test)
echo $log | grep "This message shows that your installation appears to be working correctly" echo $log | grep "This message shows that your installation appears to be working correctly"
podman rm create-test podman rm --force --ignore create-test
rand=$(head -c 10 /dev/random | base64) rand=$(head -c 10 /dev/random | base64)
log=$(podman run --runtime $runtime fedora echo "$rand") log=$(podman run --runtime $runtime fedora echo "$rand")
echo $log | grep $rand echo $log | grep $rand
podman kill exec-test || true # ignore failure for killing
podman rm --force --ignore exec-test
podman run -d --runtime $runtime --name exec-test busybox sleep 10m
rand=$(head -c 10 /dev/random | base64)
log=$(podman exec --runtime $runtime exec-test echo "$rand")
echo $log | grep $rand
CGROUP_SUB_PATH=$(podman inspect exec-test | jq .[0].State.CgroupPath | tr -d "\"")
CGROUP_PATH="/sys/fs/cgroup$CGROUP_SUB_PATH/cgroup.procs"
# assert we have exactly one process in the cgroup
test $(cat $CGROUP_PATH | wc -l) -eq 1
# assert pid match
test $(cat $CGROUP_PATH) -eq $(podman inspect exec-test | jq .[0].State.Pid)
podman exec -d --runtime $runtime exec-test sleep 5m
# we cannot exactly check the pid of tenant here, instead just check that there are
# two processes in the same cgroup now
test $(cat $CGROUP_PATH | wc -l) -eq 2
podman kill exec-test
podman rm --force --ignore exec-test