1
0
mirror of https://github.com/containers/youki synced 2024-11-23 17:32:15 +01:00

Emulate cgroup namespaces with bind mounts

This commit is contained in:
Furisto 2021-10-06 23:09:02 +02:00
parent b23cfc820f
commit 96d13fb5b1
No known key found for this signature in database
GPG Key ID: 40C5F0E00523478B
2 changed files with 101 additions and 45 deletions

@ -5,11 +5,11 @@ use super::{
use crate::syscall::{syscall::create_syscall, Syscall};
use crate::utils::PathBufExt;
use anyhow::{bail, Context, Result};
use cgroups::common::CgroupSetup::{Hybrid, Legacy, Unified};
use cgroups::common::{CgroupSetup::{Hybrid, Legacy, Unified}, DEFAULT_CGROUP_ROOT};
use nix::{errno::Errno, mount::MsFlags};
use oci_spec::runtime::{Mount as SpecMount, MountBuilder as SpecMountBuilder};
use procfs::process::{MountOptFields, Process};
use std::fs::{canonicalize, create_dir_all, OpenOptions};
use std::{collections::HashMap, fs::{canonicalize, create_dir_all, OpenOptions}};
use std::path::{Path, PathBuf};
#[derive(Debug)]
@ -73,12 +73,12 @@ impl Mount {
Ok(())
}
fn mount_cgroup_v1(&self, mount: &SpecMount, options: &MountOptions) -> Result<()> {
fn mount_cgroup_v1(&self, cgroup_mount: &SpecMount, options: &MountOptions) -> Result<()> {
// create tmpfs into which the cgroup subsystems will be mounted
let tmpfs = SpecMountBuilder::default()
.source("tmpfs")
.typ("tmpfs")
.destination(mount.destination())
.destination(cgroup_mount.destination())
.options(
["noexec", "nosuid", "nodev", "mode=755"]
.iter()
@ -87,73 +87,128 @@ impl Mount {
)
.build()
.context("failed to build tmpfs for cgroup")?;
self.setup_mount(&tmpfs, options)
.context("failed to mount tmpfs for cgroup")?;
self.setup_mount(&tmpfs, options).context("failed to mount tmpfs for cgroup")?;
// get all cgroup mounts on the host system
let mount_points: Vec<PathBuf> = cgroups::v1::util::list_subsystem_mount_points()
let host_mounts: Vec<PathBuf> = cgroups::v1::util::list_subsystem_mount_points()
.context("failed to get subsystem mount points")?
.into_iter()
.filter(|p| p.as_path().starts_with("/sys/fs"))
.filter(|p| p.as_path().starts_with(DEFAULT_CGROUP_ROOT))
.collect();
log::debug!("{:?}", mount_points);
// setup cgroup mounts for container
let cgroup_root = options
.root
.join_safely(mount.destination())
.context("could not join rootfs with cgroup destination")?;
for mount_point in mount_points {
if let Some(subsystem_name) = mount_point.file_name().and_then(|n| n.to_str()) {
let cgroup_mount = SpecMountBuilder::default()
.source("cgroup")
.typ("cgroup")
.destination(mount.destination().join(subsystem_name))
.options(
["noexec", "nosuid", "nodev"]
.iter()
.map(|o| o.to_string())
.collect::<Vec<String>>(),
)
.build()
.with_context(|| format!("failed to build {}", subsystem_name))?;
.join_safely(cgroup_mount.destination())
.context("could not join rootfs path with cgroup mount destination")?;
let symlink = Symlink::new();
// setup cgroup mounts for container
for host_mount in &host_mounts {
if let Some(subsystem_name) = host_mount.file_name().and_then(|n| n.to_str()) {
if subsystem_name == "systemd" {
continue;
}
if options.cgroup_ns {
self.setup_namespaced_hierarchy(&cgroup_mount, options, subsystem_name)?;
Symlink::new().setup_comount_symlinks(&cgroup_root, subsystem_name)?;
self.setup_namespaced_subsystem(cgroup_mount, options, subsystem_name)?;
} else {
log::warn!("cgroup mounts are currently only suported with cgroup namespaces")
self.setup_emulated_subsystem(cgroup_mount, options, host_mount, subsystem_name)?;
}
symlink.setup_comount_symlinks(&cgroup_root, subsystem_name)?;
} else {
log::warn!("could not get subsystem name from {:?}", mount_point);
log::warn!("could not get subsystem name from {:?}", host_mount);
}
}
Ok(())
}
// On some distros cgroup subsystems are comounted e.g. cpu,cpuacct or net_cls,net_prio. These systems
// have to be comounted in the container as well as the kernel will reject trying to mount them separately.
fn setup_namespaced_hierarchy(
&self,
fn setup_namespaced_subsystem(&self,
cgroup_mount: &SpecMount,
options: &MountOptions,
subsystem_name: &str,
) -> Result<()> {
let subsystem_mount = SpecMountBuilder::default()
.source("cgroup")
.typ("cgroup")
.destination(cgroup_mount.destination().join(subsystem_name))
.options(
["noexec", "nosuid", "nodev"]
.iter()
.map(|o| o.to_string())
.collect::<Vec<String>>(),
)
.build()
.with_context(|| format!("failed to build {}", subsystem_name))?;
log::debug!("Mounting cgroup subsystem: {:?}", subsystem_name);
self.mount_to_container(
cgroup_mount,
&subsystem_mount,
options.root,
MsFlags::MS_NOEXEC | MsFlags::MS_NOSUID | MsFlags::MS_NODEV,
subsystem_name,
options.label,
)
.with_context(|| format!("failed to mount {:?}", cgroup_mount))
.with_context(|| format!("failed to mount {:?}", subsystem_mount))
}
fn setup_emulated_subsystem(
&self,
mount: &SpecMount,
options: &MountOptions,
host_mount: &Path,
subsystem_name: &str,
) -> Result<()> {
let process_cgroups: HashMap<String, String> = Process::myself()?
.cgroups()
.context("failed to get process cgroups")?
.into_iter()
.map(|c| (c.controllers.join(","), c.pathname))
.collect();
log::debug!("{:?}", process_cgroups);
if let Some(proc_path) = process_cgroups.get(subsystem_name) {
let emulated = SpecMountBuilder::default()
.source(
host_mount
.join_safely(proc_path.as_str())
.with_context(|| {
format!(
"failed to join mount source for {} subsystem",
subsystem_name
)
})?,
)
.destination(
mount
.destination()
.join_safely(subsystem_name)
.with_context(|| {
format!(
"failed to join mount destination for {} subsystem",
subsystem_name
)
})?,
)
.typ("bind")
.options(
["rw", "rbind"]
.iter()
.map(|o| o.to_string())
.collect::<Vec<String>>(),
)
.build()?;
log::debug!("Mounting emulated cgroup subsystem: {:?}", emulated);
self.setup_mount(&emulated, options)
.with_context(|| format!("failed to mount {} cgroup hierarchy", subsystem_name))?;
}
Ok(())
}
fn mount_cgroup_v2(&self, _: &SpecMount, _: &MountOptions, _: MsFlags, _: &str) -> Result<()> {

@ -18,7 +18,7 @@ use std::path::{Path, PathBuf};
pub trait PathBufExt {
fn as_in_container(&self) -> Result<PathBuf>;
fn join_absolute_path(&self, p: &Path) -> Result<PathBuf>;
fn join_safely(&self, p: &Path) -> Result<PathBuf>;
fn join_safely<P: AsRef<Path>>(&self, p: P) -> Result<PathBuf>;
}
impl PathBufExt for Path {
@ -41,14 +41,15 @@ impl PathBufExt for Path {
Ok(PathBuf::from(format!("{}{}", self.display(), p.display())))
}
fn join_safely(&self, p: &Path) -> Result<PathBuf> {
if p.is_relative() {
return Ok(self.join(p));
fn join_safely<P: AsRef<Path>>(&self, path: P) -> Result<PathBuf> {
let path = path.as_ref();
if path.is_relative() {
return Ok(self.join(path));
}
let stripped = p
let stripped = path
.strip_prefix("/")
.with_context(|| format!("failed to strip prefix from {}", p.display()))?;
.with_context(|| format!("failed to strip prefix from {}", path.display()))?;
Ok(self.join(stripped))
}
}