mirror of
https://github.com/containers/youki
synced 2024-11-23 17:32:15 +01:00
Emulate cgroup namespaces with bind mounts
This commit is contained in:
parent
b23cfc820f
commit
96d13fb5b1
@ -5,11 +5,11 @@ use super::{
|
||||
use crate::syscall::{syscall::create_syscall, Syscall};
|
||||
use crate::utils::PathBufExt;
|
||||
use anyhow::{bail, Context, Result};
|
||||
use cgroups::common::CgroupSetup::{Hybrid, Legacy, Unified};
|
||||
use cgroups::common::{CgroupSetup::{Hybrid, Legacy, Unified}, DEFAULT_CGROUP_ROOT};
|
||||
use nix::{errno::Errno, mount::MsFlags};
|
||||
use oci_spec::runtime::{Mount as SpecMount, MountBuilder as SpecMountBuilder};
|
||||
use procfs::process::{MountOptFields, Process};
|
||||
use std::fs::{canonicalize, create_dir_all, OpenOptions};
|
||||
use std::{collections::HashMap, fs::{canonicalize, create_dir_all, OpenOptions}};
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
#[derive(Debug)]
|
||||
@ -73,12 +73,12 @@ impl Mount {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn mount_cgroup_v1(&self, mount: &SpecMount, options: &MountOptions) -> Result<()> {
|
||||
fn mount_cgroup_v1(&self, cgroup_mount: &SpecMount, options: &MountOptions) -> Result<()> {
|
||||
// create tmpfs into which the cgroup subsystems will be mounted
|
||||
let tmpfs = SpecMountBuilder::default()
|
||||
.source("tmpfs")
|
||||
.typ("tmpfs")
|
||||
.destination(mount.destination())
|
||||
.destination(cgroup_mount.destination())
|
||||
.options(
|
||||
["noexec", "nosuid", "nodev", "mode=755"]
|
||||
.iter()
|
||||
@ -87,73 +87,128 @@ impl Mount {
|
||||
)
|
||||
.build()
|
||||
.context("failed to build tmpfs for cgroup")?;
|
||||
|
||||
self.setup_mount(&tmpfs, options)
|
||||
.context("failed to mount tmpfs for cgroup")?;
|
||||
|
||||
self.setup_mount(&tmpfs, options).context("failed to mount tmpfs for cgroup")?;
|
||||
|
||||
// get all cgroup mounts on the host system
|
||||
let mount_points: Vec<PathBuf> = cgroups::v1::util::list_subsystem_mount_points()
|
||||
let host_mounts: Vec<PathBuf> = cgroups::v1::util::list_subsystem_mount_points()
|
||||
.context("failed to get subsystem mount points")?
|
||||
.into_iter()
|
||||
.filter(|p| p.as_path().starts_with("/sys/fs"))
|
||||
.filter(|p| p.as_path().starts_with(DEFAULT_CGROUP_ROOT))
|
||||
.collect();
|
||||
log::debug!("{:?}", mount_points);
|
||||
|
||||
// setup cgroup mounts for container
|
||||
|
||||
let cgroup_root = options
|
||||
.root
|
||||
.join_safely(mount.destination())
|
||||
.context("could not join rootfs with cgroup destination")?;
|
||||
for mount_point in mount_points {
|
||||
if let Some(subsystem_name) = mount_point.file_name().and_then(|n| n.to_str()) {
|
||||
let cgroup_mount = SpecMountBuilder::default()
|
||||
.source("cgroup")
|
||||
.typ("cgroup")
|
||||
.destination(mount.destination().join(subsystem_name))
|
||||
.options(
|
||||
["noexec", "nosuid", "nodev"]
|
||||
.iter()
|
||||
.map(|o| o.to_string())
|
||||
.collect::<Vec<String>>(),
|
||||
)
|
||||
.build()
|
||||
.with_context(|| format!("failed to build {}", subsystem_name))?;
|
||||
.join_safely(cgroup_mount.destination())
|
||||
.context("could not join rootfs path with cgroup mount destination")?;
|
||||
|
||||
let symlink = Symlink::new();
|
||||
|
||||
// setup cgroup mounts for container
|
||||
for host_mount in &host_mounts {
|
||||
if let Some(subsystem_name) = host_mount.file_name().and_then(|n| n.to_str()) {
|
||||
if subsystem_name == "systemd" {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
if options.cgroup_ns {
|
||||
self.setup_namespaced_hierarchy(&cgroup_mount, options, subsystem_name)?;
|
||||
Symlink::new().setup_comount_symlinks(&cgroup_root, subsystem_name)?;
|
||||
self.setup_namespaced_subsystem(cgroup_mount, options, subsystem_name)?;
|
||||
} else {
|
||||
log::warn!("cgroup mounts are currently only suported with cgroup namespaces")
|
||||
self.setup_emulated_subsystem(cgroup_mount, options, host_mount, subsystem_name)?;
|
||||
}
|
||||
|
||||
symlink.setup_comount_symlinks(&cgroup_root, subsystem_name)?;
|
||||
} else {
|
||||
log::warn!("could not get subsystem name from {:?}", mount_point);
|
||||
log::warn!("could not get subsystem name from {:?}", host_mount);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// On some distros cgroup subsystems are comounted e.g. cpu,cpuacct or net_cls,net_prio. These systems
|
||||
// have to be comounted in the container as well as the kernel will reject trying to mount them separately.
|
||||
fn setup_namespaced_hierarchy(
|
||||
&self,
|
||||
fn setup_namespaced_subsystem(&self,
|
||||
cgroup_mount: &SpecMount,
|
||||
options: &MountOptions,
|
||||
subsystem_name: &str,
|
||||
) -> Result<()> {
|
||||
let subsystem_mount = SpecMountBuilder::default()
|
||||
.source("cgroup")
|
||||
.typ("cgroup")
|
||||
.destination(cgroup_mount.destination().join(subsystem_name))
|
||||
.options(
|
||||
["noexec", "nosuid", "nodev"]
|
||||
.iter()
|
||||
.map(|o| o.to_string())
|
||||
.collect::<Vec<String>>(),
|
||||
)
|
||||
.build()
|
||||
.with_context(|| format!("failed to build {}", subsystem_name))?;
|
||||
|
||||
log::debug!("Mounting cgroup subsystem: {:?}", subsystem_name);
|
||||
self.mount_to_container(
|
||||
cgroup_mount,
|
||||
&subsystem_mount,
|
||||
options.root,
|
||||
MsFlags::MS_NOEXEC | MsFlags::MS_NOSUID | MsFlags::MS_NODEV,
|
||||
subsystem_name,
|
||||
options.label,
|
||||
)
|
||||
.with_context(|| format!("failed to mount {:?}", cgroup_mount))
|
||||
.with_context(|| format!("failed to mount {:?}", subsystem_mount))
|
||||
}
|
||||
|
||||
fn setup_emulated_subsystem(
|
||||
&self,
|
||||
mount: &SpecMount,
|
||||
options: &MountOptions,
|
||||
host_mount: &Path,
|
||||
subsystem_name: &str,
|
||||
) -> Result<()> {
|
||||
let process_cgroups: HashMap<String, String> = Process::myself()?
|
||||
.cgroups()
|
||||
.context("failed to get process cgroups")?
|
||||
.into_iter()
|
||||
.map(|c| (c.controllers.join(","), c.pathname))
|
||||
.collect();
|
||||
|
||||
log::debug!("{:?}", process_cgroups);
|
||||
if let Some(proc_path) = process_cgroups.get(subsystem_name) {
|
||||
let emulated = SpecMountBuilder::default()
|
||||
.source(
|
||||
host_mount
|
||||
.join_safely(proc_path.as_str())
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"failed to join mount source for {} subsystem",
|
||||
subsystem_name
|
||||
)
|
||||
})?,
|
||||
)
|
||||
.destination(
|
||||
mount
|
||||
.destination()
|
||||
.join_safely(subsystem_name)
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"failed to join mount destination for {} subsystem",
|
||||
subsystem_name
|
||||
)
|
||||
})?,
|
||||
)
|
||||
.typ("bind")
|
||||
.options(
|
||||
["rw", "rbind"]
|
||||
.iter()
|
||||
.map(|o| o.to_string())
|
||||
.collect::<Vec<String>>(),
|
||||
)
|
||||
.build()?;
|
||||
log::debug!("Mounting emulated cgroup subsystem: {:?}", emulated);
|
||||
|
||||
self.setup_mount(&emulated, options)
|
||||
.with_context(|| format!("failed to mount {} cgroup hierarchy", subsystem_name))?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn mount_cgroup_v2(&self, _: &SpecMount, _: &MountOptions, _: MsFlags, _: &str) -> Result<()> {
|
||||
|
13
src/utils.rs
13
src/utils.rs
@ -18,7 +18,7 @@ use std::path::{Path, PathBuf};
|
||||
pub trait PathBufExt {
|
||||
fn as_in_container(&self) -> Result<PathBuf>;
|
||||
fn join_absolute_path(&self, p: &Path) -> Result<PathBuf>;
|
||||
fn join_safely(&self, p: &Path) -> Result<PathBuf>;
|
||||
fn join_safely<P: AsRef<Path>>(&self, p: P) -> Result<PathBuf>;
|
||||
}
|
||||
|
||||
impl PathBufExt for Path {
|
||||
@ -41,14 +41,15 @@ impl PathBufExt for Path {
|
||||
Ok(PathBuf::from(format!("{}{}", self.display(), p.display())))
|
||||
}
|
||||
|
||||
fn join_safely(&self, p: &Path) -> Result<PathBuf> {
|
||||
if p.is_relative() {
|
||||
return Ok(self.join(p));
|
||||
fn join_safely<P: AsRef<Path>>(&self, path: P) -> Result<PathBuf> {
|
||||
let path = path.as_ref();
|
||||
if path.is_relative() {
|
||||
return Ok(self.join(path));
|
||||
}
|
||||
|
||||
let stripped = p
|
||||
let stripped = path
|
||||
.strip_prefix("/")
|
||||
.with_context(|| format!("failed to strip prefix from {}", p.display()))?;
|
||||
.with_context(|| format!("failed to strip prefix from {}", path.display()))?;
|
||||
Ok(self.join(stripped))
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user