1
0
mirror of https://github.com/containers/youki synced 2024-11-23 09:21:57 +01:00

Merge pull request #513 from Furisto/systemd-part4

Improvements to cgroup support
This commit is contained in:
Travis Sturzl 2021-12-04 20:15:50 -07:00 committed by GitHub
commit eb083a162a
Signed by: GitHub
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 144 additions and 107 deletions

@ -5,6 +5,10 @@ edition = "2021"
autoexamples = false
[features]
default = ["v1", "v2", "systemd"]
v1 = []
v2 = []
systemd = ["v2", "dbus"]
cgroupsv2_devices = ["rbpf", "libbpf-sys", "errno", "libc"]
[dependencies]
@ -13,7 +17,7 @@ procfs = "0.11.1"
log = "0.4"
anyhow = "1.0"
oci-spec = { git = "https://github.com/containers/oci-spec-rs", rev = "54c5e386f01ab37c9305cc4a83404eb157e42440" }
dbus = "0.9.5"
dbus = { version = "0.9.5", optional = true }
fixedbitset = "0.4.0"
serde = { version = "1.0", features = ["derive"] }
rbpf = {version = "0.1.0", optional = true }

@ -16,8 +16,11 @@ use oci_spec::runtime::{
LinuxResources,
};
#[cfg(feature = "systemd")]
use super::systemd;
#[cfg(feature = "v1")]
use super::v1;
#[cfg(feature = "v2")]
use super::v2;
use super::stats::Stats;
@ -173,40 +176,78 @@ pub fn create_cgroup_manager<P: Into<PathBuf>>(
container_name: &str,
) -> Result<Box<dyn CgroupManager>> {
let cgroup_setup = get_cgroup_setup()?;
let cgroup_path = cgroup_path.into();
match cgroup_setup {
CgroupSetup::Legacy | CgroupSetup::Hybrid => {
log::info!("cgroup manager V1 will be used");
Ok(Box::new(v1::manager::Manager::new(cgroup_path.into())?))
}
CgroupSetup::Legacy | CgroupSetup::Hybrid => create_v1_cgroup_manager(cgroup_path),
CgroupSetup::Unified => {
if systemd_cgroup {
if !systemd::booted() {
bail!("systemd cgroup flag passed, but systemd support for managing cgroups is not available");
}
let use_system = nix::unistd::geteuid().is_root();
log::info!(
"systemd cgroup manager with system bus {} will be used",
use_system
);
return Ok(Box::new(systemd::manager::Manager::new(
DEFAULT_CGROUP_ROOT.into(),
cgroup_path.into(),
container_name.into(),
use_system,
)?));
return create_systemd_cgroup_manager(cgroup_path, container_name);
}
log::info!("cgroup manager V2 will be used");
Ok(Box::new(v2::manager::Manager::new(
DEFAULT_CGROUP_ROOT.into(),
cgroup_path.into(),
)?))
create_v2_cgroup_manager(cgroup_path)
}
}
}
#[cfg(feature = "v1")]
fn create_v1_cgroup_manager(cgroup_path: PathBuf) -> Result<Box<dyn CgroupManager>> {
log::info!("cgroup manager V1 will be used");
Ok(Box::new(v1::manager::Manager::new(cgroup_path)?))
}
#[cfg(not(feature = "v1"))]
fn create_v1_cgroup_manager(_cgroup_path: PathBuf) -> Result<Box<dyn CgroupManager>> {
bail!("cgroup v1 feature is required, but was not enabled during compile time");
}
#[cfg(feature = "v2")]
fn create_v2_cgroup_manager(cgroup_path: PathBuf) -> Result<Box<dyn CgroupManager>> {
log::info!("cgroup manager V2 will be used");
Ok(Box::new(v2::manager::Manager::new(
DEFAULT_CGROUP_ROOT.into(),
cgroup_path,
)?))
}
#[cfg(not(feature = "v2"))]
fn create_v2_cgroup_manager(_cgroup_path: PathBuf) -> Result<Box<dyn CgroupManager>> {
bail!("cgroup v2 feature is required, but was not enabled during compile time");
}
#[cfg(feature = "systemd")]
fn create_systemd_cgroup_manager(
cgroup_path: PathBuf,
container_name: &str,
) -> Result<Box<dyn CgroupManager>> {
if !systemd::booted() {
bail!(
"systemd cgroup flag passed, but systemd support for managing cgroups is not available"
);
}
let use_system = nix::unistd::geteuid().is_root();
log::info!(
"systemd cgroup manager with system bus {} will be used",
use_system
);
Ok(Box::new(systemd::manager::Manager::new(
DEFAULT_CGROUP_ROOT.into(),
cgroup_path,
container_name.into(),
use_system,
)?))
}
#[cfg(not(feature = "systemd"))]
fn create_systemd_cgroup_manager(
_cgroup_path: PathBuf,
_container_name: &str,
) -> Result<Box<dyn CgroupManager>> {
bail!("systemd cgroup feature is required, but was not enabled during compile time");
}
pub fn get_all_pids(path: &Path) -> Result<Vec<Pid>> {
log::debug!("scan pids in folder: {:?}", path);
let mut result = vec![];

@ -8,7 +8,10 @@ mod test;
pub mod common;
pub mod stats;
#[cfg(feature = "systemd")]
pub mod systemd;
pub mod test_manager;
#[cfg(feature = "v1")]
pub mod v1;
#[cfg(feature = "v2")]
pub mod v2;

@ -5,7 +5,7 @@ pub enum ControllerType {
CpuSet,
Io,
Memory,
Tasks,
Pids,
}
impl Display for ControllerType {
@ -15,7 +15,7 @@ impl Display for ControllerType {
ControllerType::CpuSet => "cpuset",
ControllerType::Io => "io",
ControllerType::Memory => "memory",
ControllerType::Tasks => "tasks",
ControllerType::Pids => "pids",
};
write!(f, "{}", print)
@ -29,7 +29,7 @@ impl AsRef<str> for ControllerType {
ControllerType::CpuSet => "cpuset",
ControllerType::Io => "io",
ControllerType::Memory => "memory",
ControllerType::Tasks => "tasks",
ControllerType::Pids => "pids",
}
}
}
@ -39,5 +39,5 @@ pub const CONTROLLER_TYPES: &[ControllerType] = &[
ControllerType::CpuSet,
ControllerType::Io,
ControllerType::Memory,
ControllerType::Tasks,
ControllerType::Pids,
];

@ -1,10 +1,7 @@
#![allow(dead_code)]
#![allow(unused_variables)]
use std::{
collections::HashMap,
fmt::{Debug, Display},
fs::{self},
os::unix::fs::PermissionsExt,
path::Component::RootDir,
};
@ -22,13 +19,12 @@ use super::{
memory::Memory,
pids::Pids,
};
use crate::stats::Stats;
use crate::{
common::{self, CgroupManager, ControllerOpt, FreezerState, PathBufExt},
systemd::unified::Unified,
};
use crate::{stats::Stats, v2::manager::Manager as FsManager};
const CGROUP_PROCS: &str = "cgroup.procs";
const CGROUP_CONTROLLERS: &str = "cgroup.controllers";
const CGROUP_SUBTREE_CONTROL: &str = "cgroup.subtree_control";
@ -48,6 +44,10 @@ pub struct Manager {
unit_name: String,
/// Client for communicating with systemd
client: Client,
/// Cgroup manager for the created transient unit
fs_manager: FsManager,
/// Last control group which is managed by systemd, e.g. /user.slice/user-1000/user@1000.service
delegation_boundary: PathBuf,
}
/// Represents the systemd cgroups path:
@ -134,9 +134,11 @@ impl Manager {
false => Client::new_session().context("failed to create session dbus client")?,
};
let (cgroups_path, parent) = Self::construct_cgroups_path(&destructured_path, &client)
.context("failed to construct cgroups path")?;
let (cgroups_path, delegation_boundary) =
Self::construct_cgroups_path(&destructured_path, &client)
.context("failed to construct cgroups path")?;
let full_path = root_path.join_safely(&cgroups_path)?;
let fs_manager = FsManager::new(root_path.clone(), cgroups_path.clone())?;
Ok(Manager {
root_path,
@ -146,6 +148,8 @@ impl Manager {
unit_name: Self::get_unit_name(&destructured_path),
destructured_path,
client,
fs_manager,
delegation_boundary,
})
}
@ -160,7 +164,8 @@ impl Manager {
}
// get_cgroups_path generates a cgroups path from the one provided by the user via cgroupsPath.
// an example of the final path: "/system.slice/docker-foo.scope"
// an example of the final path: "/system.slice/youki-569d5ce3afe1074769f67.scope" or if we are
// not running as root /user.slice/user-1000/user@1000.service/youki-569d5ce3afe1074769f67.scope
fn construct_cgroups_path(
cgroups_path: &CgroupsPath,
client: &dyn SystemdClient,
@ -178,12 +183,13 @@ impl Manager {
let systemd_root = client.control_cgroup_root()?;
let unit_name = Self::get_unit_name(cgroups_path);
let cgroups_path = systemd_root
.join_safely(&parent)
.with_context(|| format!("failed to join {:?} with {:?}", systemd_root, parent))?
.join_safely(&unit_name)
.with_context(|| format!("failed to join {:?} with {:?}", parent, unit_name))?;
Ok((cgroups_path, parent))
Ok((cgroups_path, systemd_root))
}
// systemd represents slice hierarchy using `-`, so we need to follow suit when
@ -215,22 +221,23 @@ impl Manager {
Ok(Path::new(&path).to_path_buf())
}
/// create_unified_cgroup verifies sure that *each level* in the downward path from the root cgroup
/// down to the cgroup_path provided by the user is a valid cgroup hierarchy,
/// containing the attached controllers and that it contains the container pid.
fn create_unified_cgroup(&self, pid: Pid) -> Result<()> {
/// ensures that each level in the downward path from the delegation boundary down to
/// the scope or slice of the transient unit has all available controllers enabled
fn ensure_controllers_attached(&self) -> Result<()> {
let full_boundary_path = self.root_path.join_safely(&self.delegation_boundary)?;
let controllers: Vec<String> = self
.get_available_controllers(&self.root_path)?
.get_available_controllers(&full_boundary_path)?
.into_iter()
.map(|c| format!("{}{}", "+", c.to_string()))
.collect();
// Write the controllers to the root_path.
Self::write_controllers(&self.root_path, &controllers)?;
Self::write_controllers(&full_boundary_path, &controllers)?;
let mut current_path = self.root_path.clone();
let mut current_path = full_boundary_path;
let mut components = self
.cgroups_path
.strip_prefix(&self.delegation_boundary)?
.components()
.filter(|c| c.ne(&RootDir))
.peekable();
@ -240,8 +247,11 @@ impl Manager {
while let Some(component) = components.next() {
current_path = current_path.join(component);
if !current_path.exists() {
fs::create_dir(&current_path)?;
fs::metadata(&current_path)?.permissions().set_mode(0o755);
log::warn!(
"{:?} does not exist. Resource restrictions might not work correctly",
current_path
);
return Ok(());
}
// last component cannot have subtree_control enabled due to internal process constraint
@ -251,7 +261,7 @@ impl Manager {
}
}
common::write_cgroup_file(self.full_path.join(CGROUP_PROCS), pid)
Ok(())
}
fn get_available_controllers<P: AsRef<Path>>(
@ -270,9 +280,8 @@ impl Manager {
for controller in fs::read_to_string(&controllers_path)?.split_whitespace() {
match controller {
"cpu" => controllers.push(ControllerType::Cpu),
"io" => controllers.push(ControllerType::Io),
"memory" => controllers.push(ControllerType::Memory),
"pids" => controllers.push(ControllerType::Tasks),
"pids" => controllers.push(ControllerType::Pids),
_ => continue,
}
}
@ -331,7 +340,7 @@ impl CgroupManager for Manager {
CpuSet::apply(controller_opt, systemd_version, &mut properties)?
}
ControllerType::Tasks => {
ControllerType::Pids => {
Pids::apply(controller_opt, systemd_version, &mut properties)?
}
ControllerType::Memory => {
@ -344,9 +353,14 @@ impl CgroupManager for Manager {
Unified::apply(controller_opt, systemd_version, &mut properties)?;
log::debug!("{:?}", properties);
self.client
.set_unit_properties(&self.unit_name, &properties)
.context("could not apply resource restrictions")?;
if !properties.is_empty() {
self.ensure_controllers_attached()
.context("failed to attach controllers")?;
self.client
.set_unit_properties(&self.unit_name, &properties)
.context("could not apply resource restrictions")?;
}
Ok(())
}
@ -363,11 +377,11 @@ impl CgroupManager for Manager {
}
fn freeze(&self, state: FreezerState) -> Result<()> {
todo!();
self.fs_manager.freeze(state)
}
fn stats(&self) -> Result<Stats> {
Ok(Stats::default())
self.fs_manager.stats()
}
fn get_all_pids(&self) -> Result<Vec<Pid>> {
@ -390,22 +404,22 @@ mod tests {
fn start_transient_unit(
&self,
container_name: &str,
pid: u32,
parent: &str,
unit_name: &str,
_container_name: &str,
_pid: u32,
_parent: &str,
_unit_name: &str,
) -> Result<()> {
Ok(())
}
fn stop_transient_unit(&self, unit_name: &str) -> Result<()> {
fn stop_transient_unit(&self, _unit_name: &str) -> Result<()> {
Ok(())
}
fn set_unit_properties(
&self,
unit_name: &str,
properties: &HashMap<&str, Box<dyn RefArg>>,
_unit_name: &str,
_properties: &HashMap<&str, Box<dyn RefArg>>,
) -> Result<()> {
Ok(())
}

@ -42,8 +42,11 @@ impl<'a> YoukiConfig {
}
pub fn load<P: AsRef<Path>>(path: P) -> Result<Self> {
let file = fs::File::open(path.as_ref().join(YOUKI_CONFIG_NAME))?;
Ok(serde_json::from_reader(&file)?)
let path = path.as_ref();
let file = fs::File::open(path.join(YOUKI_CONFIG_NAME))?;
let config = serde_json::from_reader(&file)
.with_context(|| format!("failed to load config from {:?}", path))?;
Ok(config)
}
}

@ -8,9 +8,9 @@ use chrono::DateTime;
use nix::unistd::Pid;
use chrono::Utc;
use oci_spec::runtime::Spec;
use procfs::process::Process;
use crate::config::YoukiConfig;
use crate::syscall::syscall::create_syscall;
use crate::container::{ContainerStatus, State};
@ -192,8 +192,8 @@ impl Container {
self.state.save(&self.root)
}
pub fn spec(&self) -> Result<Spec> {
let spec = Spec::load(self.root.join("config.json"))?;
pub fn spec(&self) -> Result<YoukiConfig> {
let spec = YoukiConfig::load(&self.root)?;
Ok(spec)
}
}
@ -202,6 +202,7 @@ impl Container {
mod tests {
use super::*;
use crate::utils::create_temp_dir;
use anyhow::Context;
use serial_test::serial;
#[test]
@ -300,13 +301,14 @@ mod tests {
let tmp_dir = create_temp_dir("test_get_spec")?;
use oci_spec::runtime::Spec;
let spec = Spec::default();
spec.save(tmp_dir.path().join("config.json"))?;
let config = YoukiConfig::from_spec(&spec, "123").context("convert spec to config")?;
config.save(tmp_dir.path()).context("save config")?;
let container = Container {
root: tmp_dir.path().to_path_buf(),
..Default::default()
};
container.spec()?;
container.spec().context("get config")?;
Ok(())
}

@ -1,7 +1,5 @@
use std::{thread, time::Duration};
use crate::utils;
use super::{Container, ContainerStatus};
use anyhow::{bail, Context, Result};
@ -30,17 +28,10 @@ impl Container {
bail!("{} is not in running state", self.id());
}
let cgroups_path = utils::get_cgroup_path(
self.spec()?
.linux()
.as_ref()
.context("no linux in spec")?
.cgroups_path(),
self.id(),
);
let cgroups_path = self.spec()?.cgroup_path;
let use_systemd = self
.systemd()
.context("Could not determine cgroup manager")?;
.context("could not determine cgroup manager")?;
let cgroup_manager =
libcgroups::common::create_cgroup_manager(cgroups_path, use_systemd, self.id())?;

@ -1,5 +1,3 @@
use crate::utils;
use super::{Container, ContainerStatus};
use anyhow::{bail, Context, Result};
use libcgroups::common::FreezerState;
@ -34,15 +32,7 @@ impl Container {
);
}
let spec = self.spec()?;
let cgroups_path = utils::get_cgroup_path(
spec.linux()
.as_ref()
.context("no linux in spec")?
.cgroups_path(),
self.id(),
);
let cgroups_path = self.spec()?.cgroup_path;
let use_systemd = self
.systemd()
.context("container state does not contain cgroup manager")?;

@ -1,5 +1,3 @@
use crate::utils;
use super::{Container, ContainerStatus};
use anyhow::{bail, Context, Result};
@ -36,16 +34,7 @@ impl Container {
);
}
let spec = self.spec()?;
let cgroups_path = utils::get_cgroup_path(
spec.linux()
.as_ref()
.context("no linux in spec")?
.cgroups_path(),
self.id(),
);
// create cgroup manager structure from the config at the path
let cgroups_path = self.spec()?.cgroup_path;
let use_systemd = self
.systemd()
.context("container state does not contain cgroup manager")?;

@ -25,7 +25,7 @@ fn load_container<P: AsRef<Path>>(root_path: P, container_id: &str) -> Result<Co
// the state of the container is stored in a directory named after the container id
let container_root = root_path.join(container_id);
if !container_root.exists() {
bail!("{} does not exist.", container_id)
bail!("container {} does not exist.", container_id)
}
Container::load(container_root)