mirror of
https://github.com/containers/youki
synced 2024-11-23 09:21:57 +01:00
Merge pull request #513 from Furisto/systemd-part4
Improvements to cgroup support
This commit is contained in:
commit
eb083a162a
@ -5,6 +5,10 @@ edition = "2021"
|
||||
autoexamples = false
|
||||
|
||||
[features]
|
||||
default = ["v1", "v2", "systemd"]
|
||||
v1 = []
|
||||
v2 = []
|
||||
systemd = ["v2", "dbus"]
|
||||
cgroupsv2_devices = ["rbpf", "libbpf-sys", "errno", "libc"]
|
||||
|
||||
[dependencies]
|
||||
@ -13,7 +17,7 @@ procfs = "0.11.1"
|
||||
log = "0.4"
|
||||
anyhow = "1.0"
|
||||
oci-spec = { git = "https://github.com/containers/oci-spec-rs", rev = "54c5e386f01ab37c9305cc4a83404eb157e42440" }
|
||||
dbus = "0.9.5"
|
||||
dbus = { version = "0.9.5", optional = true }
|
||||
fixedbitset = "0.4.0"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
rbpf = {version = "0.1.0", optional = true }
|
||||
|
@ -16,8 +16,11 @@ use oci_spec::runtime::{
|
||||
LinuxResources,
|
||||
};
|
||||
|
||||
#[cfg(feature = "systemd")]
|
||||
use super::systemd;
|
||||
#[cfg(feature = "v1")]
|
||||
use super::v1;
|
||||
#[cfg(feature = "v2")]
|
||||
use super::v2;
|
||||
|
||||
use super::stats::Stats;
|
||||
@ -173,40 +176,78 @@ pub fn create_cgroup_manager<P: Into<PathBuf>>(
|
||||
container_name: &str,
|
||||
) -> Result<Box<dyn CgroupManager>> {
|
||||
let cgroup_setup = get_cgroup_setup()?;
|
||||
let cgroup_path = cgroup_path.into();
|
||||
|
||||
match cgroup_setup {
|
||||
CgroupSetup::Legacy | CgroupSetup::Hybrid => {
|
||||
log::info!("cgroup manager V1 will be used");
|
||||
Ok(Box::new(v1::manager::Manager::new(cgroup_path.into())?))
|
||||
}
|
||||
CgroupSetup::Legacy | CgroupSetup::Hybrid => create_v1_cgroup_manager(cgroup_path),
|
||||
CgroupSetup::Unified => {
|
||||
if systemd_cgroup {
|
||||
if !systemd::booted() {
|
||||
bail!("systemd cgroup flag passed, but systemd support for managing cgroups is not available");
|
||||
}
|
||||
|
||||
let use_system = nix::unistd::geteuid().is_root();
|
||||
|
||||
log::info!(
|
||||
"systemd cgroup manager with system bus {} will be used",
|
||||
use_system
|
||||
);
|
||||
return Ok(Box::new(systemd::manager::Manager::new(
|
||||
DEFAULT_CGROUP_ROOT.into(),
|
||||
cgroup_path.into(),
|
||||
container_name.into(),
|
||||
use_system,
|
||||
)?));
|
||||
return create_systemd_cgroup_manager(cgroup_path, container_name);
|
||||
}
|
||||
log::info!("cgroup manager V2 will be used");
|
||||
Ok(Box::new(v2::manager::Manager::new(
|
||||
DEFAULT_CGROUP_ROOT.into(),
|
||||
cgroup_path.into(),
|
||||
)?))
|
||||
|
||||
create_v2_cgroup_manager(cgroup_path)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "v1")]
|
||||
fn create_v1_cgroup_manager(cgroup_path: PathBuf) -> Result<Box<dyn CgroupManager>> {
|
||||
log::info!("cgroup manager V1 will be used");
|
||||
Ok(Box::new(v1::manager::Manager::new(cgroup_path)?))
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "v1"))]
|
||||
fn create_v1_cgroup_manager(_cgroup_path: PathBuf) -> Result<Box<dyn CgroupManager>> {
|
||||
bail!("cgroup v1 feature is required, but was not enabled during compile time");
|
||||
}
|
||||
|
||||
#[cfg(feature = "v2")]
|
||||
fn create_v2_cgroup_manager(cgroup_path: PathBuf) -> Result<Box<dyn CgroupManager>> {
|
||||
log::info!("cgroup manager V2 will be used");
|
||||
Ok(Box::new(v2::manager::Manager::new(
|
||||
DEFAULT_CGROUP_ROOT.into(),
|
||||
cgroup_path,
|
||||
)?))
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "v2"))]
|
||||
fn create_v2_cgroup_manager(_cgroup_path: PathBuf) -> Result<Box<dyn CgroupManager>> {
|
||||
bail!("cgroup v2 feature is required, but was not enabled during compile time");
|
||||
}
|
||||
|
||||
#[cfg(feature = "systemd")]
|
||||
fn create_systemd_cgroup_manager(
|
||||
cgroup_path: PathBuf,
|
||||
container_name: &str,
|
||||
) -> Result<Box<dyn CgroupManager>> {
|
||||
if !systemd::booted() {
|
||||
bail!(
|
||||
"systemd cgroup flag passed, but systemd support for managing cgroups is not available"
|
||||
);
|
||||
}
|
||||
|
||||
let use_system = nix::unistd::geteuid().is_root();
|
||||
|
||||
log::info!(
|
||||
"systemd cgroup manager with system bus {} will be used",
|
||||
use_system
|
||||
);
|
||||
Ok(Box::new(systemd::manager::Manager::new(
|
||||
DEFAULT_CGROUP_ROOT.into(),
|
||||
cgroup_path,
|
||||
container_name.into(),
|
||||
use_system,
|
||||
)?))
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "systemd"))]
|
||||
fn create_systemd_cgroup_manager(
|
||||
_cgroup_path: PathBuf,
|
||||
_container_name: &str,
|
||||
) -> Result<Box<dyn CgroupManager>> {
|
||||
bail!("systemd cgroup feature is required, but was not enabled during compile time");
|
||||
}
|
||||
|
||||
pub fn get_all_pids(path: &Path) -> Result<Vec<Pid>> {
|
||||
log::debug!("scan pids in folder: {:?}", path);
|
||||
let mut result = vec![];
|
||||
|
@ -8,7 +8,10 @@ mod test;
|
||||
|
||||
pub mod common;
|
||||
pub mod stats;
|
||||
#[cfg(feature = "systemd")]
|
||||
pub mod systemd;
|
||||
pub mod test_manager;
|
||||
#[cfg(feature = "v1")]
|
||||
pub mod v1;
|
||||
#[cfg(feature = "v2")]
|
||||
pub mod v2;
|
||||
|
@ -5,7 +5,7 @@ pub enum ControllerType {
|
||||
CpuSet,
|
||||
Io,
|
||||
Memory,
|
||||
Tasks,
|
||||
Pids,
|
||||
}
|
||||
|
||||
impl Display for ControllerType {
|
||||
@ -15,7 +15,7 @@ impl Display for ControllerType {
|
||||
ControllerType::CpuSet => "cpuset",
|
||||
ControllerType::Io => "io",
|
||||
ControllerType::Memory => "memory",
|
||||
ControllerType::Tasks => "tasks",
|
||||
ControllerType::Pids => "pids",
|
||||
};
|
||||
|
||||
write!(f, "{}", print)
|
||||
@ -29,7 +29,7 @@ impl AsRef<str> for ControllerType {
|
||||
ControllerType::CpuSet => "cpuset",
|
||||
ControllerType::Io => "io",
|
||||
ControllerType::Memory => "memory",
|
||||
ControllerType::Tasks => "tasks",
|
||||
ControllerType::Pids => "pids",
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -39,5 +39,5 @@ pub const CONTROLLER_TYPES: &[ControllerType] = &[
|
||||
ControllerType::CpuSet,
|
||||
ControllerType::Io,
|
||||
ControllerType::Memory,
|
||||
ControllerType::Tasks,
|
||||
ControllerType::Pids,
|
||||
];
|
||||
|
@ -1,10 +1,7 @@
|
||||
#![allow(dead_code)]
|
||||
#![allow(unused_variables)]
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
fmt::{Debug, Display},
|
||||
fs::{self},
|
||||
os::unix::fs::PermissionsExt,
|
||||
path::Component::RootDir,
|
||||
};
|
||||
|
||||
@ -22,13 +19,12 @@ use super::{
|
||||
memory::Memory,
|
||||
pids::Pids,
|
||||
};
|
||||
use crate::stats::Stats;
|
||||
use crate::{
|
||||
common::{self, CgroupManager, ControllerOpt, FreezerState, PathBufExt},
|
||||
systemd::unified::Unified,
|
||||
};
|
||||
use crate::{stats::Stats, v2::manager::Manager as FsManager};
|
||||
|
||||
const CGROUP_PROCS: &str = "cgroup.procs";
|
||||
const CGROUP_CONTROLLERS: &str = "cgroup.controllers";
|
||||
const CGROUP_SUBTREE_CONTROL: &str = "cgroup.subtree_control";
|
||||
|
||||
@ -48,6 +44,10 @@ pub struct Manager {
|
||||
unit_name: String,
|
||||
/// Client for communicating with systemd
|
||||
client: Client,
|
||||
/// Cgroup manager for the created transient unit
|
||||
fs_manager: FsManager,
|
||||
/// Last control group which is managed by systemd, e.g. /user.slice/user-1000/user@1000.service
|
||||
delegation_boundary: PathBuf,
|
||||
}
|
||||
|
||||
/// Represents the systemd cgroups path:
|
||||
@ -134,9 +134,11 @@ impl Manager {
|
||||
false => Client::new_session().context("failed to create session dbus client")?,
|
||||
};
|
||||
|
||||
let (cgroups_path, parent) = Self::construct_cgroups_path(&destructured_path, &client)
|
||||
.context("failed to construct cgroups path")?;
|
||||
let (cgroups_path, delegation_boundary) =
|
||||
Self::construct_cgroups_path(&destructured_path, &client)
|
||||
.context("failed to construct cgroups path")?;
|
||||
let full_path = root_path.join_safely(&cgroups_path)?;
|
||||
let fs_manager = FsManager::new(root_path.clone(), cgroups_path.clone())?;
|
||||
|
||||
Ok(Manager {
|
||||
root_path,
|
||||
@ -146,6 +148,8 @@ impl Manager {
|
||||
unit_name: Self::get_unit_name(&destructured_path),
|
||||
destructured_path,
|
||||
client,
|
||||
fs_manager,
|
||||
delegation_boundary,
|
||||
})
|
||||
}
|
||||
|
||||
@ -160,7 +164,8 @@ impl Manager {
|
||||
}
|
||||
|
||||
// get_cgroups_path generates a cgroups path from the one provided by the user via cgroupsPath.
|
||||
// an example of the final path: "/system.slice/docker-foo.scope"
|
||||
// an example of the final path: "/system.slice/youki-569d5ce3afe1074769f67.scope" or if we are
|
||||
// not running as root /user.slice/user-1000/user@1000.service/youki-569d5ce3afe1074769f67.scope
|
||||
fn construct_cgroups_path(
|
||||
cgroups_path: &CgroupsPath,
|
||||
client: &dyn SystemdClient,
|
||||
@ -178,12 +183,13 @@ impl Manager {
|
||||
|
||||
let systemd_root = client.control_cgroup_root()?;
|
||||
let unit_name = Self::get_unit_name(cgroups_path);
|
||||
|
||||
let cgroups_path = systemd_root
|
||||
.join_safely(&parent)
|
||||
.with_context(|| format!("failed to join {:?} with {:?}", systemd_root, parent))?
|
||||
.join_safely(&unit_name)
|
||||
.with_context(|| format!("failed to join {:?} with {:?}", parent, unit_name))?;
|
||||
Ok((cgroups_path, parent))
|
||||
Ok((cgroups_path, systemd_root))
|
||||
}
|
||||
|
||||
// systemd represents slice hierarchy using `-`, so we need to follow suit when
|
||||
@ -215,22 +221,23 @@ impl Manager {
|
||||
Ok(Path::new(&path).to_path_buf())
|
||||
}
|
||||
|
||||
/// create_unified_cgroup verifies sure that *each level* in the downward path from the root cgroup
|
||||
/// down to the cgroup_path provided by the user is a valid cgroup hierarchy,
|
||||
/// containing the attached controllers and that it contains the container pid.
|
||||
fn create_unified_cgroup(&self, pid: Pid) -> Result<()> {
|
||||
/// ensures that each level in the downward path from the delegation boundary down to
|
||||
/// the scope or slice of the transient unit has all available controllers enabled
|
||||
fn ensure_controllers_attached(&self) -> Result<()> {
|
||||
let full_boundary_path = self.root_path.join_safely(&self.delegation_boundary)?;
|
||||
|
||||
let controllers: Vec<String> = self
|
||||
.get_available_controllers(&self.root_path)?
|
||||
.get_available_controllers(&full_boundary_path)?
|
||||
.into_iter()
|
||||
.map(|c| format!("{}{}", "+", c.to_string()))
|
||||
.collect();
|
||||
|
||||
// Write the controllers to the root_path.
|
||||
Self::write_controllers(&self.root_path, &controllers)?;
|
||||
Self::write_controllers(&full_boundary_path, &controllers)?;
|
||||
|
||||
let mut current_path = self.root_path.clone();
|
||||
let mut current_path = full_boundary_path;
|
||||
let mut components = self
|
||||
.cgroups_path
|
||||
.strip_prefix(&self.delegation_boundary)?
|
||||
.components()
|
||||
.filter(|c| c.ne(&RootDir))
|
||||
.peekable();
|
||||
@ -240,8 +247,11 @@ impl Manager {
|
||||
while let Some(component) = components.next() {
|
||||
current_path = current_path.join(component);
|
||||
if !current_path.exists() {
|
||||
fs::create_dir(¤t_path)?;
|
||||
fs::metadata(¤t_path)?.permissions().set_mode(0o755);
|
||||
log::warn!(
|
||||
"{:?} does not exist. Resource restrictions might not work correctly",
|
||||
current_path
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// last component cannot have subtree_control enabled due to internal process constraint
|
||||
@ -251,7 +261,7 @@ impl Manager {
|
||||
}
|
||||
}
|
||||
|
||||
common::write_cgroup_file(self.full_path.join(CGROUP_PROCS), pid)
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn get_available_controllers<P: AsRef<Path>>(
|
||||
@ -270,9 +280,8 @@ impl Manager {
|
||||
for controller in fs::read_to_string(&controllers_path)?.split_whitespace() {
|
||||
match controller {
|
||||
"cpu" => controllers.push(ControllerType::Cpu),
|
||||
"io" => controllers.push(ControllerType::Io),
|
||||
"memory" => controllers.push(ControllerType::Memory),
|
||||
"pids" => controllers.push(ControllerType::Tasks),
|
||||
"pids" => controllers.push(ControllerType::Pids),
|
||||
_ => continue,
|
||||
}
|
||||
}
|
||||
@ -331,7 +340,7 @@ impl CgroupManager for Manager {
|
||||
CpuSet::apply(controller_opt, systemd_version, &mut properties)?
|
||||
}
|
||||
|
||||
ControllerType::Tasks => {
|
||||
ControllerType::Pids => {
|
||||
Pids::apply(controller_opt, systemd_version, &mut properties)?
|
||||
}
|
||||
ControllerType::Memory => {
|
||||
@ -344,9 +353,14 @@ impl CgroupManager for Manager {
|
||||
Unified::apply(controller_opt, systemd_version, &mut properties)?;
|
||||
log::debug!("{:?}", properties);
|
||||
|
||||
self.client
|
||||
.set_unit_properties(&self.unit_name, &properties)
|
||||
.context("could not apply resource restrictions")?;
|
||||
if !properties.is_empty() {
|
||||
self.ensure_controllers_attached()
|
||||
.context("failed to attach controllers")?;
|
||||
|
||||
self.client
|
||||
.set_unit_properties(&self.unit_name, &properties)
|
||||
.context("could not apply resource restrictions")?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@ -363,11 +377,11 @@ impl CgroupManager for Manager {
|
||||
}
|
||||
|
||||
fn freeze(&self, state: FreezerState) -> Result<()> {
|
||||
todo!();
|
||||
self.fs_manager.freeze(state)
|
||||
}
|
||||
|
||||
fn stats(&self) -> Result<Stats> {
|
||||
Ok(Stats::default())
|
||||
self.fs_manager.stats()
|
||||
}
|
||||
|
||||
fn get_all_pids(&self) -> Result<Vec<Pid>> {
|
||||
@ -390,22 +404,22 @@ mod tests {
|
||||
|
||||
fn start_transient_unit(
|
||||
&self,
|
||||
container_name: &str,
|
||||
pid: u32,
|
||||
parent: &str,
|
||||
unit_name: &str,
|
||||
_container_name: &str,
|
||||
_pid: u32,
|
||||
_parent: &str,
|
||||
_unit_name: &str,
|
||||
) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn stop_transient_unit(&self, unit_name: &str) -> Result<()> {
|
||||
fn stop_transient_unit(&self, _unit_name: &str) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn set_unit_properties(
|
||||
&self,
|
||||
unit_name: &str,
|
||||
properties: &HashMap<&str, Box<dyn RefArg>>,
|
||||
_unit_name: &str,
|
||||
_properties: &HashMap<&str, Box<dyn RefArg>>,
|
||||
) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
@ -42,8 +42,11 @@ impl<'a> YoukiConfig {
|
||||
}
|
||||
|
||||
pub fn load<P: AsRef<Path>>(path: P) -> Result<Self> {
|
||||
let file = fs::File::open(path.as_ref().join(YOUKI_CONFIG_NAME))?;
|
||||
Ok(serde_json::from_reader(&file)?)
|
||||
let path = path.as_ref();
|
||||
let file = fs::File::open(path.join(YOUKI_CONFIG_NAME))?;
|
||||
let config = serde_json::from_reader(&file)
|
||||
.with_context(|| format!("failed to load config from {:?}", path))?;
|
||||
Ok(config)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -8,9 +8,9 @@ use chrono::DateTime;
|
||||
use nix::unistd::Pid;
|
||||
|
||||
use chrono::Utc;
|
||||
use oci_spec::runtime::Spec;
|
||||
use procfs::process::Process;
|
||||
|
||||
use crate::config::YoukiConfig;
|
||||
use crate::syscall::syscall::create_syscall;
|
||||
|
||||
use crate::container::{ContainerStatus, State};
|
||||
@ -192,8 +192,8 @@ impl Container {
|
||||
self.state.save(&self.root)
|
||||
}
|
||||
|
||||
pub fn spec(&self) -> Result<Spec> {
|
||||
let spec = Spec::load(self.root.join("config.json"))?;
|
||||
pub fn spec(&self) -> Result<YoukiConfig> {
|
||||
let spec = YoukiConfig::load(&self.root)?;
|
||||
Ok(spec)
|
||||
}
|
||||
}
|
||||
@ -202,6 +202,7 @@ impl Container {
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::utils::create_temp_dir;
|
||||
use anyhow::Context;
|
||||
use serial_test::serial;
|
||||
|
||||
#[test]
|
||||
@ -300,13 +301,14 @@ mod tests {
|
||||
let tmp_dir = create_temp_dir("test_get_spec")?;
|
||||
use oci_spec::runtime::Spec;
|
||||
let spec = Spec::default();
|
||||
spec.save(tmp_dir.path().join("config.json"))?;
|
||||
let config = YoukiConfig::from_spec(&spec, "123").context("convert spec to config")?;
|
||||
config.save(tmp_dir.path()).context("save config")?;
|
||||
|
||||
let container = Container {
|
||||
root: tmp_dir.path().to_path_buf(),
|
||||
..Default::default()
|
||||
};
|
||||
container.spec()?;
|
||||
container.spec().context("get config")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
@ -1,7 +1,5 @@
|
||||
use std::{thread, time::Duration};
|
||||
|
||||
use crate::utils;
|
||||
|
||||
use super::{Container, ContainerStatus};
|
||||
use anyhow::{bail, Context, Result};
|
||||
|
||||
@ -30,17 +28,10 @@ impl Container {
|
||||
bail!("{} is not in running state", self.id());
|
||||
}
|
||||
|
||||
let cgroups_path = utils::get_cgroup_path(
|
||||
self.spec()?
|
||||
.linux()
|
||||
.as_ref()
|
||||
.context("no linux in spec")?
|
||||
.cgroups_path(),
|
||||
self.id(),
|
||||
);
|
||||
let cgroups_path = self.spec()?.cgroup_path;
|
||||
let use_systemd = self
|
||||
.systemd()
|
||||
.context("Could not determine cgroup manager")?;
|
||||
.context("could not determine cgroup manager")?;
|
||||
|
||||
let cgroup_manager =
|
||||
libcgroups::common::create_cgroup_manager(cgroups_path, use_systemd, self.id())?;
|
||||
|
@ -1,5 +1,3 @@
|
||||
use crate::utils;
|
||||
|
||||
use super::{Container, ContainerStatus};
|
||||
use anyhow::{bail, Context, Result};
|
||||
use libcgroups::common::FreezerState;
|
||||
@ -34,15 +32,7 @@ impl Container {
|
||||
);
|
||||
}
|
||||
|
||||
let spec = self.spec()?;
|
||||
let cgroups_path = utils::get_cgroup_path(
|
||||
spec.linux()
|
||||
.as_ref()
|
||||
.context("no linux in spec")?
|
||||
.cgroups_path(),
|
||||
self.id(),
|
||||
);
|
||||
|
||||
let cgroups_path = self.spec()?.cgroup_path;
|
||||
let use_systemd = self
|
||||
.systemd()
|
||||
.context("container state does not contain cgroup manager")?;
|
||||
|
@ -1,5 +1,3 @@
|
||||
use crate::utils;
|
||||
|
||||
use super::{Container, ContainerStatus};
|
||||
|
||||
use anyhow::{bail, Context, Result};
|
||||
@ -36,16 +34,7 @@ impl Container {
|
||||
);
|
||||
}
|
||||
|
||||
let spec = self.spec()?;
|
||||
let cgroups_path = utils::get_cgroup_path(
|
||||
spec.linux()
|
||||
.as_ref()
|
||||
.context("no linux in spec")?
|
||||
.cgroups_path(),
|
||||
self.id(),
|
||||
);
|
||||
|
||||
// create cgroup manager structure from the config at the path
|
||||
let cgroups_path = self.spec()?.cgroup_path;
|
||||
let use_systemd = self
|
||||
.systemd()
|
||||
.context("container state does not contain cgroup manager")?;
|
||||
|
@ -25,7 +25,7 @@ fn load_container<P: AsRef<Path>>(root_path: P, container_id: &str) -> Result<Co
|
||||
// the state of the container is stored in a directory named after the container id
|
||||
let container_root = root_path.join(container_id);
|
||||
if !container_root.exists() {
|
||||
bail!("{} does not exist.", container_id)
|
||||
bail!("container {} does not exist.", container_id)
|
||||
}
|
||||
|
||||
Container::load(container_root)
|
||||
|
Loading…
Reference in New Issue
Block a user