1
0
mirror of https://github.com/containers/youki synced 2024-09-24 04:50:54 +02:00
youki/crates/libcontainer/src/container/builder_impl.rs

226 lines
8.8 KiB
Rust
Raw Normal View History

2021-10-08 21:09:48 +02:00
use super::{Container, ContainerStatus};
use crate::{
error::{LibcontainerError, MissingSpecError},
2021-08-14 02:58:52 +02:00
hooks,
notify_socket::NotifyListener,
process::{
self,
args::{ContainerArgs, ContainerType},
intel_rdt::delete_resctrl_subdirectory,
},
2021-09-20 09:33:20 +02:00
rootless::Rootless,
syscall::Syscall,
utils,
workload::ExecutorManager,
};
Adopt `thiserror` for libcgroups (#1872) * convert systemd controllers to thiserror Signed-off-by: squili <74628488+squili@users.noreply.github.com> * finish systemd manager except for the CgroupManager impl Signed-off-by: squili <74628488+squili@users.noreply.github.com> * refactor controller traits to be internal Signed-off-by: squili <74628488+squili@users.noreply.github.com> * convert v1 controllers to thiserror Signed-off-by: squili <74628488+squili@users.noreply.github.com> * convert v2 controllers to thiserror Signed-off-by: squili <74628488+squili@users.noreply.github.com> * make StatsProvider an internal trait Signed-off-by: squili <74628488+squili@users.noreply.github.com> * replace v1 stats provider anyhow with thiserror Signed-off-by: squili <74628488+squili@users.noreply.github.com> * remove anyhow from v2 stats providers Signed-off-by: squili <74628488+squili@users.noreply.github.com> * fix lints Signed-off-by: squili <74628488+squili@users.noreply.github.com> * get rid of doc test error Signed-off-by: squili <74628488+squili@users.noreply.github.com> * convert managers from anyhow to thiserror Signed-off-by: squili <74628488+squili@users.noreply.github.com> * fix lints Signed-off-by: squili <74628488+squili@users.noreply.github.com> * get rid of the rest of the references to anyhow Signed-off-by: squili <74628488+squili@users.noreply.github.com> * AnyManager -> AnyCgroupManager Signed-off-by: squili <74628488+squili@users.noreply.github.com> * fix formatting Signed-off-by: squili <74628488+squili@users.noreply.github.com> * fix lint in libcgroups devices Signed-off-by: squili <74628488+squili@users.noreply.github.com> --------- Signed-off-by: squili <74628488+squili@users.noreply.github.com>
2023-05-03 07:59:55 +02:00
use libcgroups::common::CgroupManager;
use nix::unistd::Pid;
use oci_spec::runtime::Spec;
2021-08-31 07:45:45 +02:00
use std::{fs, io::Write, os::unix::prelude::RawFd, path::PathBuf};
2021-07-03 00:45:38 +02:00
pub(super) struct ContainerBuilderImpl<'a> {
2021-07-17 15:55:38 +02:00
/// Flag indicating if an init or a tenant container should be created
pub container_type: ContainerType,
2021-07-17 15:55:38 +02:00
/// Interface to operating system primitives
pub syscall: &'a dyn Syscall,
2021-07-17 15:55:38 +02:00
/// Flag indicating if systemd should be used for cgroup management
2021-07-03 00:45:38 +02:00
pub use_systemd: bool,
2021-07-17 15:55:38 +02:00
/// Id of the container
2021-07-03 00:45:38 +02:00
pub container_id: String,
/// OCI compliant runtime spec
pub spec: &'a Spec,
2021-07-17 15:55:38 +02:00
/// Root filesystem of the container
2021-07-03 00:45:38 +02:00
pub rootfs: PathBuf,
2021-07-17 15:55:38 +02:00
/// File which will be used to communicate the pid of the
/// container process to the higher level runtime
2021-07-03 00:45:38 +02:00
pub pid_file: Option<PathBuf>,
2021-07-17 15:55:38 +02:00
/// Socket to communicate the file descriptor of the ptty
pub console_socket: Option<RawFd>,
2021-07-17 15:55:38 +02:00
/// Options for rootless containers
pub rootless: Option<Rootless<'a>>,
2021-07-19 08:22:47 +02:00
/// Path to the Unix Domain Socket to communicate container start
pub notify_path: PathBuf,
2021-07-17 15:55:38 +02:00
/// Container state
2021-07-03 00:45:38 +02:00
pub container: Option<Container>,
2021-08-02 02:23:56 +02:00
/// File descriptos preserved/passed to the container init process.
pub preserve_fds: i32,
/// If the container is to be run in detached mode
pub detached: bool,
/// Default executes the specified execution of a generic command
pub executor_manager: ExecutorManager,
2021-07-03 00:45:38 +02:00
}
impl<'a> ContainerBuilderImpl<'a> {
pub(super) fn create(&mut self) -> Result<Pid, LibcontainerError> {
match self.run_container() {
Ok(pid) => Ok(pid),
Err(outer) => {
// Only the init container should be cleaned up in the case of
// an error.
if matches!(self.container_type, ContainerType::InitContainer) {
self.cleanup_container()?;
}
Err(outer)
}
}
2021-07-03 00:45:38 +02:00
}
fn run_container(&mut self) -> Result<Pid, LibcontainerError> {
let linux = self.spec.linux().as_ref().ok_or(MissingSpecError::Linux)?;
2022-01-06 22:51:18 +01:00
let cgroups_path = utils::get_cgroup_path(
linux.cgroups_path(),
&self.container_id,
self.rootless.is_some(),
);
2021-10-28 21:53:58 +02:00
let cmanager = libcgroups::common::create_cgroup_manager(
Adopt `thiserror` for libcgroups (#1872) * convert systemd controllers to thiserror Signed-off-by: squili <74628488+squili@users.noreply.github.com> * finish systemd manager except for the CgroupManager impl Signed-off-by: squili <74628488+squili@users.noreply.github.com> * refactor controller traits to be internal Signed-off-by: squili <74628488+squili@users.noreply.github.com> * convert v1 controllers to thiserror Signed-off-by: squili <74628488+squili@users.noreply.github.com> * convert v2 controllers to thiserror Signed-off-by: squili <74628488+squili@users.noreply.github.com> * make StatsProvider an internal trait Signed-off-by: squili <74628488+squili@users.noreply.github.com> * replace v1 stats provider anyhow with thiserror Signed-off-by: squili <74628488+squili@users.noreply.github.com> * remove anyhow from v2 stats providers Signed-off-by: squili <74628488+squili@users.noreply.github.com> * fix lints Signed-off-by: squili <74628488+squili@users.noreply.github.com> * get rid of doc test error Signed-off-by: squili <74628488+squili@users.noreply.github.com> * convert managers from anyhow to thiserror Signed-off-by: squili <74628488+squili@users.noreply.github.com> * fix lints Signed-off-by: squili <74628488+squili@users.noreply.github.com> * get rid of the rest of the references to anyhow Signed-off-by: squili <74628488+squili@users.noreply.github.com> * AnyManager -> AnyCgroupManager Signed-off-by: squili <74628488+squili@users.noreply.github.com> * fix formatting Signed-off-by: squili <74628488+squili@users.noreply.github.com> * fix lint in libcgroups devices Signed-off-by: squili <74628488+squili@users.noreply.github.com> --------- Signed-off-by: squili <74628488+squili@users.noreply.github.com>
2023-05-03 07:59:55 +02:00
cgroups_path,
self.use_systemd || self.rootless.is_some(),
2021-10-28 21:53:58 +02:00
&self.container_id,
)?;
let process = self
.spec
.process()
.as_ref()
.ok_or(MissingSpecError::Process)?;
if matches!(self.container_type, ContainerType::InitContainer) {
2021-09-28 00:46:57 +02:00
if let Some(hooks) = self.spec.hooks() {
hooks::run_hooks(hooks.create_runtime().as_ref(), self.container.as_ref())?
2021-08-05 10:21:15 +02:00
}
}
// Need to create the notify socket before we pivot root, since the unix
// domain socket used here is outside of the rootfs of container. During
2021-09-05 17:41:02 +02:00
// exec, need to create the socket before we enter into existing mount
// namespace.
let notify_socket: NotifyListener = NotifyListener::new(&self.notify_path)?;
2021-08-31 07:45:45 +02:00
// If Out-of-memory score adjustment is set in specification. set the score
// value for the current process check
// https://dev.to/rrampage/surviving-the-linux-oom-killer-2ki9 for some more
// information.
//
// This has to be done before !dumpable because /proc/self/oom_score_adj
// is not writeable unless you're an privileged user (if !dumpable is
// set). All children inherit their parent's oom_score_adj value on
// fork(2) so this will always be propagated properly.
if let Some(oom_score_adj) = process.oom_score_adj() {
tracing::debug!("Set OOM score to {}", oom_score_adj);
let mut f = fs::File::create("/proc/self/oom_score_adj").map_err(|err| {
tracing::error!("failed to open /proc/self/oom_score_adj: {}", err);
LibcontainerError::OtherIO(err)
})?;
f.write_all(oom_score_adj.to_string().as_bytes())
.map_err(|err| {
tracing::error!("failed to write to /proc/self/oom_score_adj: {}", err);
LibcontainerError::OtherIO(err)
})?;
2021-08-31 07:45:45 +02:00
}
// Make the process non-dumpable, to avoid various race conditions that
// could cause processes in namespaces we're joining to access host
// resources (or potentially execute code).
//
// However, if the number of namespaces we are joining is 0, we are not
// going to be switching to a different security context. Thus setting
// ourselves to be non-dumpable only breaks things (like rootless
// containers), which is the recommendation from the kernel folks.
if linux.namespaces().is_some() {
2021-08-31 07:45:45 +02:00
prctl::set_dumpable(false).unwrap();
}
// This container_args will be passed to the container processes,
// therefore we will have to move all the variable by value. Since self
// is a shared reference, we have to clone these variables here.
2021-10-08 21:09:48 +02:00
let container_args = ContainerArgs {
container_type: self.container_type,
syscall: self.syscall,
spec: self.spec,
rootfs: &self.rootfs,
console_socket: self.console_socket,
notify_socket,
2021-08-02 02:23:56 +02:00
preserve_fds: self.preserve_fds,
container: &self.container,
rootless: &self.rootless,
cgroup_manager: cmanager,
detached: self.detached,
executor_manager: &self.executor_manager,
};
let (init_pid, need_to_clean_up_intel_rdt_dir) =
process::container_main_process::container_main_process(&container_args).map_err(
|err| {
tracing::error!(?err, "failed to run container process");
LibcontainerError::MainProcess(err)
},
)?;
// if file to write the pid to is specified, write pid of the child
if let Some(pid_file) = &self.pid_file {
fs::write(pid_file, format!("{init_pid}")).map_err(|err| {
tracing::error!("failed to write pid to file: {}", err);
LibcontainerError::OtherIO(err)
})?;
}
2021-09-23 23:05:35 +02:00
if let Some(container) = &mut self.container {
// update status and pid of the container process
container
2021-09-23 23:05:35 +02:00
.set_status(ContainerStatus::Created)
.set_creator(nix::unistd::geteuid().as_raw())
.set_pid(init_pid.as_raw())
.set_clean_up_intel_rdt_directory(need_to_clean_up_intel_rdt_dir)
.save()?;
}
Ok(init_pid)
}
fn cleanup_container(&self) -> Result<(), LibcontainerError> {
let linux = self.spec.linux().as_ref().ok_or(MissingSpecError::Linux)?;
2022-01-06 22:51:18 +01:00
let cgroups_path = utils::get_cgroup_path(
linux.cgroups_path(),
&self.container_id,
self.rootless.is_some(),
);
2021-10-28 21:53:58 +02:00
let cmanager = libcgroups::common::create_cgroup_manager(
Adopt `thiserror` for libcgroups (#1872) * convert systemd controllers to thiserror Signed-off-by: squili <74628488+squili@users.noreply.github.com> * finish systemd manager except for the CgroupManager impl Signed-off-by: squili <74628488+squili@users.noreply.github.com> * refactor controller traits to be internal Signed-off-by: squili <74628488+squili@users.noreply.github.com> * convert v1 controllers to thiserror Signed-off-by: squili <74628488+squili@users.noreply.github.com> * convert v2 controllers to thiserror Signed-off-by: squili <74628488+squili@users.noreply.github.com> * make StatsProvider an internal trait Signed-off-by: squili <74628488+squili@users.noreply.github.com> * replace v1 stats provider anyhow with thiserror Signed-off-by: squili <74628488+squili@users.noreply.github.com> * remove anyhow from v2 stats providers Signed-off-by: squili <74628488+squili@users.noreply.github.com> * fix lints Signed-off-by: squili <74628488+squili@users.noreply.github.com> * get rid of doc test error Signed-off-by: squili <74628488+squili@users.noreply.github.com> * convert managers from anyhow to thiserror Signed-off-by: squili <74628488+squili@users.noreply.github.com> * fix lints Signed-off-by: squili <74628488+squili@users.noreply.github.com> * get rid of the rest of the references to anyhow Signed-off-by: squili <74628488+squili@users.noreply.github.com> * AnyManager -> AnyCgroupManager Signed-off-by: squili <74628488+squili@users.noreply.github.com> * fix formatting Signed-off-by: squili <74628488+squili@users.noreply.github.com> * fix lint in libcgroups devices Signed-off-by: squili <74628488+squili@users.noreply.github.com> --------- Signed-off-by: squili <74628488+squili@users.noreply.github.com>
2023-05-03 07:59:55 +02:00
cgroups_path,
self.use_systemd || self.rootless.is_some(),
2021-10-28 21:53:58 +02:00
&self.container_id,
)?;
let mut errors = Vec::new();
if let Err(e) = cmanager.remove() {
tracing::error!(error = ?e, "failed to remove cgroup manager");
errors.push(e.to_string());
}
if let Some(container) = &self.container {
if let Some(true) = container.clean_up_intel_rdt_subdirectory() {
if let Err(e) = delete_resctrl_subdirectory(container.id()) {
tracing::error!(id = ?container.id(), error = ?e, "failed to delete resctrl subdirectory");
errors.push(e.to_string());
}
}
if container.root.exists() {
if let Err(e) = fs::remove_dir_all(&container.root) {
tracing::error!(container_root = ?container.root, error = ?e, "failed to delete container root");
errors.push(e.to_string());
}
}
}
if !errors.is_empty() {
return Err(LibcontainerError::Other(format!(
"failed to cleanup container: {}",
errors.join(";")
)));
}
Ok(())
}
}