1
0
Fork 0
mirror of https://github.com/containers/youki synced 2024-06-02 04:46:13 +02:00
youki/src/container/tenant_builder.rs

376 lines
12 KiB
Rust
Raw Normal View History

2021-07-15 22:12:21 +02:00
use anyhow::{bail, Context, Result};
2021-07-15 01:09:04 +02:00
use caps::Capability;
use oci_spec::{LinuxCapabilities, LinuxNamespace, LinuxNamespaceType, Process, Spec};
2021-07-15 22:12:21 +02:00
use std::{
collections::HashMap,
2021-07-15 01:09:04 +02:00
convert::TryFrom,
ffi::{CString, OsString},
2021-07-15 22:12:21 +02:00
fs,
2021-07-15 01:09:04 +02:00
os::unix::prelude::OsStrExt,
path::{Path, PathBuf},
2021-07-15 01:09:04 +02:00
str::FromStr,
};
use crate::{
2021-07-19 19:25:01 +02:00
notify_socket::NotifySocket, rootless::detect_rootless, stdio::FileDescriptor, tty, utils,
};
2021-07-15 01:09:04 +02:00
use super::{builder::ContainerBuilder, builder_impl::ContainerBuilderImpl, Container};
2021-07-15 01:09:04 +02:00
const NAMESPACE_TYPES: &[&str] = &["ipc", "uts", "net", "pid", "mnt", "cgroup"];
2021-07-15 22:12:21 +02:00
const TENANT_NOTIFY: &str = "tenant-notify-";
const TENANT_TTY: &str = "tenant-tty-";
2021-07-05 19:40:35 +02:00
/// Builder that can be used to configure the properties of a process
/// that will join an existing container sandbox
pub struct TenantContainerBuilder {
base: ContainerBuilder,
env: HashMap<String, String>,
cwd: Option<PathBuf>,
2021-07-15 01:09:04 +02:00
args: Vec<String>,
no_new_privs: Option<bool>,
capabilities: Vec<String>,
process: Option<PathBuf>,
}
impl TenantContainerBuilder {
2021-07-05 19:40:35 +02:00
/// Generates the base configuration for a process that will join
/// an existing container sandbox from which configuration methods
/// can be chained
pub(super) fn new(builder: ContainerBuilder) -> Self {
Self {
base: builder,
env: HashMap::new(),
cwd: None,
2021-07-15 01:09:04 +02:00
args: Vec::new(),
no_new_privs: None,
capabilities: Vec::new(),
process: None,
}
}
2021-07-05 19:40:35 +02:00
/// Sets environment variables for the container
pub fn with_env(mut self, env: HashMap<String, String>) -> Self {
self.env = env;
self
}
2021-07-05 19:40:35 +02:00
/// Sets the working directory of the container
pub fn with_cwd<P: Into<PathBuf>>(mut self, path: P) -> Self {
self.cwd = Some(path.into());
self
}
2021-07-05 19:40:35 +02:00
/// Sets the command the container will be started with
2021-07-15 01:09:04 +02:00
pub fn with_container_args(mut self, args: Vec<String>) -> Self {
self.args = args;
self
}
pub fn with_no_new_privs(mut self, no_new_privs: bool) -> Self {
self.no_new_privs = Some(no_new_privs);
self
}
pub fn with_capabilities(mut self, capabilities: Vec<String>) -> Self {
self.capabilities = capabilities;
self
}
pub fn with_process<P: Into<PathBuf>>(mut self, path: P) -> Self {
self.process = Some(path.into());
self
}
2021-07-05 19:40:35 +02:00
/// Joins an existing container
pub fn build(self) -> Result<()> {
let container_dir = self.lookup_container_dir()?;
2021-07-15 01:09:04 +02:00
let container = self.load_container_state(container_dir.clone())?;
let mut spec = self.load_init_spec(&container_dir)?;
self.adapt_spec_for_tenant(&mut spec, &container)?;
log::debug!("{:#?}", spec);
2021-07-19 08:22:47 +02:00
let notify_path = Self::setup_notify_listener(&container_dir)?;
// convert path of root file system of the container to absolute path
let rootfs = fs::canonicalize(&spec.root.path)?;
// if socket file path is given in commandline options,
// get file descriptors of console socket
2021-07-15 22:12:21 +02:00
let csocketfd = self.setup_tty_socket(&container_dir)?;
2021-07-15 01:09:04 +02:00
let use_systemd = self.should_use_systemd(&container);
let rootless = detect_rootless(&spec)?;
let mut builder_impl = ContainerBuilderImpl {
init: false,
syscall: self.base.syscall,
container_id: self.base.container_id,
pid_file: self.base.pid_file,
console_socket: csocketfd,
2021-07-15 22:12:21 +02:00
use_systemd,
container_dir,
spec,
rootfs,
rootless,
2021-07-19 08:22:47 +02:00
notify_path: notify_path.clone(),
container: None,
};
builder_impl.create()?;
2021-07-15 01:09:04 +02:00
2021-07-15 22:12:21 +02:00
let mut notify_socket = NotifySocket::new(notify_path);
2021-07-15 01:09:04 +02:00
notify_socket.notify_container_start()?;
Ok(())
}
fn lookup_container_dir(&self) -> Result<PathBuf> {
let container_dir = self.base.root_path.join(&self.base.container_id);
if !container_dir.exists() {
bail!("container {} does not exist", self.base.container_id);
}
Ok(container_dir)
}
fn load_init_spec(&self, container_dir: &Path) -> Result<Spec> {
let spec_path = container_dir.join("config.json");
2021-07-16 00:41:09 +02:00
let spec = oci_spec::Spec::load(spec_path).context("failed to load spec")?;
Ok(spec)
}
2021-07-15 01:09:04 +02:00
fn load_container_state(&self, container_dir: PathBuf) -> Result<Container> {
let container = Container::load(container_dir)?.refresh_status()?;
if !container.can_exec() {
bail!(
"Cannot exec as container is in state {}",
container.status()
);
}
Ok(container)
}
fn adapt_spec_for_tenant(&self, spec: &mut Spec, container: &Container) -> Result<()> {
if let Some(ref process) = self.process {
self.set_process(spec, process)?;
} else {
self.set_working_dir(spec)?;
self.set_args(spec)?;
self.set_environment(spec)?;
2021-07-15 22:12:21 +02:00
self.set_no_new_privileges(spec);
2021-07-15 01:09:04 +02:00
self.set_capabilities(spec)?;
}
if container.pid().is_none() {
bail!("Could not retrieve container init pid");
}
let init_process = procfs::process::Process::new(container.pid().unwrap().as_raw())?;
self.set_namespaces(spec, init_process.namespaces()?)?;
Ok(())
}
fn set_process(&self, spec: &mut Spec, process: &Path) -> Result<()> {
if !process.exists() {
bail!(
"Process.json file does not exist at specified path {}",
process.display()
)
}
2021-07-15 22:12:21 +02:00
let process = utils::open(process)?;
2021-07-15 01:09:04 +02:00
let process_spec: Process = serde_json::from_reader(process)?;
spec.process = process_spec;
2021-07-15 22:12:21 +02:00
Ok(())
2021-07-15 01:09:04 +02:00
}
fn set_working_dir(&self, spec: &mut Spec) -> Result<()> {
if let Some(ref cwd) = self.cwd {
if cwd.is_relative() {
bail!(
"Current working directory must be an absolute path, but is {}",
cwd.display()
);
}
spec.process.cwd = cwd.to_string_lossy().to_string();
}
Ok(())
}
fn set_args(&self, spec: &mut Spec) -> Result<()> {
if self.args.is_empty() {
bail!("Container command was not specified")
}
spec.process.args = self.args.clone();
Ok(())
}
fn set_environment(&self, spec: &mut Spec) -> Result<()> {
spec.process.env.append(
&mut self
.env
.iter()
.map(|(k, v)| format!("{}={}", k, v))
.collect(),
);
Ok(())
}
2021-07-15 22:12:21 +02:00
fn set_no_new_privileges(&self, spec: &mut Spec) {
2021-07-15 01:09:04 +02:00
if let Some(no_new_privs) = self.no_new_privs {
spec.process.no_new_privileges = no_new_privs;
}
}
fn set_capabilities(&self, spec: &mut Spec) -> Result<()> {
if !self.capabilities.is_empty() {
let mut caps: Vec<Capability> = Vec::with_capacity(self.capabilities.len());
2021-07-15 01:09:04 +02:00
for cap in &self.capabilities {
caps.push(Capability::from_str(cap)?);
2021-07-15 01:09:04 +02:00
}
if let Some(ref mut spec_caps) = spec.process.capabilities {
spec_caps.ambient.append(&mut caps.clone());
spec_caps.bounding.append(&mut caps.clone());
spec_caps.effective.append(&mut caps.clone());
spec_caps.inheritable.append(&mut caps.clone());
2021-07-15 22:12:21 +02:00
spec_caps.permitted.append(&mut caps);
2021-07-15 01:09:04 +02:00
} else {
spec.process.capabilities = Some(LinuxCapabilities {
ambient: caps.clone(),
bounding: caps.clone(),
effective: caps.clone(),
inheritable: caps.clone(),
2021-07-15 22:12:21 +02:00
permitted: caps,
2021-07-15 01:09:04 +02:00
})
}
}
Ok(())
}
fn set_namespaces(&self, spec: &mut Spec, init_namespaces: Vec<Namespace>) -> Result<()> {
let mut tenant_namespaces = Vec::with_capacity(init_namespaces.len());
for ns_type in NAMESPACE_TYPES.iter().copied() {
if let Some(init_ns) = init_namespaces.iter().find(|n| n.ns_type.eq(ns_type)) {
let tenant_ns = LinuxNamespaceType::try_from(ns_type)?;
tenant_namespaces.push(LinuxNamespace {
typ: tenant_ns,
path: Some(init_ns.path.to_string_lossy().to_string()),
})
}
}
let mut linux = spec.linux.as_mut().unwrap();
linux.namespaces = tenant_namespaces;
Ok(())
}
fn should_use_systemd(&self, container: &Container) -> bool {
if let Some(use_systemd) = container.systemd() {
return use_systemd;
}
false
}
2021-07-15 22:12:21 +02:00
2021-07-19 08:22:47 +02:00
fn setup_notify_listener(container_dir: &Path) -> Result<PathBuf> {
2021-07-15 22:12:21 +02:00
let notify_name = Self::generate_name(&container_dir, TENANT_NOTIFY);
let socket_path = container_dir.join(&notify_name);
2021-07-19 08:22:47 +02:00
Ok(socket_path)
2021-07-15 22:12:21 +02:00
}
fn setup_tty_socket(&self, container_dir: &Path) -> Result<Option<FileDescriptor>> {
let tty_name = Self::generate_name(&container_dir, TENANT_TTY);
let csocketfd = if let Some(console_socket) = &self.base.console_socket {
Some(tty::setup_console_socket(
container_dir,
console_socket,
&tty_name,
)?)
} else {
None
};
Ok(csocketfd)
}
fn generate_name(dir: &Path, prefix: &str) -> String {
loop {
let rand = fastrand::i32(..);
let name = format!("{}{:x}.sock", prefix, rand);
if !dir.join(&name).exists() {
return name;
}
}
}
2021-07-15 01:09:04 +02:00
}
// Can be removed once https://github.com/eminence/procfs/pull/135 is available
trait GetNamespace {
fn namespaces(&self) -> Result<Vec<Namespace>>;
}
impl GetNamespace for procfs::process::Process {
/// Describes namespaces to which the process with the corresponding PID belongs.
/// Doc reference: https://man7.org/linux/man-pages/man7/namespaces.7.html
fn namespaces(&self) -> Result<Vec<Namespace>> {
let proc_path = PathBuf::from(format!("/proc/{}", self.pid()));
let ns = proc_path.join("ns");
let mut namespaces = Vec::new();
for entry in fs::read_dir(ns)? {
let entry = entry?;
let path = entry.path();
let ns_type = entry.file_name();
let cstr = CString::new(path.as_os_str().as_bytes()).unwrap();
let mut stat = unsafe { std::mem::zeroed() };
if unsafe { libc::stat(cstr.as_ptr(), &mut stat) } != 0 {
bail!("Unable to stat {:?}", path);
}
namespaces.push(Namespace {
ns_type,
path,
identifier: stat.st_ino,
device_id: stat.st_dev,
})
}
Ok(namespaces)
}
}
/// Information about a namespace
///
/// See also the [Process::namespaces()] method
#[derive(Debug, Clone)]
pub struct Namespace {
/// Namespace type
pub ns_type: OsString,
/// Handle to the namespace
pub path: PathBuf,
/// Namespace identifier (inode number)
pub identifier: u64,
/// Device id of the namespace
pub device_id: u64,
}
2021-07-15 01:09:04 +02:00
impl PartialEq for Namespace {
fn eq(&self, other: &Self) -> bool {
// see https://lore.kernel.org/lkml/87poky5ca9.fsf@xmission.com/
self.identifier == other.identifier && self.device_id == other.device_id
}
}
impl Eq for Namespace {}