diff --git a/.gitignore b/.gitignore index f3ea879a..b41834af 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ -youki + /tutorial .idea/ diff --git a/crates/youki/Cargo.toml b/crates/youki/Cargo.toml new file mode 100644 index 00000000..0f5e9ae0 --- /dev/null +++ b/crates/youki/Cargo.toml @@ -0,0 +1,32 @@ +[package] +name = "youki" +version = "0.0.1" +authors = ["youki team"] +edition = "2018" +description = "A container runtime written in Rust" + +[dependencies.clap] +version = "3.0.0-beta.2" +default-features = false +features = ["std", "suggestions", "derive", "cargo"] + + +[dependencies] +anyhow = "1.0" +chrono = { version="0.4", features = ["serde"] } +libcgroups = { path = "../libcgroups" } +libcontainer = { path = "../libcontainer" } +log = "0.4" +nix = "0.22.0" +oci-spec = { git = "https://github.com/containers/oci-spec-rs", rev = "d6fb1e91742313cd0d0085937e2d6df5d4669720" } +once_cell = "1.6.0" +pentacle = "1.0.0" +procfs = "0.11.0" +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +tabwriter = "1" + +[dev-dependencies] +serial_test = "0.5.1" + + diff --git a/crates/youki/src/commands/create.rs b/crates/youki/src/commands/create.rs new file mode 100644 index 00000000..f8dc76e2 --- /dev/null +++ b/crates/youki/src/commands/create.rs @@ -0,0 +1,48 @@ +//! Handles the creation of a new container +use anyhow::Result; +use clap::Clap; +use std::path::PathBuf; + +use libcontainer::{container::builder::ContainerBuilder, syscall::syscall::create_syscall}; + +/// Create a container +#[derive(Clap, Debug)] +pub struct Create { + /// File to write pid of the container created + // note that in the end, container is just another process + #[clap(short, long)] + pid_file: Option, + /// path to the bundle directory, containing config.json and root filesystem + #[clap(short, long, default_value = ".")] + bundle: PathBuf, + /// Unix socket (file) path , which will receive file descriptor of the writing end of the pseudoterminal + #[clap(short, long)] + console_socket: Option, + /// Pass N additional file descriptors to the container (stdio + $LISTEN_FDS + N in total) + #[clap(long, default_value = "0")] + preserve_fds: i32, + /// name of the container instance to be started + #[clap(required = true)] + pub container_id: String, +} + +// One thing to note is that in the end, container is just another process in Linux +// it has specific/different control group, namespace, using which program executing in it +// can be given impression that is is running on a complete system, but on the system which +// it is running, it is just another process, and has attributes such as pid, file descriptors, etc. +// associated with it like any other process. +impl Create { + pub fn exec(&self, root_path: PathBuf, systemd_cgroup: bool) -> Result<()> { + let syscall = create_syscall(); + ContainerBuilder::new(self.container_id.clone(), syscall.as_ref()) + .with_pid_file(self.pid_file.as_ref()) + .with_console_socket(self.console_socket.as_ref()) + .with_root_path(root_path) + .with_preserved_fds(self.preserve_fds) + .as_init(&self.bundle) + .with_systemd(systemd_cgroup) + .build()?; + + Ok(()) + } +} diff --git a/crates/youki/src/commands/delete.rs b/crates/youki/src/commands/delete.rs new file mode 100644 index 00000000..1ad46605 --- /dev/null +++ b/crates/youki/src/commands/delete.rs @@ -0,0 +1,24 @@ +use crate::commands::load_container; +use anyhow::{Context, Result}; +use clap::Clap; +use std::path::PathBuf; + +/// Release any resources held by the container +#[derive(Clap, Debug)] +pub struct Delete { + #[clap(required = true)] + container_id: String, + /// forces deletion of the container if it is still running (using SIGKILL) + #[clap(short, long)] + force: bool, +} + +impl Delete { + pub fn exec(&self, root_path: PathBuf) -> Result<()> { + log::debug!("start deleting {}", self.container_id); + let mut container = load_container(root_path, &self.container_id)?; + container + .delete(self.force) + .with_context(|| format!("failed to delete container {}", self.container_id)) + } +} diff --git a/crates/youki/src/commands/events.rs b/crates/youki/src/commands/events.rs new file mode 100644 index 00000000..8fde6788 --- /dev/null +++ b/crates/youki/src/commands/events.rs @@ -0,0 +1,29 @@ +use clap::Clap; +use std::path::PathBuf; + +use anyhow::{Context, Result}; + +use crate::commands::load_container; + +/// Show resource statistics for the container +#[derive(Clap, Debug)] +pub struct Events { + /// Sets the stats collection interval in seconds (default: 5s) + #[clap(long, default_value = "5")] + pub interval: u32, + /// Display the container stats only once + #[clap(long)] + pub stats: bool, + /// Name of the container instance + #[clap(required = true)] + pub container_id: String, +} + +impl Events { + pub fn exec(&self, root_path: PathBuf) -> Result<()> { + let mut container = load_container(root_path, &self.container_id)?; + container + .events(self.interval, self.stats) + .with_context(|| format!("failed to get events from container {}", self.container_id)) + } +} diff --git a/crates/youki/src/commands/exec.rs b/crates/youki/src/commands/exec.rs new file mode 100644 index 00000000..d8860ead --- /dev/null +++ b/crates/youki/src/commands/exec.rs @@ -0,0 +1,70 @@ +use anyhow::Result; +use clap::Clap; +use std::{error::Error, path::PathBuf}; + +use libcontainer::{container::builder::ContainerBuilder, syscall::syscall::create_syscall}; + +/// Execute a process within an existing container +#[derive(Clap, Debug)] +pub struct Exec { + /// Unix socket (file) path , which will receive file descriptor of the writing end of the pseudoterminal + #[clap(long)] + pub console_socket: Option, + #[clap(short, long)] + pub tty: bool, + #[clap(long)] + /// Current working directory of the container + pub cwd: Option, + #[clap(long)] + /// The file to which the pid of the container process should be written to + pub pid_file: Option, + /// Environment variables that should be set in the container + #[clap(short, long, parse(try_from_str = parse_key_val), number_of_values = 1)] + pub env: Vec<(String, String)>, + /// Prevent the process from gaining additional privileges + #[clap(long)] + pub no_new_privs: bool, + /// Path to process.json + #[clap(short, long)] + pub process: Option, + /// Detach from the container process + #[clap(short, long)] + pub detach: bool, + /// Identifier of the container + #[clap(required = true)] + pub container_id: String, + /// Command that should be executed in the container + #[clap(required = false)] + pub command: Vec, +} + +impl Exec { + pub fn exec(&self, root_path: PathBuf) -> Result<()> { + let syscall = create_syscall(); + ContainerBuilder::new(self.container_id.clone(), syscall.as_ref()) + .with_root_path(root_path) + .with_console_socket(self.console_socket.as_ref()) + .with_pid_file(self.pid_file.as_ref()) + .as_tenant() + .with_cwd(self.cwd.as_ref()) + .with_env(self.env.clone().into_iter().collect()) + .with_process(self.process.as_ref()) + .with_no_new_privs(self.no_new_privs) + .with_process(self.process.as_ref()) + .with_container_args(self.command.clone()) + .build() + } +} + +fn parse_key_val(s: &str) -> Result<(T, U), Box> +where + T: std::str::FromStr, + T::Err: Error + Send + Sync + 'static, + U: std::str::FromStr, + U::Err: Error + Send + Sync + 'static, +{ + let pos = s + .find('=') + .ok_or_else(|| format!("invalid KEY=value: no `=` found in `{}`", s))?; + Ok((s[..pos].parse()?, s[pos + 1..].parse()?)) +} diff --git a/crates/youki/src/commands/info.rs b/crates/youki/src/commands/info.rs new file mode 100644 index 00000000..542464e9 --- /dev/null +++ b/crates/youki/src/commands/info.rs @@ -0,0 +1,224 @@ +//! Contains functions related to printing information about system running Youki +use std::{collections::HashSet, fs, path::Path}; + +use anyhow::Result; +use clap::Clap; +use procfs::{CpuInfo, Meminfo}; + +use libcgroups::{common::CgroupSetup, v2::controller_type::ControllerType}; + +/// Show information about the system +#[derive(Clap, Debug)] +pub struct Info {} + +impl Info { + pub fn exec(&self) -> Result<()> { + print_youki(); + print_kernel(); + print_os(); + print_hardware(); + print_cgroups(); + print_namespaces(); + + Ok(()) + } +} + +/// print Version of Youki +pub fn print_youki() { + println!("{:<18}{}", "Version", env!("CARGO_PKG_VERSION")); +} + +/// Print Kernel Release, Version and Architecture +pub fn print_kernel() { + let uname = nix::sys::utsname::uname(); + println!("{:<18}{}", "Kernel-Release", uname.release()); + println!("{:<18}{}", "Kernel-Version", uname.version()); + println!("{:<18}{}", "Architecture", uname.machine()); +} + +/// Prints OS Distribution information +// see https://www.freedesktop.org/software/systemd/man/os-release.html +pub fn print_os() { + if let Some(os) = try_read_os_from("/etc/os-release") { + println!("{:<18}{}", "Operating System", os); + } else if let Some(os) = try_read_os_from("/usr/lib/os-release") { + println!("{:<18}{}", "Operating System", os); + } +} + +/// Helper function to read the OS Distribution info +fn try_read_os_from>(path: P) -> Option { + let os_release = path.as_ref(); + if !os_release.exists() { + return None; + } + + if let Ok(release_content) = fs::read_to_string(path) { + let pretty = find_parameter(&release_content, "PRETTY_NAME"); + + if let Some(pretty) = pretty { + return Some(pretty.trim_matches('"').to_owned()); + } + + let name = find_parameter(&release_content, "NAME"); + let version = find_parameter(&release_content, "VERSION"); + + if let Some((name, version)) = name.zip(version) { + return Some(format!( + "{} {}", + name.trim_matches('"'), + version.trim_matches('"') + )); + } + } + + None +} + +/// Helper function to find keyword values in OS info string +fn find_parameter<'a>(content: &'a str, param_name: &str) -> Option<&'a str> { + content + .lines() + .find(|l| l.starts_with(param_name)) + .and_then(|l| l.split_terminator('=').last()) +} + +/// Print Hardware information of system +pub fn print_hardware() { + if let Ok(cpu_info) = CpuInfo::new() { + println!("{:<18}{}", "Cores", cpu_info.num_cores()); + } + + if let Ok(mem_info) = Meminfo::new() { + println!( + "{:<18}{}", + "Total Memory", + mem_info.mem_total / u64::pow(1024, 2) + ); + } +} + +/// Print cgroups info of system +pub fn print_cgroups() { + let cgroup_setup = libcgroups::common::get_cgroup_setup(); + if let Ok(cgroup_setup) = &cgroup_setup { + println!("{:<18}{}", "Cgroup setup", cgroup_setup); + } + + println!("Cgroup mounts"); + if let Ok(v1_mounts) = libcgroups::v1::util::list_supported_mount_points() { + let mut v1_mounts: Vec = v1_mounts + .iter() + .map(|kv| format!(" {:<16}{}", kv.0.to_string(), kv.1.display())) + .collect(); + + v1_mounts.sort(); + for cgroup_mount in v1_mounts { + println!("{}", cgroup_mount); + } + } + + let unified = libcgroups::v2::util::get_unified_mount_point(); + if let Ok(mount_point) = &unified { + println!(" {:<16}{}", "unified", mount_point.display()); + } + + if let Ok(cgroup_setup) = cgroup_setup { + if let Ok(unified) = &unified { + if matches!(cgroup_setup, CgroupSetup::Hybrid | CgroupSetup::Unified) { + if let Ok(controllers) = libcgroups::v2::util::get_available_controllers(unified) { + println!("CGroup v2 controllers"); + let active_controllers: HashSet = + controllers.into_iter().collect(); + for controller in libcgroups::v2::controller_type::CONTROLLER_TYPES { + let status = if active_controllers.contains(controller) { + "attached" + } else { + "detached" + }; + + println!(" {:<16}{}", controller.to_string(), status); + } + } + + if let Some(config) = read_kernel_config() { + let display = FeatureDisplay::with_status("device", "attached", "detached"); + print_feature_status(&config, "CONFIG_CGROUP_BPF", display); + } + } + } + } +} + +fn read_kernel_config() -> Option { + let uname = nix::sys::utsname::uname(); + let kernel_config = Path::new("/boot").join(format!("config-{}", uname.release())); + if !kernel_config.exists() { + return None; + } + + fs::read_to_string(kernel_config).ok() +} + +pub fn print_namespaces() { + if let Some(content) = read_kernel_config() { + if let Some(ns_enabled) = find_parameter(&content, "CONFIG_NAMESPACES") { + if ns_enabled == "y" { + println!("{:<18}enabled", "Namespaces"); + } else { + println!("{:<18}disabled", "Namespaces"); + return; + } + } + + // mount namespace is always enabled if namespaces are enabled + println!(" {:<16}enabled", "mount"); + print_feature_status(&content, "CONFIG_UTS_NS", FeatureDisplay::new("uts")); + print_feature_status(&content, "CONFIG_IPC_NS", FeatureDisplay::new("ipc")); + print_feature_status(&content, "CONFIG_USER_NS", FeatureDisplay::new("user")); + print_feature_status(&content, "CONFIG_PID_NS", FeatureDisplay::new("pid")); + print_feature_status(&content, "CONFIG_NET_NS", FeatureDisplay::new("network")); + // While the CONFIG_CGROUP_NS kernel feature exists, it is obsolete and should not be used. CGroup namespaces + // are instead enabled with CONFIG_CGROUPS. + print_feature_status(&content, "CONFIG_CGROUPS", FeatureDisplay::new("cgroup")) + } +} + +fn print_feature_status(config: &str, feature: &str, display: FeatureDisplay) { + if let Some(status_flag) = find_parameter(config, feature) { + let status = if status_flag == "y" { + display.enabled + } else { + display.disabled + }; + + println!(" {:<16}{}", display.name, status); + } else { + println!(" {:<16}{}", display.name, display.disabled); + } +} + +struct FeatureDisplay<'a> { + name: &'a str, + enabled: &'a str, + disabled: &'a str, +} + +impl<'a> FeatureDisplay<'a> { + fn new(name: &'a str) -> Self { + Self { + name, + enabled: "enabled", + disabled: "disabled", + } + } + + fn with_status(name: &'a str, enabled: &'a str, disabled: &'a str) -> Self { + Self { + name, + enabled, + disabled, + } + } +} diff --git a/crates/youki/src/commands/kill.rs b/crates/youki/src/commands/kill.rs new file mode 100644 index 00000000..e12dcfae --- /dev/null +++ b/crates/youki/src/commands/kill.rs @@ -0,0 +1,24 @@ +//! Contains functionality of kill container command +use std::{convert::TryInto, path::PathBuf}; + +use anyhow::Result; +use clap::Clap; + +use crate::commands::load_container; +use libcontainer::signal::Signal; + +/// Send the specified signal to the container +#[derive(Clap, Debug)] +pub struct Kill { + #[clap(required = true)] + container_id: String, + signal: String, +} + +impl Kill { + pub fn exec(&self, root_path: PathBuf) -> Result<()> { + let mut container = load_container(root_path, &self.container_id)?; + let signal: Signal = self.signal.as_str().try_into()?; + container.kill(signal) + } +} diff --git a/crates/youki/src/commands/list.rs b/crates/youki/src/commands/list.rs new file mode 100644 index 00000000..202af94a --- /dev/null +++ b/crates/youki/src/commands/list.rs @@ -0,0 +1,66 @@ +//! Contains Functionality of list container command +use std::fs; +use std::io; +use std::io::Write; +use std::path::PathBuf; + +use anyhow::Result; +use chrono::{DateTime, Local}; +use clap::Clap; +use tabwriter::TabWriter; + +use libcontainer::container::{state::State, Container}; + +/// List created containers +#[derive(Clap, Debug)] +pub struct List {} + +impl List { + /// lists all existing containers + pub fn exec(&self, root_path: PathBuf) -> Result<()> { + let root_path = fs::canonicalize(root_path)?; + let mut content = String::new(); + // all containers' data is stored in their respective dir in root directory + // so we iterate through each and print the various info + for container_dir in fs::read_dir(root_path)? { + let container_dir = container_dir?.path(); + let state_file = State::file_path(&container_dir); + if !state_file.exists() { + continue; + } + + let container = Container::load(container_dir)?; + let pid = if let Some(pid) = container.pid() { + pid.to_string() + } else { + "".to_owned() + }; + + let user_name = container.creator().unwrap_or_default(); + + let created = if let Some(utc) = container.created() { + let local: DateTime = DateTime::from(utc); + local.to_rfc3339_opts(chrono::SecondsFormat::Secs, false) + } else { + "".to_owned() + }; + + content.push_str(&format!( + "{}\t{}\t{}\t{}\t{}\t{}\n", + container.id(), + pid, + container.status(), + container.bundle().display(), + created, + user_name.to_string_lossy() + )); + } + + let mut tab_writer = TabWriter::new(io::stdout()); + writeln!(&mut tab_writer, "ID\tPID\tSTATUS\tBUNDLE\tCREATED\tCREATOR")?; + write!(&mut tab_writer, "{}", content)?; + tab_writer.flush()?; + + Ok(()) + } +} diff --git a/crates/youki/src/commands/mod.rs b/crates/youki/src/commands/mod.rs new file mode 100644 index 00000000..96661b61 --- /dev/null +++ b/crates/youki/src/commands/mod.rs @@ -0,0 +1,33 @@ +use anyhow::{bail, Context, Result}; +use std::{fs, path::Path}; + +use libcontainer::container::Container; + +pub mod create; +pub mod delete; +pub mod events; +pub mod exec; +pub mod info; +pub mod kill; +pub mod list; +pub mod pause; +pub mod ps; +pub mod resume; +pub mod run; +pub mod spec_json; +pub mod start; +pub mod state; + +fn load_container>(root_path: P, container_id: &str) -> Result { + // resolves relative paths, symbolic links etc. and get complete path + let root_path = fs::canonicalize(&root_path) + .with_context(|| format!("failed to canonicalize {}", root_path.as_ref().display()))?; + // the state of the container is stored in a directory named after the container id + let container_root = root_path.join(container_id); + if !container_root.exists() { + bail!("{} does not exist.", container_id) + } + + Container::load(container_root) + .with_context(|| format!("could not load state for container {}", container_id)) +} diff --git a/crates/youki/src/commands/pause.rs b/crates/youki/src/commands/pause.rs new file mode 100644 index 00000000..48e39d93 --- /dev/null +++ b/crates/youki/src/commands/pause.rs @@ -0,0 +1,28 @@ +//! Contains functionality of pause container command +use crate::commands::load_container; +use std::path::PathBuf; + +use anyhow::{Context, Result}; +use clap::Clap; + +/// Suspend the processes within the container +#[derive(Clap, Debug)] +pub struct Pause { + #[clap(required = true)] + pub container_id: String, +} + +// Pausing a container indicates suspending all processes in given container +// This uses Freezer cgroup to suspend and resume processes +// For more information see : +// https://man7.org/linux/man-pages/man7/cgroups.7.html +// https://www.kernel.org/doc/Documentation/cgroup-v1/freezer-subsystem.txt +impl Pause { + pub fn exec(&self, root_path: PathBuf) -> Result<()> { + log::debug!("start pausing container {}", self.container_id); + let mut container = load_container(root_path, &self.container_id)?; + container + .pause() + .with_context(|| format!("failed to pause container {}", self.container_id)) + } +} diff --git a/crates/youki/src/commands/ps.rs b/crates/youki/src/commands/ps.rs new file mode 100644 index 00000000..0f26858a --- /dev/null +++ b/crates/youki/src/commands/ps.rs @@ -0,0 +1,91 @@ +use anyhow::{bail, Context, Result}; +use clap::{self, Clap}; +use libcgroups; +use libcontainer::{container::Container, utils}; +use std::{path::PathBuf, process::Command}; + +/// Display the processes inside the container +#[derive(Clap, Debug)] +pub struct Ps { + /// format to display processes: table or json (default: "table") + #[clap(short, long, default_value = "table")] + format: String, + #[clap(required = true)] + pub container_id: String, + /// options will be passed to the ps utility + #[clap(setting = clap::ArgSettings::Last)] + ps_options: Vec, +} +impl Ps { + pub fn exec(&self, root_path: PathBuf) -> Result<()> { + let container_root = root_path.join(&self.container_id); + if !container_root.exists() { + bail!("{} doesn't exist.", self.container_id) + } + let container = Container::load(container_root)?; + if container.root.exists() { + let config_absolute_path = container.root.join("config.json"); + log::debug!("load spec from {:?}", config_absolute_path); + let spec = oci_spec::runtime::Spec::load(config_absolute_path)?; + log::debug!("spec: {:?}", spec); + let cgroups_path = utils::get_cgroup_path( + spec.linux() + .as_ref() + .context("no linux in spec")? + .cgroups_path(), + container.id(), + ); + let systemd_cgroup = container + .systemd() + .context("could not determine cgroup manager")?; + let cmanager = libcgroups::common::create_cgroup_manager(cgroups_path, systemd_cgroup)?; + let pids: Vec = cmanager + .get_all_pids()? + .iter() + .map(|pid| pid.as_raw()) + .collect(); + + if self.format == "json" { + println!("{}", serde_json::to_string(&pids)?); + } else if self.format == "table" { + let default_ps_options = vec![String::from("-ef")]; + let ps_options = if self.ps_options.is_empty() { + &default_ps_options + } else { + &self.ps_options + }; + let output = Command::new("ps").args(ps_options).output()?; + if !output.status.success() { + println!("{}", std::str::from_utf8(&output.stderr)?); + } else { + let lines = std::str::from_utf8(&output.stdout)?; + let lines: Vec<&str> = lines.split('\n').collect(); + let pid_index = get_pid_index(lines[0])?; + println!("{}", &lines[0]); + for line in &lines[1..] { + if line.is_empty() { + continue; + } + let fields: Vec<&str> = line.split_whitespace().collect(); + let pid: i32 = fields[pid_index].parse()?; + if pids.contains(&pid) { + println!("{}", line); + } + } + } + } + } + Ok(()) + } +} + +fn get_pid_index(title: &str) -> Result { + let titles = title.split_whitespace(); + + for (index, name) in titles.enumerate() { + if name == "PID" { + return Ok(index); + } + } + bail!("could't find PID field in ps output"); +} diff --git a/crates/youki/src/commands/resume.rs b/crates/youki/src/commands/resume.rs new file mode 100644 index 00000000..b36573b4 --- /dev/null +++ b/crates/youki/src/commands/resume.rs @@ -0,0 +1,29 @@ +//! Contains functionality of resume container command +use std::path::PathBuf; + +use anyhow::{Context, Result}; +use clap::Clap; + +use crate::commands::load_container; + +/// Resume the processes within the container +#[derive(Clap, Debug)] +pub struct Resume { + #[clap(required = true)] + pub container_id: String, +} + +// Resuming a container indicates resuming all processes in given container from paused state +// This uses Freezer cgroup to suspend and resume processes +// For more information see : +// https://man7.org/linux/man-pages/man7/cgroups.7.html +// https://www.kernel.org/doc/Documentation/cgroup-v1/freezer-subsystem.txt +impl Resume { + pub fn exec(&self, root_path: PathBuf) -> Result<()> { + log::debug!("start resuming container {}", self.container_id); + let mut container = load_container(root_path, &self.container_id)?; + container + .resume() + .with_context(|| format!("failed to resume container {}", self.container_id)) + } +} diff --git a/crates/youki/src/commands/run.rs b/crates/youki/src/commands/run.rs new file mode 100644 index 00000000..5213d8c4 --- /dev/null +++ b/crates/youki/src/commands/run.rs @@ -0,0 +1,44 @@ +use std::path::PathBuf; + +use anyhow::{Context, Result}; +use clap::Clap; +use libcontainer::{container::builder::ContainerBuilder, syscall::syscall::create_syscall}; + +/// Create a container and immediately start it +#[derive(Clap, Debug)] +pub struct Run { + /// File to write pid of the container created + // note that in the end, container is just another process + #[clap(short, long)] + pid_file: Option, + /// path to the bundle directory, containing config.json and root filesystem + #[clap(short, long, default_value = ".")] + bundle: PathBuf, + /// Unix socket (file) path , which will receive file descriptor of the writing end of the pseudoterminal + #[clap(short, long)] + console_socket: Option, + /// Pass N additional file descriptors to the container (stdio + $LISTEN_FDS + N in total) + #[clap(long, default_value = "0")] + preserve_fds: i32, + /// name of the container instance to be started + #[clap(required = true)] + pub container_id: String, +} + +impl Run { + pub fn exec(&self, root_path: PathBuf, systemd_cgroup: bool) -> Result<()> { + let syscall = create_syscall(); + let mut container = ContainerBuilder::new(self.container_id.clone(), syscall.as_ref()) + .with_pid_file(self.pid_file.as_ref()) + .with_console_socket(self.console_socket.as_ref()) + .with_root_path(root_path) + .with_preserved_fds(self.preserve_fds) + .as_init(&self.bundle) + .with_systemd(systemd_cgroup) + .build()?; + + container + .start() + .with_context(|| format!("failed to start container {}", self.container_id)) + } +} diff --git a/crates/youki/src/commands/spec_json.rs b/crates/youki/src/commands/spec_json.rs new file mode 100644 index 00000000..c26d8187 --- /dev/null +++ b/crates/youki/src/commands/spec_json.rs @@ -0,0 +1,122 @@ +use anyhow::Result; +use clap::Clap; +use nix; +use oci_spec::runtime::Mount; +use oci_spec::runtime::{ + LinuxBuilder, LinuxIdMappingBuilder, LinuxNamespace, LinuxNamespaceBuilder, LinuxNamespaceType, + Spec, +}; +use serde_json::to_writer_pretty; +use std::fs::File; +use std::path::Path; +use std::path::PathBuf; +/// Command generates a config.json +#[derive(Clap, Debug)] +pub struct SpecJson { + /// Generate a configuration for a rootless container + #[clap(long)] + pub rootless: bool, +} + +pub fn get_default() -> Result { + Ok(Spec::default()) +} + +pub fn get_rootless() -> Result { + // Remove network and user namespace from the default spec + let mut namespaces: Vec = oci_spec::runtime::get_default_namespaces() + .into_iter() + .filter(|ns| { + ns.typ() != LinuxNamespaceType::Network && ns.typ() != LinuxNamespaceType::User + }) + .collect(); + + // Add user namespace + namespaces.push( + LinuxNamespaceBuilder::default() + .typ(LinuxNamespaceType::User) + .build()?, + ); + + let uid = nix::unistd::geteuid().as_raw(); + let gid = nix::unistd::getegid().as_raw(); + + let linux = LinuxBuilder::default() + .namespaces(namespaces) + .uid_mappings(vec![LinuxIdMappingBuilder::default() + .host_id(uid) + .container_id(0_u32) + .size(1_u32) + .build()?]) + .gid_mappings(vec![LinuxIdMappingBuilder::default() + .host_id(gid) + .container_id(0_u32) + .size(1_u32) + .build()?]) + .build()?; + + // Prepare the mounts + + let mut mounts: Vec = oci_spec::runtime::get_default_mounts(); + for mount in &mut mounts { + if mount.destination().eq(Path::new("/sys")) { + mount + .set_source(Some(PathBuf::from("/sys"))) + .set_typ(Some(String::from("none"))) + .set_options(Some(vec![ + "rbind".to_string(), + "nosuid".to_string(), + "noexec".to_string(), + "nodev".to_string(), + "ro".to_string(), + ])); + } else { + let options: Vec = mount + .options() + .as_ref() + .unwrap_or(&vec![]) + .iter() + .filter(|&o| !o.starts_with("gid=") && !o.starts_with("uid=")) + .map(|o| o.to_string()) + .collect(); + mount.set_options(Some(options)); + } + } + + let mut spec = get_default()?; + spec.set_linux(Some(linux)).set_mounts(Some(mounts)); + Ok(spec) +} + +/// spec Cli command +impl SpecJson { + pub fn exec(&self) -> Result<()> { + let spec = if self.rootless { + get_rootless()? + } else { + get_default()? + }; + + // write data to config.json + to_writer_pretty(&File::create("config.json")?, &spec)?; + Ok(()) + } +} + +#[cfg(test)] +// Tests become unstable if not serial. The cause is not known. +mod tests { + use super::*; + use crate::utils::create_temp_dir; + use serial_test::serial; + + #[test] + #[serial] + fn test_spec_json() -> Result<()> { + let spec = get_rootless()?; + let tmpdir = create_temp_dir("test_spec_json").expect("failed to create temp dir"); + let path = tmpdir.path().join("config.json"); + to_writer_pretty(&File::create(path)?, &spec)?; + Ok(()) + } +} diff --git a/crates/youki/src/commands/start.rs b/crates/youki/src/commands/start.rs new file mode 100644 index 00000000..74b631ec --- /dev/null +++ b/crates/youki/src/commands/start.rs @@ -0,0 +1,24 @@ +//! Starts execution of the container + +use std::path::PathBuf; + +use anyhow::{Context, Result}; +use clap::Clap; + +use crate::commands::load_container; + +/// Start a previously created container +#[derive(Clap, Debug)] +pub struct Start { + #[clap(required = true)] + pub container_id: String, +} + +impl Start { + pub fn exec(&self, root_path: PathBuf) -> Result<()> { + let mut container = load_container(root_path, &self.container_id)?; + container + .start() + .with_context(|| format!("failed to start container {}", self.container_id)) + } +} diff --git a/crates/youki/src/commands/state.rs b/crates/youki/src/commands/state.rs new file mode 100644 index 00000000..c0883627 --- /dev/null +++ b/crates/youki/src/commands/state.rs @@ -0,0 +1,24 @@ +use std::fs; +use std::path::PathBuf; + +use anyhow::Result; +use clap::Clap; + +use libcontainer::container::Container; + +/// Show the container state +#[derive(Clap, Debug)] +pub struct State { + #[clap(required = true)] + pub container_id: String, +} + +impl State { + pub fn exec(&self, root_path: PathBuf) -> Result<()> { + let root_path = fs::canonicalize(root_path)?; + let container_root = root_path.join(&self.container_id); + let container = Container::load(container_root)?; + println!("{}", serde_json::to_string_pretty(&container.state)?); + std::process::exit(0); + } +} diff --git a/crates/youki/src/logger.rs b/crates/youki/src/logger.rs new file mode 100644 index 00000000..239b8c41 --- /dev/null +++ b/crates/youki/src/logger.rs @@ -0,0 +1,121 @@ +//! Default Youki Logger + +use std::env; +use std::io::{stderr, Write}; +use std::path::PathBuf; +use std::{ + fs::{File, OpenOptions}, + str::FromStr, +}; + +use anyhow::Result; +use log::{LevelFilter, Log, Metadata, Record}; +use once_cell::sync::OnceCell; + +/// Public global variables to access logger and logfile +pub static YOUKI_LOGGER: OnceCell = OnceCell::new(); +pub static LOG_FILE: OnceCell> = OnceCell::new(); + +/// If in debug mode, default level is debug to get maximum logging +#[cfg(debug_assertions)] +const DEFAULT_LOG_LEVEL: LevelFilter = LevelFilter::Debug; + +/// If not in debug mode, default level is warn to get important logs +#[cfg(not(debug_assertions))] +const DEFAULT_LOG_LEVEL: LevelFilter = LevelFilter::Warn; + +/// Initialize the logger, must be called before accessing the logger +/// Multiple parts might call this at once, but the actual initialization +/// is done only once due to use of OnceCell +pub fn init(log_file: Option) -> Result<()> { + // If file exists, ignore, else create and open the file + let _log_file = LOG_FILE.get_or_init(|| -> Option { + // set the log level if specified in env variable or set to default + let level_filter = if let Ok(log_level_str) = env::var("YOUKI_LOG_LEVEL") { + LevelFilter::from_str(&log_level_str).unwrap_or(DEFAULT_LOG_LEVEL) + } else { + DEFAULT_LOG_LEVEL + }; + + // Create a new logger, or get existing if already created + let logger = YOUKI_LOGGER.get_or_init(|| YoukiLogger::new(level_filter.to_level())); + + log::set_logger(logger) + .map(|()| log::set_max_level(level_filter)) + .expect("set logger failed"); + + // Create and open log file + log_file.as_ref().map(|log_file_path| { + OpenOptions::new() + .create(true) + .write(true) + .truncate(false) + .open(log_file_path) + .expect("failed opening log file ") + }) + }); + Ok(()) +} + +/// Youki's custom Logger +pub struct YoukiLogger { + /// Indicates level up to which logs are to be printed + level: Option, +} + +impl YoukiLogger { + /// Create new logger + pub fn new(level: Option) -> Self { + Self { level } + } +} + +/// Implements Log interface given by log crate, so we can use its functionality +impl Log for YoukiLogger { + /// Check if level of given log is enabled or not + fn enabled(&self, metadata: &Metadata) -> bool { + if let Some(level) = self.level { + metadata.level() <= level + } else { + false + } + } + + /// Function to carry out logging + fn log(&self, record: &Record) { + if self.enabled(record.metadata()) { + let log_msg = match (record.file(), record.line()) { + (Some(file), Some(line)) => format!( + "[{} {}:{}] {} {}\r", + record.level(), + file, + line, + chrono::Local::now().to_rfc3339(), + record.args() + ), + (_, _) => format!( + "[{}] {} {}\r", + record.level(), + chrono::Local::now().to_rfc3339(), + record.args() + ), + }; + + // if log file is set, write to it, else write to stderr + if let Some(mut log_file) = LOG_FILE.get().unwrap().as_ref() { + let _ = writeln!(log_file, "{}", log_msg); + } else { + let _ = writeln!(stderr(), "{}", log_msg); + } + } + } + + /// Flush logs to file + fn flush(&self) { + if let Some(mut log_file) = LOG_FILE.get().unwrap().as_ref() { + log_file.flush().expect("Failed to flush"); + } else { + stderr().flush().expect("Failed to flush"); + } + } +} diff --git a/crates/youki/src/main.rs b/crates/youki/src/main.rs new file mode 100644 index 00000000..3ba68d4f --- /dev/null +++ b/crates/youki/src/main.rs @@ -0,0 +1,170 @@ +//! # Youki +//! Container Runtime written in Rust, inspired by [railcar](https://github.com/oracle/railcar) +//! This crate provides a container runtime which can be used by a high-level container runtime to run containers. +mod commands; +mod logger; + +use std::fs; +use std::path::PathBuf; + +use anyhow::bail; +use anyhow::Context; +use anyhow::Result; +use clap::{crate_version, Clap}; + +use crate::commands::create; +use crate::commands::delete; +use crate::commands::events; +use crate::commands::exec; +use crate::commands::info; +use crate::commands::kill; +use crate::commands::list; +use crate::commands::pause; +use crate::commands::ps; +use crate::commands::resume; +use crate::commands::run; +use crate::commands::spec_json; +use crate::commands::start; +use crate::commands::state; +use libcontainer::rootless::rootless_required; +use libcontainer::utils; +use libcontainer::utils::create_dir_all_with_mode; +use nix::sys::stat::Mode; +use nix::unistd::getuid; + +// High-level commandline option definition +// This takes global options as well as individual commands as specified in [OCI runtime-spec](https://github.com/opencontainers/runtime-spec/blob/master/runtime.md) +// Also check [runc commandline documentation](https://github.com/opencontainers/runc/blob/master/man/runc.8.md) for more explanation +#[derive(Clap, Debug)] +#[clap(version = crate_version!(), author = "youki team")] +struct Opts { + /// root directory to store container state + #[clap(short, long)] + root: Option, + #[clap(short, long)] + log: Option, + #[clap(long)] + log_format: Option, + /// Enable systemd cgroup manager, rather then use the cgroupfs directly. + #[clap(short, long)] + systemd_cgroup: bool, + /// command to actually manage container + #[clap(subcommand)] + subcmd: SubCommand, +} + +// Subcommands accepted by Youki, confirming with [OCI runtime-spec](https://github.com/opencontainers/runtime-spec/blob/master/runtime.md) +// Also for a short information, check [runc commandline documentation](https://github.com/opencontainers/runc/blob/master/man/runc.8.md) +#[derive(Clap, Debug)] +enum SubCommand { + #[clap(version = crate_version!(), author = "youki team")] + Create(create::Create), + #[clap(version = crate_version!(), author = "youki team")] + Start(start::Start), + #[clap(version = crate_version!(), author = "youki team")] + Run(run::Run), + #[clap(version = crate_version!(), author = "youki team")] + Exec(exec::Exec), + #[clap(version = crate_version!(), author = "youki team")] + Kill(kill::Kill), + #[clap(version = crate_version!(), author = "youki team")] + Delete(delete::Delete), + #[clap(version = crate_version!(), author = "youki team")] + State(state::State), + #[clap(version = crate_version!(), author = "youki team")] + Info(info::Info), + #[clap(version = crate_version!(), author = "youki team")] + Spec(spec_json::SpecJson), + #[clap(version = crate_version!(), author = "youki team")] + List(list::List), + #[clap(version = crate_version!(), author = "youki team")] + Pause(pause::Pause), + #[clap(version = crate_version!(), author = "youki team")] + Resume(resume::Resume), + #[clap(version = crate_version!(), author = "youki team")] + Events(events::Events), + #[clap(version = crate_version!(), author = "youki team", setting=clap::AppSettings::AllowLeadingHyphen)] + Ps(ps::Ps), +} + +/// This is the entry point in the container runtime. The binary is run by a high-level container runtime, +/// with various flags passed. This parses the flags, creates and manages appropriate resources. +fn main() -> Result<()> { + // A malicious container can gain access to the host machine by modifying youki's host + // binary and infect it with malicious code. This vulnerability was first discovered + // in runc and was assigned as CVE-2019-5736, but it also affects youki. + // + // The fix is to copy /proc/self/exe in an anonymous file descriptor (created via memfd_create), + // seal it and re-execute it. Because the final step is re-execution, this needs to be done at + // the beginning of this process. + // + // Ref: https://github.com/opencontainers/runc/commit/0a8e4117e7f715d5fbeef398405813ce8e88558b + // Ref: https://github.com/lxc/lxc/commit/6400238d08cdf1ca20d49bafb85f4e224348bf9d + pentacle::ensure_sealed().context("Failed to seal /proc/self/exe")?; + + let opts = Opts::parse(); + + if let Err(e) = crate::logger::init(opts.log) { + eprintln!("log init failed: {:?}", e); + } + + let root_path = determine_root_path(opts.root)?; + let systemd_cgroup = opts.systemd_cgroup; + + match opts.subcmd { + SubCommand::Create(create) => create.exec(root_path, systemd_cgroup), + SubCommand::Start(start) => start.exec(root_path), + SubCommand::Run(run) => run.exec(root_path, systemd_cgroup), + SubCommand::Exec(exec) => exec.exec(root_path), + SubCommand::Kill(kill) => kill.exec(root_path), + SubCommand::Delete(delete) => delete.exec(root_path), + SubCommand::State(state) => state.exec(root_path), + SubCommand::Info(info) => info.exec(), + SubCommand::List(list) => list.exec(root_path), + SubCommand::Spec(spec) => spec.exec(), + SubCommand::Pause(pause) => pause.exec(root_path), + SubCommand::Resume(resume) => resume.exec(root_path), + SubCommand::Events(events) => events.exec(root_path), + SubCommand::Ps(ps) => ps.exec(root_path), + } +} + +fn determine_root_path(root_path: Option) -> Result { + if let Some(path) = root_path { + return Ok(path); + } + + if !rootless_required() { + let default = PathBuf::from("/run/youki"); + utils::create_dir_all(&default)?; + return Ok(default); + } + + // see https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html + if let Ok(path) = std::env::var("XDG_RUNTIME_DIR") { + return Ok(PathBuf::from(path)); + } + + // XDG_RUNTIME_DIR is not set, try the usual location + let uid = getuid().as_raw(); + let runtime_dir = PathBuf::from(format!("/run/user/{}", uid)); + if create_dir_all_with_mode(&runtime_dir, uid, Mode::S_IRWXU).is_ok() { + return Ok(runtime_dir); + } + + if let Ok(path) = std::env::var("HOME") { + if let Ok(resolved) = fs::canonicalize(path) { + let run_dir = resolved.join(".youki/run"); + if create_dir_all_with_mode(&run_dir, uid, Mode::S_IRWXU).is_ok() { + return Ok(run_dir); + } + } + } + + let tmp_dir = PathBuf::from(format!("/tmp/youki/{}", uid)); + if create_dir_all_with_mode(&tmp_dir, uid, Mode::S_IRWXU).is_ok() { + return Ok(tmp_dir); + } + + bail!("could not find a storage location with suitable permissions for the current user"); +} diff --git a/youki_integration_test/src/tests/linux_ns_itype/mod.rs b/youki_integration_test/src/tests/linux_ns_itype/mod.rs deleted file mode 100644 index f2d29d71..00000000 --- a/youki_integration_test/src/tests/linux_ns_itype/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -mod ns_itype_test; -pub use ns_itype_test::get_ns_itype_tests; diff --git a/youki_integration_test/src/tests/linux_ns_itype/ns_itype_test.rs b/youki_integration_test/src/tests/linux_ns_itype/ns_itype_test.rs deleted file mode 100644 index d2a7b4cd..00000000 --- a/youki_integration_test/src/tests/linux_ns_itype/ns_itype_test.rs +++ /dev/null @@ -1,73 +0,0 @@ -use crate::utils::test_outside_container; -use anyhow::anyhow; -use oci_spec::runtime::LinuxBuilder; -use oci_spec::runtime::{Spec, SpecBuilder}; -use procfs::process::Process; -use test_framework::{Test, TestGroup, TestResult}; - -// get spec for the test -fn get_spec() -> Spec { - let mut r = SpecBuilder::default() - .linux( - LinuxBuilder::default() - .namespaces( - // we have to remove all namespaces, so we directly - // provide an empty vec here - vec![], - ) - // if these both are not empty, we cannot set a inherited - // mnt namespace, as these both require a private mnt namespace - .masked_paths(vec![]) - .readonly_paths(vec![]) - .build() - .expect("could not build spec"), - ) - .build() - .unwrap(); - // We need to remove hostname to avoid test failures when not creating UTS namespace - r.set_hostname(None); - r -} - -fn get_test<'a>(test_name: &'static str) -> Test<'a> { - Test::new( - test_name, - Box::new(move || { - let host_proc = Process::myself().expect("error in getting /proc/self"); - let host_namespaces = match host_proc.namespaces() { - Ok(n) => n, - Err(e) => { - return TestResult::Failed(anyhow!( - "error in resolving host namespaces : {}", - e - )) - } - }; - let spec = get_spec(); - test_outside_container(spec, &move |data| { - let pid = match data.state { - Some(s) => s.pid.unwrap(), - None => return TestResult::Failed(anyhow!("state command returned error")), - }; - let container_process = - Process::new(pid).expect("error in getting /proc for container process"); - let container_namespaces = container_process - .namespaces() - .expect("error in getting namespaces of container process"); - if container_namespaces != host_namespaces { - return TestResult::Failed(anyhow!( - "error : namespaces are not correctly inherited" - )); - } - TestResult::Passed - }) - }), - ) -} - -pub fn get_ns_itype_tests<'a>() -> TestGroup<'a> { - let mut tg = TestGroup::new("ns_itype"); - let tests: Vec<_> = vec![Box::new(get_test("ns_itype"))]; - tg.add(tests); - tg -}