1
0
Fork 0
mirror of https://github.com/containers/youki synced 2024-05-06 15:46:17 +02:00
youki/src/syscall/linux.rs
Takashi IIGUNI 97848f1ce6
Updated cgroups oci-spec-rs to 0.5.1 or later (#303)
* Updated cgroup oci-spec-rs to 0.5.1

Signed-off-by: Takashi IIGUNI <iiguni.tks@gmail.com>
2021-09-26 18:08:56 -07:00

215 lines
7.4 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//! Implements Command trait for Linux systems
use std::ffi::{CStr, OsStr};
use std::os::unix::ffi::OsStrExt;
use std::sync::Arc;
use std::{any::Any, mem, path::Path, ptr};
use anyhow::{bail, Result};
use caps::{errors::CapsError, CapSet, Capability, CapsHashSet};
use libc::{c_char, uid_t};
use nix::{
errno::Errno,
unistd::{fchdir, pivot_root, sethostname},
};
use nix::{fcntl::open, sched::CloneFlags};
use nix::{
fcntl::OFlag,
unistd::{Gid, Uid},
};
use nix::{
mount::{mount, umount2, MntFlags, MsFlags},
unistd,
};
use nix::{sched::unshare, sys::stat::Mode};
use oci_spec::runtime::LinuxRlimit;
use super::Syscall;
use crate::capabilities;
/// Empty structure to implement Command trait for
#[derive(Clone)]
pub struct LinuxSyscall;
impl LinuxSyscall {
unsafe fn from_raw_buf<'a, T>(p: *const c_char) -> T
where
T: From<&'a OsStr>,
{
T::from(OsStr::from_bytes(CStr::from_ptr(p).to_bytes()))
}
/// Reads data from the `c_passwd` and returns it as a `User`.
unsafe fn passwd_to_user(passwd: libc::passwd) -> Arc<OsStr> {
let name: Arc<OsStr> = Self::from_raw_buf(passwd.pw_name);
name
}
}
impl Syscall for LinuxSyscall {
/// To enable dynamic typing,
/// see https://doc.rust-lang.org/std/any/index.html for more information
fn as_any(&self) -> &dyn Any {
self
}
/// Function to set given path as root path inside process
fn pivot_rootfs(&self, path: &Path) -> Result<()> {
// open the path as directory and read only
let newroot = open(path, OFlag::O_DIRECTORY | OFlag::O_RDONLY, Mode::empty())?;
// make the given path as the root directory for the container
// see https://man7.org/linux/man-pages/man2/pivot_root.2.html, specially the notes
// pivot root usually changes the root directory to first argument, and then mounts the original root
// directory at second argument. Giving same path for both stacks mapping of the original root directory
// above the new directory at the same path, then the call to umount unmounts the original root directory from
// this path. This is done, as otherwise, we will need to create a separate temporary directory under the new root path
// so we can move the original root there, and then unmount that. This way saves the creation of the temporary
// directory to put original root directory.
pivot_root(path, path)?;
// Make the original root directory rslave to avoid propagating unmount event to the host mount namespace.
// We should use MS_SLAVE not MS_PRIVATE according to https://github.com/opencontainers/runc/pull/1500.
mount(
None::<&str>,
"/",
None::<&str>,
MsFlags::MS_SLAVE | MsFlags::MS_REC,
None::<&str>,
)?;
// Unmount the original root directory which was stacked on top of new root directory
// MNT_DETACH makes the mount point unavailable to new accesses, but waits till the original mount point
// to be free of activity to actually unmount
// see https://man7.org/linux/man-pages/man2/umount2.2.html for more information
umount2("/", MntFlags::MNT_DETACH)?;
// Change directory to root
fchdir(newroot)?;
Ok(())
}
/// Set namespace for process
fn set_ns(&self, rawfd: i32, nstype: CloneFlags) -> Result<()> {
nix::sched::setns(rawfd, nstype)?;
Ok(())
}
/// set uid and gid for process
fn set_id(&self, uid: Uid, gid: Gid) -> Result<()> {
if let Err(e) = prctl::set_keep_capabilities(true) {
bail!("set keep capabilities returned {}", e);
};
// args : real *id, effective *id, saved set *id respectively
unistd::setresgid(gid, gid, gid)?;
unistd::setresuid(uid, uid, uid)?;
// if not the root user, reset capabilities to effective capabilities,
// which are used by kernel to perform checks
// see https://man7.org/linux/man-pages/man7/capabilities.7.html for more information
if uid != Uid::from_raw(0) {
capabilities::reset_effective(self)?;
}
if let Err(e) = prctl::set_keep_capabilities(false) {
bail!("set keep capabilities returned {}", e);
};
Ok(())
}
/// Disassociate parts of execution context
// see https://man7.org/linux/man-pages/man2/unshare.2.html for more information
fn unshare(&self, flags: CloneFlags) -> Result<()> {
unshare(flags)?;
Ok(())
}
/// Set capabilities for container process
fn set_capability(&self, cset: CapSet, value: &CapsHashSet) -> Result<(), CapsError> {
match cset {
// caps::set cannot set capabilities in bounding set,
// so we do it differently
CapSet::Bounding => {
// get all capabilities
let all = caps::all();
// the difference will give capabilities
// which are to be unset
// for each such =, drop that capability
// after this, only those which are to be set will remain set
for c in all.difference(value) {
match c {
Capability::CAP_PERFMON
| Capability::CAP_CHECKPOINT_RESTORE
| Capability::CAP_BPF => {
log::warn!("{:?} is not supported.", c);
continue;
}
_ => caps::drop(None, CapSet::Bounding, *c)?,
}
}
Ok(())
}
_ => caps::set(None, cset, value),
}
}
/// Sets hostname for process
fn set_hostname(&self, hostname: &str) -> Result<()> {
if let Err(e) = sethostname(hostname) {
bail!("Failed to set {} as hostname. {:?}", hostname, e)
}
Ok(())
}
/// Sets resource limit for process
fn set_rlimit(&self, rlimit: &LinuxRlimit) -> Result<()> {
let rlim = &libc::rlimit {
rlim_cur: rlimit.soft(),
rlim_max: rlimit.hard(),
};
let res = unsafe { libc::setrlimit(rlimit.typ() as u32, rlim) };
if let Err(e) = Errno::result(res).map(drop) {
bail!("Failed to set {:?}. {:?}", rlimit.typ(), e)
}
Ok(())
}
// taken from https://crates.io/crates/users
fn get_pwuid(&self, uid: uid_t) -> Option<Arc<OsStr>> {
let mut passwd = unsafe { mem::zeroed::<libc::passwd>() };
let mut buf = vec![0; 2048];
let mut result = ptr::null_mut::<libc::passwd>();
loop {
let r = unsafe {
libc::getpwuid_r(uid, &mut passwd, buf.as_mut_ptr(), buf.len(), &mut result)
};
if r != libc::ERANGE {
break;
}
let newsize = buf.len().checked_mul(2)?;
buf.resize(newsize, 0);
}
if result.is_null() {
// There is no such user, or an error has occurred.
// errno gets set if theres an error.
return None;
}
if result != &mut passwd {
// The result of getpwuid_r should be its input passwd.
return None;
}
let user = unsafe { Self::passwd_to_user(result.read()) };
Some(user)
}
fn chroot(&self, path: &Path) -> Result<()> {
unistd::chroot(path)?;
Ok(())
}
}