mirror of
https://github.com/containers/youki
synced 2024-06-08 15:56:16 +02:00
Fix user namespace for integration tests (#233)
* Fix entering into user namespace correctly 1. Allow entering user namespace when calling process is root. Previously, only when calling process is non-root, will the rootless condition be triggered. 2. Move the creation to the NotifyListener into main process. Once the container init process enter into user namespace, we can't bind to a unix domain socket where the parent directory is owned by root. * Now we pass 2 more namespace tests
This commit is contained in:
parent
f855a1d935
commit
ebf4708f46
|
@ -40,8 +40,8 @@ test_cases=(
|
|||
# "linux_masked_paths/linux_masked_paths.t"
|
||||
"linux_mount_label/linux_mount_label.t"
|
||||
# "linux_ns_itype/linux_ns_itype.t"
|
||||
# "linux_ns_nopath/linux_ns_nopath.t"
|
||||
# "linux_ns_path/linux_ns_path.t"
|
||||
"linux_ns_nopath/linux_ns_nopath.t"
|
||||
"linux_ns_path/linux_ns_path.t"
|
||||
# "linux_ns_path_type/linux_ns_path_type.t"
|
||||
# "linux_process_apparmor_profile/linux_process_apparmor_profile.t"
|
||||
"linux_readonly_paths/linux_readonly_paths.t"
|
||||
|
|
|
@ -1,16 +1,14 @@
|
|||
use crate::{
|
||||
hooks,
|
||||
notify_socket::NotifyListener,
|
||||
process::{channel, fork, init},
|
||||
rootless::Rootless,
|
||||
rootless::{self, Rootless},
|
||||
syscall::linux::LinuxSyscall,
|
||||
utils,
|
||||
};
|
||||
use anyhow::{Context, Result};
|
||||
use cgroups;
|
||||
use nix::unistd::Pid;
|
||||
use oci_spec::Spec;
|
||||
use std::path::Path;
|
||||
use std::process::Command;
|
||||
use std::{fs, os::unix::prelude::RawFd, path::PathBuf};
|
||||
|
||||
use super::{Container, ContainerStatus};
|
||||
|
@ -67,6 +65,12 @@ impl<'a> ContainerBuilderImpl<'a> {
|
|||
let parent_to_child = &mut channel::Channel::new()?;
|
||||
let child_to_parent = &mut channel::Channel::new()?;
|
||||
|
||||
// Need to create the notify socket before we pivot root, since the unix
|
||||
// domain socket used here is outside of the rootfs of container. During
|
||||
// exec, need to create the socket before we exter into existing mount
|
||||
// namespace.
|
||||
let notify_socket: NotifyListener = NotifyListener::new(&self.notify_path)?;
|
||||
|
||||
// This init_args will be passed to the container init process,
|
||||
// therefore we will have to move all the variable by value. Since self
|
||||
// is a shared reference, we have to clone these variables here.
|
||||
|
@ -76,8 +80,7 @@ impl<'a> ContainerBuilderImpl<'a> {
|
|||
spec: self.spec.clone(),
|
||||
rootfs: self.rootfs.clone(),
|
||||
console_socket: self.console_socket,
|
||||
is_rootless: self.rootless.is_some(),
|
||||
notify_path: self.notify_path.clone(),
|
||||
notify_socket,
|
||||
preserve_fds: self.preserve_fds,
|
||||
container: self.container.clone(),
|
||||
};
|
||||
|
@ -91,22 +94,26 @@ impl<'a> ContainerBuilderImpl<'a> {
|
|||
child_to_parent.wait_for_mapping_request()?;
|
||||
log::debug!("write mapping for pid {:?}", intermediate_pid);
|
||||
utils::write_file(format!("/proc/{}/setgroups", intermediate_pid), "deny")?;
|
||||
write_uid_mapping(intermediate_pid, self.rootless.as_ref())?;
|
||||
write_gid_mapping(intermediate_pid, self.rootless.as_ref())?;
|
||||
rootless::write_uid_mapping(intermediate_pid, self.rootless.as_ref())?;
|
||||
rootless::write_gid_mapping(intermediate_pid, self.rootless.as_ref())?;
|
||||
parent_to_child.send_mapping_written()?;
|
||||
}
|
||||
|
||||
let init_pid = child_to_parent.wait_for_child_ready()?;
|
||||
log::debug!("init pid is {:?}", init_pid);
|
||||
|
||||
cmanager.add_task(init_pid)?;
|
||||
cmanager
|
||||
.add_task(init_pid)
|
||||
.context("Failed to add tasks to cgroup manager")?;
|
||||
if self.rootless.is_none() && linux.resources.is_some() && self.init {
|
||||
cmanager.apply(linux.resources.as_ref().unwrap())?;
|
||||
cmanager
|
||||
.apply(linux.resources.as_ref().unwrap())
|
||||
.context("Failed to apply resource limits through cgroup")?;
|
||||
}
|
||||
|
||||
// if file to write the pid to is specified, write pid of the child
|
||||
if let Some(pid_file) = &self.pid_file {
|
||||
fs::write(&pid_file, format!("{}", init_pid))?;
|
||||
fs::write(&pid_file, format!("{}", init_pid)).context("Failed to write pid file")?;
|
||||
}
|
||||
|
||||
if let Some(container) = &self.container {
|
||||
|
@ -115,58 +122,10 @@ impl<'a> ContainerBuilderImpl<'a> {
|
|||
.update_status(ContainerStatus::Created)
|
||||
.set_creator(nix::unistd::geteuid().as_raw())
|
||||
.set_pid(init_pid.as_raw())
|
||||
.save()?;
|
||||
.save()
|
||||
.context("Failed to save container state")?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn write_uid_mapping(target_pid: Pid, rootless: Option<&Rootless>) -> Result<()> {
|
||||
if let Some(rootless) = rootless {
|
||||
if let Some(uid_mappings) = rootless.gid_mappings {
|
||||
return write_id_mapping(
|
||||
&format!("/proc/{}/uid_map", target_pid),
|
||||
uid_mappings,
|
||||
rootless.newuidmap.as_deref(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_gid_mapping(target_pid: Pid, rootless: Option<&Rootless>) -> Result<()> {
|
||||
if let Some(rootless) = rootless {
|
||||
if let Some(gid_mappings) = rootless.gid_mappings {
|
||||
return write_id_mapping(
|
||||
&format!("/proc/{}/gid_map", target_pid),
|
||||
gid_mappings,
|
||||
rootless.newgidmap.as_deref(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_id_mapping(
|
||||
map_file: &str,
|
||||
mappings: &[oci_spec::LinuxIdMapping],
|
||||
map_binary: Option<&Path>,
|
||||
) -> Result<()> {
|
||||
let mappings: Vec<String> = mappings
|
||||
.iter()
|
||||
.map(|m| format!("{} {} {}", m.container_id, m.host_id, m.size))
|
||||
.collect();
|
||||
if mappings.len() == 1 {
|
||||
utils::write_file(map_file, mappings.first().unwrap())?;
|
||||
} else {
|
||||
Command::new(map_binary.unwrap())
|
||||
.args(mappings)
|
||||
.output()
|
||||
.with_context(|| format!("failed to execute {:?}", map_binary))?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -66,7 +66,6 @@ impl InitContainerBuilder {
|
|||
};
|
||||
|
||||
let rootless = detect_rootless(&spec)?;
|
||||
|
||||
let mut builder_impl = ContainerBuilderImpl {
|
||||
init: true,
|
||||
syscall: self.base.syscall,
|
||||
|
|
|
@ -1,12 +1,11 @@
|
|||
use anyhow::{bail, Context, Result};
|
||||
use nix::unistd::{self, close};
|
||||
use std::env;
|
||||
use std::io::prelude::*;
|
||||
use std::os::unix::io::AsRawFd;
|
||||
use std::os::unix::net::{UnixListener, UnixStream};
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use nix::unistd::{self, close};
|
||||
|
||||
pub const NOTIFY_FILE: &str = "notify.sock";
|
||||
|
||||
pub struct NotifyListener {
|
||||
|
@ -36,15 +35,16 @@ impl NotifyListener {
|
|||
Ok(Self { socket: stream })
|
||||
}
|
||||
|
||||
pub fn wait_for_container_start(&mut self) -> Result<()> {
|
||||
pub fn wait_for_container_start(&self) -> Result<()> {
|
||||
match self.socket.accept() {
|
||||
Ok((mut socket, _addr)) => {
|
||||
Ok((mut socket, _)) => {
|
||||
let mut response = String::new();
|
||||
socket.read_to_string(&mut response)?;
|
||||
log::debug!("received: {}", response);
|
||||
}
|
||||
Err(e) => println!("accept function failed: {:?}", e),
|
||||
Err(e) => bail!("accept function failed: {:?}", e),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
|
|
@ -3,7 +3,7 @@ use nix::mount::mount as nix_mount;
|
|||
use nix::mount::MsFlags;
|
||||
use nix::sched::CloneFlags;
|
||||
use nix::{
|
||||
fcntl, sched,
|
||||
fcntl,
|
||||
sys::statfs,
|
||||
unistd::{self, Gid, Pid, Uid},
|
||||
};
|
||||
|
@ -157,10 +157,8 @@ pub struct ContainerInitArgs {
|
|||
pub rootfs: PathBuf,
|
||||
/// Socket to communicate the file descriptor of the ptty
|
||||
pub console_socket: Option<RawFd>,
|
||||
/// Options for rootless containers
|
||||
pub is_rootless: bool,
|
||||
/// Path to the Unix Domain Socket to communicate container start
|
||||
pub notify_path: PathBuf,
|
||||
/// The Unix Domain Socket to communicate container start
|
||||
pub notify_socket: NotifyListener,
|
||||
/// File descriptos preserved/passed to the container init process.
|
||||
pub preserve_fds: i32,
|
||||
/// Container state
|
||||
|
@ -175,6 +173,7 @@ pub fn container_intermidiate(
|
|||
let command = &args.syscall;
|
||||
let spec = &args.spec;
|
||||
let linux = spec.linux.as_ref().context("no linux in spec")?;
|
||||
let namespaces = Namespaces::from(linux.namespaces.as_ref());
|
||||
|
||||
// if Out-of-memory score adjustment is set in specification. set the score
|
||||
// value for the current process check
|
||||
|
@ -191,15 +190,29 @@ pub fn container_intermidiate(
|
|||
// namespace will be created, check
|
||||
// https://man7.org/linux/man-pages/man7/user_namespaces.7.html for more
|
||||
// information
|
||||
if args.is_rootless {
|
||||
log::debug!("creating new user namespace");
|
||||
sched::unshare(sched::CloneFlags::CLONE_NEWUSER)?;
|
||||
// child needs to be dumpable, otherwise the non root parent is not
|
||||
// allowed to write the uid/gid maps
|
||||
prctl::set_dumpable(true).unwrap();
|
||||
intermediate_to_main.send_identifier_mapping_request()?;
|
||||
main_to_intermediate.wait_for_mapping_ack()?;
|
||||
prctl::set_dumpable(false).unwrap();
|
||||
if let Some(user_namespace) = namespaces.get(LinuxNamespaceType::User) {
|
||||
namespaces
|
||||
.unshare_or_setns(user_namespace)
|
||||
.with_context(|| format!("Failed to enter pid namespace: {:?}", user_namespace))?;
|
||||
if user_namespace.path.is_none() {
|
||||
log::debug!("creating new user namespace");
|
||||
// child needs to be dumpable, otherwise the non root parent is not
|
||||
// allowed to write the uid/gid maps
|
||||
prctl::set_dumpable(true).unwrap();
|
||||
intermediate_to_main.send_identifier_mapping_request()?;
|
||||
main_to_intermediate.wait_for_mapping_ack()?;
|
||||
prctl::set_dumpable(false).unwrap();
|
||||
}
|
||||
|
||||
// After UID and GID mapping is configured correctly in the Youki main
|
||||
// process, We want to make sure continue as the root user inside the
|
||||
// new user namespace. This is required because the process of
|
||||
// configuring the container process will require root, even though the
|
||||
// root in the user namespace likely is mapped to an non-priviliged user
|
||||
// on the parent user namespace.
|
||||
command.set_id(Uid::from_raw(0), Gid::from_raw(0)).context(
|
||||
"Failed to configure uid and gid root in the beginning of a new user namespace",
|
||||
)?;
|
||||
}
|
||||
|
||||
// set limits and namespaces to the process
|
||||
|
@ -210,12 +223,7 @@ pub fn container_intermidiate(
|
|||
}
|
||||
}
|
||||
|
||||
command
|
||||
.set_id(Uid::from_raw(0), Gid::from_raw(0))
|
||||
.context("failed to become root")?;
|
||||
|
||||
// Pid namespace requires an extra fork to enter, so we enter pid namespace now.
|
||||
let namespaces = Namespaces::from(linux.namespaces.as_ref());
|
||||
if let Some(pid_namespace) = namespaces.get(LinuxNamespaceType::Pid) {
|
||||
namespaces
|
||||
.unshare_or_setns(pid_namespace)
|
||||
|
@ -247,11 +255,6 @@ pub fn container_init(
|
|||
let command = &args.syscall;
|
||||
let spec = &args.spec;
|
||||
let linux = spec.linux.as_ref().context("no linux in spec")?;
|
||||
// Need to create the notify socket before we pivot root, since the unix
|
||||
// domain socket used here is outside of the rootfs of container. During
|
||||
// exec, need to create the socket before we exter into existing mount
|
||||
// namespace.
|
||||
let mut notify_socket: NotifyListener = NotifyListener::new(&args.notify_path)?;
|
||||
let proc = spec.process.as_ref().context("no process in spec")?;
|
||||
let mut envs: Vec<String> = proc.env.as_ref().unwrap_or(&vec![]).clone();
|
||||
let rootfs = &args.rootfs;
|
||||
|
@ -293,7 +296,8 @@ pub fn container_init(
|
|||
// create_container hook needs to be called after the namespace setup, but
|
||||
// before pivot_root is called. This runs in the container namespaces.
|
||||
if let Some(hooks) = hooks {
|
||||
hooks::run_hooks(hooks.create_container.as_ref(), container)?
|
||||
hooks::run_hooks(hooks.create_container.as_ref(), container)
|
||||
.context("Failed to run create container hooks")?;
|
||||
}
|
||||
|
||||
let bind_service = namespaces.get(LinuxNamespaceType::User).is_some();
|
||||
|
@ -324,7 +328,7 @@ pub fn container_init(
|
|||
if let Some(paths) = &linux.readonly_paths {
|
||||
// mount readonly path
|
||||
for path in paths {
|
||||
readonly_path(path)?;
|
||||
readonly_path(path).context("Failed to set read only path")?;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -341,10 +345,13 @@ pub fn container_init(
|
|||
}
|
||||
};
|
||||
|
||||
command.set_id(Uid::from_raw(proc.user.uid), Gid::from_raw(proc.user.gid))?;
|
||||
capabilities::reset_effective(command)?;
|
||||
command
|
||||
.set_id(Uid::from_raw(proc.user.uid), Gid::from_raw(proc.user.gid))
|
||||
.context("Failed to configure uid and gid")?;
|
||||
|
||||
capabilities::reset_effective(command).context("Failed to reset effective capabilities")?;
|
||||
if let Some(caps) = &proc.capabilities {
|
||||
capabilities::drop_privileges(caps, command)?;
|
||||
capabilities::drop_privileges(caps, command).context("Failed to drop capabilities")?;
|
||||
}
|
||||
|
||||
// Take care of LISTEN_FDS used for systemd-active-socket. If the value is
|
||||
|
@ -408,6 +415,7 @@ pub fn container_init(
|
|||
init_to_intermediate.send_child_ready(Pid::from_raw(-1))?;
|
||||
|
||||
// listing on the notify socket for container start command
|
||||
let notify_socket = args.notify_socket;
|
||||
notify_socket.wait_for_container_start()?;
|
||||
|
||||
// create_container hook needs to be called after the namespace setup, but
|
||||
|
|
|
@ -1,11 +1,7 @@
|
|||
//! During kernel initialization, a minimal replica of the ramfs filesystem is loaded, called rootfs.
|
||||
//! Most systems mount another filesystem over it
|
||||
|
||||
use std::fs::OpenOptions;
|
||||
use std::fs::{canonicalize, create_dir_all, remove_file};
|
||||
use std::os::unix::fs::symlink;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use crate::utils::PathBufExt;
|
||||
use anyhow::{bail, Context, Result};
|
||||
use nix::errno::Errno;
|
||||
use nix::fcntl::{open, OFlag};
|
||||
|
@ -15,13 +11,15 @@ use nix::sys::stat::Mode;
|
|||
use nix::sys::stat::{mknod, umask};
|
||||
use nix::unistd::{chdir, chown, close, getcwd};
|
||||
use nix::unistd::{Gid, Uid};
|
||||
|
||||
use crate::utils::PathBufExt;
|
||||
use oci_spec::{LinuxDevice, LinuxDeviceType, Mount, Spec};
|
||||
use std::fs::OpenOptions;
|
||||
use std::fs::{canonicalize, create_dir_all, remove_file};
|
||||
use std::os::unix::fs::symlink;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
pub fn prepare_rootfs(spec: &Spec, rootfs: &Path, bind_devices: bool) -> Result<()> {
|
||||
log::debug!("Prepare rootfs: {:?}", rootfs);
|
||||
let mut flags = MsFlags::MS_REC;
|
||||
|
||||
let linux = spec.linux.as_ref().context("no linux in spec")?;
|
||||
if let Some(roofs_propagation) = linux.rootfs_propagation.as_ref() {
|
||||
match roofs_propagation.as_str() {
|
||||
|
@ -34,7 +32,8 @@ pub fn prepare_rootfs(spec: &Spec, rootfs: &Path, bind_devices: bool) -> Result<
|
|||
flags |= MsFlags::MS_SLAVE;
|
||||
}
|
||||
|
||||
nix_mount(None::<&str>, "/", None::<&str>, flags, None::<&str>)?;
|
||||
nix_mount(None::<&str>, "/", None::<&str>, flags, None::<&str>)
|
||||
.context("Failed to mount rootfs")?;
|
||||
|
||||
log::debug!("mount root fs {:?}", rootfs);
|
||||
nix_mount::<Path, Path, str, str>(
|
||||
|
@ -46,31 +45,38 @@ pub fn prepare_rootfs(spec: &Spec, rootfs: &Path, bind_devices: bool) -> Result<
|
|||
)?;
|
||||
|
||||
if let Some(mounts) = spec.mounts.as_ref() {
|
||||
for m in mounts.iter() {
|
||||
let (flags, data) = parse_mount(m);
|
||||
let ml = linux.mount_label.as_ref();
|
||||
if m.typ.as_ref().context("no type in mount spec")? == "cgroup" {
|
||||
for mount in mounts.iter() {
|
||||
log::debug!("Mount... {:?}", mount);
|
||||
let (flags, data) = parse_mount(mount);
|
||||
let mount_label = linux.mount_label.as_ref();
|
||||
if mount.typ.as_ref().context("no type in mount spec")? == "cgroup" {
|
||||
// skip
|
||||
log::warn!("A feature of cgroup is unimplemented.");
|
||||
} else if m.destination == PathBuf::from("/dev") {
|
||||
mount_to_container(m, rootfs, flags & !MsFlags::MS_RDONLY, &data, ml)?;
|
||||
} else if mount.destination == PathBuf::from("/dev") {
|
||||
mount_to_container(
|
||||
mount,
|
||||
rootfs,
|
||||
flags & !MsFlags::MS_RDONLY,
|
||||
&data,
|
||||
mount_label,
|
||||
)
|
||||
.with_context(|| format!("Failed to mount /dev: {:?}", mount))?;
|
||||
} else {
|
||||
mount_to_container(m, rootfs, flags, &data, ml)?;
|
||||
mount_to_container(mount, rootfs, flags, &data, mount_label)
|
||||
.with_context(|| format!("Failed to mount: {:?}", mount))?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let olddir = getcwd()?;
|
||||
chdir(rootfs)?;
|
||||
|
||||
setup_default_symlinks(rootfs)?;
|
||||
setup_default_symlinks(rootfs).context("Failed to setup default symlinks")?;
|
||||
if let Some(added_devices) = linux.devices.as_ref() {
|
||||
create_devices(default_devices().iter().chain(added_devices), bind_devices)
|
||||
} else {
|
||||
create_devices(default_devices().iter(), bind_devices)
|
||||
}?;
|
||||
setup_ptmx(rootfs)?;
|
||||
|
||||
chdir(&olddir)?;
|
||||
|
||||
Ok(())
|
||||
|
@ -82,13 +88,15 @@ fn setup_ptmx(rootfs: &Path) -> Result<()> {
|
|||
bail!("could not delete /dev/ptmx")
|
||||
}
|
||||
}
|
||||
symlink("pts/ptmx", rootfs.join("dev/ptmx"))?;
|
||||
|
||||
symlink("pts/ptmx", "dev/ptmx").context("Failed to symlink ptmx")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn setup_default_symlinks(rootfs: &Path) -> Result<()> {
|
||||
if Path::new("/proc/kcore").exists() {
|
||||
symlink("/proc/kcore", "dev/kcore")?;
|
||||
symlink("/proc/kcore", rootfs.join("dev/kcore")).context("Failed to symlink kcore")?;
|
||||
}
|
||||
|
||||
let defaults = [
|
||||
|
@ -98,8 +106,9 @@ fn setup_default_symlinks(rootfs: &Path) -> Result<()> {
|
|||
("/proc/self/fd/2", "dev/stderr"),
|
||||
];
|
||||
for &(src, dst) in defaults.iter() {
|
||||
symlink(src, rootfs.join(dst))?;
|
||||
symlink(src, rootfs.join(dst)).context("Fail to symlink defaults")?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
@ -173,6 +182,7 @@ where
|
|||
if !dev.path.starts_with("/dev") {
|
||||
panic!("{} is not a valid device path", dev.path.display());
|
||||
}
|
||||
|
||||
bind_dev(dev)
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
@ -182,11 +192,13 @@ where
|
|||
if !dev.path.starts_with("/dev") {
|
||||
panic!("{} is not a valid device path", dev.path.display());
|
||||
}
|
||||
|
||||
mknod_dev(dev)
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
}
|
||||
umask(old_mode);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
@ -204,6 +216,7 @@ fn bind_dev(dev: &LinuxDevice) -> Result<()> {
|
|||
MsFlags::MS_BIND,
|
||||
None::<&str>,
|
||||
)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
@ -214,7 +227,6 @@ fn mknod_dev(dev: &LinuxDevice) -> Result<()> {
|
|||
| ((minor & !0xff) << 12)
|
||||
| ((major & !0xfff) << 32)) as u64
|
||||
}
|
||||
|
||||
mknod(
|
||||
&dev.path.as_in_container()?,
|
||||
dev.typ.to_sflag()?,
|
||||
|
@ -226,6 +238,7 @@ fn mknod_dev(dev: &LinuxDevice) -> Result<()> {
|
|||
dev.uid.map(Uid::from_raw),
|
||||
dev.gid.map(Gid::from_raw),
|
||||
)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
@ -250,15 +263,13 @@ fn mount_to_container(
|
|||
} else {
|
||||
data.to_string()
|
||||
};
|
||||
|
||||
let dest_for_host = format!(
|
||||
"{}{}",
|
||||
rootfs.to_string_lossy().into_owned(),
|
||||
m.destination.display()
|
||||
);
|
||||
let dest = Path::new(&dest_for_host);
|
||||
|
||||
let source = &m.source.as_ref().context("no source in mount spec")?;
|
||||
let source = m.source.as_ref().context("no source in mount spec")?;
|
||||
let src = if typ == "bind" {
|
||||
let src = canonicalize(source)?;
|
||||
let dir = if src.is_file() {
|
||||
|
@ -266,7 +277,8 @@ fn mount_to_container(
|
|||
} else {
|
||||
Path::new(&dest)
|
||||
};
|
||||
create_dir_all(&dir).unwrap();
|
||||
create_dir_all(&dir)
|
||||
.with_context(|| format!("Failed to create dir for bind mount: {:?}", dir))?;
|
||||
if src.is_file() {
|
||||
OpenOptions::new()
|
||||
.create(true)
|
||||
|
@ -274,18 +286,21 @@ fn mount_to_container(
|
|||
.open(&dest)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
src
|
||||
} else {
|
||||
create_dir_all(&dest).unwrap();
|
||||
create_dir_all(&dest).with_context(|| format!("Failed to create device: {:?}", dest))?;
|
||||
PathBuf::from(source)
|
||||
};
|
||||
|
||||
if let Err(errno) = nix_mount(Some(&*src), dest, Some(&*typ.as_str()), flags, Some(&*d)) {
|
||||
if !matches!(errno, Errno::EINVAL) {
|
||||
bail!("mount of {} failed", m.destination.display());
|
||||
bail!("mount of {:?} failed", m.destination);
|
||||
}
|
||||
|
||||
nix_mount(Some(&*src), dest, Some(&*typ.as_str()), flags, Some(data))?;
|
||||
}
|
||||
|
||||
if flags.contains(MsFlags::MS_BIND)
|
||||
&& flags.intersects(
|
||||
!(MsFlags::MS_REC
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
use std::{env, path::PathBuf};
|
||||
|
||||
use crate::{namespaces::Namespaces, utils};
|
||||
use anyhow::{bail, Context, Result};
|
||||
use oci_spec::{Linux, LinuxIdMapping, LinuxNamespaceType, Mount, Spec};
|
||||
|
||||
use crate::namespaces::Namespaces;
|
||||
use nix::unistd::Pid;
|
||||
use oci_spec::{Linux, LinuxIdMapping, LinuxNamespace, LinuxNamespaceType, Mount, Spec};
|
||||
use std::path::Path;
|
||||
use std::process::Command;
|
||||
use std::{env, path::PathBuf};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Rootless<'a> {
|
||||
|
@ -15,34 +16,53 @@ pub struct Rootless<'a> {
|
|||
pub uid_mappings: Option<&'a Vec<LinuxIdMapping>>,
|
||||
/// Mappings for group ids
|
||||
pub gid_mappings: Option<&'a Vec<LinuxIdMapping>>,
|
||||
/// Info on the user namespaces
|
||||
user_namespace: Option<LinuxNamespace>,
|
||||
}
|
||||
|
||||
impl<'a> From<&'a Linux> for Rootless<'a> {
|
||||
fn from(linux: &'a Linux) -> Self {
|
||||
let namespaces = Namespaces::from(linux.namespaces.as_ref());
|
||||
let user_namespace = namespaces.get(LinuxNamespaceType::User);
|
||||
Self {
|
||||
newuidmap: None,
|
||||
newgidmap: None,
|
||||
uid_mappings: linux.uid_mappings.as_ref(),
|
||||
gid_mappings: linux.gid_mappings.as_ref(),
|
||||
user_namespace: user_namespace.cloned(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If user namespace is detected, then we are going into rootless.
|
||||
// If we are not root, check if we are user namespace.
|
||||
pub fn detect_rootless(spec: &Spec) -> Result<Option<Rootless>> {
|
||||
let rootless = if should_use_rootless() {
|
||||
let linux = spec.linux.as_ref().context("no linux in spec")?;
|
||||
let namespaces = Namespaces::from(linux.namespaces.as_ref());
|
||||
let user_namespace = namespaces.get(LinuxNamespaceType::User);
|
||||
// If conditions requires us to use rootless, we must either create a new
|
||||
// user namespace or enter an exsiting.
|
||||
if should_use_rootless() && user_namespace.is_none() {
|
||||
bail!("Rootless container requires valid user namespace definition");
|
||||
}
|
||||
|
||||
// Go through rootless procedure only when entering into a new user namespace
|
||||
let rootless = if user_namespace.is_some() && user_namespace.unwrap().path.is_none() {
|
||||
log::debug!("rootless container should be created");
|
||||
log::warn!(
|
||||
"resource constraints and multi id mapping is unimplemented for rootless containers"
|
||||
);
|
||||
validate(spec)?;
|
||||
validate(spec).context("The spec failed to comply to rootless requirement")?;
|
||||
let linux = spec.linux.as_ref().context("no linux in spec")?;
|
||||
let mut rootless = Rootless::from(linux);
|
||||
if let Some((uid_binary, gid_binary)) = lookup_map_binaries(linux)? {
|
||||
rootless.newuidmap = Some(uid_binary);
|
||||
rootless.newgidmap = Some(gid_binary);
|
||||
}
|
||||
|
||||
Some(rootless)
|
||||
} else {
|
||||
log::debug!("This is NOT a rootless container");
|
||||
None
|
||||
};
|
||||
|
||||
|
@ -64,8 +84,13 @@ pub fn should_use_rootless() -> bool {
|
|||
|
||||
/// Validates that the spec contains the required information for
|
||||
/// running in rootless mode
|
||||
pub fn validate(spec: &Spec) -> Result<()> {
|
||||
fn validate(spec: &Spec) -> Result<()> {
|
||||
let linux = spec.linux.as_ref().context("no linux in spec")?;
|
||||
let namespaces = Namespaces::from(linux.namespaces.as_ref());
|
||||
if namespaces.get(LinuxNamespaceType::User).is_none() {
|
||||
bail!("rootless containers require the specification of a user namespace");
|
||||
}
|
||||
|
||||
let gid_mappings = linux
|
||||
.gid_mappings
|
||||
.as_ref()
|
||||
|
@ -83,11 +108,6 @@ pub fn validate(spec: &Spec) -> Result<()> {
|
|||
bail!("rootless containers require at least one gid mapping")
|
||||
}
|
||||
|
||||
let namespaces = Namespaces::from(linux.namespaces.as_ref());
|
||||
if namespaces.get(LinuxNamespaceType::User).is_none() {
|
||||
bail!("rootless containers require the specification of a user namespace");
|
||||
}
|
||||
|
||||
validate_mounts(
|
||||
spec.mounts.as_ref().context("no mounts in spec")?,
|
||||
uid_mappings,
|
||||
|
@ -153,3 +173,55 @@ fn lookup_map_binary(binary: &str) -> Result<Option<PathBuf>> {
|
|||
.find(|p| PathBuf::from(p).join(binary).exists())
|
||||
.map(PathBuf::from))
|
||||
}
|
||||
|
||||
pub fn write_uid_mapping(target_pid: Pid, rootless: Option<&Rootless>) -> Result<()> {
|
||||
log::debug!("Write UID mapping for {:?}", target_pid);
|
||||
if let Some(rootless) = rootless {
|
||||
if let Some(uid_mappings) = rootless.gid_mappings {
|
||||
return write_id_mapping(
|
||||
&format!("/proc/{}/uid_map", target_pid),
|
||||
uid_mappings,
|
||||
rootless.newuidmap.as_deref(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn write_gid_mapping(target_pid: Pid, rootless: Option<&Rootless>) -> Result<()> {
|
||||
log::debug!("Write GID mapping for {:?}", target_pid);
|
||||
if let Some(rootless) = rootless {
|
||||
if let Some(gid_mappings) = rootless.gid_mappings {
|
||||
return write_id_mapping(
|
||||
&format!("/proc/{}/gid_map", target_pid),
|
||||
gid_mappings,
|
||||
rootless.newgidmap.as_deref(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_id_mapping(
|
||||
map_file: &str,
|
||||
mappings: &[oci_spec::LinuxIdMapping],
|
||||
map_binary: Option<&Path>,
|
||||
) -> Result<()> {
|
||||
let mappings: Vec<String> = mappings
|
||||
.iter()
|
||||
.map(|m| format!("{} {} {}", m.container_id, m.host_id, m.size))
|
||||
.collect();
|
||||
log::debug!("Write ID mapping: {:?}", mappings);
|
||||
if mappings.len() == 1 {
|
||||
utils::write_file(map_file, mappings.first().unwrap())?;
|
||||
} else {
|
||||
Command::new(map_binary.unwrap())
|
||||
.args(mappings)
|
||||
.output()
|
||||
.with_context(|| format!("failed to execute {:?}", map_binary))?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue