1
0
mirror of https://github.com/containers/youki synced 2024-09-26 22:20:57 +02:00
youki/src/rootfs.rs
2021-06-06 14:33:51 +03:00

345 lines
10 KiB
Rust

//! During kernel initialization, a minimal replica of the ramfs filesystem is loaded, called rootfs.
//! Most systems mount another filesystem over it
use std::fs::OpenOptions;
use std::fs::{canonicalize, create_dir_all, remove_file};
use std::os::unix::fs::symlink;
use std::path::{Path, PathBuf};
use anyhow::{bail, Result};
use nix::errno::Errno;
use nix::fcntl::{open, OFlag};
use nix::mount::mount as nix_mount;
use nix::mount::MsFlags;
use nix::sys::stat::Mode;
use nix::sys::stat::{mknod, umask};
use nix::unistd::{chdir, chown, close, getcwd};
use nix::unistd::{Gid, Uid};
use crate::utils::PathBufExt;
use oci_spec::{LinuxDevice, LinuxDeviceType, Mount, Spec};
pub fn prepare_rootfs(spec: &Spec, rootfs: &Path, bind_devices: bool) -> Result<()> {
let mut flags = MsFlags::MS_REC;
match spec.linux {
Some(ref linux) => match linux.rootfs_propagation.as_ref() {
"shared" => flags |= MsFlags::MS_SHARED,
"private" => flags |= MsFlags::MS_PRIVATE,
"slave" | "" => flags |= MsFlags::MS_SLAVE,
_ => panic!(),
},
None => flags |= MsFlags::MS_SLAVE,
};
nix_mount(None::<&str>, "/", None::<&str>, flags, None::<&str>)?;
log::debug!("mount root fs {:?}", rootfs);
nix_mount::<Path, Path, str, str>(
Some(&rootfs),
&rootfs,
None::<&str>,
MsFlags::MS_BIND | MsFlags::MS_REC,
None::<&str>,
)?;
for m in spec.mounts.iter() {
let (flags, data) = parse_mount(&m);
let ml = &spec.linux.as_ref().unwrap().mount_label;
if m.typ == "cgroup" {
// skip
log::warn!("A feature of cgroup is unimplemented.");
} else if m.destination == PathBuf::from("/dev") {
mount_to_container(&m, rootfs, flags & !MsFlags::MS_RDONLY, &data, &ml)?;
} else {
mount_to_container(&m, rootfs, flags, &data, &ml)?;
}
}
let olddir = getcwd()?;
chdir(rootfs)?;
setup_default_symlinks(rootfs)?;
create_devices(&spec.linux.as_ref().unwrap().devices, bind_devices)?;
setup_ptmx(rootfs)?;
chdir(&olddir)?;
Ok(())
}
fn setup_ptmx(rootfs: &Path) -> Result<()> {
if let Err(e) = remove_file(rootfs.join("dev/ptmx")) {
if e.kind() != ::std::io::ErrorKind::NotFound {
bail!("could not delete /dev/ptmx")
}
}
symlink("pts/ptmx", rootfs.join("dev/ptmx"))?;
Ok(())
}
fn setup_default_symlinks(rootfs: &Path) -> Result<()> {
if Path::new("/proc/kcore").exists() {
symlink("/proc/kcore", "dev/kcore")?;
}
let defaults = [
("/proc/self/fd", "dev/fd"),
("/proc/self/fd/0", "dev/stdin"),
("/proc/self/fd/1", "dev/stdout"),
("/proc/self/fd/2", "dev/stderr"),
];
for &(src, dst) in defaults.iter() {
symlink(src, rootfs.join(dst))?;
}
Ok(())
}
pub fn default_devices() -> Vec<LinuxDevice> {
vec![
LinuxDevice {
path: PathBuf::from("/dev/null"),
typ: LinuxDeviceType::C,
major: 1,
minor: 3,
file_mode: Some(0o066),
uid: None,
gid: None,
},
LinuxDevice {
path: PathBuf::from("/dev/zero"),
typ: LinuxDeviceType::C,
major: 1,
minor: 5,
file_mode: Some(0o066),
uid: None,
gid: None,
},
LinuxDevice {
path: PathBuf::from("/dev/full"),
typ: LinuxDeviceType::C,
major: 1,
minor: 7,
file_mode: Some(0o066),
uid: None,
gid: None,
},
LinuxDevice {
path: PathBuf::from("/dev/tty"),
typ: LinuxDeviceType::C,
major: 5,
minor: 0,
file_mode: Some(0o066),
uid: None,
gid: None,
},
LinuxDevice {
path: PathBuf::from("/dev/urandom"),
typ: LinuxDeviceType::C,
major: 1,
minor: 9,
file_mode: Some(0o066),
uid: None,
gid: None,
},
LinuxDevice {
path: PathBuf::from("/dev/random"),
typ: LinuxDeviceType::C,
major: 1,
minor: 8,
file_mode: Some(0o066),
uid: None,
gid: None,
},
]
}
fn create_devices(devices: &[LinuxDevice], bind: bool) -> Result<()> {
let old_mode = umask(Mode::from_bits_truncate(0o000));
if bind {
let _ = default_devices()
.iter()
.chain(devices)
.map(|dev| {
if !dev.path.starts_with("/dev") {
panic!("{} is not a valid device path", dev.path.display());
}
bind_dev(dev)
})
.collect::<Result<Vec<_>>>()?;
} else {
default_devices()
.iter()
.chain(devices)
.map(|dev| {
if !dev.path.starts_with("/dev") {
panic!("{} is not a valid device path", dev.path.display());
}
mknod_dev(dev)
})
.collect::<Result<Vec<_>>>()?;
}
umask(old_mode);
Ok(())
}
fn bind_dev(dev: &LinuxDevice) -> Result<()> {
let fd = open(
&dev.path.as_in_container()?,
OFlag::O_RDWR | OFlag::O_CREAT,
Mode::from_bits_truncate(0o644),
)?;
close(fd)?;
nix_mount(
Some(&*dev.path.as_in_container()?),
&dev.path,
None::<&str>,
MsFlags::MS_BIND,
None::<&str>,
)?;
Ok(())
}
fn mknod_dev(dev: &LinuxDevice) -> Result<()> {
fn makedev(major: u64, minor: u64) -> u64 {
(minor & 0xff) | ((major & 0xfff) << 8) | ((minor & !0xff) << 12) | ((major & !0xfff) << 32)
}
mknod(
&dev.path.as_in_container()?,
dev.typ.to_sflag()?,
Mode::from_bits_truncate(dev.file_mode.unwrap_or(0)),
makedev(dev.major, dev.minor),
)?;
chown(
&dev.path.as_in_container()?,
dev.uid.map(Uid::from_raw),
dev.gid.map(Gid::from_raw),
)?;
Ok(())
}
fn mount_to_container(
m: &Mount,
rootfs: &Path,
flags: MsFlags,
data: &str,
label: &str,
) -> Result<()> {
let d = if !label.is_empty() && m.typ != "proc" && m.typ != "sysfs" {
if data.is_empty() {
format!("context=\"{}\"", label)
} else {
format!("{},context=\"{}\"", data, label)
}
} else {
data.to_string()
};
let dest_for_host = format!(
"{}{}",
rootfs.to_string_lossy().into_owned(),
m.destination.display()
);
let dest = Path::new(&dest_for_host);
let src = if m.typ == "bind" {
let src = canonicalize(&m.source)?;
let dir = if src.is_file() {
Path::new(&dest).parent().unwrap()
} else {
Path::new(&dest)
};
create_dir_all(&dir).unwrap();
if src.is_file() {
OpenOptions::new()
.create(true)
.write(true)
.open(&dest)
.unwrap();
}
src
} else {
create_dir_all(&dest).unwrap();
PathBuf::from(&m.source)
};
if let Err(::nix::Error::Sys(errno)) =
nix_mount(Some(&*src), dest, Some(&*m.typ), flags, Some(&*d))
{
if errno != Errno::EINVAL {
bail!("mount of {} failed", m.destination.display());
}
nix_mount(Some(&*src), dest, Some(&*m.typ), flags, Some(data))?;
}
if flags.contains(MsFlags::MS_BIND)
&& flags.intersects(
!(MsFlags::MS_REC
| MsFlags::MS_REMOUNT
| MsFlags::MS_BIND
| MsFlags::MS_PRIVATE
| MsFlags::MS_SHARED
| MsFlags::MS_SLAVE),
)
{
nix_mount(
Some(&*dest),
&*dest,
None::<&str>,
flags | MsFlags::MS_REMOUNT,
None::<&str>,
)?;
}
Ok(())
}
fn parse_mount(m: &Mount) -> (MsFlags, String) {
let mut flags = MsFlags::empty();
let mut data = Vec::new();
for s in &m.options {
if let Some((is_clear, flag)) = match s.as_str() {
"defaults" => Some((false, MsFlags::empty())),
"ro" => Some((false, MsFlags::MS_RDONLY)),
"rw" => Some((true, MsFlags::MS_RDONLY)),
"suid" => Some((true, MsFlags::MS_NOSUID)),
"nosuid" => Some((false, MsFlags::MS_NOSUID)),
"dev" => Some((true, MsFlags::MS_NODEV)),
"nodev" => Some((false, MsFlags::MS_NODEV)),
"exec" => Some((true, MsFlags::MS_NOEXEC)),
"noexec" => Some((false, MsFlags::MS_NOEXEC)),
"sync" => Some((false, MsFlags::MS_SYNCHRONOUS)),
"async" => Some((true, MsFlags::MS_SYNCHRONOUS)),
"dirsync" => Some((false, MsFlags::MS_DIRSYNC)),
"remount" => Some((false, MsFlags::MS_REMOUNT)),
"mand" => Some((false, MsFlags::MS_MANDLOCK)),
"nomand" => Some((true, MsFlags::MS_MANDLOCK)),
"atime" => Some((true, MsFlags::MS_NOATIME)),
"noatime" => Some((false, MsFlags::MS_NOATIME)),
"diratime" => Some((true, MsFlags::MS_NODIRATIME)),
"nodiratime" => Some((false, MsFlags::MS_NODIRATIME)),
"bind" => Some((false, MsFlags::MS_BIND)),
"rbind" => Some((false, MsFlags::MS_BIND | MsFlags::MS_REC)),
"unbindable" => Some((false, MsFlags::MS_UNBINDABLE)),
"runbindable" => Some((false, MsFlags::MS_UNBINDABLE | MsFlags::MS_REC)),
"private" => Some((false, MsFlags::MS_PRIVATE)),
"rprivate" => Some((false, MsFlags::MS_PRIVATE | MsFlags::MS_REC)),
"shared" => Some((false, MsFlags::MS_SHARED)),
"rshared" => Some((false, MsFlags::MS_SHARED | MsFlags::MS_REC)),
"slave" => Some((false, MsFlags::MS_SLAVE)),
"rslave" => Some((false, MsFlags::MS_SLAVE | MsFlags::MS_REC)),
"relatime" => Some((false, MsFlags::MS_RELATIME)),
"norelatime" => Some((true, MsFlags::MS_RELATIME)),
"strictatime" => Some((false, MsFlags::MS_STRICTATIME)),
"nostrictatime" => Some((true, MsFlags::MS_STRICTATIME)),
_ => None,
} {
if is_clear {
flags &= !flag;
} else {
flags |= flag;
}
} else {
data.push(s.as_str());
};
}
(flags, data.join(","))
}