1
0
mirror of https://github.com/containers/youki synced 2024-09-28 15:01:29 +02:00
youki/src/rootfs.rs

361 lines
11 KiB
Rust
Raw Normal View History

2021-06-06 13:33:51 +02:00
//! During kernel initialization, a minimal replica of the ramfs filesystem is loaded, called rootfs.
//! Most systems mount another filesystem over it
2021-03-27 12:08:13 +01:00
use std::fs::OpenOptions;
use std::fs::{canonicalize, create_dir_all, remove_file};
use std::os::unix::fs::symlink;
use std::path::{Path, PathBuf};
use anyhow::{bail, Context, Result};
2021-03-27 12:08:13 +01:00
use nix::errno::Errno;
use nix::fcntl::{open, OFlag};
2021-03-31 15:53:23 +02:00
use nix::mount::mount as nix_mount;
use nix::mount::MsFlags;
2021-04-07 14:28:19 +02:00
use nix::sys::stat::Mode;
2021-03-27 12:08:13 +01:00
use nix::sys::stat::{mknod, umask};
use nix::unistd::{chdir, chown, close, getcwd};
2021-03-27 12:08:13 +01:00
use nix::unistd::{Gid, Uid};
use crate::utils::PathBufExt;
2021-05-29 17:15:16 +02:00
use oci_spec::{LinuxDevice, LinuxDeviceType, Mount, Spec};
2021-03-27 12:08:13 +01:00
2021-05-28 01:37:24 +02:00
pub fn prepare_rootfs(spec: &Spec, rootfs: &Path, bind_devices: bool) -> Result<()> {
2021-03-27 12:08:13 +01:00
let mut flags = MsFlags::MS_REC;
let linux = spec.linux.as_ref().context("no linux in spec")?;
match linux
.rootfs_propagation
.as_ref()
.context("no rootfs_propagation in spec")?
.as_str()
{
"shared" => flags |= MsFlags::MS_SHARED,
"private" => flags |= MsFlags::MS_PRIVATE,
"slave" | "" => flags |= MsFlags::MS_SLAVE,
_ => panic!(),
}
2021-03-31 15:53:23 +02:00
nix_mount(None::<&str>, "/", None::<&str>, flags, None::<&str>)?;
2021-03-27 12:08:13 +01:00
log::debug!("mount root fs {:?}", rootfs);
2021-05-28 01:37:24 +02:00
nix_mount::<Path, Path, str, str>(
Some(rootfs),
rootfs,
2021-03-27 12:08:13 +01:00
None::<&str>,
MsFlags::MS_BIND | MsFlags::MS_REC,
None::<&str>,
)?;
for m in spec.mounts.as_ref().context("no mounts in spec")?.iter() {
let (flags, data) = parse_mount(m);
let ml = linux
.mount_label
.as_ref()
.context("no mount_label in spec")?;
if m.typ.as_ref().context("no type in mount spec")? == "cgroup" {
2021-03-27 12:08:13 +01:00
// skip
log::warn!("A feature of cgroup is unimplemented.");
2021-03-27 12:08:13 +01:00
} else if m.destination == PathBuf::from("/dev") {
mount_to_container(m, rootfs, flags & !MsFlags::MS_RDONLY, &data, ml)?;
2021-03-27 12:08:13 +01:00
} else {
mount_to_container(m, rootfs, flags, &data, ml)?;
2021-03-27 12:08:13 +01:00
}
}
2021-03-31 15:53:23 +02:00
2021-03-27 12:08:13 +01:00
let olddir = getcwd()?;
2021-05-28 01:37:24 +02:00
chdir(rootfs)?;
2021-03-27 12:08:13 +01:00
2021-05-28 01:37:24 +02:00
setup_default_symlinks(rootfs)?;
create_devices(
linux.devices.as_ref().context("no devices in spec")?,
bind_devices,
)?;
2021-05-28 01:37:24 +02:00
setup_ptmx(rootfs)?;
2021-03-27 12:08:13 +01:00
chdir(&olddir)?;
Ok(())
}
2021-03-31 15:53:23 +02:00
fn setup_ptmx(rootfs: &Path) -> Result<()> {
2021-03-27 12:08:13 +01:00
if let Err(e) = remove_file(rootfs.join("dev/ptmx")) {
if e.kind() != ::std::io::ErrorKind::NotFound {
2021-03-30 15:04:03 +02:00
bail!("could not delete /dev/ptmx")
2021-03-27 12:08:13 +01:00
}
}
symlink("pts/ptmx", rootfs.join("dev/ptmx"))?;
Ok(())
}
fn setup_default_symlinks(rootfs: &Path) -> Result<()> {
2021-03-27 12:08:13 +01:00
if Path::new("/proc/kcore").exists() {
symlink("/proc/kcore", "dev/kcore")?;
}
let defaults = [
("/proc/self/fd", "dev/fd"),
("/proc/self/fd/0", "dev/stdin"),
("/proc/self/fd/1", "dev/stdout"),
("/proc/self/fd/2", "dev/stderr"),
];
for &(src, dst) in defaults.iter() {
symlink(src, rootfs.join(dst))?;
}
Ok(())
}
pub fn default_devices() -> Vec<LinuxDevice> {
vec![
2021-04-07 14:28:19 +02:00
LinuxDevice {
path: PathBuf::from("/dev/null"),
typ: LinuxDeviceType::C,
major: 1,
minor: 3,
file_mode: Some(0o066),
uid: None,
gid: None,
},
LinuxDevice {
path: PathBuf::from("/dev/zero"),
typ: LinuxDeviceType::C,
major: 1,
minor: 5,
file_mode: Some(0o066),
uid: None,
gid: None,
},
LinuxDevice {
path: PathBuf::from("/dev/full"),
typ: LinuxDeviceType::C,
major: 1,
minor: 7,
file_mode: Some(0o066),
uid: None,
gid: None,
},
LinuxDevice {
path: PathBuf::from("/dev/tty"),
typ: LinuxDeviceType::C,
major: 5,
minor: 0,
file_mode: Some(0o066),
uid: None,
gid: None,
},
LinuxDevice {
path: PathBuf::from("/dev/urandom"),
typ: LinuxDeviceType::C,
major: 1,
minor: 9,
file_mode: Some(0o066),
uid: None,
gid: None,
},
LinuxDevice {
path: PathBuf::from("/dev/random"),
typ: LinuxDeviceType::C,
major: 1,
minor: 8,
file_mode: Some(0o066),
uid: None,
gid: None,
},
]
}
fn create_devices(devices: &[LinuxDevice], bind: bool) -> Result<()> {
2021-03-27 12:08:13 +01:00
let old_mode = umask(Mode::from_bits_truncate(0o000));
if bind {
let _ = default_devices()
.iter()
.chain(devices)
.map(|dev| {
if !dev.path.starts_with("/dev") {
panic!("{} is not a valid device path", dev.path.display());
}
bind_dev(dev)
})
.collect::<Result<Vec<_>>>()?;
2021-03-27 12:08:13 +01:00
} else {
default_devices()
.iter()
.chain(devices)
.map(|dev| {
if !dev.path.starts_with("/dev") {
panic!("{} is not a valid device path", dev.path.display());
}
mknod_dev(dev)
})
.collect::<Result<Vec<_>>>()?;
2021-03-27 12:08:13 +01:00
}
umask(old_mode);
Ok(())
}
fn bind_dev(dev: &LinuxDevice) -> Result<()> {
2021-03-27 12:08:13 +01:00
let fd = open(
&dev.path.as_in_container()?,
2021-03-27 12:08:13 +01:00
OFlag::O_RDWR | OFlag::O_CREAT,
Mode::from_bits_truncate(0o644),
)?;
close(fd)?;
2021-03-31 15:53:23 +02:00
nix_mount(
Some(&*dev.path.as_in_container()?),
&dev.path,
2021-03-27 12:08:13 +01:00
None::<&str>,
MsFlags::MS_BIND,
None::<&str>,
)?;
Ok(())
}
fn mknod_dev(dev: &LinuxDevice) -> Result<()> {
fn makedev(major: i64, minor: i64) -> u64 {
((minor & 0xff)
| ((major & 0xfff) << 8)
| ((minor & !0xff) << 12)
| ((major & !0xfff) << 32)) as u64
2021-03-27 12:08:13 +01:00
}
mknod(
&dev.path.as_in_container()?,
2021-04-07 14:28:19 +02:00
dev.typ.to_sflag()?,
2021-03-27 12:08:13 +01:00
Mode::from_bits_truncate(dev.file_mode.unwrap_or(0)),
makedev(dev.major, dev.minor),
)?;
chown(
&dev.path.as_in_container()?,
2021-03-27 12:08:13 +01:00
dev.uid.map(Uid::from_raw),
dev.gid.map(Gid::from_raw),
)?;
Ok(())
}
2021-03-31 15:53:23 +02:00
fn mount_to_container(
m: &Mount,
rootfs: &Path,
2021-03-31 15:53:23 +02:00
flags: MsFlags,
data: &str,
label: &str,
) -> Result<()> {
let typ = m.typ.as_ref().context("no type in mount spec")?;
let d = if !label.is_empty() && typ != "proc" && typ != "sysfs" {
2021-03-27 12:08:13 +01:00
if data.is_empty() {
2021-03-31 15:53:23 +02:00
format!("context=\"{}\"", label)
2021-03-27 12:08:13 +01:00
} else {
2021-03-31 15:53:23 +02:00
format!("{},context=\"{}\"", data, label)
2021-03-27 12:08:13 +01:00
}
} else {
2021-03-31 15:53:23 +02:00
data.to_string()
};
2021-03-27 12:08:13 +01:00
let dest_for_host = format!(
"{}{}",
rootfs.to_string_lossy().into_owned(),
m.destination.display()
);
let dest = Path::new(&dest_for_host);
let source = &m.source.as_ref().context("no source in mount spec")?;
let src = if typ == "bind" {
let src = canonicalize(source)?;
2021-03-27 12:08:13 +01:00
let dir = if src.is_file() {
Path::new(&dest).parent().unwrap()
} else {
Path::new(&dest)
};
create_dir_all(&dir).unwrap();
if src.is_file() {
OpenOptions::new()
.create(true)
.write(true)
.open(&dest)
.unwrap();
}
src
} else {
create_dir_all(&dest).unwrap();
PathBuf::from(source)
2021-03-27 12:08:13 +01:00
};
if let Err(errno) = nix_mount(Some(&*src), dest, Some(&*typ.as_str()), flags, Some(&*d)) {
2021-07-27 10:19:55 +02:00
if !matches!(errno, Errno::EINVAL) {
2021-03-30 15:04:03 +02:00
bail!("mount of {} failed", m.destination.display());
2021-03-27 12:08:13 +01:00
}
nix_mount(Some(&*src), dest, Some(&*typ.as_str()), flags, Some(data))?;
2021-03-27 12:08:13 +01:00
}
if flags.contains(MsFlags::MS_BIND)
&& flags.intersects(
!(MsFlags::MS_REC
| MsFlags::MS_REMOUNT
| MsFlags::MS_BIND
| MsFlags::MS_PRIVATE
| MsFlags::MS_SHARED
| MsFlags::MS_SLAVE),
)
{
2021-03-31 15:53:23 +02:00
nix_mount(
2021-03-27 12:08:13 +01:00
Some(&*dest),
&*dest,
None::<&str>,
flags | MsFlags::MS_REMOUNT,
None::<&str>,
)?;
}
Ok(())
}
fn parse_mount(m: &Mount) -> (MsFlags, String) {
let mut flags = MsFlags::empty();
let mut data = Vec::new();
if let Some(options) = &m.options {
for s in options {
if let Some((is_clear, flag)) = match s.as_str() {
"defaults" => Some((false, MsFlags::empty())),
"ro" => Some((false, MsFlags::MS_RDONLY)),
"rw" => Some((true, MsFlags::MS_RDONLY)),
"suid" => Some((true, MsFlags::MS_NOSUID)),
"nosuid" => Some((false, MsFlags::MS_NOSUID)),
"dev" => Some((true, MsFlags::MS_NODEV)),
"nodev" => Some((false, MsFlags::MS_NODEV)),
"exec" => Some((true, MsFlags::MS_NOEXEC)),
"noexec" => Some((false, MsFlags::MS_NOEXEC)),
"sync" => Some((false, MsFlags::MS_SYNCHRONOUS)),
"async" => Some((true, MsFlags::MS_SYNCHRONOUS)),
"dirsync" => Some((false, MsFlags::MS_DIRSYNC)),
"remount" => Some((false, MsFlags::MS_REMOUNT)),
"mand" => Some((false, MsFlags::MS_MANDLOCK)),
"nomand" => Some((true, MsFlags::MS_MANDLOCK)),
"atime" => Some((true, MsFlags::MS_NOATIME)),
"noatime" => Some((false, MsFlags::MS_NOATIME)),
"diratime" => Some((true, MsFlags::MS_NODIRATIME)),
"nodiratime" => Some((false, MsFlags::MS_NODIRATIME)),
"bind" => Some((false, MsFlags::MS_BIND)),
"rbind" => Some((false, MsFlags::MS_BIND | MsFlags::MS_REC)),
"unbindable" => Some((false, MsFlags::MS_UNBINDABLE)),
"runbindable" => Some((false, MsFlags::MS_UNBINDABLE | MsFlags::MS_REC)),
"private" => Some((false, MsFlags::MS_PRIVATE)),
"rprivate" => Some((false, MsFlags::MS_PRIVATE | MsFlags::MS_REC)),
"shared" => Some((false, MsFlags::MS_SHARED)),
"rshared" => Some((false, MsFlags::MS_SHARED | MsFlags::MS_REC)),
"slave" => Some((false, MsFlags::MS_SLAVE)),
"rslave" => Some((false, MsFlags::MS_SLAVE | MsFlags::MS_REC)),
"relatime" => Some((false, MsFlags::MS_RELATIME)),
"norelatime" => Some((true, MsFlags::MS_RELATIME)),
"strictatime" => Some((false, MsFlags::MS_STRICTATIME)),
"nostrictatime" => Some((true, MsFlags::MS_STRICTATIME)),
_ => None,
} {
if is_clear {
flags &= !flag;
} else {
flags |= flag;
}
2021-03-27 12:08:13 +01:00
} else {
data.push(s.as_str());
};
}
2021-03-27 12:08:13 +01:00
}
(flags, data.join(","))
}