1
0
mirror of https://github.com/containers/youki synced 2025-04-30 13:20:17 +02:00

implement the access limit of devices using cgroups.

This commit is contained in:
utam0k 2021-04-11 22:01:14 +09:00
parent a20a9dc62f
commit 3f0007d19c
6 changed files with 169 additions and 67 deletions

@ -1,83 +1,156 @@
use std::collections::HashSet;
use std::fs::{create_dir_all, remove_dir, OpenOptions};
use std::io::Write;
use std::path::PathBuf;
use std::{
fs::{create_dir_all, remove_dir, OpenOptions},
path::Path,
};
use anyhow::Result;
use nix::unistd::Pid;
use procfs::process::Process;
use procfs::process::{MountInfo, Process};
use crate::utils::PathBufExt;
use crate::{
rootfs::default_devices,
spec::{LinuxDeviceCgroup, LinuxDeviceType, LinuxResources},
utils::PathBufExt,
};
pub struct Manager {
cgroup_path: PathBuf,
mount_info: MountInfo,
}
impl Manager {
pub fn new(cgroup_path: PathBuf) -> Self {
Manager { cgroup_path }
}
pub fn apply(&self, pid: Pid) -> Result<()> {
println!("cgroup_path: {:?}", self.cgroup_path.display());
for mount in Process::myself()?
pub fn new(cgroup_path: PathBuf) -> Result<Self> {
let mut mount: Vec<MountInfo> = Process::myself()?
.mountinfo()?
.into_iter()
.filter(|m| m.fs_type == "cgroup")
{
let (a, _b): (HashSet<_>, HashSet<_>) = mount
.mount_options
.into_iter()
.chain(mount.super_options)
.partition(|&(_, ref m)| m.is_none());
.filter(|m| m.fs_type == "cgroup" && m.mount_point.ends_with("devices"))
.collect();
Ok(Manager {
cgroup_path,
mount_info: mount.pop().unwrap(),
})
}
if mount.mount_point.ends_with("devices") {
let p = mount.mount_point.join_absolute_path(&self.cgroup_path)?;
create_dir_all(&p)?;
eprintln!("pid: {:?}", pid.to_string());
let cgroups_procs = p.join("cgroup.procs");
let mut f = OpenOptions::new()
.create(false)
.write(true)
.truncate(true)
.open(cgroups_procs)?;
f.write_all(pid.to_string().as_bytes())?;
let device_deny = p.join("devices.deny");
OpenOptions::new()
.create(false)
.write(true)
.truncate(true)
.open(device_deny)?
.write_all("b 8:0 rw".as_bytes())?;
println!(
"{} on {} type {} ({})",
mount.mount_source.unwrap(),
mount.mount_point.display(),
mount.fs_type,
a.into_iter().map(|(k, _)| k).collect::<Vec<_>>().join(",")
);
}
pub fn apply(&self, linux_resources: &LinuxResources, pid: Pid) -> Result<()> {
for cgroup in Process::myself()?.cgroups()?.iter() {
eprintln!("c: {:?}", cgroup)
}
let p = self
.mount_info
.mount_point
// .join_absolute_path(&PathBuf::from("/user.slice"))?
.join_absolute_path(&self.cgroup_path)?;
create_dir_all(&p)?;
for d in &linux_resources.devices {
Self::apply_device(d, &p)?;
}
for d in default_devices().iter() {
Self::apply_device(&d.into(), &p)?;
}
for d in Self::default_allow_devices().iter() {
Self::apply_device(&d, &p)?;
}
let cgroups_procs = p.join("cgroup.procs");
OpenOptions::new()
.create(false)
.write(true)
.truncate(true)
.open(cgroups_procs)?
.write_all(pid.to_string().as_bytes())?;
Ok(())
}
fn apply_device(device: &LinuxDeviceCgroup, cgroup_root: &Path) -> Result<()> {
let device_deny = if device.allow {
cgroup_root.join("devices.allow")
} else {
cgroup_root.join("devices.deny")
};
let major = device
.major
.map(|mj| mj.to_string())
.unwrap_or_else(|| "*".to_string());
let minor = device
.minor
.map(|mi| mi.to_string())
.unwrap_or_else(|| "*".to_string());
let val = format! {"{} {}:{} {}", device.typ.as_str(), &major, &minor, &device.access};
eprintln!("device_deny: {:?} val: {:?}", device_deny.display(), val);
OpenOptions::new()
.create(false)
.write(true)
.truncate(true)
.open(device_deny)?
.write_all(val.as_bytes())?;
Ok(())
}
fn default_allow_devices() -> Vec<LinuxDeviceCgroup> {
vec![
LinuxDeviceCgroup {
allow: true,
typ: LinuxDeviceType::C,
major: None,
minor: None,
access: "m".to_string(),
},
LinuxDeviceCgroup {
allow: true,
typ: LinuxDeviceType::B,
major: None,
minor: None,
access: "m".to_string(),
},
// /dev/console
LinuxDeviceCgroup {
allow: true,
typ: LinuxDeviceType::C,
major: Some(5),
minor: Some(1),
access: "rwm".to_string(),
},
// /dev/pts
LinuxDeviceCgroup {
allow: true,
typ: LinuxDeviceType::C,
major: Some(136),
minor: None,
access: "rwm".to_string(),
},
LinuxDeviceCgroup {
allow: true,
typ: LinuxDeviceType::C,
major: Some(5),
minor: Some(2),
access: "rwm".to_string(),
},
// tun/tap
LinuxDeviceCgroup {
allow: true,
typ: LinuxDeviceType::C,
major: Some(10),
minor: Some(200),
access: "rwm".to_string(),
},
]
}
pub fn remove(&self) -> Result<()> {
for mount in Process::myself()?
.mountinfo()?
.into_iter()
.filter(|m| m.fs_type == "cgroup")
{
if mount.mount_point.ends_with("devices") {
let p = mount.mount_point.join_absolute_path(&self.cgroup_path)?;
println!("remove p: {:?}", p.display());
remove_dir(&p)?;
}
}
let p = self
.mount_info
.mount_point
.join_absolute_path(&self.cgroup_path)?;
println!("remove_dir: {:?}", p.display());
remove_dir(&p)?;
Ok(())
}

@ -43,6 +43,7 @@ impl Create {
unistd::chdir(&self.bundle)?;
let spec = spec::Spec::load("config.json")?;
fs::copy("config.json", container_dir.join("config.json"))?;
let container_dir = fs::canonicalize(container_dir)?;
unistd::chdir(&*container_dir)?;
@ -109,14 +110,14 @@ fn run_container<P: AsRef<Path>>(
}
}
let cmanager = cgroups::Manager::new(linux.cgroups_path.clone());
let cmanager = cgroups::Manager::new(linux.cgroups_path.clone())?;
match fork::fork_first(
pid_file,
cf.contains(sched::CloneFlags::CLONE_NEWUSER),
linux,
&container,
cmanager,
&cmanager,
)? {
Process::Parent(parent) => Ok(Process::Parent(parent)),
Process::Child(child) => {
@ -134,7 +135,7 @@ fn run_container<P: AsRef<Path>>(
}
}
match fork::fork_init(child)? {
match fork::fork_init(child, &cmanager)? {
Process::Child(child) => Ok(Process::Child(child)),
Process::Init(mut init) => {
let spec_args: &Vec<String> = &spec.process.args.clone();

@ -5,9 +5,11 @@ use anyhow::{bail, Result};
use clap::Clap;
use nix::sys::signal as nix_signal;
use youki::cgroups;
use youki::container::{Container, ContainerStatus};
use youki::create;
use youki::signal;
use youki::spec;
use youki::start;
#[derive(Clap, Debug)]
@ -110,6 +112,9 @@ fn main() -> Result<()> {
if container.can_delete() {
if container.root.exists() {
fs::remove_dir_all(&container.root)?;
let spec = spec::Spec::load("config.json")?;
let cmanager = cgroups::Manager::new(spec.linux.unwrap().cgroups_path)?;
cmanager.remove()?;
}
std::process::exit(0)
} else {

@ -24,7 +24,7 @@ pub fn fork_first<P: AsRef<Path>>(
userns: bool,
linux: &spec::Linux,
container: &Container,
cmanager: cgroups::Manager,
cmanager: &cgroups::Manager,
) -> Result<Process> {
let ccond = Cond::new()?;
@ -54,7 +54,7 @@ pub fn fork_first<P: AsRef<Path>>(
unistd::ForkResult::Parent { child } => {
ccond.wait()?;
cmanager.apply(child)?;
cmanager.apply(&linux.resources.as_ref().unwrap(), child)?;
let init_pid = parent.wait_for_child_ready()?;
container
@ -71,7 +71,7 @@ pub fn fork_first<P: AsRef<Path>>(
}
}
pub fn fork_init(mut child_process: ChildProcess) -> Result<Process> {
pub fn fork_init(mut child_process: ChildProcess, cmanager: &cgroups::Manager) -> Result<Process> {
let sender_for_child = child_process.setup_uds()?;
unsafe {
match unistd::fork()? {
@ -82,6 +82,7 @@ pub fn fork_init(mut child_process: ChildProcess) -> Result<Process> {
match waitpid(child, None)? {
WaitStatus::Exited(pid, status) => {
// cmanager.remove()?;
log::debug!("exited pid: {:?}, status: {:?}", pid, status);
exit(status);
}

@ -134,8 +134,8 @@ fn setup_default_symlinks(rootfs: &Path) -> Result<()> {
Ok(())
}
fn default_devices() -> [LinuxDevice; 6] {
[
pub fn default_devices() -> Vec<LinuxDevice> {
vec![
LinuxDevice {
path: PathBuf::from("/dev/null"),
typ: LinuxDeviceType::C,

@ -113,6 +113,16 @@ impl LinuxDeviceType {
Self::A => bail!("type a is not allowed for linux device"),
})
}
pub fn as_str(&self) -> &str {
match self {
Self::B => "b",
Self::C => "c",
Self::U => "u",
Self::P => "p",
Self::A => "a",
}
}
}
#[derive(Serialize, Deserialize, Debug, Clone)]
@ -277,6 +287,18 @@ pub struct LinuxDevice {
pub gid: Option<u32>,
}
impl From<&LinuxDevice> for LinuxDeviceCgroup {
fn from(linux_device: &LinuxDevice) -> LinuxDeviceCgroup {
LinuxDeviceCgroup {
allow: true,
typ: linux_device.typ,
major: Some(linux_device.major as i64),
minor: Some(linux_device.minor as i64),
access: "rwm".to_string(),
}
}
}
#[derive(Serialize, Deserialize, Debug, Clone, Copy)]
#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
#[repr(u32)]