mirror of
https://github.com/containers/youki
synced 2024-11-23 09:21:57 +01:00
implement seccomp notify
This commit is contained in:
parent
f15b3fda03
commit
7cbd33ed92
@ -5,7 +5,10 @@ use nix::{
|
||||
unistd::{self, Pid},
|
||||
};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::{marker::PhantomData, os::unix::prelude::RawFd};
|
||||
use std::{
|
||||
marker::PhantomData,
|
||||
os::unix::prelude::{AsRawFd, RawFd},
|
||||
};
|
||||
|
||||
/// Channel Design
|
||||
///
|
||||
@ -37,8 +40,9 @@ impl MainSender {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn seccomp_notify_request(&mut self) -> Result<()> {
|
||||
self.sender.send(Message::SeccompNotify)?;
|
||||
pub fn seccomp_notify_request(&mut self, fd: RawFd) -> Result<()> {
|
||||
self.sender
|
||||
.send_fds(Message::SeccompNotify, &[fd.as_raw_fd()])?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@ -51,6 +55,12 @@ impl MainSender {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn init_ready(&mut self) -> Result<()> {
|
||||
self.sender.send(Message::InitReady)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn close(&self) -> Result<()> {
|
||||
self.sender.close()
|
||||
}
|
||||
@ -92,14 +102,20 @@ impl MainReceiver {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn wait_for_seccomp_request(&mut self) -> Result<()> {
|
||||
let msg = self
|
||||
pub fn wait_for_seccomp_request(&mut self) -> Result<i32> {
|
||||
let (msg, fds) = self
|
||||
.receiver
|
||||
.recv()
|
||||
.recv_with_fds::<[RawFd; 1]>()
|
||||
.context("failed to wait for seccomp request")?;
|
||||
|
||||
match msg {
|
||||
Message::SeccompNotify => Ok(()),
|
||||
Message::SeccompNotify => {
|
||||
let fd = match fds {
|
||||
Some(fds) => fds[0],
|
||||
None => bail!("expecting fds from seccomp request"),
|
||||
};
|
||||
Ok(fd)
|
||||
}
|
||||
msg => bail!(
|
||||
"receive unexpected message {:?} waiting for seccomp request",
|
||||
msg
|
||||
@ -107,6 +123,22 @@ impl MainReceiver {
|
||||
}
|
||||
}
|
||||
|
||||
/// Waits for associated init process to send ready message
|
||||
/// and return the pid of init process which is forked by init process
|
||||
pub fn wait_for_init_ready(&mut self) -> Result<()> {
|
||||
let msg = self
|
||||
.receiver
|
||||
.recv()
|
||||
.context("failed to wait for init ready")?;
|
||||
match msg {
|
||||
Message::InitReady => Ok(()),
|
||||
msg => bail!(
|
||||
"receive unexpected message {:?} waiting for init ready",
|
||||
msg
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn close(&self) -> Result<()> {
|
||||
self.receiver.close()
|
||||
}
|
||||
@ -132,12 +164,6 @@ impl IntermediateSender {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn init_ready(&mut self) -> Result<()> {
|
||||
self.sender.send(Message::InitReady)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn close(&self) -> Result<()> {
|
||||
self.sender.close()
|
||||
}
|
||||
@ -164,22 +190,6 @@ impl IntermediateReceiver {
|
||||
}
|
||||
}
|
||||
|
||||
/// Waits for associated init process to send ready message
|
||||
/// and return the pid of init process which is forked by init process
|
||||
pub fn wait_for_init_ready(&mut self) -> Result<()> {
|
||||
let msg = self
|
||||
.receiver
|
||||
.recv()
|
||||
.context("failed to wait for init ready")?;
|
||||
match msg {
|
||||
Message::InitReady => Ok(()),
|
||||
msg => bail!(
|
||||
"receive unexpected message {:?} waiting for init ready",
|
||||
msg
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn close(&self) -> Result<()> {
|
||||
self.receiver.close()
|
||||
}
|
||||
@ -502,7 +512,7 @@ mod tests {
|
||||
#[test]
|
||||
#[serial]
|
||||
fn test_channel_init_ready() -> Result<()> {
|
||||
let (sender, receiver) = &mut intermediate_channel()?;
|
||||
let (sender, receiver) = &mut main_channel()?;
|
||||
match unsafe { unistd::fork()? } {
|
||||
unistd::ForkResult::Parent { child } => {
|
||||
wait::waitpid(child, None)?;
|
||||
@ -547,7 +557,7 @@ mod tests {
|
||||
#[test]
|
||||
#[serial]
|
||||
fn test_channel_intermediate_graceful_exit() -> Result<()> {
|
||||
let (sender, receiver) = &mut intermediate_channel()?;
|
||||
let (sender, receiver) = &mut main_channel()?;
|
||||
match unsafe { unistd::fork()? } {
|
||||
unistd::ForkResult::Parent { child } => {
|
||||
sender.close().context("failed to close sender")?;
|
||||
|
@ -13,7 +13,7 @@ use nix::{
|
||||
fcntl,
|
||||
unistd::{self, Gid, Uid},
|
||||
};
|
||||
use oci_spec::runtime::{LinuxNamespaceType, LinuxSeccomp, Spec, User};
|
||||
use oci_spec::runtime::{LinuxNamespaceType, Spec, User};
|
||||
use std::collections::HashMap;
|
||||
use std::{
|
||||
env, fs,
|
||||
@ -187,7 +187,6 @@ fn apply_rest_namespaces(
|
||||
|
||||
pub fn container_init_process(
|
||||
args: &ContainerArgs,
|
||||
intermediate_sender: &mut channel::IntermediateSender,
|
||||
main_sender: &mut channel::MainSender,
|
||||
init_receiver: &mut channel::InitReceiver,
|
||||
) -> Result<()> {
|
||||
@ -314,8 +313,10 @@ pub fn container_init_process(
|
||||
// as close to exec as possible.
|
||||
if linux.seccomp().is_some() && proc.no_new_privileges().is_none() {
|
||||
if let Some(seccomp) = linux.seccomp() {
|
||||
seccomp::initialize_seccomp(seccomp).context("failed to execute seccomp")?;
|
||||
sync_seccomp(seccomp, main_sender, init_receiver).context("failed to sync seccomp")?;
|
||||
let notify_fd =
|
||||
seccomp::initialize_seccomp(seccomp).context("failed to execute seccomp")?;
|
||||
sync_seccomp(notify_fd, main_sender, init_receiver)
|
||||
.context("failed to sync seccomp")?;
|
||||
}
|
||||
}
|
||||
|
||||
@ -384,16 +385,18 @@ pub fn container_init_process(
|
||||
// notify socket will still need network related syscalls.
|
||||
if let Some(seccomp) = linux.seccomp() {
|
||||
if proc.no_new_privileges().is_some() {
|
||||
seccomp::initialize_seccomp(seccomp).context("failed to execute seccomp")?;
|
||||
sync_seccomp(seccomp, main_sender, init_receiver).context("failed to sync seccomp")?;
|
||||
let notify_fd =
|
||||
seccomp::initialize_seccomp(seccomp).context("failed to execute seccomp")?;
|
||||
sync_seccomp(notify_fd, main_sender, init_receiver)
|
||||
.context("failed to sync seccomp")?;
|
||||
}
|
||||
}
|
||||
|
||||
// notify parents that the init process is ready to execute the payload.
|
||||
// Notify main process that the init process is ready to execute the payload.
|
||||
// Note, we pass -1 here because we are already inside the pid namespace.
|
||||
// The pid outside the pid namespace should be recorded by the intermediate
|
||||
// process.
|
||||
intermediate_sender.init_ready()?;
|
||||
main_sender.init_ready()?;
|
||||
|
||||
// listing on the notify socket for container start command
|
||||
args.notify_socket.wait_for_container_start()?;
|
||||
@ -476,12 +479,13 @@ fn set_supplementary_gids(user: &User, rootless: &Option<Rootless>) -> Result<()
|
||||
}
|
||||
|
||||
fn sync_seccomp(
|
||||
seccomp: &LinuxSeccomp,
|
||||
fd: Option<i32>,
|
||||
main_sender: &mut channel::MainSender,
|
||||
init_receiver: &mut channel::InitReceiver,
|
||||
) -> Result<()> {
|
||||
if seccomp::is_notify(seccomp) {
|
||||
main_sender.seccomp_notify_request()?;
|
||||
if let Some(fd) = fd {
|
||||
log::debug!("init process sync seccomp, notify fd: {}", fd);
|
||||
main_sender.seccomp_notify_request(fd)?;
|
||||
init_receiver.wait_for_seccomp_request_done()?;
|
||||
}
|
||||
|
||||
|
@ -85,7 +85,10 @@ pub fn container_intermediate_process(
|
||||
init_sender
|
||||
.close()
|
||||
.context("failed to close receiver in init process")?;
|
||||
container_init_process(args, intermediate_sender, main_sender, init_receiver)
|
||||
intermediate_sender
|
||||
.close()
|
||||
.context("failed to close sender in the intermediate process")?;
|
||||
container_init_process(args, main_sender, init_receiver)
|
||||
})?;
|
||||
// Close unused fds in the parent process.
|
||||
intermediate_sender
|
||||
@ -96,9 +99,7 @@ pub fn container_intermediate_process(
|
||||
.context("failed to close unused init sender")?;
|
||||
// There is no point using the pid returned here, since the child will be
|
||||
// inside the pid namespace already.
|
||||
intermediate_receiver
|
||||
.wait_for_init_ready()
|
||||
.context("failed to wait for the child")?;
|
||||
|
||||
// After the child (the container init process) becomes ready, we can signal
|
||||
// the parent (the main process) that we are ready.
|
||||
main_sender
|
||||
|
@ -1,11 +1,16 @@
|
||||
use crate::{
|
||||
container::ContainerProcessState,
|
||||
process::{args::ContainerArgs, channel, container_intermediate_process, fork},
|
||||
rootless::Rootless,
|
||||
seccomp, utils,
|
||||
};
|
||||
use anyhow::{Context, Result};
|
||||
use nix::unistd::Pid;
|
||||
use oci_spec::runtime::LinuxSeccomp;
|
||||
use nix::{
|
||||
sys::{socket, uio},
|
||||
unistd::{self, Pid},
|
||||
};
|
||||
use oci_spec::runtime;
|
||||
use std::path::Path;
|
||||
|
||||
pub fn container_main_process(container_args: &ContainerArgs) -> Result<Pid> {
|
||||
// We use a set of channels to communicate between parent and child process. Each channel is uni-directional.
|
||||
@ -44,13 +49,35 @@ pub fn container_main_process(container_args: &ContainerArgs) -> Result<Pid> {
|
||||
intermediate_sender.mapping_written()?;
|
||||
}
|
||||
|
||||
// The intermediate process will send the init pid once it forks the init
|
||||
// process. The intermediate process should exit after this point.
|
||||
let init_pid = main_receiver.wait_for_intermediate_ready()?;
|
||||
|
||||
intermediate_sender
|
||||
.close()
|
||||
.context("failed to close unused sender")?;
|
||||
|
||||
if let Some(linux) = container_args.spec.linux() {
|
||||
if let Some(seccomp) = linux.seccomp() {
|
||||
sync_seccomp(seccomp, init_sender, main_receiver)
|
||||
let seccomp_metadata = if let Some(metadata) = seccomp.listener_metadata() {
|
||||
metadata.to_owned()
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
let state = ContainerProcessState {
|
||||
oci_version: container_args.spec.version().to_string(),
|
||||
// runc hardcode the `seccompFd` name for fds.
|
||||
fds: vec![String::from("seccompFd")],
|
||||
pid: init_pid.as_raw(),
|
||||
metadata: seccomp_metadata,
|
||||
state: container_args
|
||||
.container
|
||||
.as_ref()
|
||||
.context("container state is required")?
|
||||
.state
|
||||
.clone(),
|
||||
};
|
||||
sync_seccomp(seccomp, &state, init_sender, main_receiver)
|
||||
.context("failed to sync seccomp with init")?;
|
||||
}
|
||||
}
|
||||
@ -59,27 +86,71 @@ pub fn container_main_process(container_args: &ContainerArgs) -> Result<Pid> {
|
||||
.close()
|
||||
.context("failed to close unused init sender")?;
|
||||
|
||||
let init_pid = main_receiver.wait_for_intermediate_ready()?;
|
||||
main_receiver
|
||||
.wait_for_init_ready()
|
||||
.context("failed to wait for init ready")?;
|
||||
|
||||
log::debug!("init pid is {:?}", init_pid);
|
||||
|
||||
Ok(init_pid)
|
||||
}
|
||||
|
||||
fn sync_seccomp(
|
||||
seccomp: &LinuxSeccomp,
|
||||
seccomp: &runtime::LinuxSeccomp,
|
||||
state: &ContainerProcessState,
|
||||
init_sender: &mut channel::InitSender,
|
||||
main_receiver: &mut channel::MainReceiver,
|
||||
) -> Result<()> {
|
||||
if seccomp::is_notify(seccomp) {
|
||||
log::debug!("main process waiting for sync seccomp");
|
||||
main_receiver.wait_for_seccomp_request()?;
|
||||
// process seccomp notify
|
||||
let seccomp_fd = main_receiver.wait_for_seccomp_request()?;
|
||||
let listener_path = seccomp
|
||||
.listener_path()
|
||||
.as_ref()
|
||||
.context("notify will require seccomp listener path to be set")?;
|
||||
let encoded_state =
|
||||
serde_json::to_vec(state).context("failed to encode container process state")?;
|
||||
sync_seccomp_send_msg(listener_path, &encoded_state, seccomp_fd)
|
||||
.context("failed to send msg to seccomp listener")?;
|
||||
init_sender.seccomp_notify_done()?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn sync_seccomp_send_msg(listener_path: &Path, msg: &[u8], fd: i32) -> Result<()> {
|
||||
// The seccomp listener has specific instructions on how to transmit the
|
||||
// information through seccomp listener. Therefore, we have to use
|
||||
// libc/nix APIs instead of Rust std lib APIs to maintain flexibility.
|
||||
let socket = socket::socket(
|
||||
socket::AddressFamily::Unix,
|
||||
socket::SockType::Stream,
|
||||
socket::SockFlag::empty(),
|
||||
None,
|
||||
)
|
||||
.context("failed to create unix domain socket for seccomp listener")?;
|
||||
let unix_addr =
|
||||
socket::SockAddr::new_unix(listener_path).context("failed to create unix addr")?;
|
||||
socket::connect(socket, &unix_addr).with_context(|| {
|
||||
format!(
|
||||
"failed to connect to seccomp notify listerner path: {:?}",
|
||||
listener_path
|
||||
)
|
||||
})?;
|
||||
// We have to use sendmsg here because the spec requires us to send seccomp notify fds through
|
||||
// SCM_RIGHTS message.
|
||||
// Ref: https://man7.org/linux/man-pages/man3/sendmsg.3p.html
|
||||
// Ref: https://man7.org/linux/man-pages/man3/cmsg.3.html
|
||||
let iov = [uio::IoVec::from_slice(msg)];
|
||||
let fds = [fd];
|
||||
let cmsgs = socket::ControlMessage::ScmRights(&fds);
|
||||
socket::sendmsg(socket, &iov, &[cmsgs], socket::MsgFlags::empty(), None)
|
||||
.context("failed to write container state to seccomp listener")?;
|
||||
let _ = unistd::close(socket);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn setup_mapping(rootless: &Rootless, pid: Pid) -> Result<()> {
|
||||
log::debug!("write mapping for pid {:?}", pid);
|
||||
if !rootless.privileged {
|
||||
|
Loading…
Reference in New Issue
Block a user