2022-09-14 12:53:05 +02:00
|
|
|
|
use anyhow::{bail, ensure, Result};
|
|
|
|
|
use clap::Parser;
|
2019-12-12 08:13:16 +01:00
|
|
|
|
use std::cmp;
|
2019-12-13 21:56:21 +01:00
|
|
|
|
use std::convert::TryInto;
|
2019-12-12 08:13:16 +01:00
|
|
|
|
use std::fs::File;
|
2020-02-03 17:35:50 +01:00
|
|
|
|
use std::io;
|
2019-12-12 08:13:16 +01:00
|
|
|
|
use std::io::prelude::*;
|
2020-05-13 21:33:17 +02:00
|
|
|
|
use std::path::{Path, PathBuf};
|
2020-05-12 19:08:22 +02:00
|
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
|
mod unit_tests;
|
2019-12-12 08:13:16 +01:00
|
|
|
|
|
2020-05-13 21:33:17 +02:00
|
|
|
|
const NAME: &str = "b3sum";
|
|
|
|
|
|
2022-10-03 05:07:33 +02:00
|
|
|
|
const DERIVE_KEY_ARG: &str = "derive_key";
|
2020-03-16 17:06:16 +01:00
|
|
|
|
const KEYED_ARG: &str = "keyed";
|
|
|
|
|
const LENGTH_ARG: &str = "length";
|
2022-10-03 05:07:33 +02:00
|
|
|
|
const NO_NAMES_ARG: &str = "no_names";
|
2020-01-13 22:12:47 +01:00
|
|
|
|
const RAW_ARG: &str = "raw";
|
2020-05-13 21:33:17 +02:00
|
|
|
|
const CHECK_ARG: &str = "check";
|
2022-09-14 12:53:05 +02:00
|
|
|
|
|
|
|
|
|
#[derive(Parser)]
|
2022-11-20 21:39:12 +01:00
|
|
|
|
#[command(version, max_term_width(100))]
|
2022-09-14 12:53:05 +02:00
|
|
|
|
struct Inner {
|
2022-11-20 21:39:12 +01:00
|
|
|
|
/// Files to hash, or checkfiles to check
|
2022-10-03 06:28:52 +02:00
|
|
|
|
///
|
|
|
|
|
/// When no file is given, or when - is given, read standard input.
|
2022-09-14 12:53:05 +02:00
|
|
|
|
file: Vec<PathBuf>,
|
|
|
|
|
|
2022-11-20 21:39:12 +01:00
|
|
|
|
/// The number of output bytes, before hex encoding
|
2022-10-03 05:07:33 +02:00
|
|
|
|
#[arg(
|
2022-09-14 12:53:05 +02:00
|
|
|
|
short,
|
2022-10-03 05:07:33 +02:00
|
|
|
|
long,
|
2022-09-14 12:53:05 +02:00
|
|
|
|
default_value_t = blake3::OUT_LEN as u64,
|
2022-10-03 05:07:33 +02:00
|
|
|
|
value_name("LEN")
|
2022-09-14 12:53:05 +02:00
|
|
|
|
)]
|
|
|
|
|
length: u64,
|
|
|
|
|
|
2022-11-20 21:39:12 +01:00
|
|
|
|
/// The maximum number of threads to use
|
2022-10-03 06:28:52 +02:00
|
|
|
|
///
|
|
|
|
|
/// By default, this is the number of logical cores. If this flag is
|
|
|
|
|
/// omitted, or if its value is 0, RAYON_NUM_THREADS is also respected.
|
|
|
|
|
#[arg(long, value_name("NUM"))]
|
2022-09-14 12:53:05 +02:00
|
|
|
|
num_threads: Option<usize>,
|
|
|
|
|
|
2022-11-20 21:39:12 +01:00
|
|
|
|
/// Use the keyed mode
|
2022-10-03 06:28:52 +02:00
|
|
|
|
///
|
|
|
|
|
/// The secret key is read from standard input, and it must be exactly 32
|
|
|
|
|
/// raw bytes.
|
|
|
|
|
#[arg(long, requires("file"))]
|
2022-09-14 12:53:05 +02:00
|
|
|
|
keyed: bool,
|
|
|
|
|
|
2022-11-20 21:39:12 +01:00
|
|
|
|
/// Use the key derivation mode, with the given context string
|
2022-10-03 06:28:52 +02:00
|
|
|
|
///
|
|
|
|
|
/// Cannot be used with --keyed.
|
|
|
|
|
#[arg(long, value_name("CONTEXT"), conflicts_with(KEYED_ARG))]
|
2022-09-14 12:53:05 +02:00
|
|
|
|
derive_key: Option<String>,
|
|
|
|
|
|
2022-11-20 21:39:12 +01:00
|
|
|
|
/// Disable memory mapping
|
2022-10-03 06:28:52 +02:00
|
|
|
|
///
|
|
|
|
|
/// Currently this also disables multithreading.
|
|
|
|
|
#[arg(long)]
|
2022-09-14 12:53:05 +02:00
|
|
|
|
no_mmap: bool,
|
|
|
|
|
|
2022-11-20 21:39:12 +01:00
|
|
|
|
/// Omit filenames in the output
|
2022-10-03 05:07:33 +02:00
|
|
|
|
#[arg(long)]
|
2022-09-14 12:53:05 +02:00
|
|
|
|
no_names: bool,
|
|
|
|
|
|
2022-11-20 21:39:12 +01:00
|
|
|
|
/// Write raw output bytes to stdout, rather than hex
|
2022-10-03 06:28:52 +02:00
|
|
|
|
///
|
|
|
|
|
/// --no-names is implied. In this case, only a single input is allowed.
|
|
|
|
|
#[arg(long)]
|
2022-09-14 12:53:05 +02:00
|
|
|
|
raw: bool,
|
|
|
|
|
|
2022-11-21 02:23:53 +01:00
|
|
|
|
/// Read BLAKE3 sums from the [FILE]s and check them
|
2022-10-03 05:07:33 +02:00
|
|
|
|
#[arg(
|
2022-09-14 12:53:05 +02:00
|
|
|
|
short,
|
2022-10-03 05:07:33 +02:00
|
|
|
|
long,
|
2022-09-14 12:53:05 +02:00
|
|
|
|
conflicts_with(DERIVE_KEY_ARG),
|
|
|
|
|
conflicts_with(KEYED_ARG),
|
|
|
|
|
conflicts_with(LENGTH_ARG),
|
|
|
|
|
conflicts_with(RAW_ARG),
|
|
|
|
|
conflicts_with(NO_NAMES_ARG)
|
|
|
|
|
)]
|
|
|
|
|
check: bool,
|
|
|
|
|
|
2022-11-20 21:39:12 +01:00
|
|
|
|
/// Skip printing OK for each successfully verified file
|
2022-10-03 06:28:52 +02:00
|
|
|
|
///
|
2022-09-14 12:53:05 +02:00
|
|
|
|
/// Must be used with --check.
|
2022-10-03 06:28:52 +02:00
|
|
|
|
#[arg(long, requires(CHECK_ARG))]
|
2022-09-14 12:53:05 +02:00
|
|
|
|
quiet: bool,
|
|
|
|
|
}
|
2019-12-13 21:56:21 +01:00
|
|
|
|
|
2020-05-13 21:33:17 +02:00
|
|
|
|
struct Args {
|
2022-09-14 12:53:05 +02:00
|
|
|
|
inner: Inner,
|
2020-05-13 21:33:17 +02:00
|
|
|
|
file_args: Vec<PathBuf>,
|
|
|
|
|
base_hasher: blake3::Hasher,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl Args {
|
|
|
|
|
fn parse() -> Result<Self> {
|
2022-09-14 12:53:05 +02:00
|
|
|
|
// wild::args_os() is equivalent to std::env::args_os() on Unix,
|
|
|
|
|
// but on Windows it adds support for globbing.
|
|
|
|
|
let inner = Inner::parse_from(wild::args_os());
|
|
|
|
|
let file_args = if !inner.file.is_empty() {
|
|
|
|
|
inner.file.clone()
|
2020-05-13 21:33:17 +02:00
|
|
|
|
} else {
|
|
|
|
|
vec!["-".into()]
|
|
|
|
|
};
|
2022-09-14 12:53:05 +02:00
|
|
|
|
if inner.raw && file_args.len() > 1 {
|
2020-05-13 21:33:17 +02:00
|
|
|
|
bail!("Only one filename can be provided when using --raw");
|
|
|
|
|
}
|
2022-09-14 12:53:05 +02:00
|
|
|
|
let base_hasher = if inner.keyed {
|
2020-05-13 21:33:17 +02:00
|
|
|
|
// In keyed mode, since stdin is used for the key, we can't handle
|
|
|
|
|
// `-` arguments. Input::open handles that case below.
|
|
|
|
|
blake3::Hasher::new_keyed(&read_key_from_stdin()?)
|
2022-09-14 12:53:05 +02:00
|
|
|
|
} else if let Some(ref context) = inner.derive_key {
|
2020-05-13 21:33:17 +02:00
|
|
|
|
blake3::Hasher::new_derive_key(context)
|
|
|
|
|
} else {
|
|
|
|
|
blake3::Hasher::new()
|
|
|
|
|
};
|
|
|
|
|
Ok(Self {
|
|
|
|
|
inner,
|
|
|
|
|
file_args,
|
|
|
|
|
base_hasher,
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
2022-09-14 12:53:05 +02:00
|
|
|
|
fn num_threads(&self) -> Option<usize> {
|
|
|
|
|
self.inner.num_threads
|
2020-05-13 21:33:17 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn check(&self) -> bool {
|
2022-09-14 12:53:05 +02:00
|
|
|
|
self.inner.check
|
2020-05-13 21:33:17 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn raw(&self) -> bool {
|
2022-09-14 12:53:05 +02:00
|
|
|
|
self.inner.raw
|
2020-05-13 21:33:17 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn no_mmap(&self) -> bool {
|
2022-09-14 12:53:05 +02:00
|
|
|
|
self.inner.no_mmap
|
2020-05-13 21:33:17 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn no_names(&self) -> bool {
|
2022-09-14 12:53:05 +02:00
|
|
|
|
self.inner.no_names
|
2020-05-13 21:33:17 +02:00
|
|
|
|
}
|
|
|
|
|
|
2022-09-14 12:53:05 +02:00
|
|
|
|
fn len(&self) -> u64 {
|
|
|
|
|
self.inner.length
|
2020-05-13 21:33:17 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn keyed(&self) -> bool {
|
2022-09-14 12:53:05 +02:00
|
|
|
|
self.inner.keyed
|
2020-05-13 21:33:17 +02:00
|
|
|
|
}
|
2020-05-23 18:18:54 +02:00
|
|
|
|
|
|
|
|
|
fn quiet(&self) -> bool {
|
2022-09-14 12:53:05 +02:00
|
|
|
|
self.inner.quiet
|
2020-05-23 18:18:54 +02:00
|
|
|
|
}
|
2020-05-13 21:33:17 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
enum Input {
|
2022-03-26 02:48:20 +01:00
|
|
|
|
Mmap(io::Cursor<memmap2::Mmap>),
|
2020-05-13 21:33:17 +02:00
|
|
|
|
File(File),
|
|
|
|
|
Stdin,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl Input {
|
|
|
|
|
// Open an input file, using mmap if appropriate. "-" means stdin. Note
|
|
|
|
|
// that this convention applies both to command line arguments, and to
|
|
|
|
|
// filepaths that appear in a checkfile.
|
|
|
|
|
fn open(path: &Path, args: &Args) -> Result<Self> {
|
|
|
|
|
if path == Path::new("-") {
|
|
|
|
|
if args.keyed() {
|
|
|
|
|
bail!("Cannot open `-` in keyed mode");
|
|
|
|
|
}
|
|
|
|
|
return Ok(Self::Stdin);
|
|
|
|
|
}
|
|
|
|
|
let file = File::open(path)?;
|
|
|
|
|
if !args.no_mmap() {
|
|
|
|
|
if let Some(mmap) = maybe_memmap_file(&file)? {
|
|
|
|
|
return Ok(Self::Mmap(io::Cursor::new(mmap)));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
Ok(Self::File(file))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn hash(&mut self, args: &Args) -> Result<blake3::OutputReader> {
|
|
|
|
|
let mut hasher = args.base_hasher.clone();
|
|
|
|
|
match self {
|
|
|
|
|
// The fast path: If we mmapped the file successfully, hash using
|
|
|
|
|
// multiple threads. This doesn't work on stdin, or on some files,
|
|
|
|
|
// and it can also be disabled with --no-mmap.
|
|
|
|
|
Self::Mmap(cursor) => {
|
2021-03-01 03:11:21 +01:00
|
|
|
|
hasher.update_rayon(cursor.get_ref());
|
2020-05-13 21:33:17 +02:00
|
|
|
|
}
|
|
|
|
|
// The slower paths, for stdin or files we didn't/couldn't mmap.
|
|
|
|
|
// This is currently all single-threaded. Doing multi-threaded
|
|
|
|
|
// hashing without memory mapping is tricky, since all your worker
|
|
|
|
|
// threads have to stop every time you refill the buffer, and that
|
|
|
|
|
// ends up being a lot of overhead. To solve that, we need a more
|
|
|
|
|
// complicated double-buffering strategy where a background thread
|
|
|
|
|
// fills one buffer while the worker threads are hashing the other
|
|
|
|
|
// one. We might implement that in the future, but since this is
|
|
|
|
|
// the slow path anyway, it's not high priority.
|
|
|
|
|
Self::File(file) => {
|
|
|
|
|
copy_wide(file, &mut hasher)?;
|
|
|
|
|
}
|
|
|
|
|
Self::Stdin => {
|
|
|
|
|
let stdin = io::stdin();
|
|
|
|
|
let lock = stdin.lock();
|
|
|
|
|
copy_wide(lock, &mut hasher)?;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
Ok(hasher.finalize_xof())
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl Read for Input {
|
|
|
|
|
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
|
|
|
|
|
match self {
|
|
|
|
|
Self::Mmap(cursor) => cursor.read(buf),
|
|
|
|
|
Self::File(file) => file.read(buf),
|
|
|
|
|
Self::Stdin => io::stdin().read(buf),
|
|
|
|
|
}
|
|
|
|
|
}
|
2019-12-12 08:13:16 +01:00
|
|
|
|
}
|
|
|
|
|
|
2020-02-03 17:35:50 +01:00
|
|
|
|
// A 16 KiB buffer is enough to take advantage of all the SIMD instruction sets
|
|
|
|
|
// that we support, but `std::io::copy` currently uses 8 KiB. Most platforms
|
|
|
|
|
// can support at least 64 KiB, and there's some performance benefit to using
|
|
|
|
|
// bigger reads, so that's what we use here.
|
|
|
|
|
fn copy_wide(mut reader: impl Read, hasher: &mut blake3::Hasher) -> io::Result<u64> {
|
|
|
|
|
let mut buffer = [0; 65536];
|
|
|
|
|
let mut total = 0;
|
|
|
|
|
loop {
|
|
|
|
|
match reader.read(&mut buffer) {
|
|
|
|
|
Ok(0) => return Ok(total),
|
|
|
|
|
Ok(n) => {
|
|
|
|
|
hasher.update(&buffer[..n]);
|
|
|
|
|
total += n as u64;
|
|
|
|
|
}
|
|
|
|
|
Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
|
|
|
|
|
Err(e) => return Err(e),
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-05-13 21:33:17 +02:00
|
|
|
|
// Mmap a file, if it looks like a good idea. Return None in cases where we
|
|
|
|
|
// know mmap will fail, or if the file is short enough that mmapping isn't
|
|
|
|
|
// worth it. However, if we do try to mmap and it fails, return the error.
|
2022-03-26 02:48:20 +01:00
|
|
|
|
fn maybe_memmap_file(file: &File) -> Result<Option<memmap2::Mmap>> {
|
2019-12-12 08:13:16 +01:00
|
|
|
|
let metadata = file.metadata()?;
|
|
|
|
|
let file_size = metadata.len();
|
|
|
|
|
Ok(if !metadata.is_file() {
|
|
|
|
|
// Not a real file.
|
|
|
|
|
None
|
|
|
|
|
} else if file_size > isize::max_value() as u64 {
|
|
|
|
|
// Too long to safely map.
|
|
|
|
|
// https://github.com/danburkert/memmap-rs/issues/69
|
|
|
|
|
None
|
|
|
|
|
} else if file_size == 0 {
|
|
|
|
|
// Mapping an empty file currently fails.
|
|
|
|
|
// https://github.com/danburkert/memmap-rs/issues/72
|
|
|
|
|
None
|
2020-01-15 18:22:20 +01:00
|
|
|
|
} else if file_size < 16 * 1024 {
|
|
|
|
|
// Mapping small files is not worth it.
|
|
|
|
|
None
|
2019-12-12 08:13:16 +01:00
|
|
|
|
} else {
|
|
|
|
|
// Explicitly set the length of the memory map, so that filesystem
|
|
|
|
|
// changes can't race to violate the invariants we just checked.
|
|
|
|
|
let map = unsafe {
|
2022-03-26 02:48:20 +01:00
|
|
|
|
memmap2::MmapOptions::new()
|
2019-12-12 08:13:16 +01:00
|
|
|
|
.len(file_size as usize)
|
2022-03-26 02:48:20 +01:00
|
|
|
|
.map(file)?
|
2019-12-12 08:13:16 +01:00
|
|
|
|
};
|
|
|
|
|
Some(map)
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
2020-05-13 21:33:17 +02:00
|
|
|
|
fn write_hex_output(mut output: blake3::OutputReader, args: &Args) -> Result<()> {
|
2019-12-12 08:13:16 +01:00
|
|
|
|
// Encoding multiples of the block size is most efficient.
|
2022-09-14 12:53:05 +02:00
|
|
|
|
let mut len = args.len();
|
2021-03-29 02:01:18 +02:00
|
|
|
|
let mut block = [0; blake3::guts::BLOCK_LEN];
|
2019-12-12 08:13:16 +01:00
|
|
|
|
while len > 0 {
|
|
|
|
|
output.fill(&mut block);
|
|
|
|
|
let hex_str = hex::encode(&block[..]);
|
|
|
|
|
let take_bytes = cmp::min(len, block.len() as u64);
|
|
|
|
|
print!("{}", &hex_str[..2 * take_bytes as usize]);
|
|
|
|
|
len -= take_bytes;
|
|
|
|
|
}
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
2020-05-13 21:33:17 +02:00
|
|
|
|
fn write_raw_output(output: blake3::OutputReader, args: &Args) -> Result<()> {
|
2022-09-14 12:53:05 +02:00
|
|
|
|
let mut output = output.take(args.len());
|
2020-01-13 23:40:30 +01:00
|
|
|
|
let stdout = std::io::stdout();
|
|
|
|
|
let mut handler = stdout.lock();
|
2020-01-14 23:35:18 +01:00
|
|
|
|
std::io::copy(&mut output, &mut handler)?;
|
2020-01-13 22:12:47 +01:00
|
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
2019-12-13 21:56:21 +01:00
|
|
|
|
fn read_key_from_stdin() -> Result<[u8; blake3::KEY_LEN]> {
|
|
|
|
|
let mut bytes = Vec::with_capacity(blake3::KEY_LEN + 1);
|
|
|
|
|
let n = std::io::stdin()
|
|
|
|
|
.lock()
|
|
|
|
|
.take(blake3::KEY_LEN as u64 + 1)
|
|
|
|
|
.read_to_end(&mut bytes)?;
|
|
|
|
|
if n < 32 {
|
|
|
|
|
bail!(
|
|
|
|
|
"expected {} key bytes from stdin, found {}",
|
|
|
|
|
blake3::KEY_LEN,
|
|
|
|
|
n,
|
|
|
|
|
)
|
|
|
|
|
} else if n > 32 {
|
|
|
|
|
bail!("read more than {} key bytes from stdin", blake3::KEY_LEN)
|
|
|
|
|
} else {
|
|
|
|
|
Ok(bytes[..blake3::KEY_LEN].try_into().unwrap())
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-05-04 03:13:58 +02:00
|
|
|
|
struct FilepathString {
|
|
|
|
|
filepath_string: String,
|
2020-05-13 21:33:17 +02:00
|
|
|
|
is_escaped: bool,
|
2020-05-04 03:13:58 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// returns (string, did_escape)
|
2020-05-13 21:33:17 +02:00
|
|
|
|
fn filepath_to_string(filepath: &Path) -> FilepathString {
|
|
|
|
|
let unicode_cow = filepath.to_string_lossy();
|
2020-05-04 03:13:58 +02:00
|
|
|
|
let mut filepath_string = unicode_cow.to_string();
|
|
|
|
|
// If we're on Windows, normalize backslashes to forward slashes. This
|
|
|
|
|
// avoids a lot of ugly escaping in the common case, and it makes
|
|
|
|
|
// checkfiles created on Windows more likely to be portable to Unix. It
|
|
|
|
|
// also allows us to set a blanket "no backslashes allowed in checkfiles on
|
|
|
|
|
// Windows" rule, rather than allowing a Unix backslash to potentially get
|
|
|
|
|
// interpreted as a directory separator on Windows.
|
|
|
|
|
if cfg!(windows) {
|
|
|
|
|
filepath_string = filepath_string.replace('\\', "/");
|
|
|
|
|
}
|
2020-05-13 21:33:17 +02:00
|
|
|
|
let mut is_escaped = false;
|
2020-05-04 03:13:58 +02:00
|
|
|
|
if filepath_string.contains('\\') || filepath_string.contains('\n') {
|
|
|
|
|
filepath_string = filepath_string.replace('\\', "\\\\").replace('\n', "\\n");
|
2020-05-13 21:33:17 +02:00
|
|
|
|
is_escaped = true;
|
2020-05-04 03:13:58 +02:00
|
|
|
|
}
|
|
|
|
|
FilepathString {
|
|
|
|
|
filepath_string,
|
2020-05-13 21:33:17 +02:00
|
|
|
|
is_escaped,
|
2020-05-04 03:13:58 +02:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-05-12 19:08:22 +02:00
|
|
|
|
fn hex_half_byte(c: char) -> Result<u8> {
|
|
|
|
|
// The hex characters in the hash must be lowercase for now, though we
|
|
|
|
|
// could support uppercase too if we wanted to.
|
|
|
|
|
if '0' <= c && c <= '9' {
|
|
|
|
|
return Ok(c as u8 - '0' as u8);
|
|
|
|
|
}
|
|
|
|
|
if 'a' <= c && c <= 'f' {
|
|
|
|
|
return Ok(c as u8 - 'a' as u8 + 10);
|
|
|
|
|
}
|
2020-05-13 21:33:17 +02:00
|
|
|
|
bail!("Invalid hex");
|
2020-05-12 19:08:22 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// The `check` command is a security tool. That means it's much better for a
|
|
|
|
|
// check to fail more often than it should (a false negative), than for a check
|
|
|
|
|
// to ever succeed when it shouldn't (a false positive). By forbidding certain
|
|
|
|
|
// characters in checked filepaths, we avoid a class of false positives where
|
|
|
|
|
// two different filepaths can get confused with each other.
|
2020-05-13 21:33:17 +02:00
|
|
|
|
fn check_for_invalid_characters(utf8_path: &str) -> Result<()> {
|
2020-05-12 19:08:22 +02:00
|
|
|
|
// Null characters in paths should never happen, but they can result in a
|
|
|
|
|
// path getting silently truncated on Unix.
|
2020-05-13 21:33:17 +02:00
|
|
|
|
if utf8_path.contains('\0') {
|
|
|
|
|
bail!("Null character in path");
|
2020-05-12 19:08:22 +02:00
|
|
|
|
}
|
|
|
|
|
// Because we convert invalid UTF-8 sequences in paths to the Unicode
|
|
|
|
|
// replacement character, multiple different invalid paths can map to the
|
|
|
|
|
// same UTF-8 string.
|
2020-05-13 21:33:17 +02:00
|
|
|
|
if utf8_path.contains('<27>') {
|
|
|
|
|
bail!("Unicode replacement character in path");
|
2020-05-12 19:08:22 +02:00
|
|
|
|
}
|
|
|
|
|
// We normalize all Windows backslashes to forward slashes in our output,
|
|
|
|
|
// so the only natural way to get a backslash in a checkfile on Windows is
|
|
|
|
|
// to construct it on Unix and copy it over. (Or of course you could just
|
|
|
|
|
// doctor it by hand.) To avoid confusing this with a directory separator,
|
|
|
|
|
// we forbid backslashes entirely on Windows. Note that this check comes
|
|
|
|
|
// after unescaping has been done.
|
2020-05-13 21:33:17 +02:00
|
|
|
|
if cfg!(windows) && utf8_path.contains('\\') {
|
|
|
|
|
bail!("Backslash in path");
|
2020-05-12 19:08:22 +02:00
|
|
|
|
}
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn unescape(mut path: &str) -> Result<String> {
|
|
|
|
|
let mut unescaped = String::with_capacity(2 * path.len());
|
|
|
|
|
while let Some(i) = path.find('\\') {
|
2020-05-13 21:33:17 +02:00
|
|
|
|
ensure!(i < path.len() - 1, "Invalid backslash escape");
|
2020-05-12 19:08:22 +02:00
|
|
|
|
unescaped.push_str(&path[..i]);
|
|
|
|
|
match path[i + 1..].chars().next().unwrap() {
|
|
|
|
|
// Anything other than a recognized escape sequence is an error.
|
|
|
|
|
'n' => unescaped.push_str("\n"),
|
|
|
|
|
'\\' => unescaped.push_str("\\"),
|
2020-05-13 21:33:17 +02:00
|
|
|
|
_ => bail!("Invalid backslash escape"),
|
2020-05-12 19:08:22 +02:00
|
|
|
|
}
|
|
|
|
|
path = &path[i + 2..];
|
|
|
|
|
}
|
|
|
|
|
unescaped.push_str(path);
|
|
|
|
|
Ok(unescaped)
|
|
|
|
|
}
|
|
|
|
|
|
2020-05-13 21:33:17 +02:00
|
|
|
|
#[derive(Debug)]
|
|
|
|
|
struct ParsedCheckLine {
|
|
|
|
|
file_string: String,
|
|
|
|
|
is_escaped: bool,
|
|
|
|
|
file_path: PathBuf,
|
|
|
|
|
expected_hash: blake3::Hash,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn parse_check_line(mut line: &str) -> Result<ParsedCheckLine> {
|
2020-05-12 19:08:22 +02:00
|
|
|
|
// Trim off the trailing newline, if any.
|
|
|
|
|
line = line.trim_end_matches('\n');
|
|
|
|
|
// If there's a backslash at the front of the line, that means we need to
|
|
|
|
|
// unescape the path below. This matches the behavior of e.g. md5sum.
|
|
|
|
|
let first = if let Some(c) = line.chars().next() {
|
|
|
|
|
c
|
|
|
|
|
} else {
|
2020-05-13 21:33:17 +02:00
|
|
|
|
bail!("Empty line");
|
2020-05-12 19:08:22 +02:00
|
|
|
|
};
|
2020-05-13 21:33:17 +02:00
|
|
|
|
let mut is_escaped = false;
|
2020-05-12 19:08:22 +02:00
|
|
|
|
if first == '\\' {
|
2020-05-13 21:33:17 +02:00
|
|
|
|
is_escaped = true;
|
2020-05-12 19:08:22 +02:00
|
|
|
|
line = &line[1..];
|
|
|
|
|
}
|
|
|
|
|
// The front of the line must be a hash of the usual length, followed by
|
|
|
|
|
// two spaces. The hex characters in the hash must be lowercase for now,
|
|
|
|
|
// though we could support uppercase too if we wanted to.
|
|
|
|
|
let hash_hex_len = 2 * blake3::OUT_LEN;
|
|
|
|
|
let num_spaces = 2;
|
|
|
|
|
let prefix_len = hash_hex_len + num_spaces;
|
2020-05-13 21:33:17 +02:00
|
|
|
|
ensure!(line.len() > prefix_len, "Short line");
|
2020-05-12 19:08:22 +02:00
|
|
|
|
ensure!(
|
|
|
|
|
line.chars().take(prefix_len).all(|c| c.is_ascii()),
|
2020-05-13 21:33:17 +02:00
|
|
|
|
"Non-ASCII prefix"
|
2020-05-12 19:08:22 +02:00
|
|
|
|
);
|
2020-05-13 21:33:17 +02:00
|
|
|
|
ensure!(&line[hash_hex_len..][..2] == " ", "Invalid space");
|
2020-05-12 19:08:22 +02:00
|
|
|
|
// Decode the hash hex.
|
|
|
|
|
let mut hash_bytes = [0; blake3::OUT_LEN];
|
|
|
|
|
let mut hex_chars = line[..hash_hex_len].chars();
|
|
|
|
|
for byte in &mut hash_bytes {
|
|
|
|
|
let high_char = hex_chars.next().unwrap();
|
|
|
|
|
let low_char = hex_chars.next().unwrap();
|
|
|
|
|
*byte = 16 * hex_half_byte(high_char)? + hex_half_byte(low_char)?;
|
|
|
|
|
}
|
2020-05-13 21:33:17 +02:00
|
|
|
|
let expected_hash: blake3::Hash = hash_bytes.into();
|
|
|
|
|
let file_string = line[prefix_len..].to_string();
|
|
|
|
|
let file_path_string = if is_escaped {
|
2020-05-12 19:08:22 +02:00
|
|
|
|
// If we detected a backslash at the start of the line earlier, now we
|
|
|
|
|
// need to unescape backslashes and newlines.
|
2020-05-13 21:33:17 +02:00
|
|
|
|
unescape(&file_string)?
|
2020-05-12 19:08:22 +02:00
|
|
|
|
} else {
|
2020-05-13 21:33:17 +02:00
|
|
|
|
file_string.clone().into()
|
2020-05-12 19:08:22 +02:00
|
|
|
|
};
|
2020-05-13 21:33:17 +02:00
|
|
|
|
check_for_invalid_characters(&file_path_string)?;
|
|
|
|
|
Ok(ParsedCheckLine {
|
|
|
|
|
file_string,
|
|
|
|
|
is_escaped,
|
|
|
|
|
file_path: file_path_string.into(),
|
|
|
|
|
expected_hash,
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn hash_one_input(path: &Path, args: &Args) -> Result<()> {
|
|
|
|
|
let mut input = Input::open(path, args)?;
|
|
|
|
|
let output = input.hash(args)?;
|
|
|
|
|
if args.raw() {
|
|
|
|
|
write_raw_output(output, args)?;
|
|
|
|
|
return Ok(());
|
|
|
|
|
}
|
|
|
|
|
if args.no_names() {
|
|
|
|
|
write_hex_output(output, args)?;
|
|
|
|
|
println!();
|
|
|
|
|
return Ok(());
|
|
|
|
|
}
|
|
|
|
|
let FilepathString {
|
|
|
|
|
filepath_string,
|
|
|
|
|
is_escaped,
|
|
|
|
|
} = filepath_to_string(path);
|
|
|
|
|
if is_escaped {
|
|
|
|
|
print!("\\");
|
|
|
|
|
}
|
|
|
|
|
write_hex_output(output, args)?;
|
|
|
|
|
println!(" {}", filepath_string);
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
|
|
|
|
|
2020-05-14 17:13:25 +02:00
|
|
|
|
// Returns true for success. Having a boolean return value here, instead of
|
|
|
|
|
// passing down the some_file_failed reference, makes it less likely that we
|
|
|
|
|
// might forget to set it in some error condition.
|
|
|
|
|
fn check_one_line(line: &str, args: &Args) -> bool {
|
|
|
|
|
let parse_result = parse_check_line(&line);
|
|
|
|
|
let ParsedCheckLine {
|
|
|
|
|
file_string,
|
|
|
|
|
is_escaped,
|
|
|
|
|
file_path,
|
|
|
|
|
expected_hash,
|
|
|
|
|
} = match parse_result {
|
|
|
|
|
Ok(parsed) => parsed,
|
|
|
|
|
Err(e) => {
|
|
|
|
|
eprintln!("{}: {}", NAME, e);
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
};
|
2020-05-23 18:18:54 +02:00
|
|
|
|
let file_string = if is_escaped {
|
|
|
|
|
"\\".to_string() + &file_string
|
|
|
|
|
} else {
|
|
|
|
|
file_string
|
|
|
|
|
};
|
2020-05-14 17:13:25 +02:00
|
|
|
|
let hash_result: Result<blake3::Hash> = Input::open(&file_path, args)
|
|
|
|
|
.and_then(|mut input| input.hash(args))
|
|
|
|
|
.map(|mut hash_output| {
|
|
|
|
|
let mut found_hash_bytes = [0; blake3::OUT_LEN];
|
|
|
|
|
hash_output.fill(&mut found_hash_bytes);
|
|
|
|
|
found_hash_bytes.into()
|
|
|
|
|
});
|
2020-05-23 18:18:54 +02:00
|
|
|
|
let found_hash: blake3::Hash = match hash_result {
|
2020-05-14 17:13:25 +02:00
|
|
|
|
Ok(hash) => hash,
|
|
|
|
|
Err(e) => {
|
2020-05-23 18:18:54 +02:00
|
|
|
|
println!("{}: FAILED ({})", file_string, e);
|
2020-05-14 17:13:25 +02:00
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
// This is a constant-time comparison.
|
|
|
|
|
if expected_hash == found_hash {
|
2020-05-23 18:18:54 +02:00
|
|
|
|
if !args.quiet() {
|
|
|
|
|
println!("{}: OK", file_string);
|
|
|
|
|
}
|
2020-05-14 17:13:25 +02:00
|
|
|
|
true
|
|
|
|
|
} else {
|
2020-05-23 18:18:54 +02:00
|
|
|
|
println!("{}: FAILED", file_string);
|
2020-05-14 17:13:25 +02:00
|
|
|
|
false
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-05-13 21:33:17 +02:00
|
|
|
|
fn check_one_checkfile(path: &Path, args: &Args, some_file_failed: &mut bool) -> Result<()> {
|
|
|
|
|
let checkfile_input = Input::open(path, args)?;
|
|
|
|
|
let mut bufreader = io::BufReader::new(checkfile_input);
|
|
|
|
|
let mut line = String::new();
|
|
|
|
|
loop {
|
|
|
|
|
line.clear();
|
|
|
|
|
let n = bufreader.read_line(&mut line)?;
|
|
|
|
|
if n == 0 {
|
|
|
|
|
return Ok(());
|
|
|
|
|
}
|
2020-05-14 17:13:25 +02:00
|
|
|
|
// check_one_line() prints errors and turns them into a success=false
|
|
|
|
|
// return, so it doesn't return a Result.
|
|
|
|
|
let success = check_one_line(&line, args);
|
|
|
|
|
if !success {
|
2020-05-13 21:33:17 +02:00
|
|
|
|
*some_file_failed = true;
|
|
|
|
|
}
|
|
|
|
|
}
|
2020-05-12 19:08:22 +02:00
|
|
|
|
}
|
|
|
|
|
|
2019-12-12 08:13:16 +01:00
|
|
|
|
fn main() -> Result<()> {
|
2020-05-13 21:33:17 +02:00
|
|
|
|
let args = Args::parse()?;
|
2020-03-16 17:06:16 +01:00
|
|
|
|
let mut thread_pool_builder = rayon::ThreadPoolBuilder::new();
|
2022-09-14 12:53:05 +02:00
|
|
|
|
if let Some(num_threads) = args.num_threads() {
|
2020-03-16 17:06:16 +01:00
|
|
|
|
thread_pool_builder = thread_pool_builder.num_threads(num_threads);
|
|
|
|
|
}
|
|
|
|
|
let thread_pool = thread_pool_builder.build()?;
|
|
|
|
|
thread_pool.install(|| {
|
2020-05-13 21:33:17 +02:00
|
|
|
|
let mut some_file_failed = false;
|
|
|
|
|
// Note that file_args automatically includes `-` if nothing is given.
|
|
|
|
|
for path in &args.file_args {
|
|
|
|
|
if args.check() {
|
2020-05-14 17:13:25 +02:00
|
|
|
|
// A hash mismatch or a failure to read a hashed file will be
|
|
|
|
|
// printed in the checkfile loop, and will not propagate here.
|
|
|
|
|
// This is similar to the explicit error handling we do in the
|
|
|
|
|
// hashing case immediately below. In these cases,
|
|
|
|
|
// some_file_failed will be set to false.
|
2020-05-13 21:33:17 +02:00
|
|
|
|
check_one_checkfile(path, &args, &mut some_file_failed)?;
|
2020-03-16 17:06:16 +01:00
|
|
|
|
} else {
|
2020-05-13 21:33:17 +02:00
|
|
|
|
// Errors encountered in hashing are tolerated and printed to
|
|
|
|
|
// stderr. This allows e.g. `b3sum *` to print errors for
|
|
|
|
|
// non-files and keep going. However, if we encounter any
|
|
|
|
|
// errors we'll still return non-zero at the end.
|
|
|
|
|
let result = hash_one_input(path, &args);
|
|
|
|
|
if let Err(e) = result {
|
|
|
|
|
some_file_failed = true;
|
2020-06-24 23:51:41 +02:00
|
|
|
|
eprintln!("{}: {}: {}", NAME, path.to_string_lossy(), e);
|
2020-05-13 21:33:17 +02:00
|
|
|
|
}
|
2020-03-16 17:06:16 +01:00
|
|
|
|
}
|
2020-01-13 22:12:47 +01:00
|
|
|
|
}
|
2020-05-13 21:33:17 +02:00
|
|
|
|
std::process::exit(if some_file_failed { 1 } else { 0 });
|
2020-03-16 17:06:16 +01:00
|
|
|
|
})
|
2019-12-12 08:13:16 +01:00
|
|
|
|
}
|
2022-10-03 05:07:33 +02:00
|
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
|
mod test {
|
|
|
|
|
use clap::CommandFactory;
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_args() {
|
|
|
|
|
crate::Inner::command().debug_assert();
|
|
|
|
|
}
|
|
|
|
|
}
|