1
0
Fork 0
mirror of https://github.com/BLAKE3-team/BLAKE3 synced 2024-05-13 03:16:11 +02:00
This commit is contained in:
Alex H 2024-03-11 20:18:47 -07:00 committed by GitHub
commit 2104aaf2f4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 117 additions and 1 deletions

7
b3sum/Cargo.lock generated
View File

@ -78,6 +78,7 @@ dependencies = [
"duct",
"hex",
"rayon",
"read_chunks",
"tempfile",
"wild",
]
@ -327,6 +328,12 @@ dependencies = [
"crossbeam-utils",
]
[[package]]
name = "read_chunks"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dda95aba172630718d3dffb3c2a3c316848fd9f2725898f56af81fdfcd3c8abf"
[[package]]
name = "redox_syscall"
version = "0.4.1"

View File

@ -19,6 +19,7 @@ blake3 = { version = "1", path = "..", features = ["mmap", "rayon"] }
clap = { version = "4.0.8", features = ["derive", "wrap_help"] }
hex = "0.4.0"
rayon = "1.2.1"
read_chunks = "0.2.0"
wild = "2.0.3"
[dev-dependencies]

View File

@ -163,6 +163,62 @@ impl Args {
}
}
/// Hashes a reader in parallel into a hasher using rayon, optionally takes a length to heuristically decide if
/// we are better off sequentially hashing
fn hash_reader_parallel(
hasher: &mut blake3::Hasher,
reader: &mut (impl Read + Send),
len: Option<u64>,
) -> Result<()> {
// we use read_chunks here because I(ultrabear) coded it, and know it is as safe as the code here.
// TODO make this just a function in main.rs instead of an extra dep,
// but only worth doing if we want to merge the PR, this is a proof of concept
use read_chunks::ReadExt;
// 2MiB of total buffer is not an extreme amount of memory, and performance is a good bit
// better with that amount, increasing this will probably equal more performance up to a
// certain point, and there might be a "magic" size to target (where blake3 multithreading can
// split evenly and have maximum possible performance without using too much memory)
const BUF_SIZE: usize = 1024 * 1024;
// if anything is under 1MiB we don't want to put it through multithreading, this is probably an
// overshoot of where multithreading is effective (512KiB was found to also have a performance
// increase), but it is essential that we do not undershoot where it is effective, and risk
// having worse performance than before this codechange on small files.
const MIN_SIZE: u64 = BUF_SIZE as u64;
// fallback to update_reader if the length is too small
if len.is_some_and(|s| s < MIN_SIZE) {
hasher.update_reader(reader)?;
return Ok(());
}
// allocate the double buffers and their return memory locations
let mut hashing = vec![0; BUF_SIZE];
let mut hashing_res = reader.keep_reading(&mut *hashing)?;
let mut reading_to = vec![0; BUF_SIZE];
let mut reading_res = None::<io::Result<usize>>;
while hashing_res != 0 {
// by scoping we guarantee that all tasks complete, and can get our mutable references back
// to do error handling and buffer swapping
rayon::scope(|s| {
s.spawn(|_| {
reading_res = Some(reader.keep_reading(&mut *reading_to));
});
s.spawn(|_| {
hasher.update_rayon(&hashing[..hashing_res]);
});
});
hashing_res = reading_res.take().unwrap()?;
(hashing, reading_to) = (reading_to, hashing);
}
Ok(())
}
fn hash_path(args: &Args, path: &Path) -> Result<blake3::OutputReader> {
let mut hasher = args.base_hasher.clone();
if path == Path::new("-") {
@ -171,7 +227,9 @@ fn hash_path(args: &Args, path: &Path) -> Result<blake3::OutputReader> {
}
hasher.update_reader(io::stdin().lock())?;
} else if args.no_mmap() {
hasher.update_reader(File::open(path)?)?;
let length = std::fs::metadata(path)?.len();
hash_reader_parallel(&mut hasher, &mut File::open(path)?, Some(length))?;
} else {
// The fast path: Try to mmap the file and hash it with multiple threads.
hasher.update_mmap_rayon(path)?;

View File

@ -611,3 +611,53 @@ fn test_globbing() {
.unwrap();
assert_eq!(expected, output);
}
#[test]
// tests that hash_reader_parallel fallsback correctly and hashes multithreaded correctly
fn test_hash_reader_parallel() {
let dir = tempfile::tempdir().unwrap();
let file1 = dir.path().join("file1");
fs::write(&file1, b"foobar").unwrap();
let expected = blake3::hash(b"foobar");
let output = cmd!(b3sum_exe(), "--no-mmap", &file1)
.stdout_capture()
.run()
.unwrap()
.stdout;
let expected = format!("{} {}\n", expected.to_hex(), file1.display());
// fallback test
assert_eq!(output, expected.as_bytes());
// tests multithread gives correct results
let file2 = dir.path().join("file2");
let mut f = fs::File::create(&file2).unwrap();
let mut expected = blake3::Hasher::new();
// 20_000 * 62 is 1.2MiB, which passes the threshold of using multithreading
for _ in 0..20_000 {
// we use a big string here to avoid looping many times, which is bad for opt-level=0
const WRITE: &[u8] = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
assert_eq!(WRITE.len(), 62);
f.write_all(WRITE).unwrap();
expected.update(WRITE);
}
let output = cmd!(b3sum_exe(), "--no-mmap", &file2)
.stdout_capture()
.run()
.unwrap()
.stdout;
let expected = format!("{} {}\n", expected.finalize().to_hex(), file2.display());
assert_eq!(output, expected.as_bytes());
}