1
0
Fork 0
mirror of https://github.com/BLAKE3-team/BLAKE3 synced 2024-05-23 17:06:05 +02:00

move file operations from b3sum to blake3

This commit is contained in:
Banyc 2023-07-18 00:48:52 +08:00
parent e302cdf36f
commit b9b2361dca
6 changed files with 101 additions and 115 deletions

View File

@ -79,6 +79,8 @@ no_neon = []
zeroize = ["zeroize_crate", "arrayvec/zeroize"]
file = ["memmap2", "rayon", "std"]
[package.metadata.docs.rs]
# Document Hasher::update_rayon on docs.rs.
features = ["rayon"]
@ -91,6 +93,7 @@ rayon = { version = "1.2.1", optional = true }
cfg-if = "1.0.0"
digest = { version = "0.10.1", features = [ "mac" ], optional = true }
zeroize_crate = { package = "zeroize", version = "1", default-features = false, features = ["zeroize_derive"], optional = true }
memmap2 = { version = "0.7.1", optional = true }
[dev-dependencies]
hex = "0.4.2"

60
b3sum/Cargo.lock generated
View File

@ -111,19 +111,10 @@ dependencies = [
"cc",
"cfg-if",
"constant_time_eq",
"digest",
"memmap2",
"rayon",
]
[[package]]
name = "block-buffer"
version = "0.10.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
dependencies = [
"generic-array",
]
[[package]]
name = "cc"
version = "1.0.79"
@ -233,27 +224,6 @@ dependencies = [
"cfg-if",
]
[[package]]
name = "crypto-common"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
dependencies = [
"generic-array",
"typenum",
]
[[package]]
name = "digest"
version = "0.10.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
dependencies = [
"block-buffer",
"crypto-common",
"subtle",
]
[[package]]
name = "duct"
version = "0.13.6"
@ -302,16 +272,6 @@ dependencies = [
"instant",
]
[[package]]
name = "generic-array"
version = "0.14.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
dependencies = [
"typenum",
"version_check",
]
[[package]]
name = "glob"
version = "0.3.1"
@ -527,12 +487,6 @@ version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
[[package]]
name = "subtle"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc"
[[package]]
name = "syn"
version = "2.0.23"
@ -568,12 +522,6 @@ dependencies = [
"windows-sys",
]
[[package]]
name = "typenum"
version = "1.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba"
[[package]]
name = "unicode-ident"
version = "1.0.10"
@ -586,12 +534,6 @@ version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
[[package]]
name = "version_check"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "wild"
version = "2.1.0"

View File

@ -15,7 +15,7 @@ pure = ["blake3/pure"]
[dependencies]
anyhow = "1.0.25"
blake3 = { version = "1", path = "..", features = ["rayon"] }
blake3 = { version = "1", path = "..", features = ["file", "rayon"] }
clap = { version = "4.0.8", features = ["derive", "wrap_help"] }
hex = "0.4.0"
memmap2 = "0.7.0"

View File

@ -182,7 +182,7 @@ impl Input {
}
let file = File::open(path)?;
if !args.no_mmap() {
if let Some(mmap) = maybe_memmap_file(&file)? {
if let Some(mmap) = blake3::file::maybe_memmap_file(&file)? {
return Ok(Self::Mmap(io::Cursor::new(mmap)));
}
}
@ -208,12 +208,12 @@ impl Input {
// one. We might implement that in the future, but since this is
// the slow path anyway, it's not high priority.
Self::File(file) => {
copy_wide(file, &mut hasher)?;
blake3::copy_wide(file, &mut hasher)?;
}
Self::Stdin => {
let stdin = io::stdin();
let lock = stdin.lock();
copy_wide(lock, &mut hasher)?;
blake3::copy_wide(lock, &mut hasher)?;
}
}
let mut output_reader = hasher.finalize_xof();
@ -232,58 +232,6 @@ impl Read for Input {
}
}
// A 16 KiB buffer is enough to take advantage of all the SIMD instruction sets
// that we support, but `std::io::copy` currently uses 8 KiB. Most platforms
// can support at least 64 KiB, and there's some performance benefit to using
// bigger reads, so that's what we use here.
fn copy_wide(mut reader: impl Read, hasher: &mut blake3::Hasher) -> io::Result<u64> {
let mut buffer = [0; 65536];
let mut total = 0;
loop {
match reader.read(&mut buffer) {
Ok(0) => return Ok(total),
Ok(n) => {
hasher.update(&buffer[..n]);
total += n as u64;
}
Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
Err(e) => return Err(e),
}
}
}
// Mmap a file, if it looks like a good idea. Return None in cases where we
// know mmap will fail, or if the file is short enough that mmapping isn't
// worth it. However, if we do try to mmap and it fails, return the error.
fn maybe_memmap_file(file: &File) -> Result<Option<memmap2::Mmap>> {
let metadata = file.metadata()?;
let file_size = metadata.len();
Ok(if !metadata.is_file() {
// Not a real file.
None
} else if file_size > isize::max_value() as u64 {
// Too long to safely map.
// https://github.com/danburkert/memmap-rs/issues/69
None
} else if file_size == 0 {
// Mapping an empty file currently fails.
// https://github.com/danburkert/memmap-rs/issues/72
None
} else if file_size < 16 * 1024 {
// Mapping small files is not worth it.
None
} else {
// Explicitly set the length of the memory map, so that filesystem
// changes can't race to violate the invariants we just checked.
let map = unsafe {
memmap2::MmapOptions::new()
.len(file_size as usize)
.map(file)?
};
Some(map)
})
}
fn write_hex_output(mut output: blake3::OutputReader, args: &Args) -> Result<()> {
// Encoding multiples of the 64 bytes is most efficient.
// TODO: This computes each output block twice when the --seek argument isn't a multiple of 64.

67
src/file.rs Normal file
View File

@ -0,0 +1,67 @@
//! The file-related utilities.
//!
//! # Examples
//!
//! ```no_run
//! use std::io;
//!
//! use blake3::file::hash_path_maybe_mmap;
//!
//! fn main() -> io::Result<()> {
//! let args: Vec<_> = std::env::args_os().collect();
//! assert_eq!(args.len(), 2);
//! let path = &args[1];
//! let mut hasher = blake3::Hasher::new();
//! hash_path_maybe_mmap(&mut hasher, path)?;
//! println!("{}", hasher.finalize());
//! Ok(())
//! }
//! ```
use std::{fs::File, io, path::Path};
/// Mmap a file, if it looks like a good idea. Return None in cases where we
/// know mmap will fail, or if the file is short enough that mmapping isn't
/// worth it. However, if we do try to mmap and it fails, return the error.
pub fn maybe_memmap_file(file: &File) -> io::Result<Option<memmap2::Mmap>> {
let metadata = file.metadata()?;
let file_size = metadata.len();
#[allow(clippy::if_same_then_else)]
if !metadata.is_file() {
// Not a real file.
Ok(None)
} else if file_size > isize::max_value() as u64 {
// Too long to safely map.
// https://github.com/danburkert/memmap-rs/issues/69
Ok(None)
} else if file_size == 0 {
// Mapping an empty file currently fails.
// https://github.com/danburkert/memmap-rs/issues/72
Ok(None)
} else if file_size < 16 * 1024 {
// Mapping small files is not worth it.
Ok(None)
} else {
// Explicitly set the length of the memory map, so that filesystem
// changes can't race to violate the invariants we just checked.
let map = unsafe {
memmap2::MmapOptions::new()
.len(file_size as usize)
.map(file)?
};
Ok(Some(map))
}
}
/// Hash a file fast.
///
/// It may use mmap if the file is big enough. If not, it will read the whole file into a buffer.
pub fn hash_path_maybe_mmap(hasher: &mut crate::Hasher, path: impl AsRef<Path>) -> io::Result<()> {
let file = File::open(path.as_ref())?;
if let Some(mmap) = maybe_memmap_file(&file)? {
hasher.update_rayon(&mmap);
} else {
crate::copy_wide(&file, hasher)?;
}
Ok(())
}

View File

@ -116,6 +116,9 @@ mod sse41;
#[cfg(feature = "traits-preview")]
pub mod traits;
#[cfg(feature = "file")]
pub mod file;
mod join;
use arrayref::{array_mut_ref, array_ref};
@ -1353,6 +1356,29 @@ impl std::io::Write for Hasher {
}
}
/// Copy from `reader` to `hasher`, returning the number of bytes read.
///
/// A 16 KiB buffer is enough to take advantage of all the SIMD instruction sets
/// that we support, but `std::io::copy` currently uses 8 KiB. Most platforms
/// can support at least 64 KiB, and there's some performance benefit to using
/// bigger reads, so that's what we use here.
#[cfg(feature = "std")]
pub fn copy_wide(mut reader: impl std::io::Read, hasher: &mut Hasher) -> std::io::Result<u64> {
let mut buffer = [0; 65536];
let mut total = 0;
loop {
match reader.read(&mut buffer) {
Ok(0) => return Ok(total),
Ok(n) => {
hasher.update(&buffer[..n]);
total += n as u64;
}
Err(e) if e.kind() == std::io::ErrorKind::Interrupted => continue,
Err(e) => return Err(e),
}
}
}
/// An incremental reader for extended output, returned by
/// [`Hasher::finalize_xof`](struct.Hasher.html#method.finalize_xof).
///