mirror of
https://github.com/BLAKE3-team/BLAKE3
synced 2024-05-23 17:06:05 +02:00
move file operations from b3sum to blake3
This commit is contained in:
parent
e302cdf36f
commit
b9b2361dca
|
@ -79,6 +79,8 @@ no_neon = []
|
|||
|
||||
zeroize = ["zeroize_crate", "arrayvec/zeroize"]
|
||||
|
||||
file = ["memmap2", "rayon", "std"]
|
||||
|
||||
[package.metadata.docs.rs]
|
||||
# Document Hasher::update_rayon on docs.rs.
|
||||
features = ["rayon"]
|
||||
|
@ -91,6 +93,7 @@ rayon = { version = "1.2.1", optional = true }
|
|||
cfg-if = "1.0.0"
|
||||
digest = { version = "0.10.1", features = [ "mac" ], optional = true }
|
||||
zeroize_crate = { package = "zeroize", version = "1", default-features = false, features = ["zeroize_derive"], optional = true }
|
||||
memmap2 = { version = "0.7.1", optional = true }
|
||||
|
||||
[dev-dependencies]
|
||||
hex = "0.4.2"
|
||||
|
|
|
@ -111,19 +111,10 @@ dependencies = [
|
|||
"cc",
|
||||
"cfg-if",
|
||||
"constant_time_eq",
|
||||
"digest",
|
||||
"memmap2",
|
||||
"rayon",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "block-buffer"
|
||||
version = "0.10.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
|
||||
dependencies = [
|
||||
"generic-array",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.0.79"
|
||||
|
@ -233,27 +224,6 @@ dependencies = [
|
|||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crypto-common"
|
||||
version = "0.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
|
||||
dependencies = [
|
||||
"generic-array",
|
||||
"typenum",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "digest"
|
||||
version = "0.10.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
|
||||
dependencies = [
|
||||
"block-buffer",
|
||||
"crypto-common",
|
||||
"subtle",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "duct"
|
||||
version = "0.13.6"
|
||||
|
@ -302,16 +272,6 @@ dependencies = [
|
|||
"instant",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "generic-array"
|
||||
version = "0.14.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
|
||||
dependencies = [
|
||||
"typenum",
|
||||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "glob"
|
||||
version = "0.3.1"
|
||||
|
@ -527,12 +487,6 @@ version = "0.10.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
|
||||
|
||||
[[package]]
|
||||
name = "subtle"
|
||||
version = "2.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.23"
|
||||
|
@ -568,12 +522,6 @@ dependencies = [
|
|||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "typenum"
|
||||
version = "1.16.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.10"
|
||||
|
@ -586,12 +534,6 @@ version = "0.2.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
|
||||
|
||||
[[package]]
|
||||
name = "version_check"
|
||||
version = "0.9.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
|
||||
|
||||
[[package]]
|
||||
name = "wild"
|
||||
version = "2.1.0"
|
||||
|
|
|
@ -15,7 +15,7 @@ pure = ["blake3/pure"]
|
|||
|
||||
[dependencies]
|
||||
anyhow = "1.0.25"
|
||||
blake3 = { version = "1", path = "..", features = ["rayon"] }
|
||||
blake3 = { version = "1", path = "..", features = ["file", "rayon"] }
|
||||
clap = { version = "4.0.8", features = ["derive", "wrap_help"] }
|
||||
hex = "0.4.0"
|
||||
memmap2 = "0.7.0"
|
||||
|
|
|
@ -182,7 +182,7 @@ impl Input {
|
|||
}
|
||||
let file = File::open(path)?;
|
||||
if !args.no_mmap() {
|
||||
if let Some(mmap) = maybe_memmap_file(&file)? {
|
||||
if let Some(mmap) = blake3::file::maybe_memmap_file(&file)? {
|
||||
return Ok(Self::Mmap(io::Cursor::new(mmap)));
|
||||
}
|
||||
}
|
||||
|
@ -208,12 +208,12 @@ impl Input {
|
|||
// one. We might implement that in the future, but since this is
|
||||
// the slow path anyway, it's not high priority.
|
||||
Self::File(file) => {
|
||||
copy_wide(file, &mut hasher)?;
|
||||
blake3::copy_wide(file, &mut hasher)?;
|
||||
}
|
||||
Self::Stdin => {
|
||||
let stdin = io::stdin();
|
||||
let lock = stdin.lock();
|
||||
copy_wide(lock, &mut hasher)?;
|
||||
blake3::copy_wide(lock, &mut hasher)?;
|
||||
}
|
||||
}
|
||||
let mut output_reader = hasher.finalize_xof();
|
||||
|
@ -232,58 +232,6 @@ impl Read for Input {
|
|||
}
|
||||
}
|
||||
|
||||
// A 16 KiB buffer is enough to take advantage of all the SIMD instruction sets
|
||||
// that we support, but `std::io::copy` currently uses 8 KiB. Most platforms
|
||||
// can support at least 64 KiB, and there's some performance benefit to using
|
||||
// bigger reads, so that's what we use here.
|
||||
fn copy_wide(mut reader: impl Read, hasher: &mut blake3::Hasher) -> io::Result<u64> {
|
||||
let mut buffer = [0; 65536];
|
||||
let mut total = 0;
|
||||
loop {
|
||||
match reader.read(&mut buffer) {
|
||||
Ok(0) => return Ok(total),
|
||||
Ok(n) => {
|
||||
hasher.update(&buffer[..n]);
|
||||
total += n as u64;
|
||||
}
|
||||
Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
|
||||
Err(e) => return Err(e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Mmap a file, if it looks like a good idea. Return None in cases where we
|
||||
// know mmap will fail, or if the file is short enough that mmapping isn't
|
||||
// worth it. However, if we do try to mmap and it fails, return the error.
|
||||
fn maybe_memmap_file(file: &File) -> Result<Option<memmap2::Mmap>> {
|
||||
let metadata = file.metadata()?;
|
||||
let file_size = metadata.len();
|
||||
Ok(if !metadata.is_file() {
|
||||
// Not a real file.
|
||||
None
|
||||
} else if file_size > isize::max_value() as u64 {
|
||||
// Too long to safely map.
|
||||
// https://github.com/danburkert/memmap-rs/issues/69
|
||||
None
|
||||
} else if file_size == 0 {
|
||||
// Mapping an empty file currently fails.
|
||||
// https://github.com/danburkert/memmap-rs/issues/72
|
||||
None
|
||||
} else if file_size < 16 * 1024 {
|
||||
// Mapping small files is not worth it.
|
||||
None
|
||||
} else {
|
||||
// Explicitly set the length of the memory map, so that filesystem
|
||||
// changes can't race to violate the invariants we just checked.
|
||||
let map = unsafe {
|
||||
memmap2::MmapOptions::new()
|
||||
.len(file_size as usize)
|
||||
.map(file)?
|
||||
};
|
||||
Some(map)
|
||||
})
|
||||
}
|
||||
|
||||
fn write_hex_output(mut output: blake3::OutputReader, args: &Args) -> Result<()> {
|
||||
// Encoding multiples of the 64 bytes is most efficient.
|
||||
// TODO: This computes each output block twice when the --seek argument isn't a multiple of 64.
|
||||
|
|
|
@ -0,0 +1,67 @@
|
|||
//! The file-related utilities.
|
||||
//!
|
||||
//! # Examples
|
||||
//!
|
||||
//! ```no_run
|
||||
//! use std::io;
|
||||
//!
|
||||
//! use blake3::file::hash_path_maybe_mmap;
|
||||
//!
|
||||
//! fn main() -> io::Result<()> {
|
||||
//! let args: Vec<_> = std::env::args_os().collect();
|
||||
//! assert_eq!(args.len(), 2);
|
||||
//! let path = &args[1];
|
||||
//! let mut hasher = blake3::Hasher::new();
|
||||
//! hash_path_maybe_mmap(&mut hasher, path)?;
|
||||
//! println!("{}", hasher.finalize());
|
||||
//! Ok(())
|
||||
//! }
|
||||
//! ```
|
||||
|
||||
use std::{fs::File, io, path::Path};
|
||||
|
||||
/// Mmap a file, if it looks like a good idea. Return None in cases where we
|
||||
/// know mmap will fail, or if the file is short enough that mmapping isn't
|
||||
/// worth it. However, if we do try to mmap and it fails, return the error.
|
||||
pub fn maybe_memmap_file(file: &File) -> io::Result<Option<memmap2::Mmap>> {
|
||||
let metadata = file.metadata()?;
|
||||
let file_size = metadata.len();
|
||||
#[allow(clippy::if_same_then_else)]
|
||||
if !metadata.is_file() {
|
||||
// Not a real file.
|
||||
Ok(None)
|
||||
} else if file_size > isize::max_value() as u64 {
|
||||
// Too long to safely map.
|
||||
// https://github.com/danburkert/memmap-rs/issues/69
|
||||
Ok(None)
|
||||
} else if file_size == 0 {
|
||||
// Mapping an empty file currently fails.
|
||||
// https://github.com/danburkert/memmap-rs/issues/72
|
||||
Ok(None)
|
||||
} else if file_size < 16 * 1024 {
|
||||
// Mapping small files is not worth it.
|
||||
Ok(None)
|
||||
} else {
|
||||
// Explicitly set the length of the memory map, so that filesystem
|
||||
// changes can't race to violate the invariants we just checked.
|
||||
let map = unsafe {
|
||||
memmap2::MmapOptions::new()
|
||||
.len(file_size as usize)
|
||||
.map(file)?
|
||||
};
|
||||
Ok(Some(map))
|
||||
}
|
||||
}
|
||||
|
||||
/// Hash a file fast.
|
||||
///
|
||||
/// It may use mmap if the file is big enough. If not, it will read the whole file into a buffer.
|
||||
pub fn hash_path_maybe_mmap(hasher: &mut crate::Hasher, path: impl AsRef<Path>) -> io::Result<()> {
|
||||
let file = File::open(path.as_ref())?;
|
||||
if let Some(mmap) = maybe_memmap_file(&file)? {
|
||||
hasher.update_rayon(&mmap);
|
||||
} else {
|
||||
crate::copy_wide(&file, hasher)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
26
src/lib.rs
26
src/lib.rs
|
@ -116,6 +116,9 @@ mod sse41;
|
|||
#[cfg(feature = "traits-preview")]
|
||||
pub mod traits;
|
||||
|
||||
#[cfg(feature = "file")]
|
||||
pub mod file;
|
||||
|
||||
mod join;
|
||||
|
||||
use arrayref::{array_mut_ref, array_ref};
|
||||
|
@ -1353,6 +1356,29 @@ impl std::io::Write for Hasher {
|
|||
}
|
||||
}
|
||||
|
||||
/// Copy from `reader` to `hasher`, returning the number of bytes read.
|
||||
///
|
||||
/// A 16 KiB buffer is enough to take advantage of all the SIMD instruction sets
|
||||
/// that we support, but `std::io::copy` currently uses 8 KiB. Most platforms
|
||||
/// can support at least 64 KiB, and there's some performance benefit to using
|
||||
/// bigger reads, so that's what we use here.
|
||||
#[cfg(feature = "std")]
|
||||
pub fn copy_wide(mut reader: impl std::io::Read, hasher: &mut Hasher) -> std::io::Result<u64> {
|
||||
let mut buffer = [0; 65536];
|
||||
let mut total = 0;
|
||||
loop {
|
||||
match reader.read(&mut buffer) {
|
||||
Ok(0) => return Ok(total),
|
||||
Ok(n) => {
|
||||
hasher.update(&buffer[..n]);
|
||||
total += n as u64;
|
||||
}
|
||||
Err(e) if e.kind() == std::io::ErrorKind::Interrupted => continue,
|
||||
Err(e) => return Err(e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An incremental reader for extended output, returned by
|
||||
/// [`Hasher::finalize_xof`](struct.Hasher.html#method.finalize_xof).
|
||||
///
|
||||
|
|
Loading…
Reference in New Issue