mirror of
https://github.com/BLAKE3-team/BLAKE3
synced 2024-05-21 02:46:06 +02:00
Compare commits
48 Commits
Author | SHA1 | Date | |
---|---|---|---|
Javier Blazquez | 0816badf3a | ||
Jack O'Connor | 4ec3be8bfa | ||
Benjamin A. Beasley | d99ad871a6 | ||
Jack O'Connor | 54930c9522 | ||
divinity76 | 58bea0bcbb | ||
Jack O'Connor | 5b9af1c347 | ||
Jack O'Connor | d57818afdc | ||
Jack O'Connor | 2435e29dbe | ||
Ryo Onodera | e6e7f27336 | ||
Jack O'Connor | 8fc36186b8 | ||
divinity76 | 2918c51bc6 | ||
JP | a65fcf63ee | ||
Jack O'Connor | 5558fa4623 | ||
Jack O'Connor | 1a6c1e2037 | ||
Jack O'Connor | 1ca383ba9b | ||
Jack O'Connor | 6e519ea6b7 | ||
Jack O'Connor | fc75227170 | ||
Jack O'Connor | 6f3e6fc86c | ||
Dirk Stolle | 4d32708f51 | ||
Dirk Stolle | 5306464d03 | ||
Jack O'Connor | c0ea395cf9 | ||
Henrik S. Gaßmann | 7ce2aa41e9 | ||
Jack O'Connor | 92e4cd71be | ||
Viacheslav H | 1930721c50 | ||
Rui Ueyama | e1f851d461 | ||
Henrik Gaßmann | 3465fe455e | ||
Henrik Gaßmann | 3e14f865d3 | ||
Henrik Gaßmann | bfd568897a | ||
Henrik Gaßmann | dd30dcb002 | ||
Jack O'Connor | 3d921ae703 | ||
Jack O'Connor | 5aa53f07f7 | ||
Jack O'Connor | d7e9365be1 | ||
Ralph Minderhoud | 5e3eb949a7 | ||
Jack O'Connor | 4e25f2e094 | ||
Havard Eidnes | 8bfe93fbf9 | ||
Jack O'Connor | 8cdfaa41ea | ||
Jack O'Connor | b754033a21 | ||
Jack O'Connor | cb32f0bd14 | ||
Banyc | e0bb915641 | ||
Jack O'Connor | 12b368541f | ||
Jack O'Connor | f22d66b307 | ||
Jack O'Connor | cd4b3140cf | ||
Jack O'Connor | 02dec6e9a6 | ||
Jack O'Connor | d6265dafc9 | ||
Javier Blazquez | 12823b8760 | ||
Elichai Turkel | e302cdf36f | ||
Elichai Turkel | f18e19092b | ||
Elichai Turkel | 8e92fc6929 |
|
@ -0,0 +1,2 @@
|
|||
# CMakeLists.txt whitespace fixups
|
||||
3e14f865d30271c74fc68d417af488ea91b66d48
|
|
@ -38,12 +38,10 @@ jobs:
|
|||
]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions-rs/toolchain@v1
|
||||
- uses: actions/checkout@v4
|
||||
- uses: dtolnay/rust-toolchain@master
|
||||
with:
|
||||
toolchain: ${{ format('{0}-{1}', matrix.channel, matrix.target.toolchain) }}
|
||||
profile: minimal
|
||||
override: true
|
||||
# Print the compiler version, for debugging.
|
||||
- name: print compiler version
|
||||
run: cargo run --quiet
|
||||
|
@ -52,13 +50,17 @@ jobs:
|
|||
- name: print instruction set support
|
||||
run: cargo run --quiet
|
||||
working-directory: ./tools/instruction_set_support
|
||||
# Default tests plus Rayon and RustCrypto trait implementations.
|
||||
- run: cargo test --features=rayon,traits-preview
|
||||
# Default tests plus Rayon and trait implementations.
|
||||
- run: cargo test --features=rayon,traits-preview,serde,zeroize
|
||||
# Same but with only one thread in the Rayon pool. This can find deadlocks.
|
||||
- name: "again with RAYON_NUM_THREADS=1"
|
||||
run: cargo test --features=rayon,traits-preview
|
||||
run: cargo test --features=rayon,traits-preview,serde,zeroize
|
||||
env:
|
||||
RAYON_NUM_THREADS: 1
|
||||
# The mmap feature by itself (update_mmap_rayon is omitted).
|
||||
- run: cargo test --features=mmap
|
||||
# All public features put together.
|
||||
- run: cargo test --features=mmap,rayon,traits-preview,serde,zeroize
|
||||
# no_std tests.
|
||||
- run: cargo test --no-default-features
|
||||
|
||||
|
@ -129,6 +131,17 @@ jobs:
|
|||
run: cargo test
|
||||
working-directory: ./reference_impl
|
||||
|
||||
# the new guts crate
|
||||
- name: guts test
|
||||
run: cargo test --all-features
|
||||
working-directory: ./rust/guts
|
||||
- name: guts no_std build
|
||||
run: cargo build --no-default-features
|
||||
working-directory: ./rust/guts
|
||||
- name: guts no_std test # note that rust/guts/src/test.rs still uses libstd
|
||||
run: cargo test --no-default-features
|
||||
working-directory: ./rust/guts
|
||||
|
||||
b3sum_tests:
|
||||
name: b3sum ${{ matrix.target.name }} ${{ matrix.channel }}
|
||||
runs-on: ${{ matrix.target.os }}
|
||||
|
@ -148,16 +161,14 @@ jobs:
|
|||
# The b3sum MSRV is sometimes higher than the blake3 crate's, because
|
||||
# b3sum depends on Clap. We check in the b3sum Cargo.lock, so Clap
|
||||
# update shouldn't randomly break us here.
|
||||
"1.66.1",
|
||||
"1.74.1",
|
||||
]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions-rs/toolchain@v1
|
||||
- uses: actions/checkout@v4
|
||||
- uses: dtolnay/rust-toolchain@master
|
||||
with:
|
||||
toolchain: ${{ format('{0}-{1}', matrix.channel, matrix.target.toolchain) }}
|
||||
profile: minimal
|
||||
override: true
|
||||
# Test b3sum.
|
||||
- name: test b3sum
|
||||
run: cargo test
|
||||
|
@ -177,14 +188,13 @@ jobs:
|
|||
- i686-unknown-linux-musl
|
||||
- armv7-unknown-linux-gnueabihf
|
||||
- aarch64-unknown-linux-gnu
|
||||
- mips-unknown-linux-gnu
|
||||
# Big-endian targets. See https://twitter.com/burntsushi5/status/1695483429997945092.
|
||||
- powerpc64-unknown-linux-gnu
|
||||
- s390x-unknown-linux-gnu
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- uses: actions/checkout@v4
|
||||
- uses: dtolnay/rust-toolchain@stable
|
||||
- run: cargo install cross
|
||||
# Test the portable implementation on everything.
|
||||
- run: cross test --target ${{ matrix.arch }}
|
||||
|
@ -210,7 +220,7 @@ jobs:
|
|||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
# Test the intrinsics-based implementations.
|
||||
- run: make -f Makefile.testing test
|
||||
working-directory: ./c
|
||||
|
@ -262,12 +272,10 @@ jobs:
|
|||
strategy:
|
||||
fail-fast: false
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions-rs/toolchain@v1
|
||||
- uses: actions/checkout@v4
|
||||
- uses: dtolnay/rust-toolchain@stable
|
||||
with:
|
||||
toolchain: stable
|
||||
target: aarch64-apple-darwin
|
||||
override: true
|
||||
targets: aarch64-apple-darwin
|
||||
- name: build blake3
|
||||
run: cargo build --target aarch64-apple-darwin
|
||||
- name: build b3sum
|
||||
|
@ -278,7 +286,7 @@ jobs:
|
|||
name: build with the Tiny C Compiler
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
- name: install TCC
|
||||
run: sudo apt-get install -y tcc
|
||||
- name: compile
|
||||
|
@ -295,7 +303,7 @@ jobs:
|
|||
name: "compile and test with GCC 5.4"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
- uses: addnab/docker-run-action@v3
|
||||
with:
|
||||
image: gcc:5.4
|
||||
|
@ -308,7 +316,7 @@ jobs:
|
|||
|
||||
# CMake build test (Library only), current macOS/Linux only.
|
||||
cmake_build:
|
||||
name: CMake ${{ matrix.os }}
|
||||
name: CMake ${{ matrix.os }} ${{ matrix.compiler }}
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
|
@ -323,8 +331,21 @@ jobs:
|
|||
- os: macOS-latest
|
||||
compiler: msvc
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
- name: CMake generation
|
||||
run: cmake -S c -B c/build -DCMAKE_INSTALL_PREFIX=${{github.workspace}}/target
|
||||
- name: CMake build / install
|
||||
run: cmake --build c/build --target install
|
||||
|
||||
miri_smoketest:
|
||||
name: Miri smoketest
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: dtolnay/rust-toolchain@nightly
|
||||
with:
|
||||
components: miri
|
||||
# Currently the test search "miri" only matches "test_miri_smoketest", but
|
||||
# we might add more. If this accidentally picks up anything incompatible or
|
||||
# slow, we can narrow it.
|
||||
- run: cargo miri test miri
|
||||
|
|
|
@ -23,18 +23,16 @@ jobs:
|
|||
]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: "3.x"
|
||||
- run: pip install PyGithub
|
||||
- run: sudo apt-get install musl-tools
|
||||
if: matrix.target.os == 'ubuntu-latest'
|
||||
- uses: actions-rs/toolchain@v1
|
||||
- uses: dtolnay/rust-toolchain@stable
|
||||
with:
|
||||
toolchain: stable
|
||||
profile: minimal
|
||||
- run: rustup target add ${{ matrix.target.rust-target }}
|
||||
targets: ${{ matrix.target.rust-target }}
|
||||
- name: build b3sum
|
||||
id: build_b3sum
|
||||
run: python -u .github/workflows/build_b3sum.py ${{ matrix.target.rust-target }}
|
||||
|
|
37
Cargo.toml
37
Cargo.toml
|
@ -1,6 +1,6 @@
|
|||
[package]
|
||||
name = "blake3"
|
||||
version = "1.4.1"
|
||||
version = "1.5.1"
|
||||
authors = ["Jack O'Connor <oconnor663@gmail.com>", "Samuel Neves"]
|
||||
description = "the BLAKE3 hash function"
|
||||
repository = "https://github.com/BLAKE3-team/BLAKE3"
|
||||
|
@ -23,11 +23,21 @@ neon = []
|
|||
# --no-default-features, the only way to use the SIMD implementations in this
|
||||
# crate is to enable the corresponding instruction sets statically for the
|
||||
# entire build, with e.g. RUSTFLAGS="-C target-cpu=native".
|
||||
std = ["digest/std"]
|
||||
std = []
|
||||
|
||||
# The "rayon" feature (defined below as an optional dependency) enables the
|
||||
# `Hasher::update_rayon` method, for multithreaded hashing. However, even if
|
||||
# this feature is enabled, all other APIs remain single-threaded.
|
||||
# The `rayon` feature (disabled by default, but enabled for docs.rs) adds the
|
||||
# `update_rayon` and (in combination with `mmap` below) `update_mmap_rayon`
|
||||
# methods, for multithreaded hashing. However, even if this feature is enabled,
|
||||
# all other APIs remain single-threaded.
|
||||
rayon = ["dep:rayon", "std"]
|
||||
|
||||
# The `mmap` feature (disabled by default, but enabled for docs.rs) adds the
|
||||
# `update_mmap` and (in combination with `rayon` above) `update_mmap_rayon`
|
||||
# helper methods for memory-mapped IO.
|
||||
mmap = ["std", "dep:memmap2"]
|
||||
|
||||
# Implement the zeroize::Zeroize trait for types in this crate.
|
||||
zeroize = ["dep:zeroize", "arrayvec/zeroize"]
|
||||
|
||||
# This crate implements traits from the RustCrypto project, exposed here as the
|
||||
# "traits-preview" feature. However, these traits aren't stable, and they're
|
||||
|
@ -78,24 +88,29 @@ no_avx512 = []
|
|||
no_neon = []
|
||||
|
||||
[package.metadata.docs.rs]
|
||||
# Document Hasher::update_rayon on docs.rs.
|
||||
features = ["rayon"]
|
||||
# Document the rayon/mmap methods and the Serialize/Deserialize/Zeroize impls on docs.rs.
|
||||
features = ["mmap", "rayon", "serde", "zeroize"]
|
||||
|
||||
[dependencies]
|
||||
arrayref = "0.3.5"
|
||||
arrayvec = { version = "0.7.0", default-features = false }
|
||||
arrayvec = { version = "0.7.4", default-features = false }
|
||||
constant_time_eq = "0.3.0"
|
||||
rayon = { version = "1.2.1", optional = true }
|
||||
cfg-if = "1.0.0"
|
||||
digest = { version = "0.10.1", features = [ "mac" ], optional = true }
|
||||
memmap2 = { version = "0.9", optional = true }
|
||||
rayon = { version = "1.2.1", optional = true }
|
||||
serde = { version = "1.0", default-features = false, features = ["derive"], optional = true }
|
||||
zeroize = { version = "1", default-features = false, features = ["zeroize_derive"], optional = true }
|
||||
|
||||
[dev-dependencies]
|
||||
hmac = "0.12.0"
|
||||
hex = "0.4.2"
|
||||
page_size = "0.5.0"
|
||||
page_size = "0.6.0"
|
||||
rand = "0.8.0"
|
||||
rand_chacha = "0.3.0"
|
||||
reference_impl = { path = "./reference_impl" }
|
||||
hmac = "0.12.0"
|
||||
tempfile = "3.8.0"
|
||||
serde_json = "1.0.107"
|
||||
|
||||
[build-dependencies]
|
||||
cc = "1.0.4"
|
||||
|
|
|
@ -201,6 +201,7 @@ Alternatively, it is licensed under the Apache License 2.0.
|
|||
Here's a (non-exhaustive) list of protocols and software that use BLAKE3:
|
||||
|
||||
* [Alephium](https://github.com/alephium/alephium/blob/master/crypto/src/main/scala/org/alephium/crypto/Blake3.scala)
|
||||
* [Bazel](https://github.com/bazelbuild/bazel/releases/tag/6.4.0)
|
||||
* [Chia](https://github.com/Chia-Network/chia-blockchain/blob/main/CHANGELOG.md#10beta8-aka-beta-18---2020-07-16)
|
||||
* [IPFS](https://github.com/ipfs/go-verifcid/issues/13)
|
||||
* [Farcaster](https://www.farcaster.xyz/)
|
||||
|
@ -211,6 +212,7 @@ Here's a (non-exhaustive) list of protocols and software that use BLAKE3:
|
|||
* [Saito](https://saito.tech/)
|
||||
* [Skale](https://github.com/skalenetwork/skale-consensus/pull/284)
|
||||
* [Solana](https://docs.rs/solana-program/1.9.5/solana_program/blake3/index.html)
|
||||
* [Tekken 8](https://en.bandainamcoent.eu/tekken/tekken-8)
|
||||
* [Wasmer](https://github.com/wasmerio/wasmer/blob/4f935a8c162bf604df223003e434e4f7ca253688/lib/cache/src/hash.rs#L21)
|
||||
|
||||
|
||||
|
|
|
@ -4,58 +4,57 @@ version = 3
|
|||
|
||||
[[package]]
|
||||
name = "anstream"
|
||||
version = "0.3.2"
|
||||
version = "0.6.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0ca84f3628370c59db74ee214b3263d58f9aadd9b4fe7e711fd87dc452b7f163"
|
||||
checksum = "d96bd03f33fe50a863e394ee9718a706f988b9079b20c3784fb726e7678b62fb"
|
||||
dependencies = [
|
||||
"anstyle",
|
||||
"anstyle-parse",
|
||||
"anstyle-query",
|
||||
"anstyle-wincon",
|
||||
"colorchoice",
|
||||
"is-terminal",
|
||||
"utf8parse",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstyle"
|
||||
version = "1.0.1"
|
||||
version = "1.0.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3a30da5c5f2d5e72842e00bcb57657162cdabef0931f40e2deb9b4140440cecd"
|
||||
checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc"
|
||||
|
||||
[[package]]
|
||||
name = "anstyle-parse"
|
||||
version = "0.2.1"
|
||||
version = "0.2.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "938874ff5980b03a87c5524b3ae5b59cf99b1d6bc836848df7bc5ada9643c333"
|
||||
checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c"
|
||||
dependencies = [
|
||||
"utf8parse",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstyle-query"
|
||||
version = "1.0.0"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b"
|
||||
checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648"
|
||||
dependencies = [
|
||||
"windows-sys",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstyle-wincon"
|
||||
version = "1.0.1"
|
||||
version = "3.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "180abfa45703aebe0093f79badacc01b8fd4ea2e35118747e5811127f926e188"
|
||||
checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7"
|
||||
dependencies = [
|
||||
"anstyle",
|
||||
"windows-sys",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anyhow"
|
||||
version = "1.0.71"
|
||||
version = "1.0.81"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9c7d0618f0e0b7e8ff11427422b64564d5fb0be1940354bfe2e0529b18a9d9b8"
|
||||
checksum = "0952808a6c2afd1aa8947271f3a60f1a6763c7b912d210184c5149b5cf147247"
|
||||
|
||||
[[package]]
|
||||
name = "arrayref"
|
||||
|
@ -69,22 +68,15 @@ version = "0.7.4"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"
|
||||
|
||||
[[package]]
|
||||
name = "autocfg"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
|
||||
|
||||
[[package]]
|
||||
name = "b3sum"
|
||||
version = "1.4.1"
|
||||
version = "1.5.1"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"blake3",
|
||||
"clap",
|
||||
"duct",
|
||||
"hex",
|
||||
"memmap2",
|
||||
"rayon",
|
||||
"tempfile",
|
||||
"wild",
|
||||
|
@ -92,43 +84,28 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "1.3.2"
|
||||
version = "2.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "2.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "630be753d4e58660abd17930c71b647fe46c27ea6b63cc59e1e3851406972e42"
|
||||
checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf"
|
||||
|
||||
[[package]]
|
||||
name = "blake3"
|
||||
version = "1.4.1"
|
||||
version = "1.5.1"
|
||||
dependencies = [
|
||||
"arrayref",
|
||||
"arrayvec",
|
||||
"cc",
|
||||
"cfg-if",
|
||||
"constant_time_eq",
|
||||
"digest",
|
||||
"memmap2",
|
||||
"rayon",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "block-buffer"
|
||||
version = "0.10.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
|
||||
dependencies = [
|
||||
"generic-array",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.0.79"
|
||||
version = "1.0.90"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f"
|
||||
checksum = "8cd6604a82acf3039f1144f54b8eb34e91ffba622051189e71b781822d5ee1f5"
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
|
@ -138,20 +115,19 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
|||
|
||||
[[package]]
|
||||
name = "clap"
|
||||
version = "4.3.11"
|
||||
version = "4.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1640e5cc7fb47dbb8338fd471b105e7ed6c3cb2aeb00c2e067127ffd3764a05d"
|
||||
checksum = "b230ab84b0ffdf890d5a10abdbc8b83ae1c4918275daea1ab8801f71536b2651"
|
||||
dependencies = [
|
||||
"clap_builder",
|
||||
"clap_derive",
|
||||
"once_cell",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap_builder"
|
||||
version = "4.3.11"
|
||||
version = "4.5.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "98c59138d527eeaf9b53f35a77fcc1fad9d883116070c63d5de1c7dc7b00c72b"
|
||||
checksum = "ae129e2e766ae0ec03484e609954119f123cc1fe650337e155d03b022f24f7b4"
|
||||
dependencies = [
|
||||
"anstream",
|
||||
"anstyle",
|
||||
|
@ -162,9 +138,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "clap_derive"
|
||||
version = "4.3.2"
|
||||
version = "4.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b8cd2b2a819ad6eec39e8f1d6b53001af1e5469f8c177579cdaeb313115b825f"
|
||||
checksum = "307bc0538d5f0f83b8248db3087aa92fe504e4691294d0c96c0eabc33f47ba47"
|
||||
dependencies = [
|
||||
"heck",
|
||||
"proc-macro2",
|
||||
|
@ -174,9 +150,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "clap_lex"
|
||||
version = "0.5.0"
|
||||
version = "0.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2da6da31387c7e4ef160ffab6d5e7f00c42626fe39aea70a7b0f1773f7dd6c1b"
|
||||
checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce"
|
||||
|
||||
[[package]]
|
||||
name = "colorchoice"
|
||||
|
@ -190,75 +166,36 @@ version = "0.3.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2"
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-channel"
|
||||
version = "0.5.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-deque"
|
||||
version = "0.8.3"
|
||||
version = "0.8.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef"
|
||||
checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"crossbeam-epoch",
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-epoch"
|
||||
version = "0.9.15"
|
||||
version = "0.9.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7"
|
||||
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"cfg-if",
|
||||
"crossbeam-utils",
|
||||
"memoffset",
|
||||
"scopeguard",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-utils"
|
||||
version = "0.8.16"
|
||||
version = "0.8.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5a22b2d63d4d1dc0b7f1b6b2747dd0088008a9be28b6ddf0b1e7d335e3037294"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crypto-common"
|
||||
version = "0.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
|
||||
dependencies = [
|
||||
"generic-array",
|
||||
"typenum",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "digest"
|
||||
version = "0.10.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
|
||||
dependencies = [
|
||||
"block-buffer",
|
||||
"crypto-common",
|
||||
"subtle",
|
||||
]
|
||||
checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345"
|
||||
|
||||
[[package]]
|
||||
name = "duct"
|
||||
version = "0.13.6"
|
||||
version = "0.13.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "37ae3fc31835f74c2a7ceda3aeede378b0ae2e74c8f1c36559fcc9ae2a4e7d3e"
|
||||
checksum = "e4ab5718d1224b63252cd0c6f74f6480f9ffeb117438a2e0f5cf6d9a4798929c"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"once_cell",
|
||||
|
@ -268,49 +205,25 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "either"
|
||||
version = "1.8.1"
|
||||
version = "1.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91"
|
||||
checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a"
|
||||
|
||||
[[package]]
|
||||
name = "errno"
|
||||
version = "0.3.1"
|
||||
version = "0.3.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4bcfec3a70f97c962c307b2d2c56e358cf1d00b558d74262b5f929ee8cc7e73a"
|
||||
checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245"
|
||||
dependencies = [
|
||||
"errno-dragonfly",
|
||||
"libc",
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "errno-dragonfly"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fastrand"
|
||||
version = "1.9.0"
|
||||
version = "2.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e51093e27b0797c359783294ca4f0a911c270184cb10f85783b118614a1501be"
|
||||
dependencies = [
|
||||
"instant",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "generic-array"
|
||||
version = "0.14.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
|
||||
dependencies = [
|
||||
"typenum",
|
||||
"version_check",
|
||||
]
|
||||
checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5"
|
||||
|
||||
[[package]]
|
||||
name = "glob"
|
||||
|
@ -324,134 +237,72 @@ version = "0.4.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
|
||||
|
||||
[[package]]
|
||||
name = "hermit-abi"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b"
|
||||
|
||||
[[package]]
|
||||
name = "hex"
|
||||
version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
|
||||
|
||||
[[package]]
|
||||
name = "instant"
|
||||
version = "0.1.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "io-lifetimes"
|
||||
version = "1.0.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "eae7b9aee968036d54dce06cebaefd919e4472e753296daccd6d344e3e2df0c2"
|
||||
dependencies = [
|
||||
"hermit-abi",
|
||||
"libc",
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "is-terminal"
|
||||
version = "0.4.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b"
|
||||
dependencies = [
|
||||
"hermit-abi",
|
||||
"rustix 0.38.3",
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.147"
|
||||
version = "0.2.153"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3"
|
||||
checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd"
|
||||
|
||||
[[package]]
|
||||
name = "linux-raw-sys"
|
||||
version = "0.3.8"
|
||||
version = "0.4.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519"
|
||||
|
||||
[[package]]
|
||||
name = "linux-raw-sys"
|
||||
version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "09fc20d2ca12cb9f044c93e3bd6d32d523e6e2ec3db4f7b2939cd99026ecd3f0"
|
||||
checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c"
|
||||
|
||||
[[package]]
|
||||
name = "memmap2"
|
||||
version = "0.7.1"
|
||||
version = "0.9.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f49388d20533534cd19360ad3d6a7dadc885944aa802ba3995040c5ec11288c6"
|
||||
checksum = "fe751422e4a8caa417e13c3ea66452215d7d63e19e604f4980461212f3ae1322"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memoffset"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num_cpus"
|
||||
version = "1.16.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43"
|
||||
dependencies = [
|
||||
"hermit-abi",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "once_cell"
|
||||
version = "1.18.0"
|
||||
version = "1.19.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
|
||||
checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
|
||||
|
||||
[[package]]
|
||||
name = "os_pipe"
|
||||
version = "1.1.4"
|
||||
version = "1.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0ae859aa07428ca9a929b936690f8b12dc5f11dd8c6992a18ca93919f28bc177"
|
||||
checksum = "57119c3b893986491ec9aa85056780d3a0f3cf4da7cc09dd3650dbd6c6738fb9"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"windows-sys",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.63"
|
||||
version = "1.0.79"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7b368fba921b0dce7e60f5e04ec15e565b3303972b42bcfde1d0713b881959eb"
|
||||
checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.29"
|
||||
version = "1.0.35"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "573015e8ab27661678357f27dc26460738fd2b6c86e46f386fde94cb5d913105"
|
||||
checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rayon"
|
||||
version = "1.7.0"
|
||||
version = "1.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1d2df5196e37bcc87abebc0053e20787d73847bb33134a69841207dd0a47f03b"
|
||||
checksum = "e4963ed1bc86e4f3ee217022bd855b297cef07fb9eac5dfa1f788b220b49b3bd"
|
||||
dependencies = [
|
||||
"either",
|
||||
"rayon-core",
|
||||
|
@ -459,58 +310,27 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "rayon-core"
|
||||
version = "1.11.0"
|
||||
version = "1.12.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4b8f95bd6966f5c87776639160a66bd8ab9895d9d4ab01ddba9fc60661aebe8d"
|
||||
checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2"
|
||||
dependencies = [
|
||||
"crossbeam-channel",
|
||||
"crossbeam-deque",
|
||||
"crossbeam-utils",
|
||||
"num_cpus",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "redox_syscall"
|
||||
version = "0.3.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29"
|
||||
dependencies = [
|
||||
"bitflags 1.3.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustix"
|
||||
version = "0.37.23"
|
||||
version = "0.38.31"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4d69718bf81c6127a49dc64e44a742e8bb9213c0ff8869a22c308f84c1d4ab06"
|
||||
checksum = "6ea3e1a662af26cd7a3ba09c0297a31af215563ecf42817c98df621387f4e949"
|
||||
dependencies = [
|
||||
"bitflags 1.3.2",
|
||||
"errno",
|
||||
"io-lifetimes",
|
||||
"libc",
|
||||
"linux-raw-sys 0.3.8",
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustix"
|
||||
version = "0.38.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ac5ffa1efe7548069688cd7028f32591853cd7b5b756d41bcffd2353e4fc75b4"
|
||||
dependencies = [
|
||||
"bitflags 2.3.3",
|
||||
"bitflags",
|
||||
"errno",
|
||||
"libc",
|
||||
"linux-raw-sys 0.4.3",
|
||||
"windows-sys",
|
||||
"linux-raw-sys",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "scopeguard"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
|
||||
|
||||
[[package]]
|
||||
name = "shared_child"
|
||||
version = "1.0.0"
|
||||
|
@ -523,21 +343,15 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "strsim"
|
||||
version = "0.10.0"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
|
||||
|
||||
[[package]]
|
||||
name = "subtle"
|
||||
version = "2.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc"
|
||||
checksum = "5ee073c9e4cd00e28217186dbe12796d692868f432bf2e97ee73bed0c56dfa01"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.23"
|
||||
version = "2.0.52"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "59fb7d6d8281a51045d62b8eb3a7d1ce347b76f312af50cd3dc0af39c87c1737"
|
||||
checksum = "b699d15b36d1f02c3e7c69f8ffef53de37aefae075d8488d4ba1a7788d574a07"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
|
@ -546,39 +360,31 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "tempfile"
|
||||
version = "3.6.0"
|
||||
version = "3.10.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "31c0432476357e58790aaa47a8efb0c5138f137343f3b5f23bd36a27e3b0a6d6"
|
||||
checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"cfg-if",
|
||||
"fastrand",
|
||||
"redox_syscall",
|
||||
"rustix 0.37.23",
|
||||
"windows-sys",
|
||||
"rustix",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "terminal_size"
|
||||
version = "0.2.6"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8e6bf6f19e9f8ed8d4048dc22981458ebcf406d67e94cd422e5ecd73d63b3237"
|
||||
checksum = "21bebf2b7c9e0a515f6e0f8c51dc0f8e4696391e6f1ff30379559f8365fb0df7"
|
||||
dependencies = [
|
||||
"rustix 0.37.23",
|
||||
"windows-sys",
|
||||
"rustix",
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "typenum"
|
||||
version = "1.16.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.10"
|
||||
version = "1.0.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "22049a19f4a68748a168c0fc439f9516686aa045927ff767eca0a85101fb6e73"
|
||||
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
|
||||
|
||||
[[package]]
|
||||
name = "utf8parse"
|
||||
|
@ -586,17 +392,11 @@ version = "0.2.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
|
||||
|
||||
[[package]]
|
||||
name = "version_check"
|
||||
version = "0.9.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
|
||||
|
||||
[[package]]
|
||||
name = "wild"
|
||||
version = "2.1.0"
|
||||
version = "2.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "05b116685a6be0c52f5a103334cbff26db643826c7b3735fc0a3ba9871310a74"
|
||||
checksum = "a3131afc8c575281e1e80f36ed6a092aa502c08b18ed7524e86fbbb12bb410e1"
|
||||
dependencies = [
|
||||
"glob",
|
||||
]
|
||||
|
@ -629,62 +429,128 @@ version = "0.48.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
|
||||
dependencies = [
|
||||
"windows-targets",
|
||||
"windows-targets 0.48.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-sys"
|
||||
version = "0.52.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
|
||||
dependencies = [
|
||||
"windows-targets 0.52.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-targets"
|
||||
version = "0.48.1"
|
||||
version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "05d4b17490f70499f20b9e791dcf6a299785ce8af4d709018206dc5b4953e95f"
|
||||
checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c"
|
||||
dependencies = [
|
||||
"windows_aarch64_gnullvm",
|
||||
"windows_aarch64_msvc",
|
||||
"windows_i686_gnu",
|
||||
"windows_i686_msvc",
|
||||
"windows_x86_64_gnu",
|
||||
"windows_x86_64_gnullvm",
|
||||
"windows_x86_64_msvc",
|
||||
"windows_aarch64_gnullvm 0.48.5",
|
||||
"windows_aarch64_msvc 0.48.5",
|
||||
"windows_i686_gnu 0.48.5",
|
||||
"windows_i686_msvc 0.48.5",
|
||||
"windows_x86_64_gnu 0.48.5",
|
||||
"windows_x86_64_gnullvm 0.48.5",
|
||||
"windows_x86_64_msvc 0.48.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-targets"
|
||||
version = "0.52.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b"
|
||||
dependencies = [
|
||||
"windows_aarch64_gnullvm 0.52.4",
|
||||
"windows_aarch64_msvc 0.52.4",
|
||||
"windows_i686_gnu 0.52.4",
|
||||
"windows_i686_msvc 0.52.4",
|
||||
"windows_x86_64_gnu 0.52.4",
|
||||
"windows_x86_64_gnullvm 0.52.4",
|
||||
"windows_x86_64_msvc 0.52.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_gnullvm"
|
||||
version = "0.48.0"
|
||||
version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc"
|
||||
checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_gnullvm"
|
||||
version = "0.52.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9"
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_msvc"
|
||||
version = "0.48.0"
|
||||
version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3"
|
||||
checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_msvc"
|
||||
version = "0.52.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_gnu"
|
||||
version = "0.48.0"
|
||||
version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241"
|
||||
checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_gnu"
|
||||
version = "0.52.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_msvc"
|
||||
version = "0.48.0"
|
||||
version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00"
|
||||
checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_msvc"
|
||||
version = "0.52.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnu"
|
||||
version = "0.48.0"
|
||||
version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1"
|
||||
checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnu"
|
||||
version = "0.52.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnullvm"
|
||||
version = "0.48.0"
|
||||
version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953"
|
||||
checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnullvm"
|
||||
version = "0.52.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_msvc"
|
||||
version = "0.48.0"
|
||||
version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a"
|
||||
checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_msvc"
|
||||
version = "0.52.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8"
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[package]
|
||||
name = "b3sum"
|
||||
version = "1.4.1"
|
||||
version = "1.5.1"
|
||||
authors = ["Jack O'Connor <oconnor663@gmail.com>"]
|
||||
description = "a command line implementation of the BLAKE3 hash function"
|
||||
repository = "https://github.com/BLAKE3-team/BLAKE3"
|
||||
|
@ -15,10 +15,9 @@ pure = ["blake3/pure"]
|
|||
|
||||
[dependencies]
|
||||
anyhow = "1.0.25"
|
||||
blake3 = { version = "1", path = "..", features = ["rayon"] }
|
||||
blake3 = { version = "1", path = "..", features = ["mmap", "rayon"] }
|
||||
clap = { version = "4.0.8", features = ["derive", "wrap_help"] }
|
||||
hex = "0.4.0"
|
||||
memmap2 = "0.7.0"
|
||||
rayon = "1.2.1"
|
||||
wild = "2.0.3"
|
||||
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
../LICENSE
|
|
@ -163,125 +163,22 @@ impl Args {
|
|||
}
|
||||
}
|
||||
|
||||
enum Input {
|
||||
Mmap(io::Cursor<memmap2::Mmap>),
|
||||
File(File),
|
||||
Stdin,
|
||||
}
|
||||
|
||||
impl Input {
|
||||
// Open an input file, using mmap if appropriate. "-" means stdin. Note
|
||||
// that this convention applies both to command line arguments, and to
|
||||
// filepaths that appear in a checkfile.
|
||||
fn open(path: &Path, args: &Args) -> Result<Self> {
|
||||
if path == Path::new("-") {
|
||||
if args.keyed() {
|
||||
bail!("Cannot open `-` in keyed mode");
|
||||
}
|
||||
return Ok(Self::Stdin);
|
||||
fn hash_path(args: &Args, path: &Path) -> Result<blake3::OutputReader> {
|
||||
let mut hasher = args.base_hasher.clone();
|
||||
if path == Path::new("-") {
|
||||
if args.keyed() {
|
||||
bail!("Cannot open `-` in keyed mode");
|
||||
}
|
||||
let file = File::open(path)?;
|
||||
if !args.no_mmap() {
|
||||
if let Some(mmap) = maybe_memmap_file(&file)? {
|
||||
return Ok(Self::Mmap(io::Cursor::new(mmap)));
|
||||
}
|
||||
}
|
||||
Ok(Self::File(file))
|
||||
}
|
||||
|
||||
fn hash(&mut self, args: &Args) -> Result<blake3::OutputReader> {
|
||||
let mut hasher = args.base_hasher.clone();
|
||||
match self {
|
||||
// The fast path: If we mmapped the file successfully, hash using
|
||||
// multiple threads. This doesn't work on stdin, or on some files,
|
||||
// and it can also be disabled with --no-mmap.
|
||||
Self::Mmap(cursor) => {
|
||||
hasher.update_rayon(cursor.get_ref());
|
||||
}
|
||||
// The slower paths, for stdin or files we didn't/couldn't mmap.
|
||||
// This is currently all single-threaded. Doing multi-threaded
|
||||
// hashing without memory mapping is tricky, since all your worker
|
||||
// threads have to stop every time you refill the buffer, and that
|
||||
// ends up being a lot of overhead. To solve that, we need a more
|
||||
// complicated double-buffering strategy where a background thread
|
||||
// fills one buffer while the worker threads are hashing the other
|
||||
// one. We might implement that in the future, but since this is
|
||||
// the slow path anyway, it's not high priority.
|
||||
Self::File(file) => {
|
||||
copy_wide(file, &mut hasher)?;
|
||||
}
|
||||
Self::Stdin => {
|
||||
let stdin = io::stdin();
|
||||
let lock = stdin.lock();
|
||||
copy_wide(lock, &mut hasher)?;
|
||||
}
|
||||
}
|
||||
let mut output_reader = hasher.finalize_xof();
|
||||
output_reader.set_position(args.seek());
|
||||
Ok(output_reader)
|
||||
}
|
||||
}
|
||||
|
||||
impl Read for Input {
|
||||
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
|
||||
match self {
|
||||
Self::Mmap(cursor) => cursor.read(buf),
|
||||
Self::File(file) => file.read(buf),
|
||||
Self::Stdin => io::stdin().read(buf),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// A 16 KiB buffer is enough to take advantage of all the SIMD instruction sets
|
||||
// that we support, but `std::io::copy` currently uses 8 KiB. Most platforms
|
||||
// can support at least 64 KiB, and there's some performance benefit to using
|
||||
// bigger reads, so that's what we use here.
|
||||
fn copy_wide(mut reader: impl Read, hasher: &mut blake3::Hasher) -> io::Result<u64> {
|
||||
let mut buffer = [0; 65536];
|
||||
let mut total = 0;
|
||||
loop {
|
||||
match reader.read(&mut buffer) {
|
||||
Ok(0) => return Ok(total),
|
||||
Ok(n) => {
|
||||
hasher.update(&buffer[..n]);
|
||||
total += n as u64;
|
||||
}
|
||||
Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
|
||||
Err(e) => return Err(e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Mmap a file, if it looks like a good idea. Return None in cases where we
|
||||
// know mmap will fail, or if the file is short enough that mmapping isn't
|
||||
// worth it. However, if we do try to mmap and it fails, return the error.
|
||||
fn maybe_memmap_file(file: &File) -> Result<Option<memmap2::Mmap>> {
|
||||
let metadata = file.metadata()?;
|
||||
let file_size = metadata.len();
|
||||
Ok(if !metadata.is_file() {
|
||||
// Not a real file.
|
||||
None
|
||||
} else if file_size > isize::max_value() as u64 {
|
||||
// Too long to safely map.
|
||||
// https://github.com/danburkert/memmap-rs/issues/69
|
||||
None
|
||||
} else if file_size == 0 {
|
||||
// Mapping an empty file currently fails.
|
||||
// https://github.com/danburkert/memmap-rs/issues/72
|
||||
None
|
||||
} else if file_size < 16 * 1024 {
|
||||
// Mapping small files is not worth it.
|
||||
None
|
||||
hasher.update_reader(io::stdin().lock())?;
|
||||
} else if args.no_mmap() {
|
||||
hasher.update_reader(File::open(path)?)?;
|
||||
} else {
|
||||
// Explicitly set the length of the memory map, so that filesystem
|
||||
// changes can't race to violate the invariants we just checked.
|
||||
let map = unsafe {
|
||||
memmap2::MmapOptions::new()
|
||||
.len(file_size as usize)
|
||||
.map(file)?
|
||||
};
|
||||
Some(map)
|
||||
})
|
||||
// The fast path: Try to mmap the file and hash it with multiple threads.
|
||||
hasher.update_mmap_rayon(path)?;
|
||||
}
|
||||
let mut output_reader = hasher.finalize_xof();
|
||||
output_reader.set_position(args.seek());
|
||||
Ok(output_reader)
|
||||
}
|
||||
|
||||
fn write_hex_output(mut output: blake3::OutputReader, args: &Args) -> Result<()> {
|
||||
|
@ -477,8 +374,7 @@ fn parse_check_line(mut line: &str) -> Result<ParsedCheckLine> {
|
|||
}
|
||||
|
||||
fn hash_one_input(path: &Path, args: &Args) -> Result<()> {
|
||||
let mut input = Input::open(path, args)?;
|
||||
let output = input.hash(args)?;
|
||||
let output = hash_path(args, path)?;
|
||||
if args.raw() {
|
||||
write_raw_output(output, args)?;
|
||||
return Ok(());
|
||||
|
@ -522,15 +418,13 @@ fn check_one_line(line: &str, args: &Args) -> bool {
|
|||
} else {
|
||||
file_string
|
||||
};
|
||||
let hash_result: Result<blake3::Hash> = Input::open(&file_path, args)
|
||||
.and_then(|mut input| input.hash(args))
|
||||
.map(|mut hash_output| {
|
||||
let found_hash: blake3::Hash;
|
||||
match hash_path(args, &file_path) {
|
||||
Ok(mut output) => {
|
||||
let mut found_hash_bytes = [0; blake3::OUT_LEN];
|
||||
hash_output.fill(&mut found_hash_bytes);
|
||||
found_hash_bytes.into()
|
||||
});
|
||||
let found_hash: blake3::Hash = match hash_result {
|
||||
Ok(hash) => hash,
|
||||
output.fill(&mut found_hash_bytes);
|
||||
found_hash = found_hash_bytes.into();
|
||||
}
|
||||
Err(e) => {
|
||||
println!("{}: FAILED ({})", file_string, e);
|
||||
return false;
|
||||
|
@ -549,8 +443,18 @@ fn check_one_line(line: &str, args: &Args) -> bool {
|
|||
}
|
||||
|
||||
fn check_one_checkfile(path: &Path, args: &Args, files_failed: &mut u64) -> Result<()> {
|
||||
let checkfile_input = Input::open(path, args)?;
|
||||
let mut bufreader = io::BufReader::new(checkfile_input);
|
||||
let mut file;
|
||||
let stdin;
|
||||
let mut stdin_lock;
|
||||
let mut bufreader: io::BufReader<&mut dyn Read>;
|
||||
if path == Path::new("-") {
|
||||
stdin = io::stdin();
|
||||
stdin_lock = stdin.lock();
|
||||
bufreader = io::BufReader::new(&mut stdin_lock);
|
||||
} else {
|
||||
file = File::open(path)?;
|
||||
bufreader = io::BufReader::new(&mut file);
|
||||
}
|
||||
let mut line = String::new();
|
||||
loop {
|
||||
line.clear();
|
||||
|
|
22
build.rs
22
build.rs
|
@ -60,6 +60,20 @@ fn is_armv7() -> bool {
|
|||
target_components()[0] == "armv7"
|
||||
}
|
||||
|
||||
fn endianness() -> String {
|
||||
let endianness = env::var("CARGO_CFG_TARGET_ENDIAN").unwrap();
|
||||
assert!(endianness == "little" || endianness == "big");
|
||||
endianness
|
||||
}
|
||||
|
||||
fn is_little_endian() -> bool {
|
||||
endianness() == "little"
|
||||
}
|
||||
|
||||
fn is_big_endian() -> bool {
|
||||
endianness() == "big"
|
||||
}
|
||||
|
||||
// Windows targets may be using the MSVC toolchain or the GNU toolchain. The
|
||||
// right compiler flags to use depend on the toolchain. (And we don't want to
|
||||
// use flag_if_supported, because we don't want features to be silently
|
||||
|
@ -253,7 +267,13 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|||
}
|
||||
}
|
||||
|
||||
if (is_arm() && is_neon()) || (!is_no_neon() && !is_pure() && is_aarch64()) {
|
||||
if is_neon() && is_big_endian() {
|
||||
panic!("The NEON implementation doesn't support big-endian ARM.")
|
||||
}
|
||||
|
||||
if (is_arm() && is_neon())
|
||||
|| (!is_no_neon() && !is_pure() && is_aarch64() && is_little_endian())
|
||||
{
|
||||
println!("cargo:rustc-cfg=blake3_neon");
|
||||
build_neon_c_intrinsics();
|
||||
}
|
||||
|
|
|
@ -1,7 +1,12 @@
|
|||
cmake_minimum_required(VERSION 3.9)
|
||||
cmake_minimum_required(VERSION 3.9 FATAL_ERROR)
|
||||
|
||||
# respect C_EXTENSIONS OFF without explicitly setting C_STANDARD
|
||||
if (POLICY CMP0128)
|
||||
cmake_policy(SET CMP0128 NEW)
|
||||
endif()
|
||||
|
||||
project(libblake3
|
||||
VERSION 1.4.1
|
||||
VERSION 1.5.1
|
||||
DESCRIPTION "BLAKE3 C implementation"
|
||||
LANGUAGES C ASM
|
||||
)
|
||||
|
@ -9,8 +14,12 @@ project(libblake3
|
|||
include(FeatureSummary)
|
||||
include(GNUInstallDirs)
|
||||
|
||||
# architecture lists for which to enable assembly / SIMD sources
|
||||
set(BLAKE3_AMD64_NAMES amd64 AMD64 x86_64)
|
||||
set(BLAKE3_X86_NAMES i686 x86 X86)
|
||||
set(BLAKE3_ARMv8_NAMES aarch64 AArch64 arm64 ARM64 armv8 armv8a)
|
||||
# default SIMD compiler flag configuration (can be overriden by toolchains or CLI)
|
||||
if(CMAKE_C_COMPILER_ID STREQUAL "MSVC")
|
||||
if(MSVC)
|
||||
set(BLAKE3_CFLAGS_SSE2 "/arch:SSE2" CACHE STRING "the compiler flags to enable SSE2")
|
||||
# MSVC has no dedicated sse4.1 flag (see https://learn.microsoft.com/en-us/cpp/build/reference/arch-x86?view=msvc-170)
|
||||
set(BLAKE3_CFLAGS_SSE4.1 "/arch:AVX" CACHE STRING "the compiler flags to enable SSE4.1")
|
||||
|
@ -24,11 +33,13 @@ elseif(CMAKE_C_COMPILER_ID STREQUAL "GNU"
|
|||
set(BLAKE3_CFLAGS_SSE4.1 "-msse4.1" CACHE STRING "the compiler flags to enable SSE4.1")
|
||||
set(BLAKE3_CFLAGS_AVX2 "-mavx2" CACHE STRING "the compiler flags to enable AVX2")
|
||||
set(BLAKE3_CFLAGS_AVX512 "-mavx512f -mavx512vl" CACHE STRING "the compiler flags to enable AVX512")
|
||||
|
||||
if (CMAKE_SYSTEM_PROCESSOR IN_LIST BLAKE3_ARMv8_NAMES
|
||||
AND NOT CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||
# 32-bit ARMv8 needs NEON to be enabled explicitly
|
||||
set(BLAKE3_CFLAGS_NEON "-mfpu=neon" CACHE STRING "the compiler flags to enable NEON")
|
||||
endif()
|
||||
endif()
|
||||
# architecture lists for which to enable assembly / SIMD sources
|
||||
set(BLAKE3_AMD64_NAMES amd64 AMD64 x86_64)
|
||||
set(BLAKE3_X86_NAMES i686 x86 X86)
|
||||
set(BLAKE3_ARMv8_NAMES aarch64 AArch64 arm64 ARM64 armv8 armv8a)
|
||||
|
||||
# library target
|
||||
add_library(blake3
|
||||
|
@ -41,33 +52,47 @@ add_library(BLAKE3::blake3 ALIAS blake3)
|
|||
# library configuration
|
||||
set(BLAKE3_PKGCONFIG_CFLAGS)
|
||||
if (BUILD_SHARED_LIBS)
|
||||
target_compile_definitions(blake3
|
||||
target_compile_definitions(blake3
|
||||
PUBLIC BLAKE3_DLL
|
||||
PRIVATE BLAKE3_DLL_EXPORTS
|
||||
)
|
||||
list(APPEND BLAKE3_PKGCONFIG_CFLAGS -DBLAKE3_DLL)
|
||||
endif()
|
||||
target_include_directories(blake3 PUBLIC $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
|
||||
target_include_directories(blake3 PUBLIC
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
|
||||
)
|
||||
set_target_properties(blake3 PROPERTIES
|
||||
VERSION ${PROJECT_VERSION}
|
||||
SOVERSION 0
|
||||
C_VISIBILITY_PRESET hidden
|
||||
C_EXTENSIONS OFF
|
||||
)
|
||||
target_compile_features(blake3 PUBLIC c_std_99)
|
||||
# ensure C_EXTENSIONS OFF is respected without overriding CMAKE_C_STANDARD
|
||||
# which may be set by the user or toolchain file
|
||||
if (NOT POLICY CMP0128 AND NOT DEFINED CMAKE_C_STANDARD)
|
||||
set_target_properties(blake3 PROPERTIES C_STANDARD 99)
|
||||
endif()
|
||||
|
||||
# optional SIMD sources
|
||||
macro(BLAKE3_DISABLE_SIMD)
|
||||
set(BLAKE3_SIMD_AMD64_ASM OFF)
|
||||
set(BLAKE3_SIMD_X86_INTRINSICS OFF)
|
||||
set(BLAKE3_SIMD_NEON_INTRINSICS OFF)
|
||||
set_source_files_properties(blake3_dispatch.c PROPERTIES
|
||||
COMPILE_DEFINITIONS BLAKE3_USE_NEON=0;BLAKE3_NO_SSE2;BLAKE3_NO_SSE41;BLAKE3_NO_AVX2;BLAKE3_NO_AVX512
|
||||
target_compile_definitions(blake3 PRIVATE
|
||||
BLAKE3_USE_NEON=0
|
||||
BLAKE3_NO_SSE2
|
||||
BLAKE3_NO_SSE41
|
||||
BLAKE3_NO_AVX2
|
||||
BLAKE3_NO_AVX512
|
||||
)
|
||||
endmacro()
|
||||
|
||||
if(CMAKE_SYSTEM_PROCESSOR IN_LIST BLAKE3_AMD64_NAMES OR BLAKE3_USE_AMD64_ASM)
|
||||
set(BLAKE3_SIMD_AMD64_ASM ON)
|
||||
|
||||
if(CMAKE_C_COMPILER_ID STREQUAL "MSVC")
|
||||
if(MSVC)
|
||||
enable_language(ASM_MASM)
|
||||
target_sources(blake3 PRIVATE
|
||||
blake3_avx2_x86-64_windows_msvc.asm
|
||||
|
@ -99,7 +124,7 @@ if(CMAKE_SYSTEM_PROCESSOR IN_LIST BLAKE3_AMD64_NAMES OR BLAKE3_USE_AMD64_ASM)
|
|||
BLAKE3_DISABLE_SIMD()
|
||||
endif()
|
||||
|
||||
else()
|
||||
else()
|
||||
BLAKE3_DISABLE_SIMD()
|
||||
endif()
|
||||
|
||||
|
@ -121,17 +146,19 @@ elseif((CMAKE_SYSTEM_PROCESSOR IN_LIST BLAKE3_X86_NAMES OR BLAKE3_USE_X86_INTRIN
|
|||
set_source_files_properties(blake3_sse2.c PROPERTIES COMPILE_FLAGS "${BLAKE3_CFLAGS_SSE2}")
|
||||
set_source_files_properties(blake3_sse41.c PROPERTIES COMPILE_FLAGS "${BLAKE3_CFLAGS_SSE4.1}")
|
||||
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR IN_LIST BLAKE3_ARMv8_NAMES
|
||||
OR ((ANDROID_ABI STREQUAL "armeabi-v7a"
|
||||
OR BLAKE3_USE_NEON_INTRINSICS)
|
||||
AND (DEFINED BLAKE3_CFLAGS_NEON
|
||||
OR CMAKE_SIZEOF_VOID_P EQUAL 8)))
|
||||
elseif((CMAKE_SYSTEM_PROCESSOR IN_LIST BLAKE3_ARMv8_NAMES
|
||||
OR ANDROID_ABI STREQUAL "armeabi-v7a"
|
||||
OR BLAKE3_USE_NEON_INTRINSICS)
|
||||
AND (DEFINED BLAKE3_CFLAGS_NEON
|
||||
OR CMAKE_SIZEOF_VOID_P EQUAL 8))
|
||||
set(BLAKE3_SIMD_NEON_INTRINSICS ON)
|
||||
|
||||
target_sources(blake3 PRIVATE
|
||||
blake3_neon.c
|
||||
)
|
||||
set_source_files_properties(blake3_dispatch.c PROPERTIES COMPILE_DEFINITIONS BLAKE3_USE_NEON=1)
|
||||
target_compile_definitions(blake3 PRIVATE
|
||||
BLAKE3_USE_NEON=1
|
||||
)
|
||||
|
||||
if (DEFINED BLAKE3_CFLAGS_NEON)
|
||||
set_source_files_properties(blake3_neon.c PROPERTIES COMPILE_FLAGS "${BLAKE3_CFLAGS_NEON}")
|
||||
|
|
19
c/blake3.c
19
c/blake3.c
|
@ -341,21 +341,24 @@ INLINE void compress_subtree_to_parent_node(
|
|||
size_t num_cvs = blake3_compress_subtree_wide(input, input_len, key,
|
||||
chunk_counter, flags, cv_array);
|
||||
assert(num_cvs <= MAX_SIMD_DEGREE_OR_2);
|
||||
|
||||
// If MAX_SIMD_DEGREE is greater than 2 and there's enough input,
|
||||
// The following loop never executes when MAX_SIMD_DEGREE_OR_2 is 2, because
|
||||
// as we just asserted, num_cvs will always be <=2 in that case. But GCC
|
||||
// (particularly GCC 8.5) can't tell that it never executes, and if NDEBUG is
|
||||
// set then it emits incorrect warnings here. We tried a few different
|
||||
// hacks to silence these, but in the end our hacks just produced different
|
||||
// warnings (see https://github.com/BLAKE3-team/BLAKE3/pull/380). Out of
|
||||
// desperation, we ifdef out this entire loop when we know it's not needed.
|
||||
#if MAX_SIMD_DEGREE_OR_2 > 2
|
||||
// If MAX_SIMD_DEGREE_OR_2 is greater than 2 and there's enough input,
|
||||
// compress_subtree_wide() returns more than 2 chaining values. Condense
|
||||
// them into 2 by forming parent nodes repeatedly.
|
||||
uint8_t out_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN / 2];
|
||||
// The second half of this loop condition is always true, and we just
|
||||
// asserted it above. But GCC can't tell that it's always true, and if NDEBUG
|
||||
// is set on platforms where MAX_SIMD_DEGREE_OR_2 == 2, GCC emits spurious
|
||||
// warnings here. GCC 8.5 is particularly sensitive, so if you're changing
|
||||
// this code, test it against that version.
|
||||
while (num_cvs > 2 && num_cvs <= MAX_SIMD_DEGREE_OR_2) {
|
||||
while (num_cvs > 2) {
|
||||
num_cvs =
|
||||
compress_parents_parallel(cv_array, num_cvs, key, flags, out_array);
|
||||
memcpy(cv_array, out_array, num_cvs * BLAKE3_OUT_LEN);
|
||||
}
|
||||
#endif
|
||||
memcpy(out, cv_array, 2 * BLAKE3_OUT_LEN);
|
||||
}
|
||||
|
||||
|
|
|
@ -30,7 +30,7 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define BLAKE3_VERSION_STRING "1.4.1"
|
||||
#define BLAKE3_VERSION_STRING "1.5.1"
|
||||
#define BLAKE3_KEY_LEN 32
|
||||
#define BLAKE3_OUT_LEN 32
|
||||
#define BLAKE3_BLOCK_LEN 64
|
||||
|
|
|
@ -20,9 +20,9 @@ neon = []
|
|||
[dev-dependencies]
|
||||
arrayref = "0.3.5"
|
||||
arrayvec = { version = "0.7.0", default-features = false }
|
||||
page_size = "0.4.1"
|
||||
rand = "0.7.2"
|
||||
rand_chacha = "0.2.1"
|
||||
page_size = "0.6.0"
|
||||
rand = "0.8.5"
|
||||
rand_chacha = "0.3.1"
|
||||
reference_impl = { path = "../../reference_impl" }
|
||||
|
||||
[build-dependencies]
|
||||
|
|
|
@ -485,7 +485,7 @@ fn test_fuzz_hasher() {
|
|||
let mut total_input = 0;
|
||||
// For each test, write 3 inputs of random length.
|
||||
for _ in 0..3 {
|
||||
let input_len = rng.gen_range(0, INPUT_MAX + 1);
|
||||
let input_len = rng.gen_range(0..INPUT_MAX + 1);
|
||||
dbg!(input_len);
|
||||
let input = &input_buf[total_input..][..input_len];
|
||||
hasher.update(input);
|
||||
|
|
|
@ -4,6 +4,10 @@
|
|||
|
||||
#include "blake3_impl.h"
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#include <Windows.h>
|
||||
#endif
|
||||
|
||||
#if defined(IS_X86)
|
||||
#if defined(_MSC_VER)
|
||||
#include <intrin.h>
|
||||
|
@ -14,6 +18,32 @@
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(BLAKE3_ATOMICS)
|
||||
#if defined(__has_include)
|
||||
#if __has_include(<stdatomic.h>) && !defined(_MSC_VER)
|
||||
#define BLAKE3_ATOMICS 1
|
||||
#else
|
||||
#define BLAKE3_ATOMICS 0
|
||||
#endif /* __has_include(<stdatomic.h>) && !defined(_MSC_VER) */
|
||||
#else
|
||||
#define BLAKE3_ATOMICS 0
|
||||
#endif /* defined(__has_include) */
|
||||
#endif /* BLAKE3_ATOMICS */
|
||||
|
||||
#if BLAKE3_ATOMICS
|
||||
#define ATOMIC_INT _Atomic int
|
||||
#define ATOMIC_LOAD(x) x
|
||||
#define ATOMIC_STORE(x, y) x = y
|
||||
#elif defined(_MSC_VER)
|
||||
#define ATOMIC_INT LONG
|
||||
#define ATOMIC_LOAD(x) InterlockedOr(&x, 0)
|
||||
#define ATOMIC_STORE(x, y) InterlockedExchange(&x, y)
|
||||
#else
|
||||
#define ATOMIC_INT int
|
||||
#define ATOMIC_LOAD(x) x
|
||||
#define ATOMIC_STORE(x, y) x = y
|
||||
#endif
|
||||
|
||||
#define MAYBE_UNUSED(x) (void)((x))
|
||||
|
||||
#if defined(IS_X86)
|
||||
|
@ -76,7 +106,7 @@ enum cpu_feature {
|
|||
#if !defined(BLAKE3_TESTING)
|
||||
static /* Allow the variable to be controlled manually for testing */
|
||||
#endif
|
||||
enum cpu_feature g_cpu_features = UNDEFINED;
|
||||
ATOMIC_INT g_cpu_features = UNDEFINED;
|
||||
|
||||
#if !defined(BLAKE3_TESTING)
|
||||
static
|
||||
|
@ -84,14 +114,16 @@ static
|
|||
enum cpu_feature
|
||||
get_cpu_features(void) {
|
||||
|
||||
if (g_cpu_features != UNDEFINED) {
|
||||
return g_cpu_features;
|
||||
/* If TSAN detects a data race here, try compiling with -DBLAKE3_ATOMICS=1 */
|
||||
enum cpu_feature features = ATOMIC_LOAD(g_cpu_features);
|
||||
if (features != UNDEFINED) {
|
||||
return features;
|
||||
} else {
|
||||
#if defined(IS_X86)
|
||||
uint32_t regs[4] = {0};
|
||||
uint32_t *eax = ®s[0], *ebx = ®s[1], *ecx = ®s[2], *edx = ®s[3];
|
||||
(void)edx;
|
||||
enum cpu_feature features = 0;
|
||||
features = 0;
|
||||
cpuid(regs, 0);
|
||||
const int max_id = *eax;
|
||||
cpuid(regs, 1);
|
||||
|
@ -124,7 +156,7 @@ static
|
|||
}
|
||||
}
|
||||
}
|
||||
g_cpu_features = features;
|
||||
ATOMIC_STORE(g_cpu_features, features);
|
||||
return features;
|
||||
#else
|
||||
/* How to detect NEON? */
|
||||
|
|
|
@ -28,7 +28,7 @@ enum blake3_flags {
|
|||
#define INLINE static inline __attribute__((always_inline))
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__) || defined(_M_X64)
|
||||
#if (defined(__x86_64__) || defined(_M_X64)) && !defined(_M_ARM64EC)
|
||||
#define IS_X86
|
||||
#define IS_X86_64
|
||||
#endif
|
||||
|
@ -38,7 +38,7 @@ enum blake3_flags {
|
|||
#define IS_X86_32
|
||||
#endif
|
||||
|
||||
#if defined(__aarch64__) || defined(_M_ARM64)
|
||||
#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
|
||||
#define IS_AARCH64
|
||||
#endif
|
||||
|
||||
|
@ -51,7 +51,11 @@ enum blake3_flags {
|
|||
#if !defined(BLAKE3_USE_NEON)
|
||||
// If BLAKE3_USE_NEON not manually set, autodetect based on AArch64ness
|
||||
#if defined(IS_AARCH64)
|
||||
#define BLAKE3_USE_NEON 1
|
||||
#if defined(__ARM_BIG_ENDIAN)
|
||||
#define BLAKE3_USE_NEON 0
|
||||
#else
|
||||
#define BLAKE3_USE_NEON 1
|
||||
#endif
|
||||
#else
|
||||
#define BLAKE3_USE_NEON 0
|
||||
#endif
|
||||
|
|
|
@ -10,14 +10,12 @@
|
|||
|
||||
INLINE uint32x4_t loadu_128(const uint8_t src[16]) {
|
||||
// vld1q_u32 has alignment requirements. Don't use it.
|
||||
uint32x4_t x;
|
||||
memcpy(&x, src, 16);
|
||||
return x;
|
||||
return vreinterpretq_u32_u8(vld1q_u8(src));
|
||||
}
|
||||
|
||||
INLINE void storeu_128(uint32x4_t src, uint8_t dest[16]) {
|
||||
// vst1q_u32 has alignment requirements. Don't use it.
|
||||
memcpy(dest, &src, 16);
|
||||
vst1q_u8(dest, vreinterpretq_u8_u32(src));
|
||||
}
|
||||
|
||||
INLINE uint32x4_t add_128(uint32x4_t a, uint32x4_t b) {
|
||||
|
|
|
@ -78,23 +78,14 @@ fn compress(
|
|||
block_len: u32,
|
||||
flags: u32,
|
||||
) -> [u32; 16] {
|
||||
let counter_low = counter as u32;
|
||||
let counter_high = (counter >> 32) as u32;
|
||||
#[rustfmt::skip]
|
||||
let mut state = [
|
||||
chaining_value[0],
|
||||
chaining_value[1],
|
||||
chaining_value[2],
|
||||
chaining_value[3],
|
||||
chaining_value[4],
|
||||
chaining_value[5],
|
||||
chaining_value[6],
|
||||
chaining_value[7],
|
||||
IV[0],
|
||||
IV[1],
|
||||
IV[2],
|
||||
IV[3],
|
||||
counter as u32,
|
||||
(counter >> 32) as u32,
|
||||
block_len,
|
||||
flags,
|
||||
chaining_value[0], chaining_value[1], chaining_value[2], chaining_value[3],
|
||||
chaining_value[4], chaining_value[5], chaining_value[6], chaining_value[7],
|
||||
IV[0], IV[1], IV[2], IV[3],
|
||||
counter_low, counter_high, block_len, flags,
|
||||
];
|
||||
let mut block = *block_words;
|
||||
|
||||
|
|
|
@ -0,0 +1,18 @@
|
|||
[package]
|
||||
name = "blake3_guts"
|
||||
version = "0.0.0"
|
||||
authors = ["Jack O'Connor <oconnor663@gmail.com>", "Samuel Neves"]
|
||||
description = "low-level building blocks for the BLAKE3 hash function"
|
||||
repository = "https://github.com/BLAKE3-team/BLAKE3"
|
||||
license = "CC0-1.0 OR Apache-2.0"
|
||||
documentation = "https://docs.rs/blake3_guts"
|
||||
readme = "readme.md"
|
||||
edition = "2021"
|
||||
|
||||
[dev-dependencies]
|
||||
hex = "0.4.3"
|
||||
reference_impl = { path = "../../reference_impl" }
|
||||
|
||||
[features]
|
||||
default = ["std"]
|
||||
std = []
|
|
@ -0,0 +1,80 @@
|
|||
# The BLAKE3 Guts API
|
||||
|
||||
## Introduction
|
||||
|
||||
This [`blake3_guts`](https://crates.io/crates/blake3_guts) sub-crate contains
|
||||
low-level, high-performance, platform-specific implementations of the BLAKE3
|
||||
compression function. This API is complicated and unsafe, and this crate will
|
||||
never have a stable release. Most callers should instead use the
|
||||
[`blake3`](https://crates.io/crates/blake3) crate, which will eventually depend
|
||||
on this one internally.
|
||||
|
||||
The code you see here (as of January 2024) is an early stage of a large planned
|
||||
refactor. The motivation for this refactor is a couple of missing features in
|
||||
both the Rust and C implementations:
|
||||
|
||||
- The output side
|
||||
([`OutputReader`](https://docs.rs/blake3/latest/blake3/struct.OutputReader.html)
|
||||
in Rust) doesn't take advantage of the most important SIMD optimizations that
|
||||
compute multiple blocks in parallel. This blocks any project that wants to
|
||||
use the BLAKE3 XOF as a stream cipher
|
||||
([[1]](https://github.com/oconnor663/bessie),
|
||||
[[2]](https://github.com/oconnor663/blake3_aead)).
|
||||
- Low-level callers like [Bao](https://github.com/oconnor663/bao) that need
|
||||
interior nodes of the tree also don't get those SIMD optimizations. They have
|
||||
to use a slow, minimalistic, unstable, doc-hidden module [(also called
|
||||
`guts`)](https://github.com/BLAKE3-team/BLAKE3/blob/master/src/guts.rs).
|
||||
|
||||
The difficulty with adding those features is that they require changes to all
|
||||
of our optimized assembly and C intrinsics code. That's a couple dozen
|
||||
different files that are large, platform-specific, difficult to understand, and
|
||||
full of duplicated code. The higher-level Rust and C implementations of BLAKE3
|
||||
both depend on these files and will need to coordinate changes.
|
||||
|
||||
At the same time, it won't be long before we add support for more platforms:
|
||||
|
||||
- RISCV vector extensions
|
||||
- ARM SVE
|
||||
- WebAssembly SIMD
|
||||
|
||||
It's important to get this refactor done before new platforms make it even
|
||||
harder to do.
|
||||
|
||||
## The private guts API
|
||||
|
||||
This is the API that each platform reimplements, so we want it to be as simple
|
||||
as possible apart from the high-performance work it needs to do. It's
|
||||
completely `unsafe`, and inputs and outputs are raw pointers that are allowed
|
||||
to alias (this matters for `hash_parents`, see below).
|
||||
|
||||
- `degree`
|
||||
- `compress`
|
||||
- The single compression function, for short inputs and odd-length tails.
|
||||
- `hash_chunks`
|
||||
- `hash_parents`
|
||||
- `xof`
|
||||
- `xof_xor`
|
||||
- As `xof` but XOR'ing the result into the output buffer.
|
||||
- `universal_hash`
|
||||
- This is a new construction specifically to support
|
||||
[BLAKE3-AEAD](https://github.com/oconnor663/blake3_aead). Some
|
||||
implementations might just stub it out with portable code.
|
||||
|
||||
## The public guts API
|
||||
|
||||
This is the API that this crate exposes to callers, i.e. to the main `blake3`
|
||||
crate. It's a thin, portable layer on top of the private API above. The Rust
|
||||
version of this API is memory-safe.
|
||||
|
||||
- `degree`
|
||||
- `compress`
|
||||
- `hash_chunks`
|
||||
- `hash_parents`
|
||||
- This handles most levels of the tree, where we keep hashing SIMD_DEGREE
|
||||
parents at a time.
|
||||
- `reduce_parents`
|
||||
- This uses the same `hash_parents` private API, but it handles the top
|
||||
levels of the tree where we reduce in-place to the root parent node.
|
||||
- `xof`
|
||||
- `xof_xor`
|
||||
- `universal_hash`
|
|
@ -0,0 +1,1000 @@
|
|||
//! # The BLAKE3 Guts API
|
||||
//!
|
||||
//! See `readme.md`.
|
||||
//!
|
||||
//! The main entrypoint into this crate is [`DETECTED_IMPL`], which is a global [`Implementation`]
|
||||
//! that atomically initializes itself the first time you use it.
|
||||
//!
|
||||
//! # Example
|
||||
//!
|
||||
//! ```rust
|
||||
//! use blake3_guts::{TransposedVectors, DETECTED_IMPL, IV_BYTES, PARENT, ROOT};
|
||||
//!
|
||||
//! // Hash an input of exactly two chunks.
|
||||
//! let input = [0u8; 2048];
|
||||
//! let mut outputs = TransposedVectors::new();
|
||||
//! let (left_outputs, _) = DETECTED_IMPL.split_transposed_vectors(&mut outputs);
|
||||
//! DETECTED_IMPL.hash_chunks(
|
||||
//! &input,
|
||||
//! &IV_BYTES,
|
||||
//! 0, // counter
|
||||
//! 0, // flags
|
||||
//! left_outputs,
|
||||
//! );
|
||||
//! let root_node = outputs.extract_parent_node(0);
|
||||
//! let hash = DETECTED_IMPL.compress(
|
||||
//! &root_node,
|
||||
//! 64, // block_len
|
||||
//! &IV_BYTES,
|
||||
//! 0, // counter
|
||||
//! PARENT | ROOT,
|
||||
//! );
|
||||
//!
|
||||
//! // Compute the same hash using the reference implementation.
|
||||
//! let mut reference_hasher = reference_impl::Hasher::new();
|
||||
//! reference_hasher.update(&input);
|
||||
//! let mut expected_hash = [0u8; 32];
|
||||
//! reference_hasher.finalize(&mut expected_hash);
|
||||
//!
|
||||
//! assert_eq!(hash, expected_hash);
|
||||
//! ```
|
||||
|
||||
// Tests always require libstd.
|
||||
#![cfg_attr(all(not(feature = "std"), not(test)), no_std)]
|
||||
|
||||
use core::cmp;
|
||||
use core::marker::PhantomData;
|
||||
use core::mem;
|
||||
use core::ptr;
|
||||
use core::sync::atomic::{AtomicPtr, Ordering::Relaxed};
|
||||
|
||||
pub mod portable;
|
||||
|
||||
#[cfg(test)]
|
||||
mod test;
|
||||
|
||||
pub const OUT_LEN: usize = 32;
|
||||
pub const BLOCK_LEN: usize = 64;
|
||||
pub const CHUNK_LEN: usize = 1024;
|
||||
pub const WORD_LEN: usize = 4;
|
||||
pub const UNIVERSAL_HASH_LEN: usize = 16;
|
||||
|
||||
pub const CHUNK_START: u32 = 1 << 0;
|
||||
pub const CHUNK_END: u32 = 1 << 1;
|
||||
pub const PARENT: u32 = 1 << 2;
|
||||
pub const ROOT: u32 = 1 << 3;
|
||||
pub const KEYED_HASH: u32 = 1 << 4;
|
||||
pub const DERIVE_KEY_CONTEXT: u32 = 1 << 5;
|
||||
pub const DERIVE_KEY_MATERIAL: u32 = 1 << 6;
|
||||
|
||||
pub const IV: CVWords = [
|
||||
0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19,
|
||||
];
|
||||
pub const IV_BYTES: CVBytes = le_bytes_from_words_32(&IV);
|
||||
|
||||
pub const MSG_SCHEDULE: [[usize; 16]; 7] = [
|
||||
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
|
||||
[2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8],
|
||||
[3, 4, 10, 12, 13, 2, 7, 14, 6, 5, 9, 0, 11, 15, 8, 1],
|
||||
[10, 7, 12, 9, 14, 3, 13, 15, 4, 0, 11, 2, 5, 8, 1, 6],
|
||||
[12, 13, 9, 11, 15, 10, 14, 8, 7, 2, 5, 3, 0, 1, 6, 4],
|
||||
[9, 14, 11, 5, 8, 12, 15, 1, 13, 3, 0, 10, 2, 6, 4, 7],
|
||||
[11, 15, 5, 0, 1, 9, 8, 6, 14, 10, 2, 12, 3, 4, 7, 13],
|
||||
];
|
||||
|
||||
// never less than 2
|
||||
pub const MAX_SIMD_DEGREE: usize = 2;
|
||||
|
||||
pub type CVBytes = [u8; 32];
|
||||
pub type CVWords = [u32; 8];
|
||||
pub type BlockBytes = [u8; 64];
|
||||
pub type BlockWords = [u32; 16];
|
||||
|
||||
pub static DETECTED_IMPL: Implementation = Implementation::new(
|
||||
degree_init,
|
||||
compress_init,
|
||||
hash_chunks_init,
|
||||
hash_parents_init,
|
||||
xof_init,
|
||||
xof_xor_init,
|
||||
universal_hash_init,
|
||||
);
|
||||
|
||||
fn detect() -> Implementation {
|
||||
portable::implementation()
|
||||
}
|
||||
|
||||
fn init_detected_impl() {
|
||||
let detected = detect();
|
||||
|
||||
DETECTED_IMPL
|
||||
.degree_ptr
|
||||
.store(detected.degree_ptr.load(Relaxed), Relaxed);
|
||||
DETECTED_IMPL
|
||||
.compress_ptr
|
||||
.store(detected.compress_ptr.load(Relaxed), Relaxed);
|
||||
DETECTED_IMPL
|
||||
.hash_chunks_ptr
|
||||
.store(detected.hash_chunks_ptr.load(Relaxed), Relaxed);
|
||||
DETECTED_IMPL
|
||||
.hash_parents_ptr
|
||||
.store(detected.hash_parents_ptr.load(Relaxed), Relaxed);
|
||||
DETECTED_IMPL
|
||||
.xof_ptr
|
||||
.store(detected.xof_ptr.load(Relaxed), Relaxed);
|
||||
DETECTED_IMPL
|
||||
.xof_xor_ptr
|
||||
.store(detected.xof_xor_ptr.load(Relaxed), Relaxed);
|
||||
DETECTED_IMPL
|
||||
.universal_hash_ptr
|
||||
.store(detected.universal_hash_ptr.load(Relaxed), Relaxed);
|
||||
}
|
||||
|
||||
pub struct Implementation {
|
||||
degree_ptr: AtomicPtr<()>,
|
||||
compress_ptr: AtomicPtr<()>,
|
||||
hash_chunks_ptr: AtomicPtr<()>,
|
||||
hash_parents_ptr: AtomicPtr<()>,
|
||||
xof_ptr: AtomicPtr<()>,
|
||||
xof_xor_ptr: AtomicPtr<()>,
|
||||
universal_hash_ptr: AtomicPtr<()>,
|
||||
}
|
||||
|
||||
impl Implementation {
|
||||
const fn new(
|
||||
degree_fn: DegreeFn,
|
||||
compress_fn: CompressFn,
|
||||
hash_chunks_fn: HashChunksFn,
|
||||
hash_parents_fn: HashParentsFn,
|
||||
xof_fn: XofFn,
|
||||
xof_xor_fn: XofFn,
|
||||
universal_hash_fn: UniversalHashFn,
|
||||
) -> Self {
|
||||
Self {
|
||||
degree_ptr: AtomicPtr::new(degree_fn as *mut ()),
|
||||
compress_ptr: AtomicPtr::new(compress_fn as *mut ()),
|
||||
hash_chunks_ptr: AtomicPtr::new(hash_chunks_fn as *mut ()),
|
||||
hash_parents_ptr: AtomicPtr::new(hash_parents_fn as *mut ()),
|
||||
xof_ptr: AtomicPtr::new(xof_fn as *mut ()),
|
||||
xof_xor_ptr: AtomicPtr::new(xof_xor_fn as *mut ()),
|
||||
universal_hash_ptr: AtomicPtr::new(universal_hash_fn as *mut ()),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn degree_fn(&self) -> DegreeFn {
|
||||
unsafe { mem::transmute(self.degree_ptr.load(Relaxed)) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn degree(&self) -> usize {
|
||||
let degree = unsafe { self.degree_fn()() };
|
||||
debug_assert!(degree >= 2);
|
||||
debug_assert!(degree <= MAX_SIMD_DEGREE);
|
||||
debug_assert_eq!(1, degree.count_ones(), "power of 2");
|
||||
degree
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn split_transposed_vectors<'v>(
|
||||
&self,
|
||||
vectors: &'v mut TransposedVectors,
|
||||
) -> (TransposedSplit<'v>, TransposedSplit<'v>) {
|
||||
unsafe { vectors.split(self.degree()) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn compress_fn(&self) -> CompressFn {
|
||||
unsafe { mem::transmute(self.compress_ptr.load(Relaxed)) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn compress(
|
||||
&self,
|
||||
block: &BlockBytes,
|
||||
block_len: u32,
|
||||
cv: &CVBytes,
|
||||
counter: u64,
|
||||
flags: u32,
|
||||
) -> CVBytes {
|
||||
let mut out = [0u8; 32];
|
||||
unsafe {
|
||||
self.compress_fn()(block, block_len, cv, counter, flags, &mut out);
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
// The contract for HashChunksFn doesn't require the implementation to support single-chunk
|
||||
// inputs. Instead we handle that case here by calling compress in a loop.
|
||||
#[inline]
|
||||
fn hash_one_chunk(
|
||||
&self,
|
||||
mut input: &[u8],
|
||||
key: &CVBytes,
|
||||
counter: u64,
|
||||
mut flags: u32,
|
||||
output: TransposedSplit,
|
||||
) {
|
||||
debug_assert!(input.len() <= CHUNK_LEN);
|
||||
let mut cv = *key;
|
||||
flags |= CHUNK_START;
|
||||
while input.len() > BLOCK_LEN {
|
||||
cv = self.compress(
|
||||
input[..BLOCK_LEN].try_into().unwrap(),
|
||||
BLOCK_LEN as u32,
|
||||
&cv,
|
||||
counter,
|
||||
flags,
|
||||
);
|
||||
input = &input[BLOCK_LEN..];
|
||||
flags &= !CHUNK_START;
|
||||
}
|
||||
let mut final_block = [0u8; BLOCK_LEN];
|
||||
final_block[..input.len()].copy_from_slice(input);
|
||||
cv = self.compress(
|
||||
&final_block,
|
||||
input.len() as u32,
|
||||
&cv,
|
||||
counter,
|
||||
flags | CHUNK_END,
|
||||
);
|
||||
unsafe {
|
||||
write_transposed_cv(&words_from_le_bytes_32(&cv), output.ptr);
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn hash_chunks_fn(&self) -> HashChunksFn {
|
||||
unsafe { mem::transmute(self.hash_chunks_ptr.load(Relaxed)) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn hash_chunks(
|
||||
&self,
|
||||
input: &[u8],
|
||||
key: &CVBytes,
|
||||
counter: u64,
|
||||
flags: u32,
|
||||
transposed_output: TransposedSplit,
|
||||
) -> usize {
|
||||
debug_assert!(input.len() <= self.degree() * CHUNK_LEN);
|
||||
if input.len() <= CHUNK_LEN {
|
||||
// The underlying hash_chunks_fn isn't required to support this case. Instead we handle
|
||||
// it by calling compress_fn in a loop. But note that we still don't support root
|
||||
// finalization or the empty input here.
|
||||
self.hash_one_chunk(input, key, counter, flags, transposed_output);
|
||||
return 1;
|
||||
}
|
||||
// SAFETY: If the caller passes in more than MAX_SIMD_DEGREE * CHUNK_LEN bytes, silently
|
||||
// ignore the remainder. This makes it impossible to write out of bounds in a properly
|
||||
// constructed TransposedSplit.
|
||||
let len = cmp::min(input.len(), MAX_SIMD_DEGREE * CHUNK_LEN);
|
||||
unsafe {
|
||||
self.hash_chunks_fn()(
|
||||
input.as_ptr(),
|
||||
len,
|
||||
key,
|
||||
counter,
|
||||
flags,
|
||||
transposed_output.ptr,
|
||||
);
|
||||
}
|
||||
if input.len() % CHUNK_LEN == 0 {
|
||||
input.len() / CHUNK_LEN
|
||||
} else {
|
||||
(input.len() / CHUNK_LEN) + 1
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn hash_parents_fn(&self) -> HashParentsFn {
|
||||
unsafe { mem::transmute(self.hash_parents_ptr.load(Relaxed)) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn hash_parents(
|
||||
&self,
|
||||
transposed_input: &TransposedVectors,
|
||||
mut num_cvs: usize,
|
||||
key: &CVBytes,
|
||||
flags: u32,
|
||||
transposed_output: TransposedSplit,
|
||||
) -> usize {
|
||||
debug_assert!(num_cvs <= 2 * MAX_SIMD_DEGREE);
|
||||
// SAFETY: Cap num_cvs at 2 * MAX_SIMD_DEGREE, to guarantee no out-of-bounds accesses.
|
||||
num_cvs = cmp::min(num_cvs, 2 * MAX_SIMD_DEGREE);
|
||||
let mut odd_cv = [0u32; 8];
|
||||
if num_cvs % 2 == 1 {
|
||||
unsafe {
|
||||
odd_cv = read_transposed_cv(transposed_input.as_ptr().add(num_cvs - 1));
|
||||
}
|
||||
}
|
||||
let num_parents = num_cvs / 2;
|
||||
unsafe {
|
||||
self.hash_parents_fn()(
|
||||
transposed_input.as_ptr(),
|
||||
num_parents,
|
||||
key,
|
||||
flags | PARENT,
|
||||
transposed_output.ptr,
|
||||
);
|
||||
}
|
||||
if num_cvs % 2 == 1 {
|
||||
unsafe {
|
||||
write_transposed_cv(&odd_cv, transposed_output.ptr.add(num_parents));
|
||||
}
|
||||
num_parents + 1
|
||||
} else {
|
||||
num_parents
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn reduce_parents(
|
||||
&self,
|
||||
transposed_in_out: &mut TransposedVectors,
|
||||
mut num_cvs: usize,
|
||||
key: &CVBytes,
|
||||
flags: u32,
|
||||
) -> usize {
|
||||
debug_assert!(num_cvs <= 2 * MAX_SIMD_DEGREE);
|
||||
// SAFETY: Cap num_cvs at 2 * MAX_SIMD_DEGREE, to guarantee no out-of-bounds accesses.
|
||||
num_cvs = cmp::min(num_cvs, 2 * MAX_SIMD_DEGREE);
|
||||
let in_out_ptr = transposed_in_out.as_mut_ptr();
|
||||
let mut odd_cv = [0u32; 8];
|
||||
if num_cvs % 2 == 1 {
|
||||
unsafe {
|
||||
odd_cv = read_transposed_cv(in_out_ptr.add(num_cvs - 1));
|
||||
}
|
||||
}
|
||||
let num_parents = num_cvs / 2;
|
||||
unsafe {
|
||||
self.hash_parents_fn()(in_out_ptr, num_parents, key, flags | PARENT, in_out_ptr);
|
||||
}
|
||||
if num_cvs % 2 == 1 {
|
||||
unsafe {
|
||||
write_transposed_cv(&odd_cv, in_out_ptr.add(num_parents));
|
||||
}
|
||||
num_parents + 1
|
||||
} else {
|
||||
num_parents
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn xof_fn(&self) -> XofFn {
|
||||
unsafe { mem::transmute(self.xof_ptr.load(Relaxed)) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn xof(
|
||||
&self,
|
||||
block: &BlockBytes,
|
||||
block_len: u32,
|
||||
cv: &CVBytes,
|
||||
mut counter: u64,
|
||||
flags: u32,
|
||||
mut out: &mut [u8],
|
||||
) {
|
||||
let degree = self.degree();
|
||||
let simd_len = degree * BLOCK_LEN;
|
||||
while !out.is_empty() {
|
||||
let take = cmp::min(simd_len, out.len());
|
||||
unsafe {
|
||||
self.xof_fn()(
|
||||
block,
|
||||
block_len,
|
||||
cv,
|
||||
counter,
|
||||
flags | ROOT,
|
||||
out.as_mut_ptr(),
|
||||
take,
|
||||
);
|
||||
}
|
||||
out = &mut out[take..];
|
||||
counter += degree as u64;
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn xof_xor_fn(&self) -> XofFn {
|
||||
unsafe { mem::transmute(self.xof_xor_ptr.load(Relaxed)) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn xof_xor(
|
||||
&self,
|
||||
block: &BlockBytes,
|
||||
block_len: u32,
|
||||
cv: &CVBytes,
|
||||
mut counter: u64,
|
||||
flags: u32,
|
||||
mut out: &mut [u8],
|
||||
) {
|
||||
let degree = self.degree();
|
||||
let simd_len = degree * BLOCK_LEN;
|
||||
while !out.is_empty() {
|
||||
let take = cmp::min(simd_len, out.len());
|
||||
unsafe {
|
||||
self.xof_xor_fn()(
|
||||
block,
|
||||
block_len,
|
||||
cv,
|
||||
counter,
|
||||
flags | ROOT,
|
||||
out.as_mut_ptr(),
|
||||
take,
|
||||
);
|
||||
}
|
||||
out = &mut out[take..];
|
||||
counter += degree as u64;
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn universal_hash_fn(&self) -> UniversalHashFn {
|
||||
unsafe { mem::transmute(self.universal_hash_ptr.load(Relaxed)) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn universal_hash(&self, mut input: &[u8], key: &CVBytes, mut counter: u64) -> [u8; 16] {
|
||||
let degree = self.degree();
|
||||
let simd_len = degree * BLOCK_LEN;
|
||||
let mut ret = [0u8; 16];
|
||||
while !input.is_empty() {
|
||||
let take = cmp::min(simd_len, input.len());
|
||||
let mut output = [0u8; 16];
|
||||
unsafe {
|
||||
self.universal_hash_fn()(input.as_ptr(), take, key, counter, &mut output);
|
||||
}
|
||||
input = &input[take..];
|
||||
counter += degree as u64;
|
||||
for byte_index in 0..16 {
|
||||
ret[byte_index] ^= output[byte_index];
|
||||
}
|
||||
}
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
impl Clone for Implementation {
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
degree_ptr: AtomicPtr::new(self.degree_ptr.load(Relaxed)),
|
||||
compress_ptr: AtomicPtr::new(self.compress_ptr.load(Relaxed)),
|
||||
hash_chunks_ptr: AtomicPtr::new(self.hash_chunks_ptr.load(Relaxed)),
|
||||
hash_parents_ptr: AtomicPtr::new(self.hash_parents_ptr.load(Relaxed)),
|
||||
xof_ptr: AtomicPtr::new(self.xof_ptr.load(Relaxed)),
|
||||
xof_xor_ptr: AtomicPtr::new(self.xof_xor_ptr.load(Relaxed)),
|
||||
universal_hash_ptr: AtomicPtr::new(self.universal_hash_ptr.load(Relaxed)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// never less than 2
|
||||
type DegreeFn = unsafe extern "C" fn() -> usize;
|
||||
|
||||
unsafe extern "C" fn degree_init() -> usize {
|
||||
init_detected_impl();
|
||||
DETECTED_IMPL.degree_fn()()
|
||||
}
|
||||
|
||||
type CompressFn = unsafe extern "C" fn(
|
||||
block: *const BlockBytes, // zero padded to 64 bytes
|
||||
block_len: u32,
|
||||
cv: *const CVBytes,
|
||||
counter: u64,
|
||||
flags: u32,
|
||||
out: *mut CVBytes, // may overlap the input
|
||||
);
|
||||
|
||||
unsafe extern "C" fn compress_init(
|
||||
block: *const BlockBytes,
|
||||
block_len: u32,
|
||||
cv: *const CVBytes,
|
||||
counter: u64,
|
||||
flags: u32,
|
||||
out: *mut CVBytes,
|
||||
) {
|
||||
init_detected_impl();
|
||||
DETECTED_IMPL.compress_fn()(block, block_len, cv, counter, flags, out);
|
||||
}
|
||||
|
||||
type CompressXofFn = unsafe extern "C" fn(
|
||||
block: *const BlockBytes, // zero padded to 64 bytes
|
||||
block_len: u32,
|
||||
cv: *const CVBytes,
|
||||
counter: u64,
|
||||
flags: u32,
|
||||
out: *mut BlockBytes, // may overlap the input
|
||||
);
|
||||
|
||||
type HashChunksFn = unsafe extern "C" fn(
|
||||
input: *const u8,
|
||||
input_len: usize,
|
||||
key: *const CVBytes,
|
||||
counter: u64,
|
||||
flags: u32,
|
||||
transposed_output: *mut u32,
|
||||
);
|
||||
|
||||
unsafe extern "C" fn hash_chunks_init(
|
||||
input: *const u8,
|
||||
input_len: usize,
|
||||
key: *const CVBytes,
|
||||
counter: u64,
|
||||
flags: u32,
|
||||
transposed_output: *mut u32,
|
||||
) {
|
||||
init_detected_impl();
|
||||
DETECTED_IMPL.hash_chunks_fn()(input, input_len, key, counter, flags, transposed_output);
|
||||
}
|
||||
|
||||
type HashParentsFn = unsafe extern "C" fn(
|
||||
transposed_input: *const u32,
|
||||
num_parents: usize,
|
||||
key: *const CVBytes,
|
||||
flags: u32,
|
||||
transposed_output: *mut u32, // may overlap the input
|
||||
);
|
||||
|
||||
unsafe extern "C" fn hash_parents_init(
|
||||
transposed_input: *const u32,
|
||||
num_parents: usize,
|
||||
key: *const CVBytes,
|
||||
flags: u32,
|
||||
transposed_output: *mut u32,
|
||||
) {
|
||||
init_detected_impl();
|
||||
DETECTED_IMPL.hash_parents_fn()(transposed_input, num_parents, key, flags, transposed_output);
|
||||
}
|
||||
|
||||
// This signature covers both xof() and xof_xor().
|
||||
type XofFn = unsafe extern "C" fn(
|
||||
block: *const BlockBytes, // zero padded to 64 bytes
|
||||
block_len: u32,
|
||||
cv: *const CVBytes,
|
||||
counter: u64,
|
||||
flags: u32,
|
||||
out: *mut u8,
|
||||
out_len: usize,
|
||||
);
|
||||
|
||||
unsafe extern "C" fn xof_init(
|
||||
block: *const BlockBytes,
|
||||
block_len: u32,
|
||||
cv: *const CVBytes,
|
||||
counter: u64,
|
||||
flags: u32,
|
||||
out: *mut u8,
|
||||
out_len: usize,
|
||||
) {
|
||||
init_detected_impl();
|
||||
DETECTED_IMPL.xof_fn()(block, block_len, cv, counter, flags, out, out_len);
|
||||
}
|
||||
|
||||
unsafe extern "C" fn xof_xor_init(
|
||||
block: *const BlockBytes,
|
||||
block_len: u32,
|
||||
cv: *const CVBytes,
|
||||
counter: u64,
|
||||
flags: u32,
|
||||
out: *mut u8,
|
||||
out_len: usize,
|
||||
) {
|
||||
init_detected_impl();
|
||||
DETECTED_IMPL.xof_xor_fn()(block, block_len, cv, counter, flags, out, out_len);
|
||||
}
|
||||
|
||||
type UniversalHashFn = unsafe extern "C" fn(
|
||||
input: *const u8,
|
||||
input_len: usize,
|
||||
key: *const CVBytes,
|
||||
counter: u64,
|
||||
out: *mut [u8; 16],
|
||||
);
|
||||
|
||||
unsafe extern "C" fn universal_hash_init(
|
||||
input: *const u8,
|
||||
input_len: usize,
|
||||
key: *const CVBytes,
|
||||
counter: u64,
|
||||
out: *mut [u8; 16],
|
||||
) {
|
||||
init_detected_impl();
|
||||
DETECTED_IMPL.universal_hash_fn()(input, input_len, key, counter, out);
|
||||
}
|
||||
|
||||
// The implicit degree of this implementation is MAX_SIMD_DEGREE.
|
||||
#[inline(always)]
|
||||
unsafe fn hash_chunks_using_compress(
|
||||
compress: CompressFn,
|
||||
mut input: *const u8,
|
||||
mut input_len: usize,
|
||||
key: *const CVBytes,
|
||||
mut counter: u64,
|
||||
flags: u32,
|
||||
mut transposed_output: *mut u32,
|
||||
) {
|
||||
debug_assert!(input_len > 0);
|
||||
debug_assert!(input_len <= MAX_SIMD_DEGREE * CHUNK_LEN);
|
||||
input_len = cmp::min(input_len, MAX_SIMD_DEGREE * CHUNK_LEN);
|
||||
while input_len > 0 {
|
||||
let mut chunk_len = cmp::min(input_len, CHUNK_LEN);
|
||||
input_len -= chunk_len;
|
||||
// We only use 8 words of the CV, but compress returns 16.
|
||||
let mut cv = *key;
|
||||
let cv_ptr: *mut CVBytes = &mut cv;
|
||||
let mut chunk_flags = flags | CHUNK_START;
|
||||
while chunk_len > BLOCK_LEN {
|
||||
compress(
|
||||
input as *const BlockBytes,
|
||||
BLOCK_LEN as u32,
|
||||
cv_ptr,
|
||||
counter,
|
||||
chunk_flags,
|
||||
cv_ptr,
|
||||
);
|
||||
input = input.add(BLOCK_LEN);
|
||||
chunk_len -= BLOCK_LEN;
|
||||
chunk_flags &= !CHUNK_START;
|
||||
}
|
||||
let mut last_block = [0u8; BLOCK_LEN];
|
||||
ptr::copy_nonoverlapping(input, last_block.as_mut_ptr(), chunk_len);
|
||||
input = input.add(chunk_len);
|
||||
compress(
|
||||
&last_block,
|
||||
chunk_len as u32,
|
||||
cv_ptr,
|
||||
counter,
|
||||
chunk_flags | CHUNK_END,
|
||||
cv_ptr,
|
||||
);
|
||||
let cv_words = words_from_le_bytes_32(&cv);
|
||||
for word_index in 0..8 {
|
||||
transposed_output
|
||||
.add(word_index * TRANSPOSED_STRIDE)
|
||||
.write(cv_words[word_index]);
|
||||
}
|
||||
transposed_output = transposed_output.add(1);
|
||||
counter += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// The implicit degree of this implementation is MAX_SIMD_DEGREE.
|
||||
#[inline(always)]
|
||||
unsafe fn hash_parents_using_compress(
|
||||
compress: CompressFn,
|
||||
mut transposed_input: *const u32,
|
||||
mut num_parents: usize,
|
||||
key: *const CVBytes,
|
||||
flags: u32,
|
||||
mut transposed_output: *mut u32, // may overlap the input
|
||||
) {
|
||||
debug_assert!(num_parents > 0);
|
||||
debug_assert!(num_parents <= MAX_SIMD_DEGREE);
|
||||
while num_parents > 0 {
|
||||
let mut block_bytes = [0u8; 64];
|
||||
for word_index in 0..8 {
|
||||
let left_child_word = transposed_input.add(word_index * TRANSPOSED_STRIDE).read();
|
||||
block_bytes[WORD_LEN * word_index..][..WORD_LEN]
|
||||
.copy_from_slice(&left_child_word.to_le_bytes());
|
||||
let right_child_word = transposed_input
|
||||
.add(word_index * TRANSPOSED_STRIDE + 1)
|
||||
.read();
|
||||
block_bytes[WORD_LEN * (word_index + 8)..][..WORD_LEN]
|
||||
.copy_from_slice(&right_child_word.to_le_bytes());
|
||||
}
|
||||
let mut cv = [0u8; 32];
|
||||
compress(&block_bytes, BLOCK_LEN as u32, key, 0, flags, &mut cv);
|
||||
let cv_words = words_from_le_bytes_32(&cv);
|
||||
for word_index in 0..8 {
|
||||
transposed_output
|
||||
.add(word_index * TRANSPOSED_STRIDE)
|
||||
.write(cv_words[word_index]);
|
||||
}
|
||||
transposed_input = transposed_input.add(2);
|
||||
transposed_output = transposed_output.add(1);
|
||||
num_parents -= 1;
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
unsafe fn xof_using_compress_xof(
|
||||
compress_xof: CompressXofFn,
|
||||
block: *const BlockBytes,
|
||||
block_len: u32,
|
||||
cv: *const CVBytes,
|
||||
mut counter: u64,
|
||||
flags: u32,
|
||||
mut out: *mut u8,
|
||||
mut out_len: usize,
|
||||
) {
|
||||
debug_assert!(out_len <= MAX_SIMD_DEGREE * BLOCK_LEN);
|
||||
while out_len > 0 {
|
||||
let mut block_output = [0u8; 64];
|
||||
compress_xof(block, block_len, cv, counter, flags, &mut block_output);
|
||||
let take = cmp::min(out_len, BLOCK_LEN);
|
||||
ptr::copy_nonoverlapping(block_output.as_ptr(), out, take);
|
||||
out = out.add(take);
|
||||
out_len -= take;
|
||||
counter += 1;
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
unsafe fn xof_xor_using_compress_xof(
|
||||
compress_xof: CompressXofFn,
|
||||
block: *const BlockBytes,
|
||||
block_len: u32,
|
||||
cv: *const CVBytes,
|
||||
mut counter: u64,
|
||||
flags: u32,
|
||||
mut out: *mut u8,
|
||||
mut out_len: usize,
|
||||
) {
|
||||
debug_assert!(out_len <= MAX_SIMD_DEGREE * BLOCK_LEN);
|
||||
while out_len > 0 {
|
||||
let mut block_output = [0u8; 64];
|
||||
compress_xof(block, block_len, cv, counter, flags, &mut block_output);
|
||||
let take = cmp::min(out_len, BLOCK_LEN);
|
||||
for i in 0..take {
|
||||
*out.add(i) ^= block_output[i];
|
||||
}
|
||||
out = out.add(take);
|
||||
out_len -= take;
|
||||
counter += 1;
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
unsafe fn universal_hash_using_compress(
|
||||
compress: CompressFn,
|
||||
mut input: *const u8,
|
||||
mut input_len: usize,
|
||||
key: *const CVBytes,
|
||||
mut counter: u64,
|
||||
out: *mut [u8; 16],
|
||||
) {
|
||||
let flags = KEYED_HASH | CHUNK_START | CHUNK_END | ROOT;
|
||||
let mut result = [0u8; 16];
|
||||
while input_len > 0 {
|
||||
let block_len = cmp::min(input_len, BLOCK_LEN);
|
||||
let mut block = [0u8; BLOCK_LEN];
|
||||
ptr::copy_nonoverlapping(input, block.as_mut_ptr(), block_len);
|
||||
let mut block_output = [0u8; 32];
|
||||
compress(
|
||||
&block,
|
||||
block_len as u32,
|
||||
key,
|
||||
counter,
|
||||
flags,
|
||||
&mut block_output,
|
||||
);
|
||||
for i in 0..16 {
|
||||
result[i] ^= block_output[i];
|
||||
}
|
||||
input = input.add(block_len);
|
||||
input_len -= block_len;
|
||||
counter += 1;
|
||||
}
|
||||
*out = result;
|
||||
}
|
||||
|
||||
// this is in units of *words*, for pointer operations on *const/*mut u32
|
||||
const TRANSPOSED_STRIDE: usize = 2 * MAX_SIMD_DEGREE;
|
||||
|
||||
#[cfg_attr(any(target_arch = "x86", target_arch = "x86_64"), repr(C, align(64)))]
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub struct TransposedVectors([[u32; 2 * MAX_SIMD_DEGREE]; 8]);
|
||||
|
||||
impl TransposedVectors {
|
||||
pub fn new() -> Self {
|
||||
Self([[0; 2 * MAX_SIMD_DEGREE]; 8])
|
||||
}
|
||||
|
||||
pub fn extract_cv(&self, cv_index: usize) -> CVBytes {
|
||||
let mut words = [0u32; 8];
|
||||
for word_index in 0..8 {
|
||||
words[word_index] = self.0[word_index][cv_index];
|
||||
}
|
||||
le_bytes_from_words_32(&words)
|
||||
}
|
||||
|
||||
pub fn extract_parent_node(&self, parent_index: usize) -> BlockBytes {
|
||||
let mut bytes = [0u8; 64];
|
||||
bytes[..32].copy_from_slice(&self.extract_cv(parent_index / 2));
|
||||
bytes[32..].copy_from_slice(&self.extract_cv(parent_index / 2 + 1));
|
||||
bytes
|
||||
}
|
||||
|
||||
fn as_ptr(&self) -> *const u32 {
|
||||
self.0[0].as_ptr()
|
||||
}
|
||||
|
||||
fn as_mut_ptr(&mut self) -> *mut u32 {
|
||||
self.0[0].as_mut_ptr()
|
||||
}
|
||||
|
||||
// SAFETY: This function is just pointer arithmetic, but callers assume that it's safe (not
|
||||
// necessarily correct) to write up to `degree` words to either side of the split, possibly
|
||||
// from different threads.
|
||||
unsafe fn split(&mut self, degree: usize) -> (TransposedSplit, TransposedSplit) {
|
||||
debug_assert!(degree > 0);
|
||||
debug_assert!(degree <= MAX_SIMD_DEGREE);
|
||||
debug_assert_eq!(degree.count_ones(), 1, "power of 2");
|
||||
let ptr = self.as_mut_ptr();
|
||||
let left = TransposedSplit {
|
||||
ptr,
|
||||
phantom_data: PhantomData,
|
||||
};
|
||||
let right = TransposedSplit {
|
||||
ptr: ptr.wrapping_add(degree),
|
||||
phantom_data: PhantomData,
|
||||
};
|
||||
(left, right)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct TransposedSplit<'vectors> {
|
||||
ptr: *mut u32,
|
||||
phantom_data: PhantomData<&'vectors mut u32>,
|
||||
}
|
||||
|
||||
unsafe impl<'vectors> Send for TransposedSplit<'vectors> {}
|
||||
unsafe impl<'vectors> Sync for TransposedSplit<'vectors> {}
|
||||
|
||||
unsafe fn read_transposed_cv(src: *const u32) -> CVWords {
|
||||
let mut cv = [0u32; 8];
|
||||
for word_index in 0..8 {
|
||||
let offset_words = word_index * TRANSPOSED_STRIDE;
|
||||
cv[word_index] = src.add(offset_words).read();
|
||||
}
|
||||
cv
|
||||
}
|
||||
|
||||
unsafe fn write_transposed_cv(cv: &CVWords, dest: *mut u32) {
|
||||
for word_index in 0..8 {
|
||||
let offset_words = word_index * TRANSPOSED_STRIDE;
|
||||
dest.add(offset_words).write(cv[word_index]);
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub const fn le_bytes_from_words_32(words: &CVWords) -> CVBytes {
|
||||
let mut bytes = [0u8; 32];
|
||||
// This loop is super verbose because currently that's what it takes to be const.
|
||||
let mut word_index = 0;
|
||||
while word_index < bytes.len() / WORD_LEN {
|
||||
let word_bytes = words[word_index].to_le_bytes();
|
||||
let mut byte_index = 0;
|
||||
while byte_index < WORD_LEN {
|
||||
bytes[word_index * WORD_LEN + byte_index] = word_bytes[byte_index];
|
||||
byte_index += 1;
|
||||
}
|
||||
word_index += 1;
|
||||
}
|
||||
bytes
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub const fn le_bytes_from_words_64(words: &BlockWords) -> BlockBytes {
|
||||
let mut bytes = [0u8; 64];
|
||||
// This loop is super verbose because currently that's what it takes to be const.
|
||||
let mut word_index = 0;
|
||||
while word_index < bytes.len() / WORD_LEN {
|
||||
let word_bytes = words[word_index].to_le_bytes();
|
||||
let mut byte_index = 0;
|
||||
while byte_index < WORD_LEN {
|
||||
bytes[word_index * WORD_LEN + byte_index] = word_bytes[byte_index];
|
||||
byte_index += 1;
|
||||
}
|
||||
word_index += 1;
|
||||
}
|
||||
bytes
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub const fn words_from_le_bytes_32(bytes: &CVBytes) -> CVWords {
|
||||
let mut words = [0u32; 8];
|
||||
// This loop is super verbose because currently that's what it takes to be const.
|
||||
let mut word_index = 0;
|
||||
while word_index < words.len() {
|
||||
let mut word_bytes = [0u8; WORD_LEN];
|
||||
let mut byte_index = 0;
|
||||
while byte_index < WORD_LEN {
|
||||
word_bytes[byte_index] = bytes[word_index * WORD_LEN + byte_index];
|
||||
byte_index += 1;
|
||||
}
|
||||
words[word_index] = u32::from_le_bytes(word_bytes);
|
||||
word_index += 1;
|
||||
}
|
||||
words
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub const fn words_from_le_bytes_64(bytes: &BlockBytes) -> BlockWords {
|
||||
let mut words = [0u32; 16];
|
||||
// This loop is super verbose because currently that's what it takes to be const.
|
||||
let mut word_index = 0;
|
||||
while word_index < words.len() {
|
||||
let mut word_bytes = [0u8; WORD_LEN];
|
||||
let mut byte_index = 0;
|
||||
while byte_index < WORD_LEN {
|
||||
word_bytes[byte_index] = bytes[word_index * WORD_LEN + byte_index];
|
||||
byte_index += 1;
|
||||
}
|
||||
words[word_index] = u32::from_le_bytes(word_bytes);
|
||||
word_index += 1;
|
||||
}
|
||||
words
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_byte_word_round_trips() {
|
||||
let cv = *b"This is 32 LE bytes/eight words.";
|
||||
assert_eq!(cv, le_bytes_from_words_32(&words_from_le_bytes_32(&cv)));
|
||||
let block = *b"This is sixty-four little-endian bytes, or sixteen 32-bit words.";
|
||||
assert_eq!(
|
||||
block,
|
||||
le_bytes_from_words_64(&words_from_le_bytes_64(&block)),
|
||||
);
|
||||
}
|
||||
|
||||
// The largest power of two less than or equal to `n`, used for left_len()
|
||||
// immediately below, and also directly in Hasher::update().
|
||||
pub fn largest_power_of_two_leq(n: usize) -> usize {
|
||||
((n / 2) + 1).next_power_of_two()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_largest_power_of_two_leq() {
|
||||
let input_output = &[
|
||||
// The zero case is nonsensical, but it does work.
|
||||
(0, 1),
|
||||
(1, 1),
|
||||
(2, 2),
|
||||
(3, 2),
|
||||
(4, 4),
|
||||
(5, 4),
|
||||
(6, 4),
|
||||
(7, 4),
|
||||
(8, 8),
|
||||
// the largest possible usize
|
||||
(usize::MAX, (usize::MAX >> 1) + 1),
|
||||
];
|
||||
for &(input, output) in input_output {
|
||||
assert_eq!(
|
||||
output,
|
||||
crate::largest_power_of_two_leq(input),
|
||||
"wrong output for n={}",
|
||||
input
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Given some input larger than one chunk, return the number of bytes that
|
||||
// should go in the left subtree. This is the largest power-of-2 number of
|
||||
// chunks that leaves at least 1 byte for the right subtree.
|
||||
pub fn left_len(content_len: usize) -> usize {
|
||||
debug_assert!(content_len > CHUNK_LEN);
|
||||
// Subtract 1 to reserve at least one byte for the right side.
|
||||
let full_chunks = (content_len - 1) / CHUNK_LEN;
|
||||
largest_power_of_two_leq(full_chunks) * CHUNK_LEN
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_left_len() {
|
||||
let input_output = &[
|
||||
(CHUNK_LEN + 1, CHUNK_LEN),
|
||||
(2 * CHUNK_LEN - 1, CHUNK_LEN),
|
||||
(2 * CHUNK_LEN, CHUNK_LEN),
|
||||
(2 * CHUNK_LEN + 1, 2 * CHUNK_LEN),
|
||||
(4 * CHUNK_LEN - 1, 2 * CHUNK_LEN),
|
||||
(4 * CHUNK_LEN, 2 * CHUNK_LEN),
|
||||
(4 * CHUNK_LEN + 1, 4 * CHUNK_LEN),
|
||||
];
|
||||
for &(input, output) in input_output {
|
||||
assert_eq!(left_len(input), output);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,262 @@
|
|||
use crate::{
|
||||
le_bytes_from_words_32, le_bytes_from_words_64, words_from_le_bytes_32, words_from_le_bytes_64,
|
||||
BlockBytes, BlockWords, CVBytes, CVWords, Implementation, IV, MAX_SIMD_DEGREE, MSG_SCHEDULE,
|
||||
};
|
||||
|
||||
const DEGREE: usize = MAX_SIMD_DEGREE;
|
||||
|
||||
unsafe extern "C" fn degree() -> usize {
|
||||
DEGREE
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn g(state: &mut BlockWords, a: usize, b: usize, c: usize, d: usize, x: u32, y: u32) {
|
||||
state[a] = state[a].wrapping_add(state[b]).wrapping_add(x);
|
||||
state[d] = (state[d] ^ state[a]).rotate_right(16);
|
||||
state[c] = state[c].wrapping_add(state[d]);
|
||||
state[b] = (state[b] ^ state[c]).rotate_right(12);
|
||||
state[a] = state[a].wrapping_add(state[b]).wrapping_add(y);
|
||||
state[d] = (state[d] ^ state[a]).rotate_right(8);
|
||||
state[c] = state[c].wrapping_add(state[d]);
|
||||
state[b] = (state[b] ^ state[c]).rotate_right(7);
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn round(state: &mut [u32; 16], msg: &BlockWords, round: usize) {
|
||||
// Select the message schedule based on the round.
|
||||
let schedule = MSG_SCHEDULE[round];
|
||||
|
||||
// Mix the columns.
|
||||
g(state, 0, 4, 8, 12, msg[schedule[0]], msg[schedule[1]]);
|
||||
g(state, 1, 5, 9, 13, msg[schedule[2]], msg[schedule[3]]);
|
||||
g(state, 2, 6, 10, 14, msg[schedule[4]], msg[schedule[5]]);
|
||||
g(state, 3, 7, 11, 15, msg[schedule[6]], msg[schedule[7]]);
|
||||
|
||||
// Mix the diagonals.
|
||||
g(state, 0, 5, 10, 15, msg[schedule[8]], msg[schedule[9]]);
|
||||
g(state, 1, 6, 11, 12, msg[schedule[10]], msg[schedule[11]]);
|
||||
g(state, 2, 7, 8, 13, msg[schedule[12]], msg[schedule[13]]);
|
||||
g(state, 3, 4, 9, 14, msg[schedule[14]], msg[schedule[15]]);
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn compress_inner(
|
||||
block_words: &BlockWords,
|
||||
block_len: u32,
|
||||
cv_words: &CVWords,
|
||||
counter: u64,
|
||||
flags: u32,
|
||||
) -> [u32; 16] {
|
||||
let mut state = [
|
||||
cv_words[0],
|
||||
cv_words[1],
|
||||
cv_words[2],
|
||||
cv_words[3],
|
||||
cv_words[4],
|
||||
cv_words[5],
|
||||
cv_words[6],
|
||||
cv_words[7],
|
||||
IV[0],
|
||||
IV[1],
|
||||
IV[2],
|
||||
IV[3],
|
||||
counter as u32,
|
||||
(counter >> 32) as u32,
|
||||
block_len as u32,
|
||||
flags as u32,
|
||||
];
|
||||
for round_number in 0..7 {
|
||||
round(&mut state, &block_words, round_number);
|
||||
}
|
||||
state
|
||||
}
|
||||
|
||||
pub(crate) unsafe extern "C" fn compress(
|
||||
block: *const BlockBytes,
|
||||
block_len: u32,
|
||||
cv: *const CVBytes,
|
||||
counter: u64,
|
||||
flags: u32,
|
||||
out: *mut CVBytes,
|
||||
) {
|
||||
let block_words = words_from_le_bytes_64(&*block);
|
||||
let cv_words = words_from_le_bytes_32(&*cv);
|
||||
let mut state = compress_inner(&block_words, block_len, &cv_words, counter, flags);
|
||||
for word_index in 0..8 {
|
||||
state[word_index] ^= state[word_index + 8];
|
||||
}
|
||||
*out = le_bytes_from_words_32(state[..8].try_into().unwrap());
|
||||
}
|
||||
|
||||
pub(crate) unsafe extern "C" fn compress_xof(
|
||||
block: *const BlockBytes,
|
||||
block_len: u32,
|
||||
cv: *const CVBytes,
|
||||
counter: u64,
|
||||
flags: u32,
|
||||
out: *mut BlockBytes,
|
||||
) {
|
||||
let block_words = words_from_le_bytes_64(&*block);
|
||||
let cv_words = words_from_le_bytes_32(&*cv);
|
||||
let mut state = compress_inner(&block_words, block_len, &cv_words, counter, flags);
|
||||
for word_index in 0..8 {
|
||||
state[word_index] ^= state[word_index + 8];
|
||||
state[word_index + 8] ^= cv_words[word_index];
|
||||
}
|
||||
*out = le_bytes_from_words_64(&state);
|
||||
}
|
||||
|
||||
pub(crate) unsafe extern "C" fn hash_chunks(
|
||||
input: *const u8,
|
||||
input_len: usize,
|
||||
key: *const CVBytes,
|
||||
counter: u64,
|
||||
flags: u32,
|
||||
transposed_output: *mut u32,
|
||||
) {
|
||||
crate::hash_chunks_using_compress(
|
||||
compress,
|
||||
input,
|
||||
input_len,
|
||||
key,
|
||||
counter,
|
||||
flags,
|
||||
transposed_output,
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) unsafe extern "C" fn hash_parents(
|
||||
transposed_input: *const u32,
|
||||
num_parents: usize,
|
||||
key: *const CVBytes,
|
||||
flags: u32,
|
||||
transposed_output: *mut u32, // may overlap the input
|
||||
) {
|
||||
crate::hash_parents_using_compress(
|
||||
compress,
|
||||
transposed_input,
|
||||
num_parents,
|
||||
key,
|
||||
flags,
|
||||
transposed_output,
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) unsafe extern "C" fn xof(
|
||||
block: *const BlockBytes,
|
||||
block_len: u32,
|
||||
cv: *const CVBytes,
|
||||
counter: u64,
|
||||
flags: u32,
|
||||
out: *mut u8,
|
||||
out_len: usize,
|
||||
) {
|
||||
crate::xof_using_compress_xof(
|
||||
compress_xof,
|
||||
block,
|
||||
block_len,
|
||||
cv,
|
||||
counter,
|
||||
flags,
|
||||
out,
|
||||
out_len,
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) unsafe extern "C" fn xof_xor(
|
||||
block: *const BlockBytes,
|
||||
block_len: u32,
|
||||
cv: *const CVBytes,
|
||||
counter: u64,
|
||||
flags: u32,
|
||||
out: *mut u8,
|
||||
out_len: usize,
|
||||
) {
|
||||
crate::xof_xor_using_compress_xof(
|
||||
compress_xof,
|
||||
block,
|
||||
block_len,
|
||||
cv,
|
||||
counter,
|
||||
flags,
|
||||
out,
|
||||
out_len,
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) unsafe extern "C" fn universal_hash(
|
||||
input: *const u8,
|
||||
input_len: usize,
|
||||
key: *const CVBytes,
|
||||
counter: u64,
|
||||
out: *mut [u8; 16],
|
||||
) {
|
||||
crate::universal_hash_using_compress(compress, input, input_len, key, counter, out)
|
||||
}
|
||||
|
||||
pub fn implementation() -> Implementation {
|
||||
Implementation::new(
|
||||
degree,
|
||||
compress,
|
||||
hash_chunks,
|
||||
hash_parents,
|
||||
xof,
|
||||
xof_xor,
|
||||
universal_hash,
|
||||
)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
// This is circular but do it anyway.
|
||||
#[test]
|
||||
fn test_compress_vs_portable() {
|
||||
crate::test::test_compress_vs_portable(&implementation());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compress_vs_reference() {
|
||||
crate::test::test_compress_vs_reference(&implementation());
|
||||
}
|
||||
|
||||
// This is circular but do it anyway.
|
||||
#[test]
|
||||
fn test_hash_chunks_vs_portable() {
|
||||
crate::test::test_hash_chunks_vs_portable(&implementation());
|
||||
}
|
||||
|
||||
// This is circular but do it anyway.
|
||||
#[test]
|
||||
fn test_hash_parents_vs_portable() {
|
||||
crate::test::test_hash_parents_vs_portable(&implementation());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_chunks_and_parents_vs_reference() {
|
||||
crate::test::test_chunks_and_parents_vs_reference(&implementation());
|
||||
}
|
||||
|
||||
// This is circular but do it anyway.
|
||||
#[test]
|
||||
fn test_xof_vs_portable() {
|
||||
crate::test::test_xof_vs_portable(&implementation());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_xof_vs_reference() {
|
||||
crate::test::test_xof_vs_reference(&implementation());
|
||||
}
|
||||
|
||||
// This is circular but do it anyway.
|
||||
#[test]
|
||||
fn test_universal_hash_vs_portable() {
|
||||
crate::test::test_universal_hash_vs_portable(&implementation());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_universal_hash_vs_reference() {
|
||||
crate::test::test_universal_hash_vs_reference(&implementation());
|
||||
}
|
||||
}
|
|
@ -0,0 +1,523 @@
|
|||
use crate::*;
|
||||
|
||||
pub const TEST_KEY: CVBytes = *b"whats the Elvish word for friend";
|
||||
|
||||
// Test a few different initial counter values.
|
||||
// - 0: The base case.
|
||||
// - i32::MAX: *No* overflow. But carry bugs in tricky SIMD code can screw this up, if you XOR when
|
||||
// you're supposed to ANDNOT.
|
||||
// - u32::MAX: The low word of the counter overflows for all inputs except the first.
|
||||
// - (42 << 32) + u32::MAX: Same but with a non-zero value in the high word.
|
||||
const INITIAL_COUNTERS: [u64; 4] = [
|
||||
0,
|
||||
i32::MAX as u64,
|
||||
u32::MAX as u64,
|
||||
(42u64 << 32) + u32::MAX as u64,
|
||||
];
|
||||
|
||||
const BLOCK_LENGTHS: [usize; 4] = [0, 1, 63, 64];
|
||||
|
||||
pub fn paint_test_input(buf: &mut [u8]) {
|
||||
for (i, b) in buf.iter_mut().enumerate() {
|
||||
*b = (i % 251) as u8;
|
||||
}
|
||||
}
|
||||
|
||||
pub fn test_compress_vs_portable(test_impl: &Implementation) {
|
||||
for block_len in BLOCK_LENGTHS {
|
||||
dbg!(block_len);
|
||||
let mut block = [0; BLOCK_LEN];
|
||||
paint_test_input(&mut block[..block_len]);
|
||||
for counter in INITIAL_COUNTERS {
|
||||
dbg!(counter);
|
||||
let portable_cv = portable::implementation().compress(
|
||||
&block,
|
||||
block_len as u32,
|
||||
&TEST_KEY,
|
||||
counter,
|
||||
KEYED_HASH,
|
||||
);
|
||||
|
||||
let test_cv =
|
||||
test_impl.compress(&block, block_len as u32, &TEST_KEY, counter, KEYED_HASH);
|
||||
|
||||
assert_eq!(portable_cv, test_cv);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn test_compress_vs_reference(test_impl: &Implementation) {
|
||||
for block_len in BLOCK_LENGTHS {
|
||||
dbg!(block_len);
|
||||
let mut block = [0; BLOCK_LEN];
|
||||
paint_test_input(&mut block[..block_len]);
|
||||
|
||||
let mut ref_hasher = reference_impl::Hasher::new_keyed(&TEST_KEY);
|
||||
ref_hasher.update(&block[..block_len]);
|
||||
let mut ref_hash = [0u8; 32];
|
||||
ref_hasher.finalize(&mut ref_hash);
|
||||
|
||||
let test_cv = test_impl.compress(
|
||||
&block,
|
||||
block_len as u32,
|
||||
&TEST_KEY,
|
||||
0,
|
||||
CHUNK_START | CHUNK_END | ROOT | KEYED_HASH,
|
||||
);
|
||||
|
||||
assert_eq!(ref_hash, test_cv);
|
||||
}
|
||||
}
|
||||
|
||||
fn check_transposed_eq(output_a: &TransposedVectors, output_b: &TransposedVectors) {
|
||||
if output_a == output_b {
|
||||
return;
|
||||
}
|
||||
for cv_index in 0..2 * MAX_SIMD_DEGREE {
|
||||
let cv_a = output_a.extract_cv(cv_index);
|
||||
let cv_b = output_b.extract_cv(cv_index);
|
||||
if cv_a == [0; 32] && cv_b == [0; 32] {
|
||||
println!("CV {cv_index:2} empty");
|
||||
} else if cv_a == cv_b {
|
||||
println!("CV {cv_index:2} matches");
|
||||
} else {
|
||||
println!("CV {cv_index:2} mismatch:");
|
||||
println!(" {}", hex::encode(cv_a));
|
||||
println!(" {}", hex::encode(cv_b));
|
||||
}
|
||||
}
|
||||
panic!("transposed outputs are not equal");
|
||||
}
|
||||
|
||||
pub fn test_hash_chunks_vs_portable(test_impl: &Implementation) {
|
||||
assert!(test_impl.degree() <= MAX_SIMD_DEGREE);
|
||||
dbg!(test_impl.degree() * CHUNK_LEN);
|
||||
// Allocate 4 extra bytes of padding so we can make aligned slices.
|
||||
let mut input_buf = [0u8; 2 * 2 * MAX_SIMD_DEGREE * CHUNK_LEN + 4];
|
||||
let mut input_slice = &mut input_buf[..];
|
||||
// Make sure the start of the input is word-aligned.
|
||||
while input_slice.as_ptr() as usize % 4 != 0 {
|
||||
input_slice = &mut input_slice[1..];
|
||||
}
|
||||
let (aligned_input, mut unaligned_input) =
|
||||
input_slice.split_at_mut(2 * MAX_SIMD_DEGREE * CHUNK_LEN);
|
||||
unaligned_input = &mut unaligned_input[1..][..2 * MAX_SIMD_DEGREE * CHUNK_LEN];
|
||||
assert_eq!(aligned_input.as_ptr() as usize % 4, 0);
|
||||
assert_eq!(unaligned_input.as_ptr() as usize % 4, 1);
|
||||
paint_test_input(aligned_input);
|
||||
paint_test_input(unaligned_input);
|
||||
// Try just below, equal to, and just above every whole number of chunks.
|
||||
let mut input_2_lengths = Vec::new();
|
||||
let mut next_len = 2 * CHUNK_LEN;
|
||||
loop {
|
||||
// 95 is one whole block plus one interesting part of another
|
||||
input_2_lengths.push(next_len - 95);
|
||||
input_2_lengths.push(next_len);
|
||||
if next_len == test_impl.degree() * CHUNK_LEN {
|
||||
break;
|
||||
}
|
||||
input_2_lengths.push(next_len + 95);
|
||||
next_len += CHUNK_LEN;
|
||||
}
|
||||
for input_2_len in input_2_lengths {
|
||||
dbg!(input_2_len);
|
||||
let aligned_input1 = &aligned_input[..test_impl.degree() * CHUNK_LEN];
|
||||
let aligned_input2 = &aligned_input[test_impl.degree() * CHUNK_LEN..][..input_2_len];
|
||||
let unaligned_input1 = &unaligned_input[..test_impl.degree() * CHUNK_LEN];
|
||||
let unaligned_input2 = &unaligned_input[test_impl.degree() * CHUNK_LEN..][..input_2_len];
|
||||
for initial_counter in INITIAL_COUNTERS {
|
||||
dbg!(initial_counter);
|
||||
// Make two calls, to test the output_column parameter.
|
||||
let mut portable_output = TransposedVectors::new();
|
||||
let (portable_left, portable_right) =
|
||||
test_impl.split_transposed_vectors(&mut portable_output);
|
||||
portable::implementation().hash_chunks(
|
||||
aligned_input1,
|
||||
&IV_BYTES,
|
||||
initial_counter,
|
||||
0,
|
||||
portable_left,
|
||||
);
|
||||
portable::implementation().hash_chunks(
|
||||
aligned_input2,
|
||||
&TEST_KEY,
|
||||
initial_counter + test_impl.degree() as u64,
|
||||
KEYED_HASH,
|
||||
portable_right,
|
||||
);
|
||||
|
||||
let mut test_output = TransposedVectors::new();
|
||||
let (test_left, test_right) = test_impl.split_transposed_vectors(&mut test_output);
|
||||
test_impl.hash_chunks(aligned_input1, &IV_BYTES, initial_counter, 0, test_left);
|
||||
test_impl.hash_chunks(
|
||||
aligned_input2,
|
||||
&TEST_KEY,
|
||||
initial_counter + test_impl.degree() as u64,
|
||||
KEYED_HASH,
|
||||
test_right,
|
||||
);
|
||||
check_transposed_eq(&portable_output, &test_output);
|
||||
|
||||
// Do the same thing with unaligned input.
|
||||
let mut unaligned_test_output = TransposedVectors::new();
|
||||
let (unaligned_left, unaligned_right) =
|
||||
test_impl.split_transposed_vectors(&mut unaligned_test_output);
|
||||
test_impl.hash_chunks(
|
||||
unaligned_input1,
|
||||
&IV_BYTES,
|
||||
initial_counter,
|
||||
0,
|
||||
unaligned_left,
|
||||
);
|
||||
test_impl.hash_chunks(
|
||||
unaligned_input2,
|
||||
&TEST_KEY,
|
||||
initial_counter + test_impl.degree() as u64,
|
||||
KEYED_HASH,
|
||||
unaligned_right,
|
||||
);
|
||||
check_transposed_eq(&portable_output, &unaligned_test_output);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn painted_transposed_input() -> TransposedVectors {
|
||||
let mut vectors = TransposedVectors::new();
|
||||
let mut val = 0;
|
||||
for col in 0..2 * MAX_SIMD_DEGREE {
|
||||
for row in 0..8 {
|
||||
vectors.0[row][col] = val;
|
||||
val += 1;
|
||||
}
|
||||
}
|
||||
vectors
|
||||
}
|
||||
|
||||
pub fn test_hash_parents_vs_portable(test_impl: &Implementation) {
|
||||
assert!(test_impl.degree() <= MAX_SIMD_DEGREE);
|
||||
let input = painted_transposed_input();
|
||||
for num_parents in 2..=(test_impl.degree() / 2) {
|
||||
dbg!(num_parents);
|
||||
let mut portable_output = TransposedVectors::new();
|
||||
let (portable_left, portable_right) =
|
||||
test_impl.split_transposed_vectors(&mut portable_output);
|
||||
portable::implementation().hash_parents(
|
||||
&input,
|
||||
2 * num_parents, // num_cvs
|
||||
&IV_BYTES,
|
||||
0,
|
||||
portable_left,
|
||||
);
|
||||
portable::implementation().hash_parents(
|
||||
&input,
|
||||
2 * num_parents, // num_cvs
|
||||
&TEST_KEY,
|
||||
KEYED_HASH,
|
||||
portable_right,
|
||||
);
|
||||
|
||||
let mut test_output = TransposedVectors::new();
|
||||
let (test_left, test_right) = test_impl.split_transposed_vectors(&mut test_output);
|
||||
test_impl.hash_parents(
|
||||
&input,
|
||||
2 * num_parents, // num_cvs
|
||||
&IV_BYTES,
|
||||
0,
|
||||
test_left,
|
||||
);
|
||||
test_impl.hash_parents(
|
||||
&input,
|
||||
2 * num_parents, // num_cvs
|
||||
&TEST_KEY,
|
||||
KEYED_HASH,
|
||||
test_right,
|
||||
);
|
||||
|
||||
check_transposed_eq(&portable_output, &test_output);
|
||||
}
|
||||
}
|
||||
|
||||
fn hash_with_chunks_and_parents_recurse(
|
||||
test_impl: &Implementation,
|
||||
input: &[u8],
|
||||
counter: u64,
|
||||
output: TransposedSplit,
|
||||
) -> usize {
|
||||
assert!(input.len() > 0);
|
||||
if input.len() <= test_impl.degree() * CHUNK_LEN {
|
||||
return test_impl.hash_chunks(input, &IV_BYTES, counter, 0, output);
|
||||
}
|
||||
let (left_input, right_input) = input.split_at(left_len(input.len()));
|
||||
let mut child_output = TransposedVectors::new();
|
||||
let (left_output, right_output) = test_impl.split_transposed_vectors(&mut child_output);
|
||||
let mut children =
|
||||
hash_with_chunks_and_parents_recurse(test_impl, left_input, counter, left_output);
|
||||
assert_eq!(children, test_impl.degree());
|
||||
children += hash_with_chunks_and_parents_recurse(
|
||||
test_impl,
|
||||
right_input,
|
||||
counter + (left_input.len() / CHUNK_LEN) as u64,
|
||||
right_output,
|
||||
);
|
||||
test_impl.hash_parents(&child_output, children, &IV_BYTES, PARENT, output)
|
||||
}
|
||||
|
||||
// Note: This test implementation doesn't support the 1-chunk-or-less case.
|
||||
fn root_hash_with_chunks_and_parents(test_impl: &Implementation, input: &[u8]) -> CVBytes {
|
||||
// TODO: handle the 1-chunk case?
|
||||
assert!(input.len() > CHUNK_LEN);
|
||||
let mut cvs = TransposedVectors::new();
|
||||
// The right half of these vectors are never used.
|
||||
let (cvs_left, _) = test_impl.split_transposed_vectors(&mut cvs);
|
||||
let mut num_cvs = hash_with_chunks_and_parents_recurse(test_impl, input, 0, cvs_left);
|
||||
while num_cvs > 2 {
|
||||
num_cvs = test_impl.reduce_parents(&mut cvs, num_cvs, &IV_BYTES, 0);
|
||||
}
|
||||
test_impl.compress(
|
||||
&cvs.extract_parent_node(0),
|
||||
BLOCK_LEN as u32,
|
||||
&IV_BYTES,
|
||||
0,
|
||||
PARENT | ROOT,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn test_chunks_and_parents_vs_reference(test_impl: &Implementation) {
|
||||
assert_eq!(test_impl.degree().count_ones(), 1, "power of 2");
|
||||
const MAX_INPUT_LEN: usize = 2 * MAX_SIMD_DEGREE * CHUNK_LEN;
|
||||
let mut input_buf = [0u8; MAX_INPUT_LEN];
|
||||
paint_test_input(&mut input_buf);
|
||||
// Try just below, equal to, and just above every whole number of chunks, except that
|
||||
// root_hash_with_chunks_and_parents doesn't support the 1-chunk-or-less case.
|
||||
let mut test_lengths = vec![CHUNK_LEN + 1];
|
||||
let mut next_len = 2 * CHUNK_LEN;
|
||||
loop {
|
||||
// 95 is one whole block plus one interesting part of another
|
||||
test_lengths.push(next_len - 95);
|
||||
test_lengths.push(next_len);
|
||||
if next_len == MAX_INPUT_LEN {
|
||||
break;
|
||||
}
|
||||
test_lengths.push(next_len + 95);
|
||||
next_len += CHUNK_LEN;
|
||||
}
|
||||
for test_len in test_lengths {
|
||||
dbg!(test_len);
|
||||
let input = &input_buf[..test_len];
|
||||
|
||||
let mut ref_hasher = reference_impl::Hasher::new();
|
||||
ref_hasher.update(&input);
|
||||
let mut ref_hash = [0u8; 32];
|
||||
ref_hasher.finalize(&mut ref_hash);
|
||||
|
||||
let test_hash = root_hash_with_chunks_and_parents(test_impl, input);
|
||||
|
||||
assert_eq!(ref_hash, test_hash);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn test_xof_vs_portable(test_impl: &Implementation) {
|
||||
let flags = CHUNK_START | CHUNK_END | KEYED_HASH;
|
||||
for counter in INITIAL_COUNTERS {
|
||||
dbg!(counter);
|
||||
for input_len in [0, 1, BLOCK_LEN] {
|
||||
dbg!(input_len);
|
||||
let mut input_block = [0u8; BLOCK_LEN];
|
||||
for byte_index in 0..input_len {
|
||||
input_block[byte_index] = byte_index as u8 + 42;
|
||||
}
|
||||
// Try equal to and partway through every whole number of output blocks.
|
||||
const MAX_OUTPUT_LEN: usize = 2 * MAX_SIMD_DEGREE * BLOCK_LEN;
|
||||
let mut output_lengths = Vec::new();
|
||||
let mut next_len = 0;
|
||||
loop {
|
||||
output_lengths.push(next_len);
|
||||
if next_len == MAX_OUTPUT_LEN {
|
||||
break;
|
||||
}
|
||||
output_lengths.push(next_len + 31);
|
||||
next_len += BLOCK_LEN;
|
||||
}
|
||||
for output_len in output_lengths {
|
||||
dbg!(output_len);
|
||||
let mut portable_output = [0xff; MAX_OUTPUT_LEN];
|
||||
portable::implementation().xof(
|
||||
&input_block,
|
||||
input_len as u32,
|
||||
&TEST_KEY,
|
||||
counter,
|
||||
flags,
|
||||
&mut portable_output[..output_len],
|
||||
);
|
||||
let mut test_output = [0xff; MAX_OUTPUT_LEN];
|
||||
test_impl.xof(
|
||||
&input_block,
|
||||
input_len as u32,
|
||||
&TEST_KEY,
|
||||
counter,
|
||||
flags,
|
||||
&mut test_output[..output_len],
|
||||
);
|
||||
assert_eq!(portable_output, test_output);
|
||||
|
||||
// Double check that the implementation didn't overwrite.
|
||||
assert!(test_output[output_len..].iter().all(|&b| b == 0xff));
|
||||
|
||||
// The first XOR cancels out the output.
|
||||
test_impl.xof_xor(
|
||||
&input_block,
|
||||
input_len as u32,
|
||||
&TEST_KEY,
|
||||
counter,
|
||||
flags,
|
||||
&mut test_output[..output_len],
|
||||
);
|
||||
assert!(test_output[..output_len].iter().all(|&b| b == 0));
|
||||
assert!(test_output[output_len..].iter().all(|&b| b == 0xff));
|
||||
|
||||
// The second XOR restores out the output.
|
||||
test_impl.xof_xor(
|
||||
&input_block,
|
||||
input_len as u32,
|
||||
&TEST_KEY,
|
||||
counter,
|
||||
flags,
|
||||
&mut test_output[..output_len],
|
||||
);
|
||||
assert_eq!(portable_output, test_output);
|
||||
assert!(test_output[output_len..].iter().all(|&b| b == 0xff));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn test_xof_vs_reference(test_impl: &Implementation) {
|
||||
let input = b"hello world";
|
||||
let mut input_block = [0; BLOCK_LEN];
|
||||
input_block[..input.len()].copy_from_slice(input);
|
||||
|
||||
const MAX_OUTPUT_LEN: usize = 2 * MAX_SIMD_DEGREE * BLOCK_LEN;
|
||||
let mut ref_output = [0; MAX_OUTPUT_LEN];
|
||||
let mut ref_hasher = reference_impl::Hasher::new_keyed(&TEST_KEY);
|
||||
ref_hasher.update(input);
|
||||
ref_hasher.finalize(&mut ref_output);
|
||||
|
||||
// Try equal to and partway through every whole number of output blocks.
|
||||
let mut output_lengths = vec![0, 1, 31];
|
||||
let mut next_len = BLOCK_LEN;
|
||||
loop {
|
||||
output_lengths.push(next_len);
|
||||
if next_len == MAX_OUTPUT_LEN {
|
||||
break;
|
||||
}
|
||||
output_lengths.push(next_len + 31);
|
||||
next_len += BLOCK_LEN;
|
||||
}
|
||||
|
||||
for output_len in output_lengths {
|
||||
dbg!(output_len);
|
||||
let mut test_output = [0; MAX_OUTPUT_LEN];
|
||||
test_impl.xof(
|
||||
&input_block,
|
||||
input.len() as u32,
|
||||
&TEST_KEY,
|
||||
0,
|
||||
KEYED_HASH | CHUNK_START | CHUNK_END,
|
||||
&mut test_output[..output_len],
|
||||
);
|
||||
assert_eq!(ref_output[..output_len], test_output[..output_len]);
|
||||
|
||||
// Double check that the implementation didn't overwrite.
|
||||
assert!(test_output[output_len..].iter().all(|&b| b == 0));
|
||||
|
||||
// Do it again starting from block 1.
|
||||
if output_len >= BLOCK_LEN {
|
||||
test_impl.xof(
|
||||
&input_block,
|
||||
input.len() as u32,
|
||||
&TEST_KEY,
|
||||
1,
|
||||
KEYED_HASH | CHUNK_START | CHUNK_END,
|
||||
&mut test_output[..output_len - BLOCK_LEN],
|
||||
);
|
||||
assert_eq!(
|
||||
ref_output[BLOCK_LEN..output_len],
|
||||
test_output[..output_len - BLOCK_LEN],
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn test_universal_hash_vs_portable(test_impl: &Implementation) {
|
||||
const MAX_INPUT_LEN: usize = 2 * MAX_SIMD_DEGREE * BLOCK_LEN;
|
||||
let mut input_buf = [0; MAX_INPUT_LEN];
|
||||
paint_test_input(&mut input_buf);
|
||||
// Try equal to and partway through every whole number of input blocks.
|
||||
let mut input_lengths = vec![0, 1, 31];
|
||||
let mut next_len = BLOCK_LEN;
|
||||
loop {
|
||||
input_lengths.push(next_len);
|
||||
if next_len == MAX_INPUT_LEN {
|
||||
break;
|
||||
}
|
||||
input_lengths.push(next_len + 31);
|
||||
next_len += BLOCK_LEN;
|
||||
}
|
||||
for input_len in input_lengths {
|
||||
dbg!(input_len);
|
||||
for counter in INITIAL_COUNTERS {
|
||||
dbg!(counter);
|
||||
let portable_output = portable::implementation().universal_hash(
|
||||
&input_buf[..input_len],
|
||||
&TEST_KEY,
|
||||
counter,
|
||||
);
|
||||
let test_output = test_impl.universal_hash(&input_buf[..input_len], &TEST_KEY, counter);
|
||||
assert_eq!(portable_output, test_output);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn reference_impl_universal_hash(input: &[u8], key: &CVBytes) -> [u8; UNIVERSAL_HASH_LEN] {
|
||||
// The reference_impl doesn't support XOF seeking, so we have to materialize an entire extended
|
||||
// output to seek to a block.
|
||||
const MAX_BLOCKS: usize = 2 * MAX_SIMD_DEGREE;
|
||||
assert!(input.len() / BLOCK_LEN <= MAX_BLOCKS);
|
||||
let mut output_buffer: [u8; BLOCK_LEN * MAX_BLOCKS] = [0u8; BLOCK_LEN * MAX_BLOCKS];
|
||||
let mut result = [0u8; UNIVERSAL_HASH_LEN];
|
||||
let mut block_start = 0;
|
||||
while block_start < input.len() {
|
||||
let block_len = cmp::min(input.len() - block_start, BLOCK_LEN);
|
||||
let mut ref_hasher = reference_impl::Hasher::new_keyed(key);
|
||||
ref_hasher.update(&input[block_start..block_start + block_len]);
|
||||
ref_hasher.finalize(&mut output_buffer[..block_start + UNIVERSAL_HASH_LEN]);
|
||||
for byte_index in 0..UNIVERSAL_HASH_LEN {
|
||||
result[byte_index] ^= output_buffer[block_start + byte_index];
|
||||
}
|
||||
block_start += BLOCK_LEN;
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
pub fn test_universal_hash_vs_reference(test_impl: &Implementation) {
|
||||
const MAX_INPUT_LEN: usize = 2 * MAX_SIMD_DEGREE * BLOCK_LEN;
|
||||
let mut input_buf = [0; MAX_INPUT_LEN];
|
||||
paint_test_input(&mut input_buf);
|
||||
// Try equal to and partway through every whole number of input blocks.
|
||||
let mut input_lengths = vec![0, 1, 31];
|
||||
let mut next_len = BLOCK_LEN;
|
||||
loop {
|
||||
input_lengths.push(next_len);
|
||||
if next_len == MAX_INPUT_LEN {
|
||||
break;
|
||||
}
|
||||
input_lengths.push(next_len + 31);
|
||||
next_len += BLOCK_LEN;
|
||||
}
|
||||
for input_len in input_lengths {
|
||||
dbg!(input_len);
|
||||
let ref_output = reference_impl_universal_hash(&input_buf[..input_len], &TEST_KEY);
|
||||
let test_output = test_impl.universal_hash(&input_buf[..input_len], &TEST_KEY, 0);
|
||||
assert_eq!(ref_output, test_output);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,79 @@
|
|||
//! Helper functions for efficient IO.
|
||||
|
||||
#[cfg(feature = "std")]
|
||||
pub(crate) fn copy_wide(
|
||||
mut reader: impl std::io::Read,
|
||||
hasher: &mut crate::Hasher,
|
||||
) -> std::io::Result<u64> {
|
||||
let mut buffer = [0; 65536];
|
||||
let mut total = 0;
|
||||
loop {
|
||||
match reader.read(&mut buffer) {
|
||||
Ok(0) => return Ok(total),
|
||||
Ok(n) => {
|
||||
hasher.update(&buffer[..n]);
|
||||
total += n as u64;
|
||||
}
|
||||
// see test_update_reader_interrupted
|
||||
Err(e) if e.kind() == std::io::ErrorKind::Interrupted => continue,
|
||||
Err(e) => return Err(e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Mmap a file, if it looks like a good idea. Return None in cases where we know mmap will fail, or
|
||||
// if the file is short enough that mmapping isn't worth it. However, if we do try to mmap and it
|
||||
// fails, return the error.
|
||||
//
|
||||
// SAFETY: Mmaps are fundamentally unsafe, because you can call invariant-checking functions like
|
||||
// str::from_utf8 on them and then have them change out from under you. Letting a safe caller get
|
||||
// their hands on an mmap, or even a &[u8] that's backed by an mmap, is unsound. However, because
|
||||
// this function is crate-private, we can guarantee that all can ever happen in the event of a race
|
||||
// condition is that we either hash nonsense bytes or crash with SIGBUS or similar, neither of
|
||||
// which should risk memory corruption in a safe caller.
|
||||
//
|
||||
// PARANOIA: But a data race...is a data race...is a data race...right? Even if we know that no
|
||||
// platform in the "real world" is ever going to do anything other than compute the "wrong answer"
|
||||
// if we race on this mmap while we hash it, aren't we still supposed to feel bad about doing this?
|
||||
// Well, maybe. This is IO, and IO gets special carve-outs in the memory model. Consider a
|
||||
// memory-mapped register that returns random 32-bit words. (This is actually realistic if you have
|
||||
// a hardware RNG.) It's probably sound to construct a *const i32 pointing to that register and do
|
||||
// some raw pointer reads from it. Those reads should be volatile if you don't want the compiler to
|
||||
// coalesce them, but either way the compiler isn't allowed to just _go nuts_ and insert
|
||||
// should-never-happen branches to wipe your hard drive if two adjacent reads happen to give
|
||||
// different values. As far as I'm aware, there's no such thing as a read that's allowed if it's
|
||||
// volatile but prohibited if it's not (unlike atomics). As mentioned above, it's not ok to
|
||||
// construct a safe &i32 to the register if you're going to leak that reference to unknown callers.
|
||||
// But if you "know what you're doing," I don't think *const i32 and &i32 are fundamentally
|
||||
// different here. Feedback needed.
|
||||
#[cfg(feature = "mmap")]
|
||||
pub(crate) fn maybe_mmap_file(file: &std::fs::File) -> std::io::Result<Option<memmap2::Mmap>> {
|
||||
let metadata = file.metadata()?;
|
||||
let file_size = metadata.len();
|
||||
#[allow(clippy::if_same_then_else)]
|
||||
if !metadata.is_file() {
|
||||
// Not a real file.
|
||||
Ok(None)
|
||||
} else if file_size > isize::max_value() as u64 {
|
||||
// Too long to safely map.
|
||||
// https://github.com/danburkert/memmap-rs/issues/69
|
||||
Ok(None)
|
||||
} else if file_size == 0 {
|
||||
// Mapping an empty file currently fails.
|
||||
// https://github.com/danburkert/memmap-rs/issues/72
|
||||
// See test_mmap_virtual_file.
|
||||
Ok(None)
|
||||
} else if file_size < 16 * 1024 {
|
||||
// Mapping small files is not worth it.
|
||||
Ok(None)
|
||||
} else {
|
||||
// Explicitly set the length of the memory map, so that filesystem
|
||||
// changes can't race to violate the invariants we just checked.
|
||||
let map = unsafe {
|
||||
memmap2::MmapOptions::new()
|
||||
.len(file_size as usize)
|
||||
.map(file)?
|
||||
};
|
||||
Ok(Some(map))
|
||||
}
|
||||
}
|
304
src/lib.rs
304
src/lib.rs
|
@ -33,15 +33,33 @@
|
|||
//! # Cargo Features
|
||||
//!
|
||||
//! The `std` feature (the only feature enabled by default) is required for
|
||||
//! implementations of the [`Write`] and [`Seek`] traits, and also for runtime
|
||||
//! CPU feature detection on x86. If this feature is disabled, the only way to
|
||||
//! use the x86 SIMD implementations is to enable the corresponding instruction
|
||||
//! sets globally, with e.g. `RUSTFLAGS="-C target-cpu=native"`. The resulting
|
||||
//! binary will not be portable to other machines.
|
||||
//! implementations of the [`Write`] and [`Seek`] traits, the
|
||||
//! [`update_reader`](Hasher::update_reader) helper method, and runtime CPU
|
||||
//! feature detection on x86. If this feature is disabled, the only way to use
|
||||
//! the x86 SIMD implementations is to enable the corresponding instruction sets
|
||||
//! globally, with e.g. `RUSTFLAGS="-C target-cpu=native"`. The resulting binary
|
||||
//! will not be portable to other machines.
|
||||
//!
|
||||
//! The `rayon` feature (disabled by default, but enabled for [docs.rs]) adds
|
||||
//! the [`Hasher::update_rayon`] method, for multithreaded hashing. However,
|
||||
//! even if this feature is enabled, all other APIs remain single-threaded.
|
||||
//! the [`update_rayon`](Hasher::update_rayon) and (in combination with `mmap`
|
||||
//! below) [`update_mmap_rayon`](Hasher::update_mmap_rayon) methods, for
|
||||
//! multithreaded hashing. However, even if this feature is enabled, all other
|
||||
//! APIs remain single-threaded.
|
||||
//!
|
||||
//! The `mmap` feature (disabled by default, but enabled for [docs.rs]) adds the
|
||||
//! [`update_mmap`](Hasher::update_mmap) and (in combination with `rayon` above)
|
||||
//! [`update_mmap_rayon`](Hasher::update_mmap_rayon) helper methods for
|
||||
//! memory-mapped IO.
|
||||
//!
|
||||
//! The `zeroize` feature (disabled by default, but enabled for [docs.rs])
|
||||
//! implements
|
||||
//! [`Zeroize`](https://docs.rs/zeroize/latest/zeroize/trait.Zeroize.html) for
|
||||
//! this crate's types.
|
||||
//!
|
||||
//! The `serde` feature (disabled by default, but enabled for [docs.rs]) implements
|
||||
//! [`serde::Serialize`](https://docs.rs/serde/latest/serde/trait.Serialize.html) and
|
||||
//! [`serde::Deserialize`](https://docs.rs/serde/latest/serde/trait.Deserialize.html)
|
||||
//! for [`Hash`](struct@Hash).
|
||||
//!
|
||||
//! The NEON implementation is enabled by default for AArch64 but requires the
|
||||
//! `neon` feature for other ARM targets. Not all ARMv7 CPUs support NEON, and
|
||||
|
@ -49,12 +67,12 @@
|
|||
//! without NEON support.
|
||||
//!
|
||||
//! The `traits-preview` feature enables implementations of traits from the
|
||||
//! RustCrypto [`digest`] crate, and re-exports that crate as
|
||||
//! `traits::digest`. However, the traits aren't stable, and they're expected to
|
||||
//! change in incompatible ways before that crate reaches 1.0. For that reason,
|
||||
//! this crate makes no SemVer guarantees for this feature, and callers who use
|
||||
//! it should expect breaking changes between patch versions. (The "-preview"
|
||||
//! feature name follows the conventions of the RustCrypto [`signature`] crate.)
|
||||
//! RustCrypto [`digest`] crate, and re-exports that crate as `traits::digest`.
|
||||
//! However, the traits aren't stable, and they're expected to change in
|
||||
//! incompatible ways before that crate reaches 1.0. For that reason, this crate
|
||||
//! makes no SemVer guarantees for this feature, and callers who use it should
|
||||
//! expect breaking changes between patch versions. (The "-preview" feature name
|
||||
//! follows the conventions of the RustCrypto [`signature`] crate.)
|
||||
//!
|
||||
//! [`Hasher::update_rayon`]: struct.Hasher.html#method.update_rayon
|
||||
//! [BLAKE3]: https://blake3.io
|
||||
|
@ -112,6 +130,7 @@ mod sse41;
|
|||
#[cfg(feature = "traits-preview")]
|
||||
pub mod traits;
|
||||
|
||||
mod io;
|
||||
mod join;
|
||||
|
||||
use arrayref::{array_mut_ref, array_ref};
|
||||
|
@ -197,6 +216,8 @@ fn counter_high(counter: u64) -> u32 {
|
|||
/// [`from_hex`]: #method.from_hex
|
||||
/// [`Display`]: https://doc.rust-lang.org/std/fmt/trait.Display.html
|
||||
/// [`FromStr`]: https://doc.rust-lang.org/std/str/trait.FromStr.html
|
||||
#[cfg_attr(feature = "zeroize", derive(zeroize::Zeroize))]
|
||||
#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
|
||||
#[derive(Clone, Copy, Hash)]
|
||||
pub struct Hash([u8; OUT_LEN]);
|
||||
|
||||
|
@ -284,10 +305,28 @@ impl core::str::FromStr for Hash {
|
|||
}
|
||||
}
|
||||
|
||||
// A proper implementation of constant time equality is tricky, and we get it from the
|
||||
// constant_time_eq crate instead of rolling our own. However, that crate isn't compatible with
|
||||
// Miri, so we roll our own just for that.
|
||||
#[cfg(miri)]
|
||||
fn constant_time_eq_miri(a: &[u8], b: &[u8]) -> bool {
|
||||
if a.len() != b.len() {
|
||||
return false;
|
||||
}
|
||||
let mut x = 0;
|
||||
for i in 0..a.len() {
|
||||
x |= a[i] ^ b[i];
|
||||
}
|
||||
x == 0
|
||||
}
|
||||
|
||||
/// This implementation is constant-time.
|
||||
impl PartialEq for Hash {
|
||||
#[inline]
|
||||
fn eq(&self, other: &Hash) -> bool {
|
||||
#[cfg(miri)]
|
||||
return constant_time_eq_miri(&self.0, &other.0);
|
||||
#[cfg(not(miri))]
|
||||
constant_time_eq::constant_time_eq_32(&self.0, &other.0)
|
||||
}
|
||||
}
|
||||
|
@ -296,6 +335,9 @@ impl PartialEq for Hash {
|
|||
impl PartialEq<[u8; OUT_LEN]> for Hash {
|
||||
#[inline]
|
||||
fn eq(&self, other: &[u8; OUT_LEN]) -> bool {
|
||||
#[cfg(miri)]
|
||||
return constant_time_eq_miri(&self.0, other);
|
||||
#[cfg(not(miri))]
|
||||
constant_time_eq::constant_time_eq_32(&self.0, other)
|
||||
}
|
||||
}
|
||||
|
@ -304,6 +346,9 @@ impl PartialEq<[u8; OUT_LEN]> for Hash {
|
|||
impl PartialEq<[u8]> for Hash {
|
||||
#[inline]
|
||||
fn eq(&self, other: &[u8]) -> bool {
|
||||
#[cfg(miri)]
|
||||
return constant_time_eq_miri(&self.0, other);
|
||||
#[cfg(not(miri))]
|
||||
constant_time_eq::constant_time_eq(&self.0, other)
|
||||
}
|
||||
}
|
||||
|
@ -371,6 +416,7 @@ impl std::error::Error for HexError {}
|
|||
// Each chunk or parent node can produce either a 32-byte chaining value or, by
|
||||
// setting the ROOT flag, any number of final output bytes. The Output struct
|
||||
// captures the state just prior to choosing between those two possibilities.
|
||||
#[cfg_attr(feature = "zeroize", derive(zeroize::Zeroize))]
|
||||
#[derive(Clone)]
|
||||
struct Output {
|
||||
input_chaining_value: CVWords,
|
||||
|
@ -378,6 +424,7 @@ struct Output {
|
|||
block_len: u8,
|
||||
counter: u64,
|
||||
flags: u8,
|
||||
#[cfg_attr(feature = "zeroize", zeroize(skip))]
|
||||
platform: Platform,
|
||||
}
|
||||
|
||||
|
@ -414,6 +461,7 @@ impl Output {
|
|||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
#[cfg_attr(feature = "zeroize", derive(zeroize::Zeroize))]
|
||||
struct ChunkState {
|
||||
cv: CVWords,
|
||||
chunk_counter: u64,
|
||||
|
@ -421,6 +469,7 @@ struct ChunkState {
|
|||
buf_len: u8,
|
||||
blocks_compressed: u8,
|
||||
flags: u8,
|
||||
#[cfg_attr(feature = "zeroize", zeroize(skip))]
|
||||
platform: Platform,
|
||||
}
|
||||
|
||||
|
@ -903,6 +952,9 @@ fn parent_node_output(
|
|||
|
||||
/// An incremental hash state that can accept any number of writes.
|
||||
///
|
||||
/// The `rayon` and `mmap` Cargo features enable additional methods on this
|
||||
/// type related to multithreading and memory-mapped IO.
|
||||
///
|
||||
/// When the `traits-preview` Cargo feature is enabled, this type implements
|
||||
/// several commonly used traits from the
|
||||
/// [`digest`](https://crates.io/crates/digest) crate. However, those
|
||||
|
@ -911,15 +963,6 @@ fn parent_node_output(
|
|||
/// guarantees for this feature, and callers who use it should expect breaking
|
||||
/// changes between patch versions.
|
||||
///
|
||||
/// When the `rayon` Cargo feature is enabled, the
|
||||
/// [`update_rayon`](#method.update_rayon) method is available for multithreaded
|
||||
/// hashing.
|
||||
///
|
||||
/// **Performance note:** The [`update`](#method.update) method can't take full
|
||||
/// advantage of SIMD optimizations if its input buffer is too small or oddly
|
||||
/// sized. Using a 16 KiB buffer, or any multiple of that, enables all currently
|
||||
/// supported SIMD instruction sets.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
|
@ -942,6 +985,7 @@ fn parent_node_output(
|
|||
/// # }
|
||||
/// ```
|
||||
#[derive(Clone)]
|
||||
#[cfg_attr(feature = "zeroize", derive(zeroize::Zeroize))]
|
||||
pub struct Hasher {
|
||||
key: CVWords,
|
||||
chunk_state: ChunkState,
|
||||
|
@ -1069,48 +1113,17 @@ impl Hasher {
|
|||
self.cv_stack.push(*new_cv);
|
||||
}
|
||||
|
||||
/// Add input bytes to the hash state. You can call this any number of
|
||||
/// times.
|
||||
/// Add input bytes to the hash state. You can call this any number of times.
|
||||
///
|
||||
/// This method is always single-threaded. For multithreading support, see
|
||||
/// [`update_rayon`](#method.update_rayon) below (enabled with the `rayon`
|
||||
/// Cargo feature).
|
||||
/// [`update_rayon`](#method.update_rayon) (enabled with the `rayon` Cargo feature).
|
||||
///
|
||||
/// Note that the degree of SIMD parallelism that `update` can use is
|
||||
/// limited by the size of this input buffer. The 8 KiB buffer currently
|
||||
/// used by [`std::io::copy`] is enough to leverage AVX2, for example, but
|
||||
/// not enough to leverage AVX-512. A 16 KiB buffer is large enough to
|
||||
/// leverage all currently supported SIMD instruction sets.
|
||||
///
|
||||
/// [`std::io::copy`]: https://doc.rust-lang.org/std/io/fn.copy.html
|
||||
/// Note that the degree of SIMD parallelism that `update` can use is limited by the size of
|
||||
/// this input buffer. See [`update_reader`](#method.update_reader).
|
||||
pub fn update(&mut self, input: &[u8]) -> &mut Self {
|
||||
self.update_with_join::<join::SerialJoin>(input)
|
||||
}
|
||||
|
||||
/// Identical to [`update`](Hasher::update), but using Rayon-based
|
||||
/// multithreading internally.
|
||||
///
|
||||
/// This method is gated by the `rayon` Cargo feature, which is disabled by
|
||||
/// default but enabled on [docs.rs](https://docs.rs).
|
||||
///
|
||||
/// To get any performance benefit from multithreading, the input buffer
|
||||
/// needs to be large. As a rule of thumb on x86_64, `update_rayon` is
|
||||
/// _slower_ than `update` for inputs under 128 KiB. That threshold varies
|
||||
/// quite a lot across different processors, and it's important to benchmark
|
||||
/// your specific use case.
|
||||
///
|
||||
/// Memory mapping an entire input file is a simple way to take advantage of
|
||||
/// multithreading without needing to carefully tune your buffer size or
|
||||
/// offload IO. However, on spinning disks where random access is expensive,
|
||||
/// that approach can lead to disk thrashing and terrible IO performance.
|
||||
/// Note that OS page caching can mask this problem, in which case it might
|
||||
/// only appear for files larger than available RAM. Again, benchmarking
|
||||
/// your specific use case is important.
|
||||
#[cfg(feature = "rayon")]
|
||||
pub fn update_rayon(&mut self, input: &[u8]) -> &mut Self {
|
||||
self.update_with_join::<join::RayonJoin>(input)
|
||||
}
|
||||
|
||||
fn update_with_join<J: join::Join>(&mut self, mut input: &[u8]) -> &mut Self {
|
||||
// If we have some partial chunk bytes in the internal chunk_state, we
|
||||
// need to finish that chunk first.
|
||||
|
@ -1309,6 +1322,182 @@ impl Hasher {
|
|||
pub fn count(&self) -> u64 {
|
||||
self.chunk_state.chunk_counter * CHUNK_LEN as u64 + self.chunk_state.len() as u64
|
||||
}
|
||||
|
||||
/// As [`update`](Hasher::update), but reading from a
|
||||
/// [`std::io::Read`](https://doc.rust-lang.org/std/io/trait.Read.html) implementation.
|
||||
///
|
||||
/// [`Hasher`] implements
|
||||
/// [`std::io::Write`](https://doc.rust-lang.org/std/io/trait.Write.html), so it's possible to
|
||||
/// use [`std::io::copy`](https://doc.rust-lang.org/std/io/fn.copy.html) to update a [`Hasher`]
|
||||
/// from any reader. Unfortunately, this standard approach can limit performance, because
|
||||
/// `copy` currently uses an internal 8 KiB buffer that isn't big enough to take advantage of
|
||||
/// all SIMD instruction sets. (In particular, [AVX-512](https://en.wikipedia.org/wiki/AVX-512)
|
||||
/// needs a 16 KiB buffer.) `update_reader` avoids this performance problem and is slightly
|
||||
/// more convenient.
|
||||
///
|
||||
/// The internal buffer size this method uses may change at any time, and it may be different
|
||||
/// for different targets. The only guarantee is that it will be large enough for all of this
|
||||
/// crate's SIMD implementations on the current platform.
|
||||
///
|
||||
/// The most common implementer of
|
||||
/// [`std::io::Read`](https://doc.rust-lang.org/std/io/trait.Read.html) might be
|
||||
/// [`std::fs::File`](https://doc.rust-lang.org/std/fs/struct.File.html), but note that memory
|
||||
/// mapping can be faster than this method for hashing large files. See
|
||||
/// [`update_mmap`](Hasher::update_mmap) and [`update_mmap_rayon`](Hasher::update_mmap_rayon),
|
||||
/// which require the `mmap` and (for the latter) `rayon` Cargo features.
|
||||
///
|
||||
/// This method requires the `std` Cargo feature, which is enabled by default.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```no_run
|
||||
/// # use std::fs::File;
|
||||
/// # use std::io;
|
||||
/// # fn main() -> io::Result<()> {
|
||||
/// // Hash standard input.
|
||||
/// let mut hasher = blake3::Hasher::new();
|
||||
/// hasher.update_reader(std::io::stdin().lock())?;
|
||||
/// println!("{}", hasher.finalize());
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
#[cfg(feature = "std")]
|
||||
pub fn update_reader(&mut self, reader: impl std::io::Read) -> std::io::Result<&mut Self> {
|
||||
io::copy_wide(reader, self)?;
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
/// As [`update`](Hasher::update), but using Rayon-based multithreading
|
||||
/// internally.
|
||||
///
|
||||
/// This method is gated by the `rayon` Cargo feature, which is disabled by
|
||||
/// default but enabled on [docs.rs](https://docs.rs).
|
||||
///
|
||||
/// To get any performance benefit from multithreading, the input buffer
|
||||
/// needs to be large. As a rule of thumb on x86_64, `update_rayon` is
|
||||
/// _slower_ than `update` for inputs under 128 KiB. That threshold varies
|
||||
/// quite a lot across different processors, and it's important to benchmark
|
||||
/// your specific use case. See also the performance warning associated with
|
||||
/// [`update_mmap_rayon`](Hasher::update_mmap_rayon).
|
||||
///
|
||||
/// If you already have a large buffer in memory, and you want to hash it
|
||||
/// with multiple threads, this method is a good option. However, reading a
|
||||
/// file into memory just to call this method can be a performance mistake,
|
||||
/// both because it requires lots of memory and because single-threaded
|
||||
/// reads can be slow. For hashing whole files, see
|
||||
/// [`update_mmap_rayon`](Hasher::update_mmap_rayon), which is gated by both
|
||||
/// the `rayon` and `mmap` Cargo features.
|
||||
#[cfg(feature = "rayon")]
|
||||
pub fn update_rayon(&mut self, input: &[u8]) -> &mut Self {
|
||||
self.update_with_join::<join::RayonJoin>(input)
|
||||
}
|
||||
|
||||
/// As [`update`](Hasher::update), but reading the contents of a file using memory mapping.
|
||||
///
|
||||
/// Not all files can be memory mapped, and memory mapping small files can be slower than
|
||||
/// reading them the usual way. In those cases, this method will fall back to standard file IO.
|
||||
/// The heuristic for whether to use memory mapping is currently very simple (file size >=
|
||||
/// 16 KiB), and it might change at any time.
|
||||
///
|
||||
/// Like [`update`](Hasher::update), this method is single-threaded. In this author's
|
||||
/// experience, memory mapping improves single-threaded performance by ~10% for large files
|
||||
/// that are already in cache. This probably varies between platforms, and as always it's a
|
||||
/// good idea to benchmark your own use case. In comparison, the multithreaded
|
||||
/// [`update_mmap_rayon`](Hasher::update_mmap_rayon) method can have a much larger impact on
|
||||
/// performance.
|
||||
///
|
||||
/// There's a correctness reason that this method takes
|
||||
/// [`Path`](https://doc.rust-lang.org/stable/std/path/struct.Path.html) instead of
|
||||
/// [`File`](https://doc.rust-lang.org/std/fs/struct.File.html): reading from a memory-mapped
|
||||
/// file ignores the seek position of the original file handle (it neither respects the current
|
||||
/// position nor updates the position). This difference in behavior would've caused
|
||||
/// `update_mmap` and [`update_reader`](Hasher::update_reader) to give different answers and
|
||||
/// have different side effects in some cases. Taking a
|
||||
/// [`Path`](https://doc.rust-lang.org/stable/std/path/struct.Path.html) avoids this problem by
|
||||
/// making it clear that a new [`File`](https://doc.rust-lang.org/std/fs/struct.File.html) is
|
||||
/// opened internally.
|
||||
///
|
||||
/// This method requires the `mmap` Cargo feature, which is disabled by default but enabled on
|
||||
/// [docs.rs](https://docs.rs).
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```no_run
|
||||
/// # use std::io;
|
||||
/// # use std::path::Path;
|
||||
/// # fn main() -> io::Result<()> {
|
||||
/// let path = Path::new("file.dat");
|
||||
/// let mut hasher = blake3::Hasher::new();
|
||||
/// hasher.update_mmap(path)?;
|
||||
/// println!("{}", hasher.finalize());
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
#[cfg(feature = "mmap")]
|
||||
pub fn update_mmap(&mut self, path: impl AsRef<std::path::Path>) -> std::io::Result<&mut Self> {
|
||||
let file = std::fs::File::open(path.as_ref())?;
|
||||
if let Some(mmap) = io::maybe_mmap_file(&file)? {
|
||||
self.update(&mmap);
|
||||
} else {
|
||||
io::copy_wide(&file, self)?;
|
||||
}
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
/// As [`update_rayon`](Hasher::update_rayon), but reading the contents of a file using
|
||||
/// memory mapping. This is the default behavior of `b3sum`.
|
||||
///
|
||||
/// For large files that are likely to be in cache, this can be much faster than
|
||||
/// single-threaded hashing. When benchmarks report that BLAKE3 is 10x or 20x faster than other
|
||||
/// cryptographic hashes, this is usually what they're measuring. However...
|
||||
///
|
||||
/// **Performance Warning:** There are cases where multithreading hurts performance. The worst
|
||||
/// case is [a large file on a spinning disk](https://github.com/BLAKE3-team/BLAKE3/issues/31),
|
||||
/// where simultaneous reads from multiple threads can cause "thrashing" (i.e. the disk spends
|
||||
/// more time seeking around than reading data). Windows tends to be somewhat worse about this,
|
||||
/// in part because it's less likely than Linux to keep very large files in cache. More
|
||||
/// generally, if your CPU cores are already busy, then multithreading will add overhead
|
||||
/// without improving performance. If your code runs in different environments that you don't
|
||||
/// control and can't measure, then unfortunately there's no one-size-fits-all answer for
|
||||
/// whether multithreading is a good idea.
|
||||
///
|
||||
/// The memory mapping behavior of this function is the same as
|
||||
/// [`update_mmap`](Hasher::update_mmap), and the heuristic for when to fall back to standard
|
||||
/// file IO might change at any time.
|
||||
///
|
||||
/// This method requires both the `mmap` and `rayon` Cargo features, which are disabled by
|
||||
/// default but enabled on [docs.rs](https://docs.rs).
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```no_run
|
||||
/// # use std::io;
|
||||
/// # use std::path::Path;
|
||||
/// # fn main() -> io::Result<()> {
|
||||
/// # #[cfg(feature = "rayon")]
|
||||
/// # {
|
||||
/// let path = Path::new("big_file.dat");
|
||||
/// let mut hasher = blake3::Hasher::new();
|
||||
/// hasher.update_mmap_rayon(path)?;
|
||||
/// println!("{}", hasher.finalize());
|
||||
/// # }
|
||||
/// # Ok(())
|
||||
/// # }
|
||||
/// ```
|
||||
#[cfg(feature = "mmap")]
|
||||
#[cfg(feature = "rayon")]
|
||||
pub fn update_mmap_rayon(
|
||||
&mut self,
|
||||
path: impl AsRef<std::path::Path>,
|
||||
) -> std::io::Result<&mut Self> {
|
||||
let file = std::fs::File::open(path.as_ref())?;
|
||||
if let Some(mmap) = io::maybe_mmap_file(&file)? {
|
||||
self.update_rayon(&mmap);
|
||||
} else {
|
||||
io::copy_wide(&file, self)?;
|
||||
}
|
||||
Ok(self)
|
||||
}
|
||||
}
|
||||
|
||||
// Don't derive(Debug), because the state may be secret.
|
||||
|
@ -1366,6 +1555,7 @@ impl std::io::Write for Hasher {
|
|||
/// from an unknown position in the output stream to recover its block index. Callers with strong
|
||||
/// secret keys aren't affected in practice, but secret offsets are a [design
|
||||
/// smell](https://en.wikipedia.org/wiki/Design_smell) in any case.
|
||||
#[cfg_attr(feature = "zeroize", derive(zeroize::Zeroize))]
|
||||
#[derive(Clone)]
|
||||
pub struct OutputReader {
|
||||
inner: Output,
|
||||
|
|
|
@ -56,6 +56,11 @@ pub enum Platform {
|
|||
impl Platform {
|
||||
#[allow(unreachable_code)]
|
||||
pub fn detect() -> Self {
|
||||
#[cfg(miri)]
|
||||
{
|
||||
return Platform::Portable;
|
||||
}
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
{
|
||||
#[cfg(blake3_avx512_ffi)]
|
||||
|
@ -327,7 +332,12 @@ impl Platform {
|
|||
#[cfg(blake3_avx512_ffi)]
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
#[inline(always)]
|
||||
#[allow(unreachable_code)]
|
||||
pub fn avx512_detected() -> bool {
|
||||
if cfg!(miri) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// A testing-only short-circuit.
|
||||
if cfg!(feature = "no_avx512") {
|
||||
return false;
|
||||
|
@ -349,7 +359,12 @@ pub fn avx512_detected() -> bool {
|
|||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
#[inline(always)]
|
||||
#[allow(unreachable_code)]
|
||||
pub fn avx2_detected() -> bool {
|
||||
if cfg!(miri) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// A testing-only short-circuit.
|
||||
if cfg!(feature = "no_avx2") {
|
||||
return false;
|
||||
|
@ -371,7 +386,12 @@ pub fn avx2_detected() -> bool {
|
|||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
#[inline(always)]
|
||||
#[allow(unreachable_code)]
|
||||
pub fn sse41_detected() -> bool {
|
||||
if cfg!(miri) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// A testing-only short-circuit.
|
||||
if cfg!(feature = "no_sse41") {
|
||||
return false;
|
||||
|
@ -395,6 +415,10 @@ pub fn sse41_detected() -> bool {
|
|||
#[inline(always)]
|
||||
#[allow(unreachable_code)]
|
||||
pub fn sse2_detected() -> bool {
|
||||
if cfg!(miri) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// A testing-only short-circuit.
|
||||
if cfg!(feature = "no_sse2") {
|
||||
return false;
|
||||
|
|
208
src/test.rs
208
src/test.rs
|
@ -628,3 +628,211 @@ const fn test_hash_const_conversions() {
|
|||
let hash = crate::Hash::from_bytes(bytes);
|
||||
_ = hash.as_bytes();
|
||||
}
|
||||
|
||||
#[cfg(feature = "zeroize")]
|
||||
#[test]
|
||||
fn test_zeroize() {
|
||||
use zeroize::Zeroize;
|
||||
|
||||
let mut hash = crate::Hash([42; 32]);
|
||||
hash.zeroize();
|
||||
assert_eq!(hash.0, [0u8; 32]);
|
||||
|
||||
let mut hasher = crate::Hasher {
|
||||
chunk_state: crate::ChunkState {
|
||||
cv: [42; 8],
|
||||
chunk_counter: 42,
|
||||
buf: [42; 64],
|
||||
buf_len: 42,
|
||||
blocks_compressed: 42,
|
||||
flags: 42,
|
||||
platform: crate::Platform::Portable,
|
||||
},
|
||||
key: [42; 8],
|
||||
cv_stack: [[42; 32]; { crate::MAX_DEPTH + 1 }].into(),
|
||||
};
|
||||
hasher.zeroize();
|
||||
assert_eq!(hasher.chunk_state.cv, [0; 8]);
|
||||
assert_eq!(hasher.chunk_state.chunk_counter, 0);
|
||||
assert_eq!(hasher.chunk_state.buf, [0; 64]);
|
||||
assert_eq!(hasher.chunk_state.buf_len, 0);
|
||||
assert_eq!(hasher.chunk_state.blocks_compressed, 0);
|
||||
assert_eq!(hasher.chunk_state.flags, 0);
|
||||
assert!(matches!(
|
||||
hasher.chunk_state.platform,
|
||||
crate::Platform::Portable
|
||||
));
|
||||
assert_eq!(hasher.key, [0; 8]);
|
||||
assert_eq!(&*hasher.cv_stack, &[[0u8; 32]; 0]);
|
||||
|
||||
let mut output_reader = crate::OutputReader {
|
||||
inner: crate::Output {
|
||||
input_chaining_value: [42; 8],
|
||||
block: [42; 64],
|
||||
counter: 42,
|
||||
block_len: 42,
|
||||
flags: 42,
|
||||
platform: crate::Platform::Portable,
|
||||
},
|
||||
position_within_block: 42,
|
||||
};
|
||||
|
||||
output_reader.zeroize();
|
||||
assert_eq!(output_reader.inner.input_chaining_value, [0; 8]);
|
||||
assert_eq!(output_reader.inner.block, [0; 64]);
|
||||
assert_eq!(output_reader.inner.counter, 0);
|
||||
assert_eq!(output_reader.inner.block_len, 0);
|
||||
assert_eq!(output_reader.inner.flags, 0);
|
||||
assert!(matches!(
|
||||
output_reader.inner.platform,
|
||||
crate::Platform::Portable
|
||||
));
|
||||
assert_eq!(output_reader.position_within_block, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(feature = "std")]
|
||||
fn test_update_reader() -> Result<(), std::io::Error> {
|
||||
// This is a brief test, since update_reader() is mostly a wrapper around update(), which already
|
||||
// has substantial testing.
|
||||
let mut input = vec![0; 1_000_000];
|
||||
paint_test_input(&mut input);
|
||||
assert_eq!(
|
||||
crate::Hasher::new().update_reader(&input[..])?.finalize(),
|
||||
crate::hash(&input),
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(feature = "std")]
|
||||
fn test_update_reader_interrupted() -> std::io::Result<()> {
|
||||
use std::io;
|
||||
struct InterruptingReader<'a> {
|
||||
already_interrupted: bool,
|
||||
slice: &'a [u8],
|
||||
}
|
||||
impl<'a> InterruptingReader<'a> {
|
||||
fn new(slice: &'a [u8]) -> Self {
|
||||
Self {
|
||||
already_interrupted: false,
|
||||
slice,
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<'a> io::Read for InterruptingReader<'a> {
|
||||
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
|
||||
if !self.already_interrupted {
|
||||
self.already_interrupted = true;
|
||||
return Err(io::Error::from(io::ErrorKind::Interrupted));
|
||||
}
|
||||
let take = std::cmp::min(self.slice.len(), buf.len());
|
||||
buf[..take].copy_from_slice(&self.slice[..take]);
|
||||
self.slice = &self.slice[take..];
|
||||
Ok(take)
|
||||
}
|
||||
}
|
||||
|
||||
let input = b"hello world";
|
||||
let mut reader = InterruptingReader::new(input);
|
||||
let mut hasher = crate::Hasher::new();
|
||||
hasher.update_reader(&mut reader)?;
|
||||
assert_eq!(hasher.finalize(), crate::hash(input));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(feature = "mmap")]
|
||||
// NamedTempFile isn't Miri-compatible
|
||||
#[cfg(not(miri))]
|
||||
fn test_mmap() -> Result<(), std::io::Error> {
|
||||
// This is a brief test, since update_mmap() is mostly a wrapper around update(), which already
|
||||
// has substantial testing.
|
||||
use std::io::prelude::*;
|
||||
let mut input = vec![0; 1_000_000];
|
||||
paint_test_input(&mut input);
|
||||
let mut tempfile = tempfile::NamedTempFile::new()?;
|
||||
tempfile.write_all(&input)?;
|
||||
tempfile.flush()?;
|
||||
assert_eq!(
|
||||
crate::Hasher::new()
|
||||
.update_mmap(tempfile.path())?
|
||||
.finalize(),
|
||||
crate::hash(&input),
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(feature = "mmap")]
|
||||
#[cfg(target_os = "linux")]
|
||||
fn test_mmap_virtual_file() -> Result<(), std::io::Error> {
|
||||
// Virtual files like /proc/version can't be mmapped, because their contents don't actually
|
||||
// exist anywhere in memory. Make sure we fall back to regular file IO in these cases.
|
||||
// Currently this is handled with a length check, where the assumption is that virtual files
|
||||
// will always report length 0. If that assumption ever breaks, hopefully this test will catch
|
||||
// it.
|
||||
let virtual_filepath = "/proc/version";
|
||||
let mut mmap_hasher = crate::Hasher::new();
|
||||
// We'll fail right here if the fallback doesn't work.
|
||||
mmap_hasher.update_mmap(virtual_filepath)?;
|
||||
let mut read_hasher = crate::Hasher::new();
|
||||
read_hasher.update_reader(std::fs::File::open(virtual_filepath)?)?;
|
||||
assert_eq!(mmap_hasher.finalize(), read_hasher.finalize());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(feature = "mmap")]
|
||||
#[cfg(feature = "rayon")]
|
||||
// NamedTempFile isn't Miri-compatible
|
||||
#[cfg(not(miri))]
|
||||
fn test_mmap_rayon() -> Result<(), std::io::Error> {
|
||||
// This is a brief test, since update_mmap_rayon() is mostly a wrapper around update_rayon(),
|
||||
// which already has substantial testing.
|
||||
use std::io::prelude::*;
|
||||
let mut input = vec![0; 1_000_000];
|
||||
paint_test_input(&mut input);
|
||||
let mut tempfile = tempfile::NamedTempFile::new()?;
|
||||
tempfile.write_all(&input)?;
|
||||
tempfile.flush()?;
|
||||
assert_eq!(
|
||||
crate::Hasher::new()
|
||||
.update_mmap_rayon(tempfile.path())?
|
||||
.finalize(),
|
||||
crate::hash(&input),
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(feature = "std")]
|
||||
#[cfg(feature = "serde")]
|
||||
fn test_serde() {
|
||||
let hash: crate::Hash = [7; 32].into();
|
||||
let json = serde_json::to_string(&hash).unwrap();
|
||||
assert_eq!(
|
||||
json,
|
||||
"[7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]",
|
||||
);
|
||||
let hash2: crate::Hash = serde_json::from_str(&json).unwrap();
|
||||
assert_eq!(hash, hash2);
|
||||
}
|
||||
|
||||
// `cargo +nightly miri test` currently works, but it takes forever, because some of our test
|
||||
// inputs are quite large. Most of our unsafe code is platform specific and incompatible with Miri
|
||||
// anyway, but we'd like it to be possible for callers to run their own tests under Miri, assuming
|
||||
// they don't use incompatible features like Rayon or mmap. This test should get reasonable
|
||||
// coverage of our public API without using any large inputs, so we can run it in CI and catch
|
||||
// obvious breaks. (For example, constant_time_eq is not compatible with Miri.)
|
||||
#[test]
|
||||
fn test_miri_smoketest() {
|
||||
let mut hasher = crate::Hasher::new_derive_key("Miri smoketest");
|
||||
hasher.update(b"foo");
|
||||
#[cfg(feature = "std")]
|
||||
hasher.update_reader(&b"bar"[..]).unwrap();
|
||||
assert_eq!(hasher.finalize(), hasher.finalize());
|
||||
let mut reader = hasher.finalize_xof();
|
||||
reader.set_position(999999);
|
||||
reader.fill(&mut [0]);
|
||||
}
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
- Bump the version in the root Cargo.toml.
|
||||
- Bump the version in b3sum/Cargo.toml.
|
||||
- Delete b3sum/Cargo.lock and recreate it with `cargo build` or similar.
|
||||
- Update the `--help` output in b3sum/README.md if it's changed.
|
||||
- Update the `-h` output in b3sum/README.md if it's changed.
|
||||
- Bump `BLAKE3_VERSION_STRING` in c/blake3.h.
|
||||
- Bump `VERSION` in c/CMakeLists.txt.
|
||||
- Make a version bump commit with change notes.
|
||||
|
|
Loading…
Reference in New Issue