mirror of
https://github.com/BLAKE3-team/BLAKE3
synced 2024-04-19 20:44:11 +02:00
fix incorrect output / undefined behavior in Windows SSE2 assembly
The SSE2 patch introduced xmm10 as a temporary register for one of the rotations, but xmm6-xmm15 are callee-save registers on Windows, and SSE4.1 was only saving the registers it used. The minimal fix is to use one of the saved registers instead of xmm10. See https://github.com/BLAKE3-team/BLAKE3/issues/206.
This commit is contained in:
parent
04571021fb
commit
371b5483c9
|
@ -2137,10 +2137,10 @@ _blake3_compress_in_place_sse2:
|
|||
por xmm9, xmm8
|
||||
movdqa xmm8, xmm7
|
||||
punpcklqdq xmm8, xmm5
|
||||
movdqa xmm10, xmm6
|
||||
movdqa xmm14, xmm6
|
||||
pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip]
|
||||
pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK+rip]
|
||||
por xmm8, xmm10
|
||||
pand xmm14, xmmword ptr [PBLENDW_0xC0_MASK+rip]
|
||||
por xmm8, xmm14
|
||||
pshufd xmm8, xmm8, 0x78
|
||||
punpckhdq xmm5, xmm7
|
||||
punpckldq xmm6, xmm5
|
||||
|
@ -2268,10 +2268,10 @@ blake3_compress_xof_sse2:
|
|||
por xmm9, xmm8
|
||||
movdqa xmm8, xmm7
|
||||
punpcklqdq xmm8, xmm5
|
||||
movdqa xmm10, xmm6
|
||||
movdqa xmm14, xmm6
|
||||
pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip]
|
||||
pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK+rip]
|
||||
por xmm8, xmm10
|
||||
pand xmm14, xmmword ptr [PBLENDW_0xC0_MASK+rip]
|
||||
por xmm8, xmm14
|
||||
pshufd xmm8, xmm8, 0x78
|
||||
punpckhdq xmm5, xmm7
|
||||
punpckldq xmm6, xmm5
|
||||
|
|
|
@ -2139,10 +2139,10 @@ _blake3_compress_in_place_sse2 PROC
|
|||
por xmm9, xmm8
|
||||
movdqa xmm8, xmm7
|
||||
punpcklqdq xmm8, xmm5
|
||||
movdqa xmm10, xmm6
|
||||
movdqa xmm14, xmm6
|
||||
pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK]
|
||||
pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK]
|
||||
por xmm8, xmm10
|
||||
pand xmm14, xmmword ptr [PBLENDW_0xC0_MASK]
|
||||
por xmm8, xmm14
|
||||
pshufd xmm8, xmm8, 78H
|
||||
punpckhdq xmm5, xmm7
|
||||
punpckldq xmm6, xmm5
|
||||
|
@ -2271,10 +2271,10 @@ _blake3_compress_xof_sse2 PROC
|
|||
por xmm9, xmm8
|
||||
movdqa xmm8, xmm7
|
||||
punpcklqdq xmm8, xmm5
|
||||
movdqa xmm10, xmm6
|
||||
movdqa xmm14, xmm6
|
||||
pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK]
|
||||
pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK]
|
||||
por xmm8, xmm10
|
||||
pand xmm14, xmmword ptr [PBLENDW_0xC0_MASK]
|
||||
por xmm8, xmm14
|
||||
pshufd xmm8, xmm8, 78H
|
||||
punpckhdq xmm5, xmm7
|
||||
punpckldq xmm6, xmm5
|
||||
|
|
30
src/test.rs
30
src/test.rs
|
@ -564,3 +564,33 @@ fn test_hex_encoding_decoding() {
|
|||
#[cfg(feature = "std")]
|
||||
assert_eq!(_result.to_string(), "invalid hex character: 0x80");
|
||||
}
|
||||
|
||||
// This test is a mimized failure case for the Windows SSE2 bug described in
|
||||
// https://github.com/BLAKE3-team/BLAKE3/issues/206.
|
||||
//
|
||||
// Before that issue was fixed, this test would fail on Windows in the following configuration:
|
||||
//
|
||||
// cargo test --features=no_avx512,no_avx2,no_sse41 --release
|
||||
//
|
||||
// Bugs like this one (stomping on a caller's register) are very sensitive to the details of
|
||||
// surrounding code, so it's not especially likely that this test will catch another bug (or even
|
||||
// the same bug) in the future. Still, there's no harm in keeping it.
|
||||
#[test]
|
||||
fn test_issue_206_windows_sse2() {
|
||||
// This stupid loop has to be here to trigger the bug. I don't know why.
|
||||
for _ in &[0] {
|
||||
// The length 65 (two blocks) is significant. It doesn't repro with 64 (one block). It also
|
||||
// doesn't repro with an all-zero input.
|
||||
let input = &[0xff; 65];
|
||||
let expected_hash = [
|
||||
183, 235, 50, 217, 156, 24, 190, 219, 2, 216, 176, 255, 224, 53, 28, 95, 57, 148, 179,
|
||||
245, 162, 90, 37, 121, 0, 142, 219, 62, 234, 204, 225, 161,
|
||||
];
|
||||
|
||||
// This throwaway call has to be here to trigger the bug.
|
||||
crate::Hasher::new().update(input);
|
||||
|
||||
// This assert fails when the bug is triggered.
|
||||
assert_eq!(crate::Hasher::new().update(input).finalize(), expected_hash);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue