1
1
mirror of https://github.com/BLAKE2/BLAKE2 synced 2024-11-22 02:31:57 +01:00

Merge pull request #65 from noloader/power8

Add BLAKE2b for POWER8
This commit is contained in:
Samuel Neves 2020-06-29 22:53:30 +01:00 committed by GitHub
commit 8c6526f992
Signed by: GitHub
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 3238 additions and 0 deletions

38
power8/blake2-config.h Normal file

@ -0,0 +1,38 @@
/*
BLAKE2 reference source code package - optimized C implementations
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#ifndef BLAKE2_CONFIG_H
#define BLAKE2_CONFIG_H
#if !defined(__ALTIVEC__)
# error "This code requires at least Altivec."
#endif
#if !defined(_ARCH_PWR8)
# error "This code requires at least POWER8."
#endif
/* https://gcc.gnu.org/onlinedocs/cpp/Common-Predefined-Macros.html */
#if (defined(__GNUC__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)) || \
((defined(__xlc__) || defined(__xlC__)) && defined(__LITTLE_ENDIAN__))
# define NATIVE_LITTLE_ENDIAN 1
#endif
#if (defined(__GNUC__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)) || \
((defined(__xlc__) || defined(__xlC__)) && defined(__BIG_ENDIAN__))
# define NATIVE_BIG_ENDIAN 1
#endif
#endif

244
power8/blake2-impl.h Normal file

@ -0,0 +1,244 @@
/*
BLAKE2 reference source code package - reference C implementations
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#ifndef BLAKE2_IMPL_H
#define BLAKE2_IMPL_H
#include <stdint.h>
#include <string.h>
#include <assert.h>
#include "blake2-config.h"
#if !defined(__cplusplus) && (!defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L)
#if defined(__GNUC__)
#define BLAKE2_INLINE __inline__
#else
#define BLAKE2_INLINE
#endif
#else
#define BLAKE2_INLINE inline
#endif
#if defined(__ALTIVEC__)
# include <altivec.h>
# undef vector
# undef pixel
# undef bool
#endif
/* The PowerPC ABI says loads are non-const. Ugh... */
#ifndef CONST_V8_CAST
#define CONST_V8_CAST(x) ((unsigned char*)(x))
#endif
#ifndef CONST_V32_CAST
#define CONST_V32_CAST(x) ((unsigned int*)(x))
#endif
#ifndef NCONST_V8_CAST
#define NCONST_V8_CAST(x) ((unsigned char*)(x))
#endif
#ifndef NCONST_V32_CAST
#define NCONST_V32_CAST(x) ((unsigned int*)(x))
#endif
#ifndef BLAKE2_UNUSED
#define BLAKE2_UNUSED(x) ((void)(x))
#endif
#if defined(__ALTIVEC__)
typedef __vector unsigned char uint8x16_p;
typedef __vector unsigned int uint32x4_p;
#if defined(__VSX__) || defined(_ARCH_PWR8)
typedef __vector unsigned long long uint64x2_p;
#endif
#endif
static BLAKE2_INLINE uint32_t load32( const void *src )
{
#if defined(NATIVE_LITTLE_ENDIAN)
uint32_t w;
memcpy(&w, src, sizeof w);
return w;
#else
const uint8_t *p = ( const uint8_t * )src;
return (( uint32_t )( p[0] ) << 0) |
(( uint32_t )( p[1] ) << 8) |
(( uint32_t )( p[2] ) << 16) |
(( uint32_t )( p[3] ) << 24) ;
#endif
}
static BLAKE2_INLINE uint64_t load64( const void *src )
{
#if defined(NATIVE_LITTLE_ENDIAN)
uint64_t w;
memcpy(&w, src, sizeof w);
return w;
#else
const uint8_t *p = ( const uint8_t * )src;
return (( uint64_t )( p[0] ) << 0) |
(( uint64_t )( p[1] ) << 8) |
(( uint64_t )( p[2] ) << 16) |
(( uint64_t )( p[3] ) << 24) |
(( uint64_t )( p[4] ) << 32) |
(( uint64_t )( p[5] ) << 40) |
(( uint64_t )( p[6] ) << 48) |
(( uint64_t )( p[7] ) << 56) ;
#endif
}
static BLAKE2_INLINE uint16_t load16( const void *src )
{
#if defined(NATIVE_LITTLE_ENDIAN)
uint16_t w;
memcpy(&w, src, sizeof w);
return w;
#else
const uint8_t *p = ( const uint8_t * )src;
return ( uint16_t )((( uint32_t )( p[0] ) << 0) |
(( uint32_t )( p[1] ) << 8));
#endif
}
static BLAKE2_INLINE void store16( void *dst, uint16_t w )
{
#if defined(NATIVE_LITTLE_ENDIAN)
memcpy(dst, &w, sizeof w);
#else
uint8_t *p = ( uint8_t * )dst;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w;
#endif
}
static BLAKE2_INLINE void store32( void *dst, uint32_t w )
{
#if defined(NATIVE_LITTLE_ENDIAN)
memcpy(dst, &w, sizeof w);
#else
uint8_t *p = ( uint8_t * )dst;
p[0] = (uint8_t)(w >> 0);
p[1] = (uint8_t)(w >> 8);
p[2] = (uint8_t)(w >> 16);
p[3] = (uint8_t)(w >> 24);
#endif
}
static BLAKE2_INLINE void store64( void *dst, uint64_t w )
{
#if defined(NATIVE_LITTLE_ENDIAN)
memcpy(dst, &w, sizeof w);
#else
uint8_t *p = ( uint8_t * )dst;
p[0] = (uint8_t)(w >> 0);
p[1] = (uint8_t)(w >> 8);
p[2] = (uint8_t)(w >> 16);
p[3] = (uint8_t)(w >> 24);
p[4] = (uint8_t)(w >> 32);
p[5] = (uint8_t)(w >> 40);
p[6] = (uint8_t)(w >> 48);
p[7] = (uint8_t)(w >> 56);
#endif
}
static BLAKE2_INLINE uint64_t load48( const void *src )
{
const uint8_t *p = ( const uint8_t * )src;
return (( uint64_t )( p[0] ) << 0) |
(( uint64_t )( p[1] ) << 8) |
(( uint64_t )( p[2] ) << 16) |
(( uint64_t )( p[3] ) << 24) |
(( uint64_t )( p[4] ) << 32) |
(( uint64_t )( p[5] ) << 40) ;
}
static BLAKE2_INLINE void store48( void *dst, uint64_t w )
{
uint8_t *p = ( uint8_t * )dst;
p[0] = (uint8_t)(w >> 0);
p[1] = (uint8_t)(w >> 8);
p[2] = (uint8_t)(w >> 16);
p[3] = (uint8_t)(w >> 24);
p[4] = (uint8_t)(w >> 32);
p[5] = (uint8_t)(w >> 40);
}
static BLAKE2_INLINE uint32_t rotr32( const uint32_t w, const unsigned c )
{
return ( w >> c ) | ( w << ( 32 - c ) );
}
static BLAKE2_INLINE uint64_t rotr64( const uint64_t w, const unsigned c )
{
return ( w >> c ) | ( w << ( 64 - c ) );
}
/* prevents compiler optimizing out memset() */
static BLAKE2_INLINE void secure_zero_memory(void *v, size_t n)
{
static void *(*const volatile memset_v)(void *, int, size_t) = &memset;
memset_v(v, 0, n);
}
static BLAKE2_INLINE uint64x2_p vec_load64( const void *src )
{
#if defined(_ARCH_PWR9)
assert((uintptr_t)src % 2 == 0);
return (uint64x2_p) vec_xl(0, CONST_V8_CAST(src))
#elif defined(__VSX__) || defined(_ARCH_PWR8)
assert((uintptr_t)src % 4 == 0);
return (uint64x2_p) vec_xl(0, CONST_V32_CAST(src));
#else
assert((uintptr_t)src % 16 == 0);
return (uint64x2_p) vec_ld(0, CONST_V8_CAST(src));
#endif
}
static BLAKE2_INLINE uint64x2_p vec_load64_le( const void *src, const uint8x16_p le_mask )
{
#if defined(NATIVE_BIG_ENDIAN)
const uint64x2_p v = vec_load64(src);
return vec_perm(v, v, le_mask);
#else
BLAKE2_UNUSED(le_mask);
return vec_load64(src);
#endif
}
static BLAKE2_INLINE void vec_store64( void *dst, uint64x2_p w )
{
#if defined(_ARCH_PWR9)
assert((uintptr_t)dst % 2 == 0);
vec_xst((uint8x16_p)w, 0, NCONST_V8_CAST(dst));
#elif defined(__VSX__) || defined(_ARCH_PWR8)
assert((uintptr_t)dst % 4 == 0);
vec_xst((uint32x4_p)w, 0, NCONST_V32_CAST(dst));
#else
assert((uintptr_t)dst % 16 == 0);
vec_st(w, 0, NCONST_V32_CAST(dst));
#endif
}
static BLAKE2_INLINE void vec_store64_le( void *dst, uint64x2_p w, const uint8x16_p le_mask)
{
#if defined(NATIVE_BIG_ENDIAN)
uint64x2_p v = vec_perm(w, w, le_mask);
vec_store64(dst, v);
#else
BLAKE2_UNUSED(le_mask);
vec_store64(dst, w);
#endif
}
#endif

191
power8/blake2.h Normal file

@ -0,0 +1,191 @@
/*
BLAKE2 reference source code package - reference C implementations
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#ifndef BLAKE2_H
#define BLAKE2_H
#include <stddef.h>
#include <stdint.h>
#if defined(__cplusplus)
extern "C" {
#endif
#define BLAKE2_PACKED(x) x __attribute__((packed))
enum blake2s_constant
{
BLAKE2S_BLOCKBYTES = 64,
BLAKE2S_OUTBYTES = 32,
BLAKE2S_KEYBYTES = 32,
BLAKE2S_SALTBYTES = 8,
BLAKE2S_PERSONALBYTES = 8
};
enum blake2b_constant
{
BLAKE2B_BLOCKBYTES = 128,
BLAKE2B_OUTBYTES = 64,
BLAKE2B_KEYBYTES = 64,
BLAKE2B_SALTBYTES = 16,
BLAKE2B_PERSONALBYTES = 16
};
typedef struct blake2s_state__
{
uint32_t h[8];
uint32_t t[2];
uint32_t f[2];
uint8_t buf[BLAKE2S_BLOCKBYTES];
size_t buflen;
size_t outlen;
uint8_t last_node;
} blake2s_state;
typedef struct blake2b_state__
{
uint64_t h[8];
uint64_t t[2];
uint64_t f[2];
uint8_t buf[BLAKE2B_BLOCKBYTES];
size_t buflen;
size_t outlen;
uint8_t last_node;
} blake2b_state;
typedef struct blake2sp_state__
{
blake2s_state S[8][1];
blake2s_state R[1];
uint8_t buf[8 * BLAKE2S_BLOCKBYTES];
size_t buflen;
size_t outlen;
} blake2sp_state;
typedef struct blake2bp_state__
{
blake2b_state S[4][1];
blake2b_state R[1];
uint8_t buf[4 * BLAKE2B_BLOCKBYTES];
size_t buflen;
size_t outlen;
} blake2bp_state;
BLAKE2_PACKED(struct blake2s_param__
{
uint8_t digest_length; /* 1 */
uint8_t key_length; /* 2 */
uint8_t fanout; /* 3 */
uint8_t depth; /* 4 */
uint32_t leaf_length; /* 8 */
uint32_t node_offset; /* 12 */
uint16_t xof_length; /* 14 */
uint8_t node_depth; /* 15 */
uint8_t inner_length; /* 16 */
/* uint8_t reserved[0]; */
uint8_t salt[BLAKE2S_SALTBYTES]; /* 24 */
uint8_t personal[BLAKE2S_PERSONALBYTES]; /* 32 */
});
typedef struct blake2s_param__ blake2s_param;
BLAKE2_PACKED(struct blake2b_param__
{
uint8_t digest_length; /* 1 */
uint8_t key_length; /* 2 */
uint8_t fanout; /* 3 */
uint8_t depth; /* 4 */
uint32_t leaf_length; /* 8 */
uint32_t node_offset; /* 12 */
uint32_t xof_length; /* 16 */
uint8_t node_depth; /* 17 */
uint8_t inner_length; /* 18 */
uint8_t reserved[14]; /* 32 */
uint8_t salt[BLAKE2B_SALTBYTES]; /* 48 */
uint8_t personal[BLAKE2B_PERSONALBYTES]; /* 64 */
});
typedef struct blake2b_param__ blake2b_param;
typedef struct blake2xs_state__
{
blake2s_state S[1];
blake2s_param P[1];
} blake2xs_state;
typedef struct blake2xb_state__
{
blake2b_state S[1];
blake2b_param P[1];
} blake2xb_state;
/* Padded structs result in a compile-time error */
enum {
BLAKE2_DUMMY_1 = 1/(sizeof(blake2s_param) == BLAKE2S_OUTBYTES),
BLAKE2_DUMMY_2 = 1/(sizeof(blake2b_param) == BLAKE2B_OUTBYTES)
};
/* Streaming API */
int blake2s_init( blake2s_state *S, size_t outlen );
int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen );
int blake2s_init_param( blake2s_state *S, const blake2s_param *P );
int blake2s_update( blake2s_state *S, const void *in, size_t inlen );
int blake2s_final( blake2s_state *S, void *out, size_t outlen );
int blake2b_init( blake2b_state *S, size_t outlen );
int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen );
int blake2b_init_param( blake2b_state *S, const blake2b_param *P );
int blake2b_update( blake2b_state *S, const void *in, size_t inlen );
int blake2b_final( blake2b_state *S, void *out, size_t outlen );
int blake2sp_init( blake2sp_state *S, size_t outlen );
int blake2sp_init_key( blake2sp_state *S, size_t outlen, const void *key, size_t keylen );
int blake2sp_update( blake2sp_state *S, const void *in, size_t inlen );
int blake2sp_final( blake2sp_state *S, void *out, size_t outlen );
int blake2bp_init( blake2bp_state *S, size_t outlen );
int blake2bp_init_key( blake2bp_state *S, size_t outlen, const void *key, size_t keylen );
int blake2bp_update( blake2bp_state *S, const void *in, size_t inlen );
int blake2bp_final( blake2bp_state *S, void *out, size_t outlen );
/* Variable output length API */
int blake2xs_init( blake2xs_state *S, const size_t outlen );
int blake2xs_init_key( blake2xs_state *S, const size_t outlen, const void *key, size_t keylen );
int blake2xs_update( blake2xs_state *S, const void *in, size_t inlen );
int blake2xs_final(blake2xs_state *S, void *out, size_t outlen);
int blake2xb_init( blake2xb_state *S, const size_t outlen );
int blake2xb_init_key( blake2xb_state *S, const size_t outlen, const void *key, size_t keylen );
int blake2xb_update( blake2xb_state *S, const void *in, size_t inlen );
int blake2xb_final(blake2xb_state *S, void *out, size_t outlen);
/* Simple API */
int blake2s( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
int blake2b( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
int blake2sp( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
int blake2bp( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
int blake2xs( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
int blake2xb( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
/* This is simply an alias for blake2b */
int blake2( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen );
#if defined(__cplusplus)
}
#endif
#endif

316
power8/blake2b-load-pwr8.h Normal file

@ -0,0 +1,316 @@
/*
BLAKE2 reference source code package - optimized C implementations
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#ifndef BLAKE2B_LOAD_SSE2_H
#define BLAKE2B_LOAD_SSE2_H
#define vec_merge_hi(a, b) vec_mergeh(a,b)
#define vec_merge_hi_lo(a, b) vec_mergeh(a,(uint64x2_p)vec_sld((uint8x16_p)b,(uint8x16_p)b,8))
#define vec_merge_lo(a, b) vec_mergel(a,b)
#if defined(NATIVE_BIG_ENDIAN)
# define vec_shl_8(a,b) (uint64x2_p)vec_sld((uint8x16_p)a, (uint8x16_p)b, 8);
#else
# define vec_shl_8(a,b) (uint64x2_p)vec_sld((uint8x16_p)b, (uint8x16_p)a, 16-8);
#endif
#define LOAD_MSG_0_1(b0, b1) \
do { \
b0 = vec_merge_hi(m0, m1); \
b1 = vec_merge_hi(m2, m3); \
} while(0)
#define LOAD_MSG_0_2(b0, b1) \
do { \
b0 = vec_merge_lo(m0, m1); \
b1 = vec_merge_lo(m2, m3); \
} while(0)
#define LOAD_MSG_0_3(b0, b1) \
do { \
b0 = vec_merge_hi(m4, m5); \
b1 = vec_merge_hi(m6, m7); \
} while(0)
#define LOAD_MSG_0_4(b0, b1) \
do { \
b0 = vec_merge_lo(m4, m5); \
b1 = vec_merge_lo(m6, m7); \
} while(0)
#define LOAD_MSG_1_1(b0, b1) \
do { \
b0 = vec_merge_hi(m7, m2); \
b1 = vec_merge_lo(m4, m6); \
} while(0)
#define LOAD_MSG_1_2(b0, b1) \
do { \
b0 = vec_merge_hi(m5, m4); \
b1 = vec_shl_8(m7, m3); \
} while(0)
#define LOAD_MSG_1_3(b0, b1) \
do { \
b0 = vec_shl_8(m0, m0); \
b1 = vec_merge_lo(m5, m2); \
} while(0)
#define LOAD_MSG_1_4(b0, b1) \
do { \
b0 = vec_merge_hi(m6, m1); \
b1 = vec_merge_lo(m3, m1); \
} while(0)
#define LOAD_MSG_2_1(b0, b1) \
do { \
b0 = vec_shl_8(m5, m6); \
b1 = vec_merge_lo(m2, m7); \
} while(0)
#define LOAD_MSG_2_2(b0, b1) \
do { \
b0 = vec_merge_hi(m4, m0); \
b1 = vec_merge_hi_lo(m1, m6); \
} while(0)
#define LOAD_MSG_2_3(b0, b1) \
do { \
b0 = vec_merge_hi_lo(m5, m1); \
b1 = vec_merge_lo(m3, m4); \
} while(0)
#define LOAD_MSG_2_4(b0, b1) \
do { \
b0 = vec_merge_hi(m7, m3); \
b1 = vec_shl_8(m0, m2); \
} while(0)
#define LOAD_MSG_3_1(b0, b1) \
do { \
b0 = vec_merge_lo(m3, m1); \
b1 = vec_merge_lo(m6, m5); \
} while(0)
#define LOAD_MSG_3_2(b0, b1) \
do { \
b0 = vec_merge_lo(m4, m0); \
b1 = vec_merge_hi(m6, m7); \
} while(0)
#define LOAD_MSG_3_3(b0, b1) \
do { \
b0 = vec_merge_hi_lo(m1, m2); \
b1 = vec_merge_hi_lo(m2, m7); \
} while(0)
#define LOAD_MSG_3_4(b0, b1) \
do { \
b0 = vec_merge_hi(m3, m5); \
b1 = vec_merge_hi(m0, m4); \
} while(0)
#define LOAD_MSG_4_1(b0, b1) \
do { \
b0 = vec_merge_lo(m4, m2); \
b1 = vec_merge_hi(m1, m5); \
} while(0)
#define LOAD_MSG_4_2(b0, b1) \
do { \
b0 = vec_merge_hi_lo(m0, m3); \
b1 = vec_merge_hi_lo(m2, m7); \
} while(0)
#define LOAD_MSG_4_3(b0, b1) \
do { \
b0 = vec_merge_hi_lo(m7, m5); \
b1 = vec_merge_hi_lo(m3, m1); \
} while(0)
#define LOAD_MSG_4_4(b0, b1) \
do { \
b0 = vec_shl_8(m0, m6); \
b1 = vec_merge_hi_lo(m4, m6); \
} while(0)
#define LOAD_MSG_5_1(b0, b1) \
do { \
b0 = vec_merge_hi(m1, m3); \
b1 = vec_merge_hi(m0, m4); \
} while(0)
#define LOAD_MSG_5_2(b0, b1) \
do { \
b0 = vec_merge_hi(m6, m5); \
b1 = vec_merge_lo(m5, m1); \
} while(0)
#define LOAD_MSG_5_3(b0, b1) \
do { \
b0 = vec_merge_hi_lo(m2, m3); \
b1 = vec_merge_lo(m7, m0); \
} while(0)
#define LOAD_MSG_5_4(b0, b1) \
do { \
b0 = vec_merge_lo(m6, m2); \
b1 = vec_merge_hi_lo(m7, m4); \
} while(0)
#define LOAD_MSG_6_1(b0, b1) \
do { \
b0 = vec_merge_hi_lo(m6, m0); \
b1 = vec_merge_hi(m7, m2); \
} while(0)
#define LOAD_MSG_6_2(b0, b1) \
do { \
b0 = vec_merge_lo(m2, m7); \
b1 = vec_shl_8(m6, m5); \
} while(0)
#define LOAD_MSG_6_3(b0, b1) \
do { \
b0 = vec_merge_hi(m0, m3); \
b1 = vec_shl_8(m4, m4); \
} while(0)
#define LOAD_MSG_6_4(b0, b1) \
do { \
b0 = vec_merge_lo(m3, m1); \
b1 = vec_merge_hi_lo(m1, m5); \
} while(0)
#define LOAD_MSG_7_1(b0, b1) \
do { \
b0 = vec_merge_lo(m6, m3); \
b1 = vec_merge_hi_lo(m6, m1); \
} while(0)
#define LOAD_MSG_7_2(b0, b1) \
do { \
b0 = vec_shl_8(m5, m7); \
b1 = vec_merge_lo(m0, m4); \
} while(0)
#define LOAD_MSG_7_3(b0, b1) \
do { \
b0 = vec_merge_lo(m2, m7); \
b1 = vec_merge_hi(m4, m1); \
} while(0)
#define LOAD_MSG_7_4(b0, b1) \
do { \
b0 = vec_merge_hi(m0, m2); \
b1 = vec_merge_hi(m3, m5); \
} while(0)
#define LOAD_MSG_8_1(b0, b1) \
do { \
b0 = vec_merge_hi(m3, m7); \
b1 = vec_shl_8(m5, m0); \
} while(0)
#define LOAD_MSG_8_2(b0, b1) \
do { \
b0 = vec_merge_lo(m7, m4); \
b1 = vec_shl_8(m1, m4); \
} while(0)
#define LOAD_MSG_8_3(b0, b1) \
do { \
b0 = m6; \
b1 = vec_shl_8(m0, m5); \
} while(0)
#define LOAD_MSG_8_4(b0, b1) \
do { \
b0 = vec_merge_hi_lo(m1, m3); \
b1 = m2; \
} while(0)
#define LOAD_MSG_9_1(b0, b1) \
do { \
b0 = vec_merge_hi(m5, m4); \
b1 = vec_merge_lo(m3, m0); \
} while(0)
#define LOAD_MSG_9_2(b0, b1) \
do { \
b0 = vec_merge_hi(m1, m2); \
b1 = vec_merge_hi_lo(m3, m2); \
} while(0)
#define LOAD_MSG_9_3(b0, b1) \
do { \
b0 = vec_merge_lo(m7, m4); \
b1 = vec_merge_lo(m1, m6); \
} while(0)
#define LOAD_MSG_9_4(b0, b1) \
do { \
b0 = vec_shl_8(m5, m7); \
b1 = vec_merge_hi(m6, m0); \
} while(0)
#define LOAD_MSG_10_1(b0, b1) \
do { \
b0 = vec_merge_hi(m0, m1); \
b1 = vec_merge_hi(m2, m3); \
} while(0)
#define LOAD_MSG_10_2(b0, b1) \
do { \
b0 = vec_merge_lo(m0, m1); \
b1 = vec_merge_lo(m2, m3); \
} while(0)
#define LOAD_MSG_10_3(b0, b1) \
do { \
b0 = vec_merge_hi(m4, m5); \
b1 = vec_merge_hi(m6, m7); \
} while(0)
#define LOAD_MSG_10_4(b0, b1) \
do { \
b0 = vec_merge_lo(m4, m5); \
b1 = vec_merge_lo(m6, m7); \
} while(0)
#define LOAD_MSG_11_1(b0, b1) \
do { \
b0 = vec_merge_hi(m7, m2); \
b1 = vec_merge_lo(m4, m6); \
} while(0)
#define LOAD_MSG_11_2(b0, b1) \
do { \
b0 = vec_merge_hi(m5, m4); \
b1 = vec_shl_8(m7, m3); \
} while(0)
#define LOAD_MSG_11_3(b0, b1) \
do { \
b0 = vec_shl_8(m0, m0); \
b1 = vec_merge_lo(m5, m2); \
} while(0)
#define LOAD_MSG_11_4(b0, b1) \
do { \
b0 = vec_merge_hi(m6, m1); \
b1 = vec_merge_lo(m3, m1); \
} while(0)
#endif

108
power8/blake2b-round.h Normal file

@ -0,0 +1,108 @@
/*
BLAKE2 reference source code package - optimized C implementations
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#ifndef ROUND_H
#define ROUND_H
#define LIKELY(x) __builtin_expect((x),1)
#define vec_ror_16(x) vec_rl(x, mask_ror_16)
#define vec_ror_24(x) vec_rl(x, mask_ror_24)
#define vec_ror_32(x) vec_rl(x, mask_ror_32)
#define vec_ror_63(x) vec_rl(x, mask_ror_63)
#define G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \
do { \
row1l = vec_add(vec_add(row1l, b0), row2l); \
row1h = vec_add(vec_add(row1h, b1), row2h); \
row4l = vec_xor(row4l, row1l); \
row4h = vec_xor(row4h, row1h); \
row4l = vec_ror_32(row4l); \
row4h = vec_ror_32(row4h); \
row3l = vec_add(row3l, row4l); \
row3h = vec_add(row3h, row4h); \
row2l = vec_xor(row2l, row3l); \
row2h = vec_xor(row2h, row3h); \
row2l = vec_ror_24(row2l); \
row2h = vec_ror_24(row2h); \
} while(0)
#define G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \
do { \
row1l = vec_add(vec_add(row1l, b0), row2l); \
row1h = vec_add(vec_add(row1h, b1), row2h); \
row4l = vec_xor(row4l, row1l); \
row4h = vec_xor(row4h, row1h); \
row4l = vec_ror_16(row4l); \
row4h = vec_ror_16(row4h); \
row3l = vec_add(row3l, row4l); \
row3h = vec_add(row3h, row4h); \
row2l = vec_xor(row2l, row3l); \
row2h = vec_xor(row2h, row3h); \
row2l = vec_ror_63(row2l); \
row2h = vec_ror_63(row2h); \
} while(0)
#define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
do { \
uint64x2_p t0, t1; \
t0 = vec_shl_8(row2l, row2h); \
t1 = vec_shl_8(row2h, row2l); \
row2l = t0; \
row2h = t1; \
t0 = row3l; \
row3l = row3h; \
row3h = t0; \
t0 = vec_shl_8(row4h, row4l); \
t1 = vec_shl_8(row4l, row4h); \
row4l = t0; \
row4h = t1; \
} while(0)
#define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
do { \
uint64x2_p t0, t1; \
t0 = vec_shl_8(row2h, row2l); \
t1 = vec_shl_8(row2l, row2h); \
row2l = t0; \
row2h = t1; \
t0 = row3l; \
row3l = row3h; row3h = t0; \
t0 = vec_shl_8(row4l, row4h); \
t1 = vec_shl_8(row4h, row4l); \
row4l = t0; \
row4h = t1; \
} while(0)
#if defined(__VSX__) || defined(_ARCH_PWR8)
#include "blake2b-load-pwr8.h"
#endif
#define ROUND(r) \
do { \
uint64x2_p b0, b1; \
LOAD_MSG_ ##r ##_1(b0, b1); \
G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
LOAD_MSG_ ##r ##_2(b0, b1); \
G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \
LOAD_MSG_ ##r ##_3(b0, b1); \
G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
LOAD_MSG_ ##r ##_4(b0, b1); \
G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \
} while(0)
#endif

430
power8/blake2b.c Normal file

@ -0,0 +1,430 @@
/*
BLAKE2 reference source code package - optimized C implementations
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "blake2.h"
#include "blake2-impl.h"
#include "blake2b-round.h"
#if defined(__GNUC__)
/* Ignore "warning: vec_lvsl is deprecated..." */
# pragma GCC diagnostic ignored "-Wdeprecated"
#endif
static const uint64_t blake2b_IV[8] =
{
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
};
/* Some helper functions */
static void blake2b_set_lastnode( blake2b_state *S )
{
S->f[1] = (uint64_t)-1;
}
static int blake2b_is_lastblock( const blake2b_state *S )
{
return S->f[0] != 0;
}
static void blake2b_set_lastblock( blake2b_state *S )
{
if( S->last_node ) blake2b_set_lastnode( S );
S->f[0] = (uint64_t)-1;
}
static void blake2b_increment_counter( blake2b_state *S, const uint64_t inc )
{
S->t[0] += inc;
S->t[1] += ( S->t[0] < inc );
}
/* init xors IV with input parameter block */
int blake2b_init_param( blake2b_state *S, const blake2b_param *P )
{
const uint8_t *p = ( const uint8_t * )( P );
size_t i;
memset( S, 0, sizeof( blake2b_state ) );
/* IV XOR ParamBlock */
for( i = 0; i < 8; ++i )
S->h[i] = load64( p + sizeof( S->h[i] ) * i ) ^ blake2b_IV[i];
#if defined(NATIVE_BIG_ENDIAN)
for( i = 0; i < 8; ++i )
S->h[i] = __builtin_bswap64(S->h[i]);
#endif
S->outlen = P->digest_length;
return 0;
}
/* Some sort of default parameter block initialization, for sequential blake2b */
int blake2b_init( blake2b_state *S, size_t outlen )
{
blake2b_param P[1];
if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1;
P->digest_length = (uint8_t)outlen;
P->key_length = 0;
P->fanout = 1;
P->depth = 1;
store32( &P->leaf_length, 0 );
store32( &P->node_offset, 0 );
store32( &P->xof_length, 0 );
P->node_depth = 0;
P->inner_length = 0;
memset( P->reserved, 0, sizeof( P->reserved ) );
memset( P->salt, 0, sizeof( P->salt ) );
memset( P->personal, 0, sizeof( P->personal ) );
return blake2b_init_param( S, P );
}
int blake2b_init_key( blake2b_state *S, size_t outlen, const void *key, size_t keylen )
{
blake2b_param P[1];
if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1;
if ( ( !keylen ) || keylen > BLAKE2B_KEYBYTES ) return -1;
P->digest_length = (uint8_t)outlen;
P->key_length = (uint8_t)keylen;
P->fanout = 1;
P->depth = 1;
store32( &P->leaf_length, 0 );
store32( &P->node_offset, 0 );
store32( &P->xof_length, 0 );
P->node_depth = 0;
P->inner_length = 0;
memset( P->reserved, 0, sizeof( P->reserved ) );
memset( P->salt, 0, sizeof( P->salt ) );
memset( P->personal, 0, sizeof( P->personal ) );
if( blake2b_init_param( S, P ) < 0 )
return 0;
{
uint8_t block[BLAKE2B_BLOCKBYTES];
memset( block, 0, BLAKE2B_BLOCKBYTES );
memcpy( block, key, keylen );
blake2b_update( S, block, BLAKE2B_BLOCKBYTES );
secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */
}
return 0;
}
static void blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] )
{
/* Possibly unaligned user messages */
uint64x2_p m0, m1, m2, m3, m4, m5, m6, m7;
/* State variables */
uint64x2_p row1l, row1h;
uint64x2_p row2l, row2h;
uint64x2_p row3l, row3h;
uint64x2_p row4l, row4h;
uint64x2_p h0, h2, h4, h6;
/* Alignment check for message buffer load */
const uintptr_t addr = (uintptr_t)block;
/* Masks used for right rotates */
const uint64x2_p mask_ror_16 = {64-16, 64-16};
const uint64x2_p mask_ror_24 = {64-24, 64-24};
const uint64x2_p mask_ror_32 = {64-32, 64-32};
const uint64x2_p mask_ror_63 = {64-63, 64-63};
#if defined(NATIVE_BIG_ENDIAN)
/* Mask to permute between big-endian and little-endian dword arrays */
const uint8x16_p mask_le = {7,6,5,4, 3,2,1,0, 15,14,13,12, 11,10,9,8};
#endif
#if defined(_ARCH_PWR9)
/* POWER9 provides loads for char's and short's */
m0 = (uint64x2_p) vec_xl( 0, CONST_V8_CAST( block ));
m1 = (uint64x2_p) vec_xl( 16, CONST_V8_CAST( block ));
m2 = (uint64x2_p) vec_xl( 32, CONST_V8_CAST( block ));
m3 = (uint64x2_p) vec_xl( 48, CONST_V8_CAST( block ));
m4 = (uint64x2_p) vec_xl( 64, CONST_V8_CAST( block ));
m5 = (uint64x2_p) vec_xl( 80, CONST_V8_CAST( block ));
m6 = (uint64x2_p) vec_xl( 96, CONST_V8_CAST( block ));
m7 = (uint64x2_p) vec_xl(112, CONST_V8_CAST( block ));
#else
/* Altivec only provides 16-byte aligned loads. Low-order address bits */
/* are masked. Linux provides 16-byte aligned buffers, AIX typically */
/* provides 4-byte aligned buffers. */
/* http://www.nxp.com/docs/en/reference-manual/ALTIVECPEM.pdf */
const size_t off = addr%16;
m0 = (uint64x2_p) vec_ld( 0, CONST_V8_CAST( block ));
m1 = (uint64x2_p) vec_ld( 16, CONST_V8_CAST( block ));
m2 = (uint64x2_p) vec_ld( 32, CONST_V8_CAST( block ));
m3 = (uint64x2_p) vec_ld( 48, CONST_V8_CAST( block ));
m4 = (uint64x2_p) vec_ld( 64, CONST_V8_CAST( block ));
m5 = (uint64x2_p) vec_ld( 80, CONST_V8_CAST( block ));
m6 = (uint64x2_p) vec_ld( 96, CONST_V8_CAST( block ));
m7 = (uint64x2_p) vec_ld(112, CONST_V8_CAST( block ));
if (off != 0)
{
/* User buffer not 16-byte aligned. Fix the vectors spread across loads */
/* http://mirror.informatimago.com/next/developer.apple.com/ */
/* hardwaredrivers/ve/code_optimization.html */
uint64x2_p ex; uint8x16_p perm;
ex = (uint64x2_p) vec_ld(112+15, CONST_V8_CAST( block ));
perm = vec_lvsl(0, CONST_V8_CAST( addr ));
m0 = vec_perm(m0, m1, perm);
m1 = vec_perm(m1, m2, perm);
m2 = vec_perm(m2, m3, perm);
m3 = vec_perm(m3, m4, perm);
m4 = vec_perm(m4, m5, perm);
m5 = vec_perm(m5, m6, perm);
m6 = vec_perm(m6, m7, perm);
m7 = vec_perm(m7, ex, perm);
}
#endif
#if defined(NATIVE_BIG_ENDIAN)
m0 = vec_perm(m0, m0, mask_le);
m1 = vec_perm(m1, m1, mask_le);
m2 = vec_perm(m2, m2, mask_le);
m3 = vec_perm(m3, m3, mask_le);
m4 = vec_perm(m4, m4, mask_le);
m5 = vec_perm(m5, m5, mask_le);
m6 = vec_perm(m6, m6, mask_le);
m7 = vec_perm(m7, m7, mask_le);
#endif
#if defined(NATIVE_BIG_ENDIAN)
h0 = row1l = vec_load64_le( &S->h[0], mask_le);
h2 = row1h = vec_load64_le( &S->h[2], mask_le);
h4 = row2l = vec_load64_le( &S->h[4], mask_le);
h6 = row2h = vec_load64_le( &S->h[6], mask_le);
#else
h0 = row1l = vec_load64( &S->h[0] );
h2 = row1h = vec_load64( &S->h[2] );
h4 = row2l = vec_load64( &S->h[4] );
h6 = row2h = vec_load64( &S->h[6] );
#endif
row3l = vec_load64( &blake2b_IV[0] );
row3h = vec_load64( &blake2b_IV[2] );
row4l = vec_xor( vec_load64( &blake2b_IV[4] ), vec_load64( &S->t[0] ) );
row4h = vec_xor( vec_load64( &blake2b_IV[6] ), vec_load64( &S->f[0] ) );
ROUND( 0 );
ROUND( 1 );
ROUND( 2 );
ROUND( 3 );
ROUND( 4 );
ROUND( 5 );
ROUND( 6 );
ROUND( 7 );
ROUND( 8 );
ROUND( 9 );
ROUND( 10 );
ROUND( 11 );
row1l = vec_xor( row3l, row1l );
row1h = vec_xor( row3h, row1h );
#if defined(NATIVE_BIG_ENDIAN)
vec_store64_le( &S->h[0], vec_xor( h0, row1l ), mask_le );
vec_store64_le( &S->h[2], vec_xor( h2, row1h ), mask_le );
#else
vec_store64( &S->h[0], vec_xor( h0, row1l ) );
vec_store64( &S->h[2], vec_xor( h2, row1h ) );
#endif
row2l = vec_xor( row4l, row2l );
row2h = vec_xor( row4h, row2h );
#if defined(NATIVE_BIG_ENDIAN)
vec_store64_le( &S->h[4], vec_xor( h4, row2l ), mask_le );
vec_store64_le( &S->h[6], vec_xor( h6, row2h ), mask_le );
#else
vec_store64( &S->h[4], vec_xor( h4, row2l ) );
vec_store64( &S->h[6], vec_xor( h6, row2h ) );
#endif
}
int blake2b_update( blake2b_state *S, const void *pin, size_t inlen )
{
const unsigned char * in = (const unsigned char *)pin;
if( inlen > 0 )
{
size_t left = S->buflen;
size_t fill = BLAKE2B_BLOCKBYTES - left;
if( inlen > fill )
{
S->buflen = 0;
memcpy( S->buf + left, in, fill ); /* Fill buffer */
blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES );
blake2b_compress( S, S->buf ); /* Compress */
in += fill; inlen -= fill;
while(inlen > BLAKE2B_BLOCKBYTES) {
blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
blake2b_compress( S, in );
in += BLAKE2B_BLOCKBYTES;
inlen -= BLAKE2B_BLOCKBYTES;
}
}
memcpy( S->buf + S->buflen, in, inlen );
S->buflen += inlen;
}
return 0;
}
int blake2b_final( blake2b_state *S, void *out, size_t outlen )
{
if( out == NULL || outlen < S->outlen )
return -1;
if( blake2b_is_lastblock( S ) )
return -1;
blake2b_increment_counter( S, S->buflen );
blake2b_set_lastblock( S );
memset( S->buf + S->buflen, 0, BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */
blake2b_compress( S, S->buf );
memcpy( out, &S->h[0], S->outlen );
return 0;
}
int blake2b( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen )
{
blake2b_state S[1];
/* Verify parameters */
if ( NULL == in && inlen > 0 ) return -1;
if ( NULL == out ) return -1;
if( NULL == key && keylen > 0 ) return -1;
if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1;
if( keylen > BLAKE2B_KEYBYTES ) return -1;
if( keylen )
{
if( blake2b_init_key( S, outlen, key, keylen ) < 0 ) return -1;
}
else
{
if( blake2b_init( S, outlen ) < 0 ) return -1;
}
blake2b_update( S, ( const uint8_t * )in, inlen );
blake2b_final( S, out, outlen );
return 0;
}
int blake2( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen ) {
return blake2b(out, outlen, in, inlen, key, keylen);
}
#if defined(SUPERCOP)
int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen )
{
return blake2b( out, BLAKE2B_OUTBYTES, in, inlen, NULL, 0 );
}
#endif
#if defined(BLAKE2B_SELFTEST)
#include <string.h>
#include "blake2-kat.h"
int main( void )
{
uint8_t key[BLAKE2B_KEYBYTES];
uint8_t buf[BLAKE2_KAT_LENGTH];
size_t i, step;
for( i = 0; i < BLAKE2B_KEYBYTES; ++i )
key[i] = ( uint8_t )i;
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
buf[i] = ( uint8_t )i;
/* Test simple API */
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
{
uint8_t hash[BLAKE2B_OUTBYTES];
blake2b( hash, BLAKE2B_OUTBYTES, buf, i, key, BLAKE2B_KEYBYTES );
if( 0 != memcmp( hash, blake2b_keyed_kat[i], BLAKE2B_OUTBYTES ) )
{
goto fail;
}
}
/* Test streaming API */
for(step = 1; step < BLAKE2B_BLOCKBYTES; ++step) {
for (i = 0; i < BLAKE2_KAT_LENGTH; ++i) {
uint8_t hash[BLAKE2B_OUTBYTES];
blake2b_state S;
uint8_t * p = buf;
size_t mlen = i;
int err = 0;
if( (err = blake2b_init_key(&S, BLAKE2B_OUTBYTES, key, BLAKE2B_KEYBYTES)) < 0 ) {
goto fail;
}
while (mlen >= step) {
if ( (err = blake2b_update(&S, p, step)) < 0 ) {
goto fail;
}
mlen -= step;
p += step;
}
if ( (err = blake2b_update(&S, p, mlen)) < 0) {
goto fail;
}
if ( (err = blake2b_final(&S, hash, BLAKE2B_OUTBYTES)) < 0) {
goto fail;
}
if (0 != memcmp(hash, blake2b_keyed_kat[i], BLAKE2B_OUTBYTES)) {
goto fail;
}
}
}
puts( "ok" );
return 0;
fail:
puts("error");
return -1;
}
#endif

359
power8/blake2bp.c Normal file

@ -0,0 +1,359 @@
/*
BLAKE2 reference source code package - reference C implementations
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#if defined(_OPENMP)
#include <omp.h>
#endif
#include "blake2.h"
#include "blake2-impl.h"
#define PARALLELISM_DEGREE 4
/*
blake2b_init_param defaults to setting the expecting output length
from the digest_length parameter block field.
In some cases, however, we do not want this, as the output length
of these instances is given by inner_length instead.
*/
static int blake2bp_init_leaf_param( blake2b_state *S, const blake2b_param *P )
{
int err = blake2b_init_param(S, P);
S->outlen = P->inner_length;
return err;
}
static int blake2bp_init_leaf( blake2b_state *S, size_t outlen, size_t keylen, uint64_t offset )
{
blake2b_param P[1];
P->digest_length = (uint8_t)outlen;
P->key_length = (uint8_t)keylen;
P->fanout = PARALLELISM_DEGREE;
P->depth = 2;
store32( &P->leaf_length, 0 );
store32( &P->node_offset, offset );
store32( &P->xof_length, 0 );
P->node_depth = 0;
P->inner_length = BLAKE2B_OUTBYTES;
memset( P->reserved, 0, sizeof( P->reserved ) );
memset( P->salt, 0, sizeof( P->salt ) );
memset( P->personal, 0, sizeof( P->personal ) );
return blake2bp_init_leaf_param( S, P );
}
static int blake2bp_init_root( blake2b_state *S, size_t outlen, size_t keylen )
{
blake2b_param P[1];
P->digest_length = (uint8_t)outlen;
P->key_length = (uint8_t)keylen;
P->fanout = PARALLELISM_DEGREE;
P->depth = 2;
store32( &P->leaf_length, 0 );
store32( &P->node_offset, 0 );
store32( &P->xof_length, 0 );
P->node_depth = 1;
P->inner_length = BLAKE2B_OUTBYTES;
memset( P->reserved, 0, sizeof( P->reserved ) );
memset( P->salt, 0, sizeof( P->salt ) );
memset( P->personal, 0, sizeof( P->personal ) );
return blake2b_init_param( S, P );
}
int blake2bp_init( blake2bp_state *S, size_t outlen )
{
size_t i;
if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1;
memset( S->buf, 0, sizeof( S->buf ) );
S->buflen = 0;
S->outlen = outlen;
if( blake2bp_init_root( S->R, outlen, 0 ) < 0 )
return -1;
for( i = 0; i < PARALLELISM_DEGREE; ++i )
if( blake2bp_init_leaf( S->S[i], outlen, 0, i ) < 0 ) return -1;
S->R->last_node = 1;
S->S[PARALLELISM_DEGREE - 1]->last_node = 1;
return 0;
}
int blake2bp_init_key( blake2bp_state *S, size_t outlen, const void *key, size_t keylen )
{
size_t i;
if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1;
if( !key || !keylen || keylen > BLAKE2B_KEYBYTES ) return -1;
memset( S->buf, 0, sizeof( S->buf ) );
S->buflen = 0;
S->outlen = outlen;
if( blake2bp_init_root( S->R, outlen, keylen ) < 0 )
return -1;
for( i = 0; i < PARALLELISM_DEGREE; ++i )
if( blake2bp_init_leaf( S->S[i], outlen, keylen, i ) < 0 ) return -1;
S->R->last_node = 1;
S->S[PARALLELISM_DEGREE - 1]->last_node = 1;
{
uint8_t block[BLAKE2B_BLOCKBYTES];
memset( block, 0, BLAKE2B_BLOCKBYTES );
memcpy( block, key, keylen );
for( i = 0; i < PARALLELISM_DEGREE; ++i )
blake2b_update( S->S[i], block, BLAKE2B_BLOCKBYTES );
secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */
}
return 0;
}
int blake2bp_update( blake2bp_state *S, const void *pin, size_t inlen )
{
const unsigned char * in = (const unsigned char *)pin;
size_t left = S->buflen;
size_t fill = sizeof( S->buf ) - left;
size_t i;
if( left && inlen >= fill )
{
memcpy( S->buf + left, in, fill );
for( i = 0; i < PARALLELISM_DEGREE; ++i )
blake2b_update( S->S[i], S->buf + i * BLAKE2B_BLOCKBYTES, BLAKE2B_BLOCKBYTES );
in += fill;
inlen -= fill;
left = 0;
}
#if defined(_OPENMP)
#pragma omp parallel shared(S), num_threads(PARALLELISM_DEGREE)
#else
for( i = 0; i < PARALLELISM_DEGREE; ++i )
#endif
{
#if defined(_OPENMP)
size_t i = omp_get_thread_num();
#endif
size_t inlen__ = inlen;
const unsigned char *in__ = ( const unsigned char * )in;
in__ += i * BLAKE2B_BLOCKBYTES;
while( inlen__ >= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES )
{
blake2b_update( S->S[i], in__, BLAKE2B_BLOCKBYTES );
in__ += PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES;
inlen__ -= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES;
}
}
in += inlen - inlen % ( PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES );
inlen %= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES;
if( inlen > 0 )
memcpy( S->buf + left, in, inlen );
S->buflen = left + inlen;
return 0;
}
int blake2bp_final( blake2bp_state *S, void *out, size_t outlen )
{
uint8_t hash[PARALLELISM_DEGREE][BLAKE2B_OUTBYTES];
size_t i;
if(out == NULL || outlen < S->outlen) {
return -1;
}
for( i = 0; i < PARALLELISM_DEGREE; ++i )
{
if( S->buflen > i * BLAKE2B_BLOCKBYTES )
{
size_t left = S->buflen - i * BLAKE2B_BLOCKBYTES;
if( left > BLAKE2B_BLOCKBYTES ) left = BLAKE2B_BLOCKBYTES;
blake2b_update( S->S[i], S->buf + i * BLAKE2B_BLOCKBYTES, left );
}
blake2b_final( S->S[i], hash[i], BLAKE2B_OUTBYTES );
}
for( i = 0; i < PARALLELISM_DEGREE; ++i )
blake2b_update( S->R, hash[i], BLAKE2B_OUTBYTES );
return blake2b_final( S->R, out, S->outlen );
}
int blake2bp( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen )
{
uint8_t hash[PARALLELISM_DEGREE][BLAKE2B_OUTBYTES];
blake2b_state S[PARALLELISM_DEGREE][1];
blake2b_state FS[1];
size_t i;
/* Verify parameters */
if ( NULL == in && inlen > 0 ) return -1;
if ( NULL == out ) return -1;
if( NULL == key && keylen > 0 ) return -1;
if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1;
if( keylen > BLAKE2B_KEYBYTES ) return -1;
for( i = 0; i < PARALLELISM_DEGREE; ++i )
if( blake2bp_init_leaf( S[i], outlen, keylen, i ) < 0 ) return -1;
S[PARALLELISM_DEGREE - 1]->last_node = 1; /* mark last node */
if( keylen > 0 )
{
uint8_t block[BLAKE2B_BLOCKBYTES];
memset( block, 0, BLAKE2B_BLOCKBYTES );
memcpy( block, key, keylen );
for( i = 0; i < PARALLELISM_DEGREE; ++i )
blake2b_update( S[i], block, BLAKE2B_BLOCKBYTES );
secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */
}
#if defined(_OPENMP)
#pragma omp parallel shared(S,hash), num_threads(PARALLELISM_DEGREE)
#else
for( i = 0; i < PARALLELISM_DEGREE; ++i )
#endif
{
#if defined(_OPENMP)
size_t i = omp_get_thread_num();
#endif
size_t inlen__ = inlen;
const unsigned char *in__ = ( const unsigned char * )in;
in__ += i * BLAKE2B_BLOCKBYTES;
while( inlen__ >= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES )
{
blake2b_update( S[i], in__, BLAKE2B_BLOCKBYTES );
in__ += PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES;
inlen__ -= PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES;
}
if( inlen__ > i * BLAKE2B_BLOCKBYTES )
{
const size_t left = inlen__ - i * BLAKE2B_BLOCKBYTES;
const size_t len = left <= BLAKE2B_BLOCKBYTES ? left : BLAKE2B_BLOCKBYTES;
blake2b_update( S[i], in__, len );
}
blake2b_final( S[i], hash[i], BLAKE2B_OUTBYTES );
}
if( blake2bp_init_root( FS, outlen, keylen ) < 0 )
return -1;
FS->last_node = 1; /* Mark as last node */
for( i = 0; i < PARALLELISM_DEGREE; ++i )
blake2b_update( FS, hash[i], BLAKE2B_OUTBYTES );
return blake2b_final( FS, out, outlen );;
}
#if defined(BLAKE2BP_SELFTEST)
#include <string.h>
#include "blake2-kat.h"
int main( void )
{
uint8_t key[BLAKE2B_KEYBYTES];
uint8_t buf[BLAKE2_KAT_LENGTH];
size_t i, step;
for( i = 0; i < BLAKE2B_KEYBYTES; ++i )
key[i] = ( uint8_t )i;
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
buf[i] = ( uint8_t )i;
/* Test simple API */
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
{
uint8_t hash[BLAKE2B_OUTBYTES];
blake2bp( hash, BLAKE2B_OUTBYTES, buf, i, key, BLAKE2B_KEYBYTES );
if( 0 != memcmp( hash, blake2bp_keyed_kat[i], BLAKE2B_OUTBYTES ) )
{
goto fail;
}
}
/* Test streaming API */
for(step = 1; step < BLAKE2B_BLOCKBYTES; ++step) {
for (i = 0; i < BLAKE2_KAT_LENGTH; ++i) {
uint8_t hash[BLAKE2B_OUTBYTES];
blake2bp_state S;
uint8_t * p = buf;
size_t mlen = i;
int err = 0;
if( (err = blake2bp_init_key(&S, BLAKE2B_OUTBYTES, key, BLAKE2B_KEYBYTES)) < 0 ) {
goto fail;
}
while (mlen >= step) {
if ( (err = blake2bp_update(&S, p, step)) < 0 ) {
goto fail;
}
mlen -= step;
p += step;
}
if ( (err = blake2bp_update(&S, p, mlen)) < 0) {
goto fail;
}
if ( (err = blake2bp_final(&S, hash, BLAKE2B_OUTBYTES)) < 0) {
goto fail;
}
if (0 != memcmp(hash, blake2bp_keyed_kat[i], BLAKE2B_OUTBYTES)) {
goto fail;
}
}
}
puts( "ok" );
return 0;
fail:
puts("error");
return -1;
}
#endif

367
power8/blake2s.c Normal file

@ -0,0 +1,367 @@
/*
BLAKE2 reference source code package - reference C implementations
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "blake2.h"
#include "blake2-impl.h"
static const uint32_t blake2s_IV[8] =
{
0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL
};
static const uint8_t blake2s_sigma[10][16] =
{
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
};
static void blake2s_set_lastnode( blake2s_state *S )
{
S->f[1] = (uint32_t)-1;
}
/* Some helper functions, not necessarily useful */
static int blake2s_is_lastblock( const blake2s_state *S )
{
return S->f[0] != 0;
}
static void blake2s_set_lastblock( blake2s_state *S )
{
if( S->last_node ) blake2s_set_lastnode( S );
S->f[0] = (uint32_t)-1;
}
static void blake2s_increment_counter( blake2s_state *S, const uint32_t inc )
{
S->t[0] += inc;
S->t[1] += ( S->t[0] < inc );
}
static void blake2s_init0( blake2s_state *S )
{
size_t i;
memset( S, 0, sizeof( blake2s_state ) );
for( i = 0; i < 8; ++i ) S->h[i] = blake2s_IV[i];
}
/* init2 xors IV with input parameter block */
int blake2s_init_param( blake2s_state *S, const blake2s_param *P )
{
const unsigned char *p = ( const unsigned char * )( P );
size_t i;
blake2s_init0( S );
/* IV XOR ParamBlock */
for( i = 0; i < 8; ++i )
S->h[i] ^= load32( &p[i * 4] );
S->outlen = P->digest_length;
return 0;
}
/* Sequential blake2s initialization */
int blake2s_init( blake2s_state *S, size_t outlen )
{
blake2s_param P[1];
/* Move interval verification here? */
if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1;
P->digest_length = (uint8_t)outlen;
P->key_length = 0;
P->fanout = 1;
P->depth = 1;
store32( &P->leaf_length, 0 );
store32( &P->node_offset, 0 );
store16( &P->xof_length, 0 );
P->node_depth = 0;
P->inner_length = 0;
/* memset(P->reserved, 0, sizeof(P->reserved) ); */
memset( P->salt, 0, sizeof( P->salt ) );
memset( P->personal, 0, sizeof( P->personal ) );
return blake2s_init_param( S, P );
}
int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen )
{
blake2s_param P[1];
if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1;
if ( !key || !keylen || keylen > BLAKE2S_KEYBYTES ) return -1;
P->digest_length = (uint8_t)outlen;
P->key_length = (uint8_t)keylen;
P->fanout = 1;
P->depth = 1;
store32( &P->leaf_length, 0 );
store32( &P->node_offset, 0 );
store16( &P->xof_length, 0 );
P->node_depth = 0;
P->inner_length = 0;
/* memset(P->reserved, 0, sizeof(P->reserved) ); */
memset( P->salt, 0, sizeof( P->salt ) );
memset( P->personal, 0, sizeof( P->personal ) );
if( blake2s_init_param( S, P ) < 0 ) return -1;
{
uint8_t block[BLAKE2S_BLOCKBYTES];
memset( block, 0, BLAKE2S_BLOCKBYTES );
memcpy( block, key, keylen );
blake2s_update( S, block, BLAKE2S_BLOCKBYTES );
secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */
}
return 0;
}
#define G(r,i,a,b,c,d) \
do { \
a = a + b + m[blake2s_sigma[r][2*i+0]]; \
d = rotr32(d ^ a, 16); \
c = c + d; \
b = rotr32(b ^ c, 12); \
a = a + b + m[blake2s_sigma[r][2*i+1]]; \
d = rotr32(d ^ a, 8); \
c = c + d; \
b = rotr32(b ^ c, 7); \
} while(0)
#define ROUND(r) \
do { \
G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
G(r,2,v[ 2],v[ 6],v[10],v[14]); \
G(r,3,v[ 3],v[ 7],v[11],v[15]); \
G(r,4,v[ 0],v[ 5],v[10],v[15]); \
G(r,5,v[ 1],v[ 6],v[11],v[12]); \
G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \
} while(0)
static void blake2s_compress( blake2s_state *S, const uint8_t in[BLAKE2S_BLOCKBYTES] )
{
uint32_t m[16];
uint32_t v[16];
size_t i;
for( i = 0; i < 16; ++i ) {
m[i] = load32( in + i * sizeof( m[i] ) );
}
for( i = 0; i < 8; ++i ) {
v[i] = S->h[i];
}
v[ 8] = blake2s_IV[0];
v[ 9] = blake2s_IV[1];
v[10] = blake2s_IV[2];
v[11] = blake2s_IV[3];
v[12] = S->t[0] ^ blake2s_IV[4];
v[13] = S->t[1] ^ blake2s_IV[5];
v[14] = S->f[0] ^ blake2s_IV[6];
v[15] = S->f[1] ^ blake2s_IV[7];
ROUND( 0 );
ROUND( 1 );
ROUND( 2 );
ROUND( 3 );
ROUND( 4 );
ROUND( 5 );
ROUND( 6 );
ROUND( 7 );
ROUND( 8 );
ROUND( 9 );
for( i = 0; i < 8; ++i ) {
S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
}
}
#undef G
#undef ROUND
int blake2s_update( blake2s_state *S, const void *pin, size_t inlen )
{
const unsigned char * in = (const unsigned char *)pin;
if( inlen > 0 )
{
size_t left = S->buflen;
size_t fill = BLAKE2S_BLOCKBYTES - left;
if( inlen > fill )
{
S->buflen = 0;
memcpy( S->buf + left, in, fill ); /* Fill buffer */
blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES );
blake2s_compress( S, S->buf ); /* Compress */
in += fill; inlen -= fill;
while(inlen > BLAKE2S_BLOCKBYTES) {
blake2s_increment_counter(S, BLAKE2S_BLOCKBYTES);
blake2s_compress( S, in );
in += BLAKE2S_BLOCKBYTES;
inlen -= BLAKE2S_BLOCKBYTES;
}
}
memcpy( S->buf + S->buflen, in, inlen );
S->buflen += inlen;
}
return 0;
}
int blake2s_final( blake2s_state *S, void *out, size_t outlen )
{
uint8_t buffer[BLAKE2S_OUTBYTES] = {0};
size_t i;
if( out == NULL || outlen < S->outlen )
return -1;
if( blake2s_is_lastblock( S ) )
return -1;
blake2s_increment_counter( S, ( uint32_t )S->buflen );
blake2s_set_lastblock( S );
memset( S->buf + S->buflen, 0, BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */
blake2s_compress( S, S->buf );
for( i = 0; i < 8; ++i ) /* Output full hash to temp buffer */
store32( buffer + sizeof( S->h[i] ) * i, S->h[i] );
memcpy( out, buffer, outlen );
secure_zero_memory(buffer, sizeof(buffer));
return 0;
}
int blake2s( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen )
{
blake2s_state S[1];
/* Verify parameters */
if ( NULL == in && inlen > 0 ) return -1;
if ( NULL == out ) return -1;
if ( NULL == key && keylen > 0) return -1;
if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1;
if( keylen > BLAKE2S_KEYBYTES ) return -1;
if( keylen > 0 )
{
if( blake2s_init_key( S, outlen, key, keylen ) < 0 ) return -1;
}
else
{
if( blake2s_init( S, outlen ) < 0 ) return -1;
}
blake2s_update( S, ( const uint8_t * )in, inlen );
blake2s_final( S, out, outlen );
return 0;
}
#if defined(SUPERCOP)
int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen )
{
return blake2s( out, BLAKE2S_OUTBYTES, in, inlen, NULL, 0 );
}
#endif
#if defined(BLAKE2S_SELFTEST)
#include <string.h>
#include "blake2-kat.h"
int main( void )
{
uint8_t key[BLAKE2S_KEYBYTES];
uint8_t buf[BLAKE2_KAT_LENGTH];
size_t i, step;
for( i = 0; i < BLAKE2S_KEYBYTES; ++i )
key[i] = ( uint8_t )i;
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
buf[i] = ( uint8_t )i;
/* Test simple API */
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
{
uint8_t hash[BLAKE2S_OUTBYTES];
blake2s( hash, BLAKE2S_OUTBYTES, buf, i, key, BLAKE2S_KEYBYTES );
if( 0 != memcmp( hash, blake2s_keyed_kat[i], BLAKE2S_OUTBYTES ) )
{
goto fail;
}
}
/* Test streaming API */
for(step = 1; step < BLAKE2S_BLOCKBYTES; ++step) {
for (i = 0; i < BLAKE2_KAT_LENGTH; ++i) {
uint8_t hash[BLAKE2S_OUTBYTES];
blake2s_state S;
uint8_t * p = buf;
size_t mlen = i;
int err = 0;
if( (err = blake2s_init_key(&S, BLAKE2S_OUTBYTES, key, BLAKE2S_KEYBYTES)) < 0 ) {
goto fail;
}
while (mlen >= step) {
if ( (err = blake2s_update(&S, p, step)) < 0 ) {
goto fail;
}
mlen -= step;
p += step;
}
if ( (err = blake2s_update(&S, p, mlen)) < 0) {
goto fail;
}
if ( (err = blake2s_final(&S, hash, BLAKE2S_OUTBYTES)) < 0) {
goto fail;
}
if (0 != memcmp(hash, blake2s_keyed_kat[i], BLAKE2S_OUTBYTES)) {
goto fail;
}
}
}
puts( "ok" );
return 0;
fail:
puts("error");
return -1;
}
#endif

359
power8/blake2sp.c Normal file

@ -0,0 +1,359 @@
/*
BLAKE2 reference source code package - reference C implementations
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#if defined(_OPENMP)
#include <omp.h>
#endif
#include "blake2.h"
#include "blake2-impl.h"
#define PARALLELISM_DEGREE 8
/*
blake2sp_init_param defaults to setting the expecting output length
from the digest_length parameter block field.
In some cases, however, we do not want this, as the output length
of these instances is given by inner_length instead.
*/
static int blake2sp_init_leaf_param( blake2s_state *S, const blake2s_param *P )
{
int err = blake2s_init_param(S, P);
S->outlen = P->inner_length;
return err;
}
static int blake2sp_init_leaf( blake2s_state *S, size_t outlen, size_t keylen, uint64_t offset )
{
blake2s_param P[1];
P->digest_length = (uint8_t)outlen;
P->key_length = (uint8_t)keylen;
P->fanout = PARALLELISM_DEGREE;
P->depth = 2;
store32( &P->leaf_length, 0 );
store32( &P->node_offset, offset );
store16( &P->xof_length, 0 );
P->node_depth = 0;
P->inner_length = BLAKE2S_OUTBYTES;
memset( P->salt, 0, sizeof( P->salt ) );
memset( P->personal, 0, sizeof( P->personal ) );
return blake2sp_init_leaf_param( S, P );
}
static int blake2sp_init_root( blake2s_state *S, size_t outlen, size_t keylen )
{
blake2s_param P[1];
P->digest_length = (uint8_t)outlen;
P->key_length = (uint8_t)keylen;
P->fanout = PARALLELISM_DEGREE;
P->depth = 2;
store32( &P->leaf_length, 0 );
store32( &P->node_offset, 0 );
store16( &P->xof_length, 0 );
P->node_depth = 1;
P->inner_length = BLAKE2S_OUTBYTES;
memset( P->salt, 0, sizeof( P->salt ) );
memset( P->personal, 0, sizeof( P->personal ) );
return blake2s_init_param( S, P );
}
int blake2sp_init( blake2sp_state *S, size_t outlen )
{
size_t i;
if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1;
memset( S->buf, 0, sizeof( S->buf ) );
S->buflen = 0;
S->outlen = outlen;
if( blake2sp_init_root( S->R, outlen, 0 ) < 0 )
return -1;
for( i = 0; i < PARALLELISM_DEGREE; ++i )
if( blake2sp_init_leaf( S->S[i], outlen, 0, i ) < 0 ) return -1;
S->R->last_node = 1;
S->S[PARALLELISM_DEGREE - 1]->last_node = 1;
return 0;
}
int blake2sp_init_key( blake2sp_state *S, size_t outlen, const void *key, size_t keylen )
{
size_t i;
if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1;
if( !key || !keylen || keylen > BLAKE2S_KEYBYTES ) return -1;
memset( S->buf, 0, sizeof( S->buf ) );
S->buflen = 0;
S->outlen = outlen;
if( blake2sp_init_root( S->R, outlen, keylen ) < 0 )
return -1;
for( i = 0; i < PARALLELISM_DEGREE; ++i )
if( blake2sp_init_leaf( S->S[i], outlen, keylen, i ) < 0 ) return -1;
S->R->last_node = 1;
S->S[PARALLELISM_DEGREE - 1]->last_node = 1;
{
uint8_t block[BLAKE2S_BLOCKBYTES];
memset( block, 0, BLAKE2S_BLOCKBYTES );
memcpy( block, key, keylen );
for( i = 0; i < PARALLELISM_DEGREE; ++i )
blake2s_update( S->S[i], block, BLAKE2S_BLOCKBYTES );
secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */
}
return 0;
}
int blake2sp_update( blake2sp_state *S, const void *pin, size_t inlen )
{
const unsigned char * in = (const unsigned char *)pin;
size_t left = S->buflen;
size_t fill = sizeof( S->buf ) - left;
size_t i;
if( left && inlen >= fill )
{
memcpy( S->buf + left, in, fill );
for( i = 0; i < PARALLELISM_DEGREE; ++i )
blake2s_update( S->S[i], S->buf + i * BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES );
in += fill;
inlen -= fill;
left = 0;
}
#if defined(_OPENMP)
#pragma omp parallel shared(S), num_threads(PARALLELISM_DEGREE)
#else
for( i = 0; i < PARALLELISM_DEGREE; ++i )
#endif
{
#if defined(_OPENMP)
size_t i = omp_get_thread_num();
#endif
size_t inlen__ = inlen;
const unsigned char *in__ = ( const unsigned char * )in;
in__ += i * BLAKE2S_BLOCKBYTES;
while( inlen__ >= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES )
{
blake2s_update( S->S[i], in__, BLAKE2S_BLOCKBYTES );
in__ += PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
inlen__ -= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
}
}
in += inlen - inlen % ( PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES );
inlen %= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
if( inlen > 0 )
memcpy( S->buf + left, in, inlen );
S->buflen = left + inlen;
return 0;
}
int blake2sp_final( blake2sp_state *S, void *out, size_t outlen )
{
uint8_t hash[PARALLELISM_DEGREE][BLAKE2S_OUTBYTES];
size_t i;
if(out == NULL || outlen < S->outlen) {
return -1;
}
for( i = 0; i < PARALLELISM_DEGREE; ++i )
{
if( S->buflen > i * BLAKE2S_BLOCKBYTES )
{
size_t left = S->buflen - i * BLAKE2S_BLOCKBYTES;
if( left > BLAKE2S_BLOCKBYTES ) left = BLAKE2S_BLOCKBYTES;
blake2s_update( S->S[i], S->buf + i * BLAKE2S_BLOCKBYTES, left );
}
blake2s_final( S->S[i], hash[i], BLAKE2S_OUTBYTES );
}
for( i = 0; i < PARALLELISM_DEGREE; ++i )
blake2s_update( S->R, hash[i], BLAKE2S_OUTBYTES );
return blake2s_final( S->R, out, S->outlen );
}
int blake2sp( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen )
{
uint8_t hash[PARALLELISM_DEGREE][BLAKE2S_OUTBYTES];
blake2s_state S[PARALLELISM_DEGREE][1];
blake2s_state FS[1];
size_t i;
/* Verify parameters */
if ( NULL == in && inlen > 0 ) return -1;
if ( NULL == out ) return -1;
if ( NULL == key && keylen > 0) return -1;
if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1;
if( keylen > BLAKE2S_KEYBYTES ) return -1;
for( i = 0; i < PARALLELISM_DEGREE; ++i )
if( blake2sp_init_leaf( S[i], outlen, keylen, i ) < 0 ) return -1;
S[PARALLELISM_DEGREE - 1]->last_node = 1; /* mark last node */
if( keylen > 0 )
{
uint8_t block[BLAKE2S_BLOCKBYTES];
memset( block, 0, BLAKE2S_BLOCKBYTES );
memcpy( block, key, keylen );
for( i = 0; i < PARALLELISM_DEGREE; ++i )
blake2s_update( S[i], block, BLAKE2S_BLOCKBYTES );
secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */
}
#if defined(_OPENMP)
#pragma omp parallel shared(S,hash), num_threads(PARALLELISM_DEGREE)
#else
for( i = 0; i < PARALLELISM_DEGREE; ++i )
#endif
{
#if defined(_OPENMP)
size_t i = omp_get_thread_num();
#endif
size_t inlen__ = inlen;
const unsigned char *in__ = ( const unsigned char * )in;
in__ += i * BLAKE2S_BLOCKBYTES;
while( inlen__ >= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES )
{
blake2s_update( S[i], in__, BLAKE2S_BLOCKBYTES );
in__ += PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
inlen__ -= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
}
if( inlen__ > i * BLAKE2S_BLOCKBYTES )
{
const size_t left = inlen__ - i * BLAKE2S_BLOCKBYTES;
const size_t len = left <= BLAKE2S_BLOCKBYTES ? left : BLAKE2S_BLOCKBYTES;
blake2s_update( S[i], in__, len );
}
blake2s_final( S[i], hash[i], BLAKE2S_OUTBYTES );
}
if( blake2sp_init_root( FS, outlen, keylen ) < 0 )
return -1;
FS->last_node = 1;
for( i = 0; i < PARALLELISM_DEGREE; ++i )
blake2s_update( FS, hash[i], BLAKE2S_OUTBYTES );
return blake2s_final( FS, out, outlen );
}
#if defined(BLAKE2SP_SELFTEST)
#include <string.h>
#include "blake2-kat.h"
int main( void )
{
uint8_t key[BLAKE2S_KEYBYTES];
uint8_t buf[BLAKE2_KAT_LENGTH];
size_t i, step;
for( i = 0; i < BLAKE2S_KEYBYTES; ++i )
key[i] = ( uint8_t )i;
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
buf[i] = ( uint8_t )i;
/* Test simple API */
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
{
uint8_t hash[BLAKE2S_OUTBYTES];
blake2sp( hash, BLAKE2S_OUTBYTES, buf, i, key, BLAKE2S_KEYBYTES );
if( 0 != memcmp( hash, blake2sp_keyed_kat[i], BLAKE2S_OUTBYTES ) )
{
goto fail;
}
}
/* Test streaming API */
for(step = 1; step < BLAKE2S_BLOCKBYTES; ++step) {
for (i = 0; i < BLAKE2_KAT_LENGTH; ++i) {
uint8_t hash[BLAKE2S_OUTBYTES];
blake2sp_state S;
uint8_t * p = buf;
size_t mlen = i;
int err = 0;
if( (err = blake2sp_init_key(&S, BLAKE2S_OUTBYTES, key, BLAKE2S_KEYBYTES)) < 0 ) {
goto fail;
}
while (mlen >= step) {
if ( (err = blake2sp_update(&S, p, step)) < 0 ) {
goto fail;
}
mlen -= step;
p += step;
}
if ( (err = blake2sp_update(&S, p, mlen)) < 0) {
goto fail;
}
if ( (err = blake2sp_final(&S, hash, BLAKE2S_OUTBYTES)) < 0) {
goto fail;
}
if (0 != memcmp(hash, blake2sp_keyed_kat[i], BLAKE2S_OUTBYTES)) {
goto fail;
}
}
}
puts( "ok" );
return 0;
fail:
puts("error");
return -1;
}
#endif

241
power8/blake2xb.c Normal file

@ -0,0 +1,241 @@
/*
BLAKE2 reference source code package - reference C implementations
Copyright 2016, JP Aumasson <jeanphilippe.aumasson@gmail.com>.
Copyright 2016, Samuel Neves <sneves@dei.uc.pt>.
You may use this under the terms of the CC0, the OpenSSL Licence, or
the Apache Public License 2.0, at your option. The terms of these
licenses can be found at:
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "blake2.h"
#include "blake2-impl.h"
int blake2xb_init( blake2xb_state *S, const size_t outlen ) {
return blake2xb_init_key(S, outlen, NULL, 0);
}
int blake2xb_init_key( blake2xb_state *S, const size_t outlen, const void *key, size_t keylen)
{
if ( outlen == 0 || outlen > 0xFFFFFFFFUL ) {
return -1;
}
if (NULL != key && keylen > BLAKE2B_KEYBYTES) {
return -1;
}
if (NULL == key && keylen > 0) {
return -1;
}
/* Initialize parameter block */
S->P->digest_length = BLAKE2B_OUTBYTES;
S->P->key_length = keylen;
S->P->fanout = 1;
S->P->depth = 1;
store32( &S->P->leaf_length, 0 );
store32( &S->P->node_offset, 0 );
store32( &S->P->xof_length, outlen );
S->P->node_depth = 0;
S->P->inner_length = 0;
memset( S->P->reserved, 0, sizeof( S->P->reserved ) );
memset( S->P->salt, 0, sizeof( S->P->salt ) );
memset( S->P->personal, 0, sizeof( S->P->personal ) );
if( blake2b_init_param( S->S, S->P ) < 0 ) {
return -1;
}
if (keylen > 0) {
uint8_t block[BLAKE2B_BLOCKBYTES];
memset(block, 0, BLAKE2B_BLOCKBYTES);
memcpy(block, key, keylen);
blake2b_update(S->S, block, BLAKE2B_BLOCKBYTES);
secure_zero_memory(block, BLAKE2B_BLOCKBYTES);
}
return 0;
}
int blake2xb_update( blake2xb_state *S, const void *in, size_t inlen ) {
return blake2b_update( S->S, in, inlen );
}
int blake2xb_final( blake2xb_state *S, void *out, size_t outlen) {
blake2b_state C[1];
blake2b_param P[1];
uint32_t xof_length = load32(&S->P->xof_length);
uint8_t root[BLAKE2B_BLOCKBYTES];
size_t i;
if (NULL == out) {
return -1;
}
/* outlen must match the output size defined in xof_length, */
/* unless it was -1, in which case anything goes except 0. */
if(xof_length == 0xFFFFFFFFUL) {
if(outlen == 0) {
return -1;
}
} else {
if(outlen != xof_length) {
return -1;
}
}
/* Finalize the root hash */
if (blake2b_final(S->S, root, BLAKE2B_OUTBYTES) < 0) {
return -1;
}
/* Set common block structure values */
/* Copy values from parent instance, and only change the ones below */
memcpy(P, S->P, sizeof(blake2b_param));
P->key_length = 0;
P->fanout = 0;
P->depth = 0;
store32(&P->leaf_length, BLAKE2B_OUTBYTES);
P->inner_length = BLAKE2B_OUTBYTES;
P->node_depth = 0;
for (i = 0; outlen > 0; ++i) {
const size_t block_size = (outlen < BLAKE2B_OUTBYTES) ? outlen : BLAKE2B_OUTBYTES;
/* Initialize state */
P->digest_length = block_size;
store32(&P->node_offset, i);
blake2b_init_param(C, P);
/* Process key if needed */
blake2b_update(C, root, BLAKE2B_OUTBYTES);
if (blake2b_final(C, (uint8_t *)out + i * BLAKE2B_OUTBYTES, block_size) < 0 ) {
return -1;
}
outlen -= block_size;
}
secure_zero_memory(root, sizeof(root));
secure_zero_memory(P, sizeof(P));
secure_zero_memory(C, sizeof(C));
/* Put blake2xb in an invalid state? cf. blake2s_is_lastblock */
return 0;
}
int blake2xb(void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen)
{
blake2xb_state S[1];
/* Verify parameters */
if (NULL == in && inlen > 0)
return -1;
if (NULL == out)
return -1;
if (NULL == key && keylen > 0)
return -1;
if (keylen > BLAKE2B_KEYBYTES)
return -1;
if (outlen == 0)
return -1;
/* Initialize the root block structure */
if (blake2xb_init_key(S, outlen, key, keylen) < 0) {
return -1;
}
/* Absorb the input message */
blake2xb_update(S, in, inlen);
/* Compute the root node of the tree and the final hash using the counter construction */
return blake2xb_final(S, out, outlen);
}
#if defined(BLAKE2XB_SELFTEST)
#include <string.h>
#include "blake2-kat.h"
int main( void )
{
uint8_t key[BLAKE2B_KEYBYTES];
uint8_t buf[BLAKE2_KAT_LENGTH];
size_t i, step, outlen;
for( i = 0; i < BLAKE2B_KEYBYTES; ++i ) {
key[i] = ( uint8_t )i;
}
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i ) {
buf[i] = ( uint8_t )i;
}
/* Testing length of outputs rather than inputs */
/* (Test of input lengths mostly covered by blake2b tests) */
/* Test simple API */
for( outlen = 1; outlen <= BLAKE2_KAT_LENGTH; ++outlen )
{
uint8_t hash[BLAKE2_KAT_LENGTH] = {0};
if( blake2xb( hash, outlen, buf, BLAKE2_KAT_LENGTH, key, BLAKE2B_KEYBYTES ) < 0 ) {
goto fail;
}
if( 0 != memcmp( hash, blake2xb_keyed_kat[outlen-1], outlen ) )
{
goto fail;
}
}
/* Test streaming API */
for(step = 1; step < BLAKE2B_BLOCKBYTES; ++step) {
for (outlen = 1; outlen <= BLAKE2_KAT_LENGTH; ++outlen) {
uint8_t hash[BLAKE2_KAT_LENGTH];
blake2xb_state S;
uint8_t * p = buf;
size_t mlen = BLAKE2_KAT_LENGTH;
int err = 0;
if( (err = blake2xb_init_key(&S, outlen, key, BLAKE2B_KEYBYTES)) < 0 ) {
goto fail;
}
while (mlen >= step) {
if ( (err = blake2xb_update(&S, p, step)) < 0 ) {
goto fail;
}
mlen -= step;
p += step;
}
if ( (err = blake2xb_update(&S, p, mlen)) < 0) {
goto fail;
}
if ( (err = blake2xb_final(&S, hash, outlen)) < 0) {
goto fail;
}
if (0 != memcmp(hash, blake2xb_keyed_kat[outlen-1], outlen)) {
goto fail;
}
}
}
puts( "ok" );
return 0;
fail:
puts("error");
return -1;
}
#endif

239
power8/blake2xs.c Normal file

@ -0,0 +1,239 @@
/*
BLAKE2 reference source code package - reference C implementations
Copyright 2016, JP Aumasson <jeanphilippe.aumasson@gmail.com>.
Copyright 2016, Samuel Neves <sneves@dei.uc.pt>.
You may use this under the terms of the CC0, the OpenSSL Licence, or
the Apache Public License 2.0, at your option. The terms of these
licenses can be found at:
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "blake2.h"
#include "blake2-impl.h"
int blake2xs_init( blake2xs_state *S, const size_t outlen ) {
return blake2xs_init_key(S, outlen, NULL, 0);
}
int blake2xs_init_key( blake2xs_state *S, const size_t outlen, const void *key, size_t keylen )
{
if ( outlen == 0 || outlen > 0xFFFFUL ) {
return -1;
}
if (NULL != key && keylen > BLAKE2B_KEYBYTES) {
return -1;
}
if (NULL == key && keylen > 0) {
return -1;
}
/* Initialize parameter block */
S->P->digest_length = BLAKE2S_OUTBYTES;
S->P->key_length = keylen;
S->P->fanout = 1;
S->P->depth = 1;
store32( &S->P->leaf_length, 0 );
store32( &S->P->node_offset, 0 );
store16( &S->P->xof_length, outlen );
S->P->node_depth = 0;
S->P->inner_length = 0;
memset( S->P->salt, 0, sizeof( S->P->salt ) );
memset( S->P->personal, 0, sizeof( S->P->personal ) );
if( blake2s_init_param( S->S, S->P ) < 0 ) {
return -1;
}
if (keylen > 0) {
uint8_t block[BLAKE2S_BLOCKBYTES];
memset(block, 0, BLAKE2S_BLOCKBYTES);
memcpy(block, key, keylen);
blake2s_update(S->S, block, BLAKE2S_BLOCKBYTES);
secure_zero_memory(block, BLAKE2S_BLOCKBYTES);
}
return 0;
}
int blake2xs_update( blake2xs_state *S, const void *in, size_t inlen ) {
return blake2s_update( S->S, in, inlen );
}
int blake2xs_final(blake2xs_state *S, void *out, size_t outlen) {
blake2s_state C[1];
blake2s_param P[1];
uint16_t xof_length = load16(&S->P->xof_length);
uint8_t root[BLAKE2S_BLOCKBYTES];
size_t i;
if (NULL == out) {
return -1;
}
/* outlen must match the output size defined in xof_length, */
/* unless it was -1, in which case anything goes except 0. */
if(xof_length == 0xFFFFUL) {
if(outlen == 0) {
return -1;
}
} else {
if(outlen != xof_length) {
return -1;
}
}
/* Finalize the root hash */
if (blake2s_final(S->S, root, BLAKE2S_OUTBYTES) < 0) {
return -1;
}
/* Set common block structure values */
/* Copy values from parent instance, and only change the ones below */
memcpy(P, S->P, sizeof(blake2s_param));
P->key_length = 0;
P->fanout = 0;
P->depth = 0;
store32(&P->leaf_length, BLAKE2S_OUTBYTES);
P->inner_length = BLAKE2S_OUTBYTES;
P->node_depth = 0;
for (i = 0; outlen > 0; ++i) {
const size_t block_size = (outlen < BLAKE2S_OUTBYTES) ? outlen : BLAKE2S_OUTBYTES;
/* Initialize state */
P->digest_length = block_size;
store32(&P->node_offset, i);
blake2s_init_param(C, P);
/* Process key if needed */
blake2s_update(C, root, BLAKE2S_OUTBYTES);
if (blake2s_final(C, (uint8_t *)out + i * BLAKE2S_OUTBYTES, block_size) < 0) {
return -1;
}
outlen -= block_size;
}
secure_zero_memory(root, sizeof(root));
secure_zero_memory(P, sizeof(P));
secure_zero_memory(C, sizeof(C));
/* Put blake2xs in an invalid state? cf. blake2s_is_lastblock */
return 0;
}
int blake2xs(void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen)
{
blake2xs_state S[1];
/* Verify parameters */
if (NULL == in && inlen > 0)
return -1;
if (NULL == out)
return -1;
if (NULL == key && keylen > 0)
return -1;
if (keylen > BLAKE2S_KEYBYTES)
return -1;
if (outlen == 0)
return -1;
/* Initialize the root block structure */
if (blake2xs_init_key(S, outlen, key, keylen) < 0) {
return -1;
}
/* Absorb the input message */
blake2xs_update(S, in, inlen);
/* Compute the root node of the tree and the final hash using the counter construction */
return blake2xs_final(S, out, outlen);
}
#if defined(BLAKE2XS_SELFTEST)
#include <string.h>
#include "blake2-kat.h"
int main( void )
{
uint8_t key[BLAKE2S_KEYBYTES];
uint8_t buf[BLAKE2_KAT_LENGTH];
size_t i, step, outlen;
for( i = 0; i < BLAKE2S_KEYBYTES; ++i ) {
key[i] = ( uint8_t )i;
}
for( i = 0; i < BLAKE2_KAT_LENGTH; ++i ) {
buf[i] = ( uint8_t )i;
}
/* Testing length of ouputs rather than inputs */
/* (Test of input lengths mostly covered by blake2s tests) */
/* Test simple API */
for( outlen = 1; outlen <= BLAKE2_KAT_LENGTH; ++outlen )
{
uint8_t hash[BLAKE2_KAT_LENGTH] = {0};
if( blake2xs( hash, outlen, buf, BLAKE2_KAT_LENGTH, key, BLAKE2S_KEYBYTES ) < 0 ) {
goto fail;
}
if( 0 != memcmp( hash, blake2xs_keyed_kat[outlen-1], outlen ) )
{
goto fail;
}
}
/* Test streaming API */
for(step = 1; step < BLAKE2S_BLOCKBYTES; ++step) {
for (outlen = 1; outlen <= BLAKE2_KAT_LENGTH; ++outlen) {
uint8_t hash[BLAKE2_KAT_LENGTH];
blake2xs_state S;
uint8_t * p = buf;
size_t mlen = BLAKE2_KAT_LENGTH;
int err = 0;
if( (err = blake2xs_init_key(&S, outlen, key, BLAKE2S_KEYBYTES)) < 0 ) {
goto fail;
}
while (mlen >= step) {
if ( (err = blake2xs_update(&S, p, step)) < 0 ) {
goto fail;
}
mlen -= step;
p += step;
}
if ( (err = blake2xs_update(&S, p, mlen)) < 0) {
goto fail;
}
if ( (err = blake2xs_final(&S, hash, outlen)) < 0) {
goto fail;
}
if (0 != memcmp(hash, blake2xs_keyed_kat[outlen-1], outlen)) {
goto fail;
}
}
}
puts( "ok" );
return 0;
fail:
puts("error");
return -1;
}
#endif

139
power8/genkat-c.c Normal file

@ -0,0 +1,139 @@
/*
BLAKE2 reference source code package - reference C implementations
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "blake2.h"
#define STR_(x) #x
#define STR(x) STR_(x)
#define LENGTH 256
#define MAKE_KAT(name, size_prefix) \
do { \
printf("static const uint8_t " #name "_kat[BLAKE2_KAT_LENGTH][" #size_prefix \
"_OUTBYTES] = \n{\n"); \
\
for (i = 0; i < LENGTH; ++i) { \
name(hash, size_prefix##_OUTBYTES, in, i, NULL, 0); \
printf("\t{\n\t\t"); \
\
for (j = 0; j < size_prefix##_OUTBYTES; ++j) \
printf("0x%02X%s", hash[j], \
(j + 1) == size_prefix##_OUTBYTES ? "\n" : j && !((j + 1) % 8) ? ",\n\t\t" : ", "); \
\
printf("\t},\n"); \
} \
\
printf("};\n\n\n\n\n"); \
} while (0)
#define MAKE_KEYED_KAT(name, size_prefix) \
do { \
printf("static const uint8_t " #name "_keyed_kat[BLAKE2_KAT_LENGTH][" #size_prefix \
"_OUTBYTES] = \n{\n"); \
\
for (i = 0; i < LENGTH; ++i) { \
name(hash, size_prefix##_OUTBYTES, in, i, key, size_prefix##_KEYBYTES); \
printf("\t{\n\t\t"); \
\
for (j = 0; j < size_prefix##_OUTBYTES; ++j) \
printf("0x%02X%s", hash[j], \
(j + 1) == size_prefix##_OUTBYTES ? "\n" : j && !((j + 1) % 8) ? ",\n\t\t" : ", "); \
\
printf("\t},\n"); \
} \
\
printf("};\n\n\n\n\n"); \
} while (0)
#define MAKE_XOF_KAT(name) \
do { \
printf("static const uint8_t " #name "_kat[BLAKE2_KAT_LENGTH][BLAKE2_KAT_LENGTH] = \n{\n"); \
\
for (i = 1; i <= LENGTH; ++i) { \
name(hash, i, in, LENGTH, NULL, 0); \
printf("\t{\n\t\t"); \
\
for (j = 0; j < i; ++j) \
printf("0x%02X%s", hash[j], \
(j + 1) == LENGTH ? "\n" : j && !((j + 1) % 8) ? ",\n\t\t" : ", "); \
\
for (j = i; j < LENGTH; ++j) \
printf("0x00%s", (j + 1) == LENGTH ? "\n" : j && !((j + 1) % 8) ? ",\n\t\t" : ", "); \
\
printf("\t},\n"); \
} \
\
printf("};\n\n\n\n\n"); \
} while (0)
#define MAKE_XOF_KEYED_KAT(name, size_prefix) \
do { \
printf("static const uint8_t " #name \
"_keyed_kat[BLAKE2_KAT_LENGTH][BLAKE2_KAT_LENGTH] = \n{\n"); \
\
for (i = 1; i <= LENGTH; ++i) { \
name(hash, i, in, LENGTH, key, size_prefix##_KEYBYTES); \
printf("\t{\n\t\t"); \
\
for (j = 0; j < i; ++j) \
printf("0x%02X%s", hash[j], \
(j + 1) == LENGTH ? "\n" : j && !((j + 1) % 8) ? ",\n\t\t" : ", "); \
\
for (j = i; j < LENGTH; ++j) \
printf("0x00%s", (j + 1) == LENGTH ? "\n" : j && !((j + 1) % 8) ? ",\n\t\t" : ", "); \
\
printf("\t},\n"); \
} \
\
printf("};\n\n\n\n\n"); \
} while (0)
int main() {
uint8_t key[64] = {0};
uint8_t in[LENGTH] = {0};
uint8_t hash[LENGTH] = {0};
size_t i, j;
for (i = 0; i < sizeof(in); ++i)
in[i] = i;
for (i = 0; i < sizeof(key); ++i)
key[i] = i;
puts("#ifndef BLAKE2_KAT_H\n"
"#define BLAKE2_KAT_H\n\n\n"
"#include <stdint.h>\n\n"
"#define BLAKE2_KAT_LENGTH " STR(LENGTH) "\n\n\n");
MAKE_KAT(blake2s, BLAKE2S);
MAKE_KEYED_KAT(blake2s, BLAKE2S);
MAKE_KAT(blake2b, BLAKE2B);
MAKE_KEYED_KAT(blake2b, BLAKE2B);
MAKE_KAT(blake2sp, BLAKE2S);
MAKE_KEYED_KAT(blake2sp, BLAKE2S);
MAKE_KAT(blake2bp, BLAKE2B);
MAKE_KEYED_KAT(blake2bp, BLAKE2B);
MAKE_XOF_KAT(blake2xs);
MAKE_XOF_KEYED_KAT(blake2xs, BLAKE2S);
MAKE_XOF_KAT(blake2xb);
MAKE_XOF_KEYED_KAT(blake2xb, BLAKE2B);
puts("#endif");
return 0;
}

154
power8/genkat-json.c Normal file

@ -0,0 +1,154 @@
/*
BLAKE2 reference source code package - reference C implementations
Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the
terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
your option. The terms of these licenses can be found at:
- CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- OpenSSL license : https://www.openssl.org/source/license.html
- Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
More information about the BLAKE2 hash function can be found at
https://blake2.net.
*/
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "blake2.h"
#define STR_(x) #x
#define STR(x) STR_(x)
#define LENGTH 256
#define MAKE_KAT(name, size_prefix, first) \
do { \
for (i = 0; i < LENGTH; ++i) { \
printf("%s\n{\n", i == 0 && first ? "" : ","); \
\
printf(" \"hash\": \"" #name "\",\n"); \
printf(" \"in\": \""); \
for (j = 0; j < i; ++j) \
printf("%02x", in[j]); \
\
printf("\",\n"); \
printf(" \"key\": \"\",\n"); \
printf(" \"out\": \""); \
\
name(hash, size_prefix##_OUTBYTES, in, i, NULL, 0); \
\
for (j = 0; j < size_prefix##_OUTBYTES; ++j) \
printf("%02x", hash[j]); \
printf("\"\n"); \
printf("}"); \
} \
} while (0)
#define MAKE_KEYED_KAT(name, size_prefix, first) \
do { \
for (i = 0; i < LENGTH; ++i) { \
printf("%s\n{\n", i == 0 && first ? "" : ","); \
\
printf(" \"hash\": \"" #name "\",\n"); \
printf(" \"in\": \""); \
for (j = 0; j < i; ++j) \
printf("%02x", in[j]); \
\
printf("\",\n"); \
printf(" \"key\": \""); \
for (j = 0; j < size_prefix##_KEYBYTES; ++j) \
printf("%02x", key[j]); \
printf("\",\n"); \
printf(" \"out\": \""); \
\
name(hash, size_prefix##_OUTBYTES, in, i, key, size_prefix##_KEYBYTES); \
\
for (j = 0; j < size_prefix##_OUTBYTES; ++j) \
printf("%02x", hash[j]); \
printf("\"\n"); \
printf("}"); \
} \
} while (0)
#define MAKE_XOF_KAT(name, first) \
do { \
for (i = 1; i <= LENGTH; ++i) { \
printf("%s\n{\n", i == 1 && first ? "" : ","); \
\
printf(" \"hash\": \"" #name "\",\n"); \
printf(" \"in\": \""); \
for (j = 0; j < LENGTH; ++j) \
printf("%02x", in[j]); \
\
printf("\",\n"); \
printf(" \"key\": \"\",\n"); \
printf(" \"out\": \""); \
\
name(hash, i, in, LENGTH, NULL, 0); \
\
for (j = 0; j < i; ++j) \
printf("%02x", hash[j]); \
printf("\"\n"); \
printf("}"); \
} \
} while (0)
#define MAKE_XOF_KEYED_KAT(name, size_prefix, first) \
do { \
for (i = 1; i <= LENGTH; ++i) { \
printf("%s\n{\n", i == 1 && first ? "" : ","); \
\
printf(" \"hash\": \"" #name "\",\n"); \
printf(" \"in\": \""); \
for (j = 0; j < LENGTH; ++j) \
printf("%02x", in[j]); \
\
printf("\",\n"); \
printf(" \"key\": \""); \
for (j = 0; j < size_prefix##_KEYBYTES; ++j) \
printf("%02x", key[j]); \
printf("\",\n"); \
printf(" \"out\": \""); \
\
name(hash, i, in, LENGTH, key, size_prefix##_KEYBYTES); \
\
for (j = 0; j < i; ++j) \
printf("%02x", hash[j]); \
printf("\"\n"); \
printf("}"); \
} \
} while (0)
int main() {
uint8_t key[64] = {0};
uint8_t in[LENGTH] = {0};
uint8_t hash[LENGTH] = {0};
size_t i, j;
for (i = 0; i < sizeof(in); ++i)
in[i] = i;
for (i = 0; i < sizeof(key); ++i)
key[i] = i;
printf("[");
MAKE_KAT(blake2s, BLAKE2S, 1);
MAKE_KEYED_KAT(blake2s, BLAKE2S, 0);
MAKE_KAT(blake2b, BLAKE2B, 0);
MAKE_KEYED_KAT(blake2b, BLAKE2B, 0);
MAKE_KAT(blake2sp, BLAKE2S, 0);
MAKE_KEYED_KAT(blake2sp, BLAKE2S, 0);
MAKE_KAT(blake2bp, BLAKE2B, 0);
MAKE_KEYED_KAT(blake2bp, BLAKE2B, 0);
MAKE_XOF_KAT(blake2xs, 0);
MAKE_XOF_KEYED_KAT(blake2xs, BLAKE2S, 0);
MAKE_XOF_KAT(blake2xb, 0);
MAKE_XOF_KEYED_KAT(blake2xb, BLAKE2B, 0);
printf("\n]\n");
fflush(stdout);
return 0;
}

53
power8/makefile Normal file

@ -0,0 +1,53 @@
# IBM XLC compiler options. -qinfo=all is -Wall, but it is noisy
# CC=xlc
# CFLAGS=-O3 -I../testvectors
# POWER8_CFLAG=-qarch=pwr8 -qaltivec
CC=gcc
CFLAGS=-O3 -I../testvectors -Wall -Wextra -std=c89 -pedantic -Wno-long-long
POWER8_CFLAG=-mcpu=power8
BLAKEBINS=blake2s blake2b blake2sp blake2bp blake2xs blake2xb
all: $(BLAKEBINS) check
blake2s: blake2s.c
$(CC) blake2s.c -o $@ $(CFLAGS) $(POWER8_CFLAG) -DBLAKE2S_SELFTEST
blake2b: blake2b.c
$(CC) blake2b.c -o $@ $(CFLAGS) $(POWER8_CFLAG) -DBLAKE2B_SELFTEST
#blake2b: blake2b.c
# $(CC) -E blake2b.c -o blake2b.i.c $(CFLAGS) $(POWER8_CFLAG) -DBLAKE2B_SELFTEST
# sed -e '/^# [0-9]/d' blake2b.i.c > blake2b.i.c.t && mv blake2b.i.c.t blake2b.i.c
# perl -pne "s/while(0);/while(0);\n/g" blake2b.i.c > blake2b.i.c.t && mv blake2b.i.c.t blake2b.i.c
# cat -s blake2b.i.c > blake2b.i.c.t && mv blake2b.i.c.t blake2b.i.c
# $(CC) blake2b.i.c -o $@ $(CFLAGS) $(POWER8_CFLAG) -DBLAKE2B_SELFTEST
blake2sp: blake2sp.c blake2s.c
$(CC) blake2sp.c blake2s.c -o $@ $(CFLAGS) $(POWER8_CFLAG) -DBLAKE2SP_SELFTEST
blake2bp: blake2bp.c blake2b.c
$(CC) blake2bp.c blake2b.c -o $@ $(CFLAGS) $(POWER8_CFLAG) -DBLAKE2BP_SELFTEST
blake2xs: blake2xs.c blake2s.c
$(CC) blake2xs.c blake2s.c -o $@ $(CFLAGS) $(POWER8_CFLAG) -DBLAKE2XS_SELFTEST
blake2xb: blake2xb.c blake2b.c
$(CC) blake2xb.c blake2b.c -o $@ $(CFLAGS) $(POWER8_CFLAG) -DBLAKE2XB_SELFTEST
check: blake2s blake2b blake2sp blake2bp blake2xs blake2xb
./blake2s
./blake2b
./blake2sp
./blake2bp
./blake2xs
./blake2xb
kat:
$(CC) $(CFLAGS) -o genkat-c genkat-c.c blake2b.c blake2s.c blake2sp.c blake2bp.c blake2xs.c blake2xb.c
$(CC) $(CFLAGS) -g -o genkat-json genkat-json.c blake2b.c blake2s.c blake2sp.c blake2bp.c blake2xs.c blake2xb.c
./genkat-c > blake2-kat.h
./genkat-json > blake2-kat.json
clean:
rm -rf *.o *.i.c genkat-c genkat-json blake2-kat.h blake2-kat.json $(BLAKEBINS)