mirror of https://github.com/BLAKE2/BLAKE2 synced 2024-09-07 07:10:35 +02:00

Release 2013-01-31

This commit is contained in:
CodesInChaos 2013-02-01 16:44:36 +01:00
commit fb714d2c83
60 changed files with 42049 additions and 0 deletions

COPYING Normal file
View File

@ -0,0 +1,121 @@
Creative Commons Legal Code
CC0 1.0 Universal
Statement of Purpose
The laws of most jurisdictions throughout the world automatically confer
exclusive Copyright and Related Rights (defined below) upon the creator
and subsequent owner(s) (each and all, an "owner") of an original work of
authorship and/or a database (each, a "Work").
Certain owners wish to permanently relinquish those rights to a Work for
the purpose of contributing to a commons of creative, cultural and
scientific works ("Commons") that the public can reliably and without fear
of later claims of infringement build upon, modify, incorporate in other
works, reuse and redistribute as freely as possible in any form whatsoever
and for any purposes, including without limitation commercial purposes.
These owners may contribute to the Commons to promote the ideal of a free
culture and the further production of creative, cultural and scientific
works, or to gain reputation or greater distribution for their Work in
part through the use and efforts of others.
For these and/or other purposes and motivations, and without any
expectation of additional consideration or compensation, the person
associating CC0 with a Work (the "Affirmer"), to the extent that he or she
is an owner of Copyright and Related Rights in the Work, voluntarily
elects to apply CC0 to the Work and publicly distribute the Work under its
terms, with knowledge of his or her Copyright and Related Rights in the
Work and the meaning and intended legal effect of CC0 on those rights.
1. Copyright and Related Rights. A Work made available under CC0 may be
protected by copyright and related or neighboring rights ("Copyright and
Related Rights"). Copyright and Related Rights include, but are not
limited to, the following:
i. the right to reproduce, adapt, distribute, perform, display,
communicate, and translate a Work;
ii. moral rights retained by the original author(s) and/or performer(s);
iii. publicity and privacy rights pertaining to a person's image or
likeness depicted in a Work;
iv. rights protecting against unfair competition in regards to a Work,
subject to the limitations in paragraph 4(a), below;
v. rights protecting the extraction, dissemination, use and reuse of data
in a Work;
vi. database rights (such as those arising under Directive 96/9/EC of the
European Parliament and of the Council of 11 March 1996 on the legal
protection of databases, and under any national implementation
thereof, including any amended or successor version of such
directive); and
vii. other similar, equivalent or corresponding rights throughout the
world based on applicable law or treaty, and any national
implementations thereof.
2. Waiver. To the greatest extent permitted by, but not in contravention
of, applicable law, Affirmer hereby overtly, fully, permanently,
irrevocably and unconditionally waives, abandons, and surrenders all of
Affirmer's Copyright and Related Rights and associated claims and causes
of action, whether now known or unknown (including existing as well as
future claims and causes of action), in the Work (i) in all territories
worldwide, (ii) for the maximum duration provided by applicable law or
treaty (including future time extensions), (iii) in any current or future
medium and for any number of copies, and (iv) for any purpose whatsoever,
including without limitation commercial, advertising or promotional
purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
member of the public at large and to the detriment of Affirmer's heirs and
successors, fully intending that such Waiver shall not be subject to
revocation, rescission, cancellation, termination, or any other legal or
equitable action to disrupt the quiet enjoyment of the Work by the public
as contemplated by Affirmer's express Statement of Purpose.
3. Public License Fallback. Should any part of the Waiver for any reason
be judged legally invalid or ineffective under applicable law, then the
Waiver shall be preserved to the maximum extent permitted taking into
account Affirmer's express Statement of Purpose. In addition, to the
extent the Waiver is so judged Affirmer hereby grants to each affected
person a royalty-free, non transferable, non sublicensable, non exclusive,
irrevocable and unconditional license to exercise Affirmer's Copyright and
Related Rights in the Work (i) in all territories worldwide, (ii) for the
maximum duration provided by applicable law or treaty (including future
time extensions), (iii) in any current or future medium and for any number
of copies, and (iv) for any purpose whatsoever, including without
limitation commercial, advertising or promotional purposes (the
"License"). The License shall be deemed effective as of the date CC0 was
applied by Affirmer to the Work. Should any part of the License for any
reason be judged legally invalid or ineffective under applicable law, such
partial invalidity or ineffectiveness shall not invalidate the remainder
of the License, and in such case Affirmer hereby affirms that he or she
will not (i) exercise any of his or her remaining Copyright and Related
Rights in the Work or (ii) assert any associated claims and causes of
action with respect to the Work, in either case contrary to Affirmer's
express Statement of Purpose.
4. Limitations and Disclaimers.
a. No trademark or patent rights held by Affirmer are waived, abandoned,
surrendered, licensed or otherwise affected by this document.
b. Affirmer offers the Work as-is and makes no representations or
warranties of any kind concerning the Work, express, implied,
statutory or otherwise, including without limitation warranties of
title, merchantability, fitness for a particular purpose, non
infringement, or the absence of latent or other defects, accuracy, or
the present or absence of errors, whether or not discoverable, all to
the greatest extent permissible under applicable law.
c. Affirmer disclaims responsibility for clearing rights of other persons
that may apply to the Work or any use thereof, including without
limitation any person's Copyright and Related Rights in the Work.
Further, Affirmer disclaims responsibility for obtaining any necessary
consents, permissions or other rights required for any use of the
d. Affirmer understands and acknowledges that Creative Commons is not a
party to this document and has no duty or obligation with respect to
this CC0 or use of the Work.

README Normal file
View File

@ -0,0 +1,19 @@
this is the reference source code package of BLAKE2, which includes
C implementations of blake2b, blake2bp, blake2s, blake2sp, aimed at
portability and simplicity
C implementations of blake2b, blake2bp, blake2s, blake2sp, optimized
for speed on CPUs supporting SSE2, SSSE3, SSE4.1, AVX, or XOP
C# implementation of blake2b
command line tool to hash files, based on the sse/ implementations
benchmark tool to measure cycles-per-byte speeds and produce graphs

b2sum/b2sum.c Normal file
View File

@ -0,0 +1,312 @@
BLAKE2 reference source code package - b2sum tool
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <errno.h>
#include <ctype.h>
#include <unistd.h>
#include <getopt.h>
#include "blake2.h"
/* This will help compatibility with coreutils */
int blake2s_stream( FILE *stream, void *resstream )
int ret = -1;
size_t sum, n;
blake2s_state S[1];
static const size_t buffer_length = 32768;
uint8_t *buffer = ( uint8_t * )malloc( buffer_length );
if( !buffer ) return -1;
blake2s_init( S, BLAKE2S_OUTBYTES );
while( 1 )
sum = 0;
while( 1 )
n = fread( buffer + sum, 1, buffer_length - sum, stream );
sum += n;
if( buffer_length == sum )
if( 0 == n )
if( ferror( stream ) )
goto cleanup_buffer;
goto final_process;
if( feof( stream ) )
goto final_process;
blake2s_update( S, buffer, buffer_length );
if( sum > 0 ) blake2s_update( S, buffer, sum );
blake2s_final( S, resstream, BLAKE2S_OUTBYTES );
ret = 0;
free( buffer );
return ret;
int blake2b_stream( FILE *stream, void *resstream )
int ret = -1;
size_t sum, n;
blake2b_state S[1];
static const size_t buffer_length = 32768;
uint8_t *buffer = ( uint8_t * )malloc( buffer_length );
if( !buffer ) return -1;
blake2b_init( S, BLAKE2B_OUTBYTES );
while( 1 )
sum = 0;
while( 1 )
n = fread( buffer + sum, 1, buffer_length - sum, stream );
sum += n;
if( buffer_length == sum )
if( 0 == n )
if( ferror( stream ) )
goto cleanup_buffer;
goto final_process;
if( feof( stream ) )
goto final_process;
blake2b_update( S, buffer, buffer_length );
if( sum > 0 ) blake2b_update( S, buffer, sum );
blake2b_final( S, resstream, BLAKE2B_OUTBYTES );
ret = 0;
free( buffer );
return ret;
int blake2sp_stream( FILE *stream, void *resstream )
int ret = -1;
size_t sum, n;
blake2sp_state S[1];
static const size_t buffer_length = 16 * ( 1UL << 20 );
uint8_t *buffer = ( uint8_t * )malloc( buffer_length );
if( !buffer ) return -1;
blake2sp_init( S, BLAKE2S_OUTBYTES );
while( 1 )
sum = 0;
while( 1 )
n = fread( buffer + sum, 1, buffer_length - sum, stream );
sum += n;
if( buffer_length == sum )
if( 0 == n )
if( ferror( stream ) )
goto cleanup_buffer;
goto final_process;
if( feof( stream ) )
goto final_process;
blake2sp_update( S, buffer, buffer_length );
if( sum > 0 ) blake2sp_update( S, buffer, sum );
blake2sp_final( S, resstream, BLAKE2S_OUTBYTES );
ret = 0;
free( buffer );
return ret;
int blake2bp_stream( FILE *stream, void *resstream )
int ret = -1;
size_t sum, n;
blake2bp_state S[1];
static const size_t buffer_length = 16 * ( 1UL << 20 );
uint8_t *buffer = ( uint8_t * )malloc( buffer_length );
if( !buffer ) return -1;
blake2bp_init( S, BLAKE2B_OUTBYTES );
while( 1 )
sum = 0;
while( 1 )
n = fread( buffer + sum, 1, buffer_length - sum, stream );
sum += n;
if( buffer_length == sum )
if( 0 == n )
if( ferror( stream ) )
goto cleanup_buffer;
goto final_process;
if( feof( stream ) )
goto final_process;
blake2bp_update( S, buffer, buffer_length );
if( sum > 0 ) blake2bp_update( S, buffer, sum );
blake2bp_final( S, resstream, BLAKE2B_OUTBYTES );
ret = 0;
free( buffer );
return ret;
typedef int ( *blake2fn )( FILE *, void * );
static void usage( char **argv )
fprintf( stderr, "Usage: %s [-a HASH] [FILE]...\n", argv[0] );
fprintf( stderr, "\tHASH in blake2b blake2s blake2bp blake2sp\n" );
exit( 111 );
int main( int argc, char **argv )
blake2fn blake2_stream = blake2b_stream;
size_t outlen = BLAKE2B_OUTBYTES;
unsigned char hash[BLAKE2B_OUTBYTES] = {0};
int c;
opterr = 1;
if ( argc == 1 ) usage( argv ); /* show usage upon no-argument */
while( ( c = getopt( argc, argv, "a:" ) ) != -1 )
switch( c )
case 'a':
if( 0 == strcmp( optarg, "blake2b" ) )
blake2_stream = blake2b_stream;
else if ( 0 == strcmp( optarg, "blake2s" ) )
blake2_stream = blake2s_stream;
else if ( 0 == strcmp( optarg, "blake2bp" ) )
blake2_stream = blake2bp_stream;
else if ( 0 == strcmp( optarg, "blake2sp" ) )
blake2_stream = blake2sp_stream;
printf( "Invalid function name: `%s'\n", optarg );
usage( argv );
for( int i = optind; i < argc; ++i )
f = fopen( argv[i], "rb" );
if( !f )
fprintf( stderr, "Could not open `%s': %s\n", argv[i], strerror( errno ) );
goto end0;
if( blake2_stream( f, hash ) < 0 )
fprintf( stderr, "Failed to hash `%s'\n", argv[i] );
goto end1;
for( int j = 0; j < outlen; ++j )
printf( "%02x", hash[j] );
printf( " %s\n", argv[i] );
fclose( f );
end0: ;
return 0;

b2sum/makefile Normal file
View File

@ -0,0 +1,10 @@
CFLAGS=-std=c99 -O3 -march=native -I../sse -static -fopenmp
#FILES=blake2sum.c ../ref/blake2b-ref.c ../ref/blake2s-ref.c ../ref/blake2bp-ref.c ../ref/blake2sp-ref.c
FILES=b2sum.c ../sse/blake2b.c ../sse/blake2s.c ../sse/blake2bp.c ../sse/blake2sp.c
all: $(FILES)
$(CC) $(FILES) $(CFLAGS) $(LIBS) -o b2sum
rm -f b2sum

bench/amd64cpuinfo.c Normal file
View File

@ -0,0 +1,20 @@
BLAKE2 reference source code package - benchmark tool
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
unsigned long long cpucycles( void )
unsigned long long result;
asm volatile( ".byte 15;.byte 49;shlq $32,%%rdx;orq %%rdx,%%rax"
: "=a" ( result ) :: "%rdx" );
return result;

bench/bench.c Normal file
View File

@ -0,0 +1,68 @@
BLAKE2 reference source code package - benchmark tool
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int crypto_hash( unsigned char *out, const unsigned char *in, unsigned long long inlen );
static int bench_cmp( const void *x, const void *y )
const int64_t *ix = ( const int64_t * )x;
const int64_t *iy = ( const int64_t * )y;
return *ix - *iy;
unsigned long long cpucycles( void );
void bench()
#define BENCH_TRIALS 32
#define BENCH_MAXLEN 1536
static unsigned char in[4096];
static unsigned long long median[4096 + 1];
int i, j;
printf( "#bytes median per byte\n" );
/* 1 ... BENCH_MAXLEN */
for( j = 0; j <= 4096; ++j )
uint64_t cycles[BENCH_TRIALS + 1];
for( i = 0; i <= BENCH_TRIALS; ++i )
cycles[i] = cpucycles();
crypto_hash( in, in, j );
for( i = 0; i < BENCH_TRIALS; ++i )
cycles[i] = cycles[i + 1] - cycles[i];
qsort( cycles, BENCH_TRIALS, sizeof( uint64_t ), bench_cmp );
median[j] = cycles[BENCH_TRIALS / 2];
for( j = 0; j <= BENCH_MAXLEN; j += 8 )
printf( "%5d, %7.2f\n", j, ( double )median[j] / j );
printf( "#2048 %6llu %7.2f\n", median[2048], ( double )median[2048] / 2048.0 );
printf( "#4096 %6llu %7.2f\n", median[4096], ( double )median[4096] / 4096.0 );
printf( "#long long %7.2f\n", ( double )( median[4096] - median[2048] ) / 2048.0 );
int main()
return 0;

bench/do.gplot Normal file
View File

@ -0,0 +1,16 @@
maxx = 256
set xrange [1:maxx]
set xlabel "bytes "
set ylabel "cycles"
set xtics 0,32,maxx
set grid
set key left
plot "blake2b.data" using 1:2 with lines title "BLAKE2b"
replot "blake2s.data" using 1:2 with lines title "BLAKE2s"
replot "md5.data" using 1:2 with lines title "MD5"
#pause -1 "hit return to continue"
#set terminal png
#set output "plotcycles.png"
set terminal pdfcairo
set output "plotcycles.pdf"

bench/makefile Normal file
View File

@ -0,0 +1,16 @@
# std to gnu99 to support inline asm
CFLAGS=-std=gnu99 -O3 -march=native -DSUPERCOP # -DHAVE_XOP # uncomment on XOP-enabled CPUs
FILES=amd64cpuinfo.c bench.c
$(CC) $(FILES) $(CFLAGS) ../sse/blake2b.c -o blake2b
$(CC) $(FILES) $(CFLAGS) ../sse/blake2s.c -o blake2s
$(CC) $(FILES) $(CFLAGS) md5.c -o md5 -lcrypto -lz
./blake2b > blake2b.data
./blake2s > blake2s.data
./md5 > md5.data
gnuplot do.gplot
rm -f blake2b blake2s md5 plotcycles.pdf blake2b.data blake2s.data md5.data

bench/md5.c Normal file
View File

@ -0,0 +1,21 @@
BLAKE2 reference source code package - benchmark tool
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
#include <stddef.h>
#include <openssl/md5.h>
//#include "crypto_hash.h"
int crypto_hash( unsigned char *out, const unsigned char *in, unsigned long long inlen )
MD5( in, inlen, out );
return 0;

bench/x86cpuinfo.c Normal file
View File

@ -0,0 +1,19 @@
BLAKE2 reference source code package - benchmark tool
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
unsigned long long cpucycles( void )
unsigned long long result;
asm volatile( ".byte 15;.byte 49" : "=A" ( result ) );
return result;

View File

@ -0,0 +1,54 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
<Reference Include="System" />
<Reference Include="System.Core" />
<Reference Include="System.Xml.Linq" />
<Reference Include="System.Data.DataSetExtensions" />
<Reference Include="System.Data" />
<Reference Include="System.Xml" />
<Compile Include="Program.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
<!-- To modify your build process, add your task inside one of the targets below and uncomment it.
Other similar extension points exist, see Microsoft.Common.targets.
<Target Name="BeforeBuild">
<Target Name="AfterBuild">

View File

@ -0,0 +1,93 @@

/// BLAKE2 reference source code package - C# implementation
/// Written in 2012 by Christian Winnerlein <codesinchaos@gmail.com>
/// To the extent possible under law, the author(s) have dedicated all copyright
/// and related and neighboring rights to this software to the public domain
/// worldwide. This software is distributed without any warranty.
/// You should have received a copy of the CC0 Public Domain Dedication along with
/// this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace Blake2Sharp.CompressionCodeGen
class Program
private const int NumberOfRounds = 12;
private static readonly int[] Sigma = new int[NumberOfRounds * 16] {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3,
11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4,
7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8,
9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13,
2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9,
12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11,
13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10,
6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5,
10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3
static void Round(int r)
Console.WriteLine("// ##### Round({0}) #####", r);
G(r, 0, 0, 4, 8, 12);
G(r, 1, 1, 5, 9, 13);
G(r, 2, 2, 6, 10, 14);
G(r, 3, 3, 7, 11, 15);
G(r, 4, 0, 5, 10, 15);
G(r, 5, 1, 6, 11, 12);
G(r, 6, 2, 7, 8, 13);
G(r, 7, 3, 4, 9, 14);
static void G(int r, int i, int a, int b, int c, int d)
int p = (r << 4) + 2 * i;
int p0 = Sigma[p];
int p1 = Sigma[p + 1];
string s = @"// G(r, i, a, b, c, d)
a = a + b + m[" + p0 + @"];
d ^= a;
d = " + RotateRight("d", 32) + @";
c = c + d;
b ^= c;
b = " + RotateRight("b", 24) + @";
a = a + b + m[" + p1 + @"];
d ^= a;
d = " + RotateRight("d", 16) + @";
c = c + d;
b ^= c;
b = " + RotateRight("b", 63) + @";";
s = s.Replace("a", "v" + a);
s = s.Replace("b", "v" + b);
s = s.Replace("c", "v" + c);
s = s.Replace("d", "v" + d);
s = s.Replace("r", r.ToString());
s = s.Replace("i", i.ToString());
s = s.Replace("\t", "");
static string RotateRight(string name, int offset)
return "((" + name + " >>" + offset + ")|(" + name + " << (64-" + offset + ")))";
static void Main(string[] args)
for (int r = 0; r < NumberOfRounds; r++)

View File

@ -0,0 +1,47 @@

/// BLAKE2 reference source code package - C# implementation
/// Written in 2012 by Christian Winnerlein <codesinchaos@gmail.com>
/// To the extent possible under law, the author(s) have dedicated all copyright
/// and related and neighboring rights to this software to the public domain
/// worldwide. This software is distributed without any warranty.
/// You should have received a copy of the CC0 Public Domain Dedication along with
/// this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
using System.Reflection;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
// General Information about an assembly is controlled through the following
// set of attributes. Change these attribute values to modify the information
// associated with an assembly.
[assembly: AssemblyTitle("Blake2Sharp.CompressionCodeGen")]
[assembly: AssemblyDescription("")]
[assembly: AssemblyConfiguration("")]
[assembly: AssemblyCompany("")]
[assembly: AssemblyProduct("Blake2Sharp.CompressionCodeGen")]
[assembly: AssemblyCopyright("Copyright © 2012")]
[assembly: AssemblyTrademark("")]
[assembly: AssemblyCulture("")]
// Setting ComVisible to false makes the types in this assembly not visible
// to COM components. If you need to access a type in this assembly from
// COM, set the ComVisible attribute to true on that type.
[assembly: ComVisible(false)]
// The following GUID is for the ID of the typelib if this project is exposed to COM
[assembly: Guid("4af5636c-d52d-464f-a707-94464397988a")]
// Version information for an assembly consists of the following four values:
// Major Version
// Minor Version
// Build Number
// Revision
// You can specify all the values or you can default the Build and Revision Numbers
// by using the '*' as shown below:
// [assembly: AssemblyVersion("1.0.*")]
[assembly: AssemblyVersion("")]
[assembly: AssemblyFileVersion("")]

View File

@ -0,0 +1,94 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
<VisualStudioVersion Condition="'$(VisualStudioVersion)' == ''">10.0</VisualStudioVersion>
<VSToolsPath Condition="'$(VSToolsPath)' == ''">$(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)</VSToolsPath>
<ReferencePath>$(ProgramFiles)\Common Files\microsoft shared\VSTT\$(VisualStudioVersion)\UITestExtensionPackages</ReferencePath>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
<Reference Include="System" />
<Reference Include="System.Core">
<When Condition="('$(VisualStudioVersion)' == '10.0' or '$(VisualStudioVersion)' == '') and '$(TargetFrameworkVersion)' == 'v3.5'">
<Reference Include="Microsoft.VisualStudio.QualityTools.UnitTestFramework, Version=, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL" />
<Reference Include="Microsoft.VisualStudio.QualityTools.UnitTestFramework" />
<Compile Include="DebugNodeHasher.cs" />
<Compile Include="SequentialTests.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
<Compile Include="TestVectors.cs" />
<ProjectReference Include="..\Blake2Sharp\Blake2Sharp.csproj">
<When Condition="'$(VisualStudioVersion)' == '10.0' And '$(IsCodedUITest)' == 'True'">
<Reference Include="Microsoft.VisualStudio.QualityTools.CodedUITestFramework, Version=, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL">
<Reference Include="Microsoft.VisualStudio.TestTools.UITest.Common, Version=, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL">
<Reference Include="Microsoft.VisualStudio.TestTools.UITest.Extension, Version=, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL">
<Reference Include="Microsoft.VisualStudio.TestTools.UITesting, Version=, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL">
<Import Project="$(VSToolsPath)\TeamTest\Microsoft.TestTools.targets" Condition="Exists('$(VSToolsPath)\TeamTest\Microsoft.TestTools.targets')" />
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
<!-- To modify your build process, add your task inside one of the targets below and uncomment it.
Other similar extension points exist, see Microsoft.Common.targets.
<Target Name="BeforeBuild">
<Target Name="AfterBuild">

View File

@ -0,0 +1,39 @@

/// BLAKE2 reference source code package - C# implementation
/// Written in 2012 by Christian Winnerlein <codesinchaos@gmail.com>
/// To the extent possible under law, the author(s) have dedicated all copyright
/// and related and neighboring rights to this software to the public domain
/// worldwide. This software is distributed without any warranty.
/// You should have received a copy of the CC0 Public Domain Dedication along with
/// this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace Blake2Sharp.Tests
/*class DebugNodeHasher : NodeHasher
StringBuilder data = new StringBuilder();
public override void Init(int depth, long nodeOffset)
data.AppendFormat("({0}-{1}", depth, nodeOffset);
public override byte[] Finish(bool isEndOfLayer)
return Encoding.ASCII.GetBytes(data.ToString());
public override void Update(byte[] data, int start, int count)
data.Append(Encoding.ASCII.GetString(data, start, count));

View File

@ -0,0 +1,47 @@

/// BLAKE2 reference source code package - C# implementation
/// Written in 2012 by Christian Winnerlein <codesinchaos@gmail.com>
/// To the extent possible under law, the author(s) have dedicated all copyright
/// and related and neighboring rights to this software to the public domain
/// worldwide. This software is distributed without any warranty.
/// You should have received a copy of the CC0 Public Domain Dedication along with
/// this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
using System.Reflection;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
// General Information about an assembly is controlled through the following
// set of attributes. Change these attribute values to modify the information
// associated with an assembly.
[assembly: AssemblyTitle("Blake2Sharp.Tests")]
[assembly: AssemblyDescription("")]
[assembly: AssemblyConfiguration("")]
[assembly: AssemblyCompany("")]
[assembly: AssemblyProduct("Blake2Sharp.Tests")]
[assembly: AssemblyCopyright("Copyright © 2012")]
[assembly: AssemblyTrademark("")]
[assembly: AssemblyCulture("")]
// Setting ComVisible to false makes the types in this assembly not visible
// to COM components. If you need to access a type in this assembly from
// COM, set the ComVisible attribute to true on that type.
[assembly: ComVisible(false)]
// The following GUID is for the ID of the typelib if this project is exposed to COM
[assembly: Guid("4e74ef44-28bc-4b91-9ae9-355e132081ad")]
// Version information for an assembly consists of the following four values:
// Major Version
// Minor Version
// Build Number
// Revision
// You can specify all the values or you can default the Build and Revision Numbers
// by using the '*' as shown below:
// [assembly: AssemblyVersion("1.0.*")]
[assembly: AssemblyVersion("")]
[assembly: AssemblyFileVersion("")]

View File

@ -0,0 +1,76 @@

/// BLAKE2 reference source code package - C# implementation
/// Written in 2012 by Christian Winnerlein <codesinchaos@gmail.com>
/// To the extent possible under law, the author(s) have dedicated all copyright
/// and related and neighboring rights to this software to the public domain
/// worldwide. This software is distributed without any warranty.
/// You should have received a copy of the CC0 Public Domain Dedication along with
/// this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
using System;
using System.Linq;
using Microsoft.VisualStudio.TestTools.UnitTesting;
namespace Blake2Sharp.Tests
public class SequentialTests
byte[] input = Enumerable.Range(0, 256).Select(i => (byte)i).ToArray();
public void CheckTestVectors()
for (int len = 0; len < TestVectors.UnkeyedBlake2B.Length; len++)
var input = Enumerable.Range(0, len).Select(i => (byte)i).ToArray();
var hash = Blake2B.ComputeHash(input);
string actual = BitConverter.ToString(hash).Replace("-", "");
string expected = TestVectors.UnkeyedBlake2B[len];
Assert.AreEqual(expected, actual);
public void CheckKeyedTestVectors()
var key = Enumerable.Range(0, 64).Select(i => (byte)i).ToArray();
for (int len = 0; len < TestVectors.KeyedBlake2B.Length; len++)
var input = Enumerable.Range(0, len).Select(i => (byte)i).ToArray();
var hash = Blake2B.ComputeHash(input, new Blake2BConfig { Key = key });
string actual = BitConverter.ToString(hash).Replace("-", "");
string expected = TestVectors.KeyedBlake2B[len];
Assert.AreEqual(expected, actual);
public void Splits()
var hasher = Blake2B.Create();
for (int len = 0; len <= 256; len++)
hasher.Update(input, 0, len);
string hash0 = BitConverter.ToString(hasher.Finish());
for (int split1 = 0; split1 <= len; split1++)
for (int split2 = split1; split2 <= len; split2++)
hasher.Update(input, 0, split1);
hasher.Update(input, split1, split2 - split1);
hasher.Update(input, split2, len - split2);
string hash1 = BitConverter.ToString(hasher.Finish());
Assert.AreEqual(hash0, hash1);

View File

@ -0,0 +1,539 @@

/// BLAKE2 reference source code package - C# implementation
/// Written in 2012 by Christian Winnerlein <codesinchaos@gmail.com>
/// To the extent possible under law, the author(s) have dedicated all copyright
/// and related and neighboring rights to this software to the public domain
/// worldwide. This software is distributed without any warranty.
/// You should have received a copy of the CC0 Public Domain Dedication along with
/// this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace Blake2Sharp.Tests
internal static class TestVectors
public static string[] UnkeyedBlake2B = new string[]{
public static string[] KeyedBlake2B = new string[]{

csharp/Blake2Sharp.sln Normal file
View File

@ -0,0 +1,34 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 2012
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{98C3F5AC-1FDF-4AAF-B067-A9E9C663D87B}"
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Blake2Sharp", "Blake2Sharp\Blake2Sharp.csproj", "{E21AB364-9130-4F14-ABE1-18FA0C089130}"
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Blake2Sharp.Tests", "Blake2Sharp.Tests\Blake2Sharp.Tests.csproj", "{A32451B3-03A3-4CB3-AD9F-1408143D6AB7}"
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Blake2Sharp.CompressionCodeGen", "Blake2Sharp.CompressionCodeGen\Blake2Sharp.CompressionCodeGen.csproj", "{17466328-5736-4EA1-A88D-CE016CCA2E80}"
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{E21AB364-9130-4F14-ABE1-18FA0C089130}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{E21AB364-9130-4F14-ABE1-18FA0C089130}.Debug|Any CPU.Build.0 = Debug|Any CPU
{E21AB364-9130-4F14-ABE1-18FA0C089130}.Release|Any CPU.ActiveCfg = Release|Any CPU
{E21AB364-9130-4F14-ABE1-18FA0C089130}.Release|Any CPU.Build.0 = Release|Any CPU
{A32451B3-03A3-4CB3-AD9F-1408143D6AB7}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{A32451B3-03A3-4CB3-AD9F-1408143D6AB7}.Debug|Any CPU.Build.0 = Debug|Any CPU
{A32451B3-03A3-4CB3-AD9F-1408143D6AB7}.Release|Any CPU.ActiveCfg = Release|Any CPU
{A32451B3-03A3-4CB3-AD9F-1408143D6AB7}.Release|Any CPU.Build.0 = Release|Any CPU
{17466328-5736-4EA1-A88D-CE016CCA2E80}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{17466328-5736-4EA1-A88D-CE016CCA2E80}.Debug|Any CPU.Build.0 = Debug|Any CPU
{17466328-5736-4EA1-A88D-CE016CCA2E80}.Release|Any CPU.ActiveCfg = Release|Any CPU
{17466328-5736-4EA1-A88D-CE016CCA2E80}.Release|Any CPU.Build.0 = Release|Any CPU
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE

View File

@ -0,0 +1,72 @@
// BLAKE2 reference source code package - C# implementation
// Written in 2012 by Christian Winnerlein <codesinchaos@gmail.com>
// To the extent possible under law, the author(s) have dedicated all copyright
// and related and neighboring rights to this software to the public domain
// worldwide. This software is distributed without any warranty.
// You should have received a copy of the CC0 Public Domain Dedication along with
// this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
using System;
namespace Blake2Sharp
public static class Blake2B
public static Hasher Create()
return Create(new Blake2BConfig());
public static Hasher Create(Blake2BConfig config)
return new Blake2BHasher(config);
/*public static Hasher CreateParallel(int parallelism = 4)
return CreateParallel(null, parallelism);
public static Hasher CreateParallel(Blake2Config config, int parallelism = 4)
if (parallelism < 2)
throw new ArgumentOutOfRangeException("parallelism", "parallism must be at least 2");
throw new NotImplementedException();
public static Hasher CreateTreeHasher(Blake2BConfig config, Blake2TreeConfig treeConfig)
public static NodeHasher CreateNodeHasher(Blake2BConfig config, Blake2TreeConfig treeConfig)
public static byte[] ComputeHash(byte[] data, int start, int count)
return ComputeHash(data, start, count, null);
public static byte[] ComputeHash(byte[] data)
return ComputeHash(data, 0, data.Length, null);
public static byte[] ComputeHash(byte[] data, Blake2BConfig config)
return ComputeHash(data, 0, data.Length, config);
public static byte[] ComputeHash(byte[] data, int start, int count, Blake2BConfig config)
var hasher = Create(config);
hasher.Update(data, start, count);
return hasher.Finish();
//public static byte[] ComputeParallelHash(byte[] data);
//public static byte[] ComputeParallelHash(byte[] data, Blake2Config config);

View File

@ -0,0 +1,57 @@
// BLAKE2 reference source code package - C# implementation
// Written in 2012 by Christian Winnerlein <codesinchaos@gmail.com>
// To the extent possible under law, the author(s) have dedicated all copyright
// and related and neighboring rights to this software to the public domain
// worldwide. This software is distributed without any warranty.
// You should have received a copy of the CC0 Public Domain Dedication along with
// this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
using System;
using System.Text;
namespace Blake2Sharp
public sealed class Blake2BConfig : ICloneable
public byte[] Personalization { get; set; }
public byte[] Salt { get; set; }
public byte[] Key { get; set; }
public int OutputSizeInBytes { get; set; }
public int OutputSizeInBits
get { return OutputSizeInBytes * 8; }
if (value % 8 == 0)
throw new ArgumentException("Output size must be a multiple of 8 bits");
OutputSizeInBytes = value / 8;
public Blake2BConfig()
OutputSizeInBytes = 64;
public Blake2BConfig Clone()
var result = new Blake2BConfig();
result.OutputSizeInBytes = OutputSizeInBytes;
if (Key != null)
result.Key = (byte[])Key.Clone();
if (Personalization != null)
result.Personalization = (byte[])Personalization.Clone();
if (Salt != null)
result.Salt = (byte[])Salt.Clone();
return result;
object ICloneable.Clone()
return Clone();

View File

@ -0,0 +1,1455 @@
// BLAKE2 reference source code package - C# implementation
// Written in 2012 by Christian Winnerlein <codesinchaos@gmail.com>
// To the extent possible under law, the author(s) have dedicated all copyright
// and related and neighboring rights to this software to the public domain
// worldwide. This software is distributed without any warranty.
// You should have received a copy of the CC0 Public Domain Dedication along with
// this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
using System;
namespace Blake2Sharp
#if true
public sealed partial class Blake2BCore
partial void Compress(byte[] block, int start)
var h = _h;
var m = _m;
if (BitConverter.IsLittleEndian)
Buffer.BlockCopy(block, start, m, 0, BlockSizeInBytes);
for (int i = 0; i < 16; ++i)
m[i] = BytesToUInt64(block, start + (i << 3));
/*var m0 = m[0];
var m1 = m[1];
var m2 = m[2];
var m3 = m[3];
var m4 = m[4];
var m5 = m[5];
var m6 = m[6];
var m7 = m[7];
var m8 = m[8];
var m9 = m[9];
var m10 = m[10];
var m11 = m[11];
var m12 = m[12];
var m13 = m[13];
var m14 = m[14];
var m15 = m[15];*/
var v0 = h[0];
var v1 = h[1];
var v2 = h[2];
var v3 = h[3];
var v4 = h[4];
var v5 = h[5];
var v6 = h[6];
var v7 = h[7];
var v8 = IV0;
var v9 = IV1;
var v10 = IV2;
var v11 = IV3;
var v12 = IV4 ^ _counter0;
var v13 = IV5 ^ _counter1;
var v14 = IV6 ^ _finalizationFlag0;
var v15 = IV7 ^ _finalizationFlag1;
// Rounds
// ##### Round(0) #####
// G(0, 0, v0, v4, v8, v12)
v0 = v0 + v4 + m[0];
v12 ^= v0;
v12 = ((v12 >> 32) | (v12 << (64 - 32)));
v8 = v8 + v12;
v4 ^= v8;
v4 = ((v4 >> 24) | (v4 << (64 - 24)));
v0 = v0 + v4 + m[1];
v12 ^= v0;
v12 = ((v12 >> 16) | (v12 << (64 - 16)));
v8 = v8 + v12;
v4 ^= v8;
v4 = ((v4 >> 63) | (v4 << (64 - 63)));
// G(0, 1, v1, v5, v9, v13)
v1 = v1 + v5 + m[2];
v13 ^= v1;
v13 = ((v13 >> 32) | (v13 << (64 - 32)));
v9 = v9 + v13;
v5 ^= v9;
v5 = ((v5 >> 24) | (v5 << (64 - 24)));
v1 = v1 + v5 + m[3];
v13 ^= v1;
v13 = ((v13 >> 16) | (v13 << (64 - 16)));
v9 = v9 + v13;
v5 ^= v9;
v5 = ((v5 >> 63) | (v5 << (64 - 63)));
// G(0, 2, v2, v6, v10, v14)
v2 = v2 + v6 + m[4];
v14 ^= v2;
v14 = ((v14 >> 32) | (v14 << (64 - 32)));
v10 = v10 + v14;
v6 ^= v10;
v6 = ((v6 >> 24) | (v6 << (64 - 24)));
v2 = v2 + v6 + m[5];
v14 ^= v2;
v14 = ((v14 >> 16) | (v14 << (64 - 16)));
v10 = v10 + v14;
v6 ^= v10;
v6 = ((v6 >> 63) | (v6 << (64 - 63)));
// G(0, 3, v3, v7, v11, v15)
v3 = v3 + v7 + m[6];
v15 ^= v3;
v15 = ((v15 >> 32) | (v15 << (64 - 32)));
v11 = v11 + v15;
v7 ^= v11;
v7 = ((v7 >> 24) | (v7 << (64 - 24)));
v3 = v3 + v7 + m[7];
v15 ^= v3;
v15 = ((v15 >> 16) | (v15 << (64 - 16)));
v11 = v11 + v15;
v7 ^= v11;
v7 = ((v7 >> 63) | (v7 << (64 - 63)));
// G(0, 4, v0, v5, v10, v15)
v0 = v0 + v5 + m[8];
v15 ^= v0;
v15 = ((v15 >> 32) | (v15 << (64 - 32)));
v10 = v10 + v15;
v5 ^= v10;
v5 = ((v5 >> 24) | (v5 << (64 - 24)));
v0 = v0 + v5 + m[9];
v15 ^= v0;
v15 = ((v15 >> 16) | (v15 << (64 - 16)));
v10 = v10 + v15;
v5 ^= v10;
v5 = ((v5 >> 63) | (v5 << (64 - 63)));
// G(0, 5, v1, v6, v11, v12)
v1 = v1 + v6 + m[10];
v12 ^= v1;
v12 = ((v12 >> 32) | (v12 << (64 - 32)));
v11 = v11 + v12;
v6 ^= v11;
v6 = ((v6 >> 24) | (v6 << (64 - 24)));
v1 = v1 + v6 + m[11];
v12 ^= v1;
v12 = ((v12 >> 16) | (v12 << (64 - 16)));
v11 = v11 + v12;
v6 ^= v11;
v6 = ((v6 >> 63) | (v6 << (64 - 63)));
// G(0, 6, v2, v7, v8, v13)
v2 = v2 + v7 + m[12];
v13 ^= v2;
v13 = ((v13 >> 32) | (v13 << (64 - 32)));
v8 = v8 + v13;
v7 ^= v8;
v7 = ((v7 >> 24) | (v7 << (64 - 24)));
v2 = v2 + v7 + m[13];
v13 ^= v2;
v13 = ((v13 >> 16) | (v13 << (64 - 16)));
v8 = v8 + v13;
v7 ^= v8;
v7 = ((v7 >> 63) | (v7 << (64 - 63)));
// G(0, 7, v3, v4, v9, v14)
v3 = v3 + v4 + m[14];
v14 ^= v3;
v14 = ((v14 >> 32) | (v14 << (64 - 32)));
v9 = v9 + v14;
v4 ^= v9;
v4 = ((v4 >> 24) | (v4 << (64 - 24)));
v3 = v3 + v4 + m[15];
v14 ^= v3;
v14 = ((v14 >> 16) | (v14 << (64 - 16)));
v9 = v9 + v14;
v4 ^= v9;
v4 = ((v4 >> 63) | (v4 << (64 - 63)));
// ##### Round(1) #####
// G(1, 0, v0, v4, v8, v12)
v0 = v0 + v4 + m[14];
v12 ^= v0;
v12 = ((v12 >> 32) | (v12 << (64 - 32)));
v8 = v8 + v12;
v4 ^= v8;
v4 = ((v4 >> 24) | (v4 << (64 - 24)));
v0 = v0 + v4 + m[10];
v12 ^= v0;
v12 = ((v12 >> 16) | (v12 << (64 - 16)));
v8 = v8 + v12;
v4 ^= v8;
v4 = ((v4 >> 63) | (v4 << (64 - 63)));
// G(1, 1, v1, v5, v9, v13)
v1 = v1 + v5 + m[4];
v13 ^= v1;
v13 = ((v13 >> 32) | (v13 << (64 - 32)));
v9 = v9 + v13;
v5 ^= v9;
v5 = ((v5 >> 24) | (v5 << (64 - 24)));
v1 = v1 + v5 + m[8];
v13 ^= v1;
v13 = ((v13 >> 16) | (v13 << (64 - 16)));
v9 = v9 + v13;
v5 ^= v9;
v5 = ((v5 >> 63) | (v5 << (64 - 63)));
// G(1, 2, v2, v6, v10, v14)
v2 = v2 + v6 + m[9];
v14 ^= v2;
v14 = ((v14 >> 32) | (v14 << (64 - 32)));
v10 = v10 + v14;
v6 ^= v10;
v6 = ((v6 >> 24) | (v6 << (64 - 24)));
v2 = v2 + v6 + m[15];
v14 ^= v2;
v14 = ((v14 >> 16) | (v14 << (64 - 16)));
v10 = v10 + v14;
v6 ^= v10;
v6 = ((v6 >> 63) | (v6 << (64 - 63)));
// G(1, 3, v3, v7, v11, v15)
v3 = v3 + v7 + m[13];
v15 ^= v3;
v15 = ((v15 >> 32) | (v15 << (64 - 32)));
v11 = v11 + v15;
v7 ^= v11;
v7 = ((v7 >> 24) | (v7 << (64 - 24)));
v3 = v3 + v7 + m[6];
v15 ^= v3;
v15 = ((v15 >> 16) | (v15 << (64 - 16)));
v11 = v11 + v15;
v7 ^= v11;
v7 = ((v7 >> 63) | (v7 << (64 - 63)));
// G(1, 4, v0, v5, v10, v15)
v0 = v0 + v5 + m[1];
v15 ^= v0;
v15 = ((v15 >> 32) | (v15 << (64 - 32)));
v10 = v10 + v15;
v5 ^= v10;
v5 = ((v5 >> 24) | (v5 << (64 - 24)));
v0 = v0 + v5 + m[12];
v15 ^= v0;
v15 = ((v15 >> 16) | (v15 << (64 - 16)));
v10 = v10 + v15;
v5 ^= v10;
v5 = ((v5 >> 63) | (v5 << (64 - 63)));
// G(1, 5, v1, v6, v11, v12)
v1 = v1 + v6 + m[0];
v12 ^= v1;
v12 = ((v12 >> 32) | (v12 << (64 - 32)));
v11 = v11 + v12;
v6 ^= v11;
v6 = ((v6 >> 24) | (v6 << (64 - 24)));
v1 = v1 + v6 + m[2];
v12 ^= v1;
v12 = ((v12 >> 16) | (v12 << (64 - 16)));
v11 = v11 + v12;
v6 ^= v11;
v6 = ((v6 >> 63) | (v6 << (64 - 63)));
// G(1, 6, v2, v7, v8, v13)
v2 = v2 + v7 + m[11];
v13 ^= v2;
v13 = ((v13 >> 32) | (v13 << (64 - 32)));
v8 = v8 + v13;
v7 ^= v8;
v7 = ((v7 >> 24) | (v7 << (64 - 24)));
v2 = v2 + v7 + m[7];
v13 ^= v2;
v13 = ((v13 >> 16) | (v13 << (64 - 16)));
v8 = v8 + v13;
v7 ^= v8;
v7 = ((v7 >> 63) | (v7 << (64 - 63)));
// G(1, 7, v3, v4, v9, v14)
v3 = v3 + v4 + m[5];
v14 ^= v3;
v14 = ((v14 >> 32) | (v14 << (64 - 32)));
v9 = v9 + v14;
v4 ^= v9;
v4 = ((v4 >> 24) | (v4 << (64 - 24)));
v3 = v3 + v4 + m[3];
v14 ^= v3;
v14 = ((v14 >> 16) | (v14 << (64 - 16)));
v9 = v9 + v14;
v4 ^= v9;
v4 = ((v4 >> 63) | (v4 << (64 - 63)));
// ##### Round(2) #####
// G(2, 0, v0, v4, v8, v12)
v0 = v0 + v4 + m[11];
v12 ^= v0;
v12 = ((v12 >> 32) | (v12 << (64 - 32)));
v8 = v8 + v12;
v4 ^= v8;
v4 = ((v4 >> 24) | (v4 << (64 - 24)));
v0 = v0 + v4 + m[8];
v12 ^= v0;
v12 = ((v12 >> 16) | (v12 << (64 - 16)));
v8 = v8 + v12;
v4 ^= v8;
v4 = ((v4 >> 63) | (v4 << (64 - 63)));
// G(2, 1, v1, v5, v9, v13)
v1 = v1 + v5 + m[12];
v13 ^= v1;
v13 = ((v13 >> 32) | (v13 << (64 - 32)));
v9 = v9 + v13;
v5 ^= v9;
v5 = ((v5 >> 24) | (v5 << (64 - 24)));
v1 = v1 + v5 + m[0];
v13 ^= v1;
v13 = ((v13 >> 16) | (v13 << (64 - 16)));
v9 = v9 + v13;
v5 ^= v9;
v5 = ((v5 >> 63) | (v5 << (64 - 63)));
// G(2, 2, v2, v6, v10, v14)
v2 = v2 + v6 + m[5];
v14 ^= v2;
v14 = ((v14 >> 32) | (v14 << (64 - 32)));
v10 = v10 + v14;
v6 ^= v10;
v6 = ((v6 >> 24) | (v6 << (64 - 24)));
v2 = v2 + v6 + m[2];
v14 ^= v2;
v14 = ((v14 >> 16) | (v14 << (64 - 16)));
v10 = v10 + v14;
v6 ^= v10;
v6 = ((v6 >> 63) | (v6 << (64 - 63)));
// G(2, 3, v3, v7, v11, v15)
v3 = v3 + v7 + m[15];
v15 ^= v3;
v15 = ((v15 >> 32) | (v15 << (64 - 32)));
v11 = v11 + v15;
v7 ^= v11;
v7 = ((v7 >> 24) | (v7 << (64 - 24)));
v3 = v3 + v7 + m[13];
v15 ^= v3;
v15 = ((v15 >> 16) | (v15 << (64 - 16)));
v11 = v11 + v15;
v7 ^= v11;
v7 = ((v7 >> 63) | (v7 << (64 - 63)));
// G(2, 4, v0, v5, v10, v15)
v0 = v0 + v5 + m[10];
v15 ^= v0;
v15 = ((v15 >> 32) | (v15 << (64 - 32)));
v10 = v10 + v15;
v5 ^= v10;
v5 = ((v5 >> 24) | (v5 << (64 - 24)));
v0 = v0 + v5 + m[14];
v15 ^= v0;
v15 = ((v15 >> 16) | (v15 << (64 - 16)));
v10 = v10 + v15;
v5 ^= v10;
v5 = ((v5 >> 63) | (v5 << (64 - 63)));
// G(2, 5, v1, v6, v11, v12)
v1 = v1 + v6 + m[3];
v12 ^= v1;
v12 = ((v12 >> 32) | (v12 << (64 - 32)));
v11 = v11 + v12;
v6 ^= v11;
v6 = ((v6 >> 24) | (v6 << (64 - 24)));
v1 = v1 + v6 + m[6];
v12 ^= v1;
v12 = ((v12 >> 16) | (v12 << (64 - 16)));
v11 = v11 + v12;
v6 ^= v11;
v6 = ((v6 >> 63) | (v6 << (64 - 63)));
// G(2, 6, v2, v7, v8, v13)
v2 = v2 + v7 + m[7];
v13 ^= v2;
v13 = ((v13 >> 32) | (v13 << (64 - 32)));
v8 = v8 + v13;
v7 ^= v8;
v7 = ((v7 >> 24) | (v7 << (64 - 24)));
v2 = v2 + v7 + m[1];
v13 ^= v2;
v13 = ((v13 >> 16) | (v13 << (64 - 16)));
v8 = v8 + v13;
v7 ^= v8;
v7 = ((v7 >> 63) | (v7 << (64 - 63)));
// G(2, 7, v3, v4, v9, v14)
v3 = v3 + v4 + m[9];
v14 ^= v3;
v14 = ((v14 >> 32) | (v14 << (64 - 32)));
v9 = v9 + v14;
v4 ^= v9;
v4 = ((v4 >> 24) | (v4 << (64 - 24)));
v3 = v3 + v4 + m[4];
v14 ^= v3;
v14 = ((v14 >> 16) | (v14 << (64 - 16)));
v9 = v9 + v14;
v4 ^= v9;
v4 = ((v4 >> 63) | (v4 << (64 - 63)));
// ##### Round(3) #####
// G(3, 0, v0, v4, v8, v12)
v0 = v0 + v4 + m[7];
v12 ^= v0;
v12 = ((v12 >> 32) | (v12 << (64 - 32)));
v8 = v8 + v12;
v4 ^= v8;
v4 = ((v4 >> 24) | (v4 << (64 - 24)));
v0 = v0 + v4 + m[9];
v12 ^= v0;
v12 = ((v12 >> 16) | (v12 << (64 - 16)));
v8 = v8 + v12;
v4 ^= v8;
v4 = ((v4 >> 63) | (v4 << (64 - 63)));
// G(3, 1, v1, v5, v9, v13)
v1 = v1 + v5 + m[3];
v13 ^= v1;
v13 = ((v13 >> 32) | (v13 << (64 - 32)));
v9 = v9 + v13;
v5 ^= v9;
v5 = ((v5 >> 24) | (v5 << (64 - 24)));
v1 = v1 + v5 + m[1];
v13 ^= v1;
v13 = ((v13 >> 16) | (v13 << (64 - 16)));
v9 = v9 + v13;
v5 ^= v9;
v5 = ((v5 >> 63) | (v5 << (64 - 63)));
// G(3, 2, v2, v6, v10, v14)
v2 = v2 + v6 + m[13];
v14 ^= v2;
v14 = ((v14 >> 32) | (v14 << (64 - 32)));
v10 = v10 + v14;
v6 ^= v10;
v6 = ((v6 >> 24) | (v6 << (64 - 24)));
v2 = v2 + v6 + m[12];
v14 ^= v2;
v14 = ((v14 >> 16) | (v14 << (64 - 16)));
v10 = v10 + v14;
v6 ^= v10;
v6 = ((v6 >> 63) | (v6 << (64 - 63)));
// G(3, 3, v3, v7, v11, v15)
v3 = v3 + v7 + m[11];
v15 ^= v3;
v15 = ((v15 >> 32) | (v15 << (64 - 32)));
v11 = v11 + v15;
v7 ^= v11;
v7 = ((v7 >> 24) | (v7 << (64 - 24)));
v3 = v3 + v7 + m[14];
v15 ^= v3;
v15 = ((v15 >> 16) | (v15 << (64 - 16)));
v11 = v11 + v15;
v7 ^= v11;
v7 = ((v7 >> 63) | (v7 << (64 - 63)));
// G(3, 4, v0, v5, v10, v15)
v0 = v0 + v5 + m[2];
v15 ^= v0;
v15 = ((v15 >> 32) | (v15 << (64 - 32)));
v10 = v10 + v15;
v5 ^= v10;
v5 = ((v5 >> 24) | (v5 << (64 - 24)));
v0 = v0 + v5 + m[6];
v15 ^= v0;
v15 = ((v15 >> 16) | (v15 << (64 - 16)));
v10 = v10 + v15;
v5 ^= v10;
v5 = ((v5 >> 63) | (v5 << (64 - 63)));
// G(3, 5, v1, v6, v11, v12)
v1 = v1 + v6 + m[5];
v12 ^= v1;
v12 = ((v12 >> 32) | (v12 << (64 - 32)));
v11 = v11 + v12;
v6 ^= v11;
v6 = ((v6 >> 24) | (v6 << (64 - 24)));
v1 = v1 + v6 + m[10];
v12 ^= v1;
v12 = ((v12 >> 16) | (v12 << (64 - 16)));
v11 = v11 + v12;
v6 ^= v11;
v6 = ((v6 >> 63) | (v6 << (64 - 63)));
// G(3, 6, v2, v7, v8, v13)
v2 = v2 + v7 + m[4];
v13 ^= v2;
v13 = ((v13 >> 32) | (v13 << (64 - 32)));
v8 = v8 + v13;
v7 ^= v8;
v7 = ((v7 >> 24) | (v7 << (64 - 24)));
v2 = v2 + v7 + m[0];
v13 ^= v2;
v13 = ((v13 >> 16) | (v13 << (64 - 16)));
v8 = v8 + v13;
v7 ^= v8;
v7 = ((v7 >> 63) | (v7 << (64 - 63)));
// G(3, 7, v3, v4, v9, v14)
v3 = v3 + v4 + m[15];
v14 ^= v3;
v14 = ((v14 >> 32) | (v14 << (64 - 32)));
v9 = v9 + v14;
v4 ^= v9;
v4 = ((v4 >> 24) | (v4 << (64 - 24)));
v3 = v3 + v4 + m[8];
v14 ^= v3;
v14 = ((v14 >> 16) | (v14 << (64 - 16)));
v9 = v9 + v14;
v4 ^= v9;
v4 = ((v4 >> 63) | (v4 << (64 - 63)));
// ##### Round(4) #####
// G(4, 0, v0, v4, v8, v12)
v0 = v0 + v4 + m[9];
v12 ^= v0;
v12 = ((v12 >> 32) | (v12 << (64 - 32)));
v8 = v8 + v12;
v4 ^= v8;
v4 = ((v4 >> 24) | (v4 << (64 - 24)));
v0 = v0 + v4 + m[0];
v12 ^= v0;
v12 = ((v12 >> 16) | (v12 << (64 - 16)));
v8 = v8 + v12;
v4 ^= v8;
v4 = ((v4 >> 63) | (v4 << (64 - 63)));
// G(4, 1, v1, v5, v9, v13)
v1 = v1 + v5 + m[5];
v13 ^= v1;
v13 = ((v13 >> 32) | (v13 << (64 - 32)));
v9 = v9 + v13;
v5 ^= v9;
v5 = ((v5 >> 24) | (v5 << (64 - 24)));
v1 = v1 + v5 + m[7];
v13 ^= v1;
v13 = ((v13 >> 16) | (v13 << (64 - 16)));
v9 = v9 + v13;
v5 ^= v9;
v5 = ((v5 >> 63) | (v5 << (64 - 63)));
// G(4, 2, v2, v6, v10, v14)
v2 = v2 + v6 + m[2];
v14 ^= v2;
v14 = ((v14 >> 32) | (v14 << (64 - 32)));
v10 = v10 + v14;
v6 ^= v10;
v6 = ((v6 >> 24) | (v6 << (64 - 24)));
v2 = v2 + v6 + m[4];
v14 ^= v2;
v14 = ((v14 >> 16) | (v14 << (64 - 16)));
v10 = v10 + v14;
v6 ^= v10;
v6 = ((v6 >> 63) | (v6 << (64 - 63)));
// G(4, 3, v3, v7, v11, v15)
v3 = v3 + v7 + m[10];
v15 ^= v3;
v15 = ((v15 >> 32) | (v15 << (64 - 32)));
v11 = v11 + v15;
v7 ^= v11;
v7 = ((v7 >> 24) | (v7 << (64 - 24)));
v3 = v3 + v7 + m[15];
v15 ^= v3;
v15 = ((v15 >> 16) | (v15 << (64 - 16)));
v11 = v11 + v15;
v7 ^= v11;
v7 = ((v7 >> 63) | (v7 << (64 - 63)));
// G(4, 4, v0, v5, v10, v15)
v0 = v0 + v5 + m[14];
v15 ^= v0;
v15 = ((v15 >> 32) | (v15 << (64 - 32)));
v10 = v10 + v15;
v5 ^= v10;
v5 = ((v5 >> 24) | (v5 << (64 - 24)));
v0 = v0 + v5 + m[1];
v15 ^= v0;
v15 = ((v15 >> 16) | (v15 << (64 - 16)));
v10 = v10 + v15;
v5 ^= v10;
v5 = ((v5 >> 63) | (v5 << (64 - 63)));
// G(4, 5, v1, v6, v11, v12)
v1 = v1 + v6 + m[11];
v12 ^= v1;
v12 = ((v12 >> 32) | (v12 << (64 - 32)));
v11 = v11 + v12;
v6 ^= v11;
v6 = ((v6 >> 24) | (v6 << (64 - 24)));
v1 = v1 + v6 + m[12];
v12 ^= v1;
v12 = ((v12 >> 16) | (v12 << (64 - 16)));
v11 = v11 + v12;
v6 ^= v11;
v6 = ((v6 >> 63) | (v6 << (64 - 63)));
// G(4, 6, v2, v7, v8, v13)
v2 = v2 + v7 + m[6];
v13 ^= v2;
v13 = ((v13 >> 32) | (v13 << (64 - 32)));
v8 = v8 + v13;
v7 ^= v8;
v7 = ((v7 >> 24) | (v7 << (64 - 24)));
v2 = v2 + v7 + m[8];
v13 ^= v2;
v13 = ((v13 >> 16) | (v13 << (64 - 16)));
v8 = v8 + v13;
v7 ^= v8;
v7 = ((v7 >> 63) | (v7 << (64 - 63)));
// G(4, 7, v3, v4, v9, v14)
v3 = v3 + v4 + m[3];
v14 ^= v3;
v14 = ((v14 >> 32) | (v14 << (64 - 32)));
v9 = v9 + v14;
v4 ^= v9;
v4 = ((v4 >> 24) | (v4 << (64 - 24)));
v3 = v3 + v4 + m[13];
v14 ^= v3;
v14 = ((v14 >> 16) | (v14 << (64 - 16)));
v9 = v9 + v14;
v4 ^= v9;
v4 = ((v4 >> 63) | (v4 << (64 - 63)));
// ##### Round(5) #####
// G(5, 0, v0, v4, v8, v12)
v0 = v0 + v4 + m[2];
v12 ^= v0;
v12 = ((v12 >> 32) | (v12 << (64 - 32)));
v8 = v8 + v12;
v4 ^= v8;
v4 = ((v4 >> 24) | (v4 << (64 - 24)));
v0 = v0 + v4 + m[12];
v12 ^= v0;
v12 = ((v12 >> 16) | (v12 << (64 - 16)));
v8 = v8 + v12;
v4 ^= v8;
v4 = ((v4 >> 63) | (v4 << (64 - 63)));
// G(5, 1, v1, v5, v9, v13)
v1 = v1 + v5 + m[6];
v13 ^= v1;
v13 = ((v13 >> 32) | (v13 << (64 - 32)));
v9 = v9 + v13;
v5 ^= v9;
v5 = ((v5 >> 24) | (v5 << (64 - 24)));
v1 = v1 + v5 + m[10];
v13 ^= v1;
v13 = ((v13 >> 16) | (v13 << (64 - 16)));
v9 = v9 + v13;
v5 ^= v9;
v5 = ((v5 >> 63) | (v5 << (64 - 63)));
// G(5, 2, v2, v6, v10, v14)
v2 = v2 + v6 + m[0];
v14 ^= v2;
v14 = ((v14 >> 32) | (v14 << (64 - 32)));
v10 = v10 + v14;
v6 ^= v10;
v6 = ((v6 >> 24) | (v6 << (64 - 24)));
v2 = v2 + v6 + m[11];
v14 ^= v2;
v14 = ((v14 >> 16) | (v14 << (64 - 16)));
v10 = v10 + v14;
v6 ^= v10;
v6 = ((v6 >> 63) | (v6 << (64 - 63)));
// G(5, 3, v3, v7, v11, v15)
v3 = v3 + v7 + m[8];
v15 ^= v3;
v15 = ((v15 >> 32) | (v15 << (64 - 32)));
v11 = v11 + v15;
v7 ^= v11;
v7 = ((v7 >> 24) | (v7 << (64 - 24)));
v3 = v3 + v7 + m[3];
v15 ^= v3;
v15 = ((v15 >> 16) | (v15 << (64 - 16)));
v11 = v11 + v15;
v7 ^= v11;
v7 = ((v7 >> 63) | (v7 << (64 - 63)));
// G(5, 4, v0, v5, v10, v15)
v0 = v0 + v5 + m[4];
v15 ^= v0;
v15 = ((v15 >> 32) | (v15 << (64 - 32)));
v10 = v10 + v15;
v5 ^= v10;
v5 = ((v5 >> 24) | (v5 << (64 - 24)));
v0 = v0 + v5 + m[13];
v15 ^= v0;
v15 = ((v15 >> 16) | (v15 << (64 - 16)));
v10 = v10 + v15;
v5 ^= v10;
v5 = ((v5 >> 63) | (v5 << (64 - 63)));
// G(5, 5, v1, v6, v11, v12)
v1 = v1 + v6 + m[7];
v12 ^= v1;
v12 = ((v12 >> 32) | (v12 << (64 - 32)));
v11 = v11 + v12;
v6 ^= v11;
v6 = ((v6 >> 24) | (v6 << (64 - 24)));
v1 = v1 + v6 + m[5];
v12 ^= v1;
v12 = ((v12 >> 16) | (v12 << (64 - 16)));
v11 = v11 + v12;
v6 ^= v11;
v6 = ((v6 >> 63) | (v6 << (64 - 63)));
// G(5, 6, v2, v7, v8, v13)
v2 = v2 + v7 + m[15];
v13 ^= v2;
v13 = ((v13 >> 32) | (v13 << (64 - 32)));
v8 = v8 + v13;
v7 ^= v8;
v7 = ((v7 >> 24) | (v7 << (64 - 24)));
v2 = v2 + v7 + m[14];
v13 ^= v2;
v13 = ((v13 >> 16) | (v13 << (64 - 16)));
v8 = v8 + v13;
v7 ^= v8;
v7 = ((v7 >> 63) | (v7 << (64 - 63)));
// G(5, 7, v3, v4, v9, v14)
v3 = v3 + v4 + m[1];
v14 ^= v3;
v14 = ((v14 >> 32) | (v14 << (64 - 32)));
v9 = v9 + v14;
v4 ^= v9;
v4 = ((v4 >> 24) | (v4 << (64 - 24)));
v3 = v3 + v4 + m[9];
v14 ^= v3;
v14 = ((v14 >> 16) | (v14 << (64 - 16)));
v9 = v9 + v14;
v4 ^= v9;
v4 = ((v4 >> 63) | (v4 << (64 - 63)));
// ##### Round(6) #####
// G(6, 0, v0, v4, v8, v12)
v0 = v0 + v4 + m[12];
v12 ^= v0;
v12 = ((v12 >> 32) | (v12 << (64 - 32)));
v8 = v8 + v12;
v4 ^= v8;
v4 = ((v4 >> 24) | (v4 << (64 - 24)));
v0 = v0 + v4 + m[5];
v12 ^= v0;
v12 = ((v12 >> 16) | (v12 << (64 - 16)));
v8 = v8 + v12;
v4 ^= v8;
v4 = ((v4 >> 63) | (v4 << (64 - 63)));
// G(6, 1, v1, v5, v9, v13)
v1 = v1 + v5 + m[1];
v13 ^= v1;
v13 = ((v13 >> 32) | (v13 << (64 - 32)));
v9 = v9 + v13;
v5 ^= v9;
v5 = ((v5 >> 24) | (v5 << (64 - 24)));
v1 = v1 + v5 + m[15];
v13 ^= v1;
v13 = ((v13 >> 16) | (v13 << (64 - 16)));
v9 = v9 + v13;
v5 ^= v9;
v5 = ((v5 >> 63) | (v5 << (64 - 63)));
// G(6, 2, v2, v6, v10, v14)
v2 = v2 + v6 + m[14];
v14 ^= v2;
v14 = ((v14 >> 32) | (v14 << (64 - 32)));
v10 = v10 + v14;
v6 ^= v10;
v6 = ((v6 >> 24) | (v6 << (64 - 24)));
v2 = v2 + v6 + m[13];
v14 ^= v2;
v14 = ((v14 >> 16) | (v14 << (64 - 16)));
v10 = v10 + v14;
v6 ^= v10;
v6 = ((v6 >> 63) | (v6 << (64 - 63)));
// G(6, 3, v3, v7, v11, v15)
v3 = v3 + v7 + m[4];
v15 ^= v3;
v15 = ((v15 >> 32) | (v15 << (64 - 32)));
v11 = v11 + v15;
v7 ^= v11;
v7 = ((v7 >> 24) | (v7 << (64 - 24)));
v3 = v3 + v7 + m[10];
v15 ^= v3;
v15 = ((v15 >> 16) | (v15 << (64 - 16)));
v11 = v11 + v15;
v7 ^= v11;
v7 = ((v7 >> 63) | (v7 << (64 - 63)));
// G(6, 4, v0, v5, v10, v15)
v0 = v0 + v5 + m[0];
v15 ^= v0;
v15 = ((v15 >> 32) | (v15 << (64 - 32)));
v10 = v10 + v15;
v5 ^= v10;
v5 = ((v5 >> 24) | (v5 << (64 - 24)));
v0 = v0 + v5 + m[7];
v15 ^= v0;
v15 = ((v15 >> 16) | (v15 << (64 - 16)));
v10 = v10 + v15;
v5 ^= v10;
v5 = ((v5 >> 63) | (v5 << (64 - 63)));
// G(6, 5, v1, v6, v11, v12)
v1 = v1 + v6 + m[6];
v12 ^= v1;
v12 = ((v12 >> 32) | (v12 << (64 - 32)));
v11 = v11 + v12;
v6 ^= v11;
v6 = ((v6 >> 24) | (v6 << (64 - 24)));
v1 = v1 + v6 + m[3];
v12 ^= v1;
v12 = ((v12 >> 16) | (v12 << (64 - 16)));
v11 = v11 + v12;
v6 ^= v11;
v6 = ((v6 >> 63) | (v6 << (64 - 63)));
// G(6, 6, v2, v7, v8, v13)
v2 = v2 + v7 + m[9];
v13 ^= v2;
v13 = ((v13 >> 32) | (v13 << (64 - 32)));
v8 = v8 + v13;
v7 ^= v8;
v7 = ((v7 >> 24) | (v7 << (64 - 24)));
v2 = v2 + v7 + m[2];
v13 ^= v2;
v13 = ((v13 >> 16) | (v13 << (64 - 16)));
v8 = v8 + v13;
v7 ^= v8;
v7 = ((v7 >> 63) | (v7 << (64 - 63)));
// G(6, 7, v3, v4, v9, v14)
v3 = v3 + v4 + m[8];
v14 ^= v3;
v14 = ((v14 >> 32) | (v14 << (64 - 32)));
v9 = v9 + v14;
v4 ^= v9;
v4 = ((v4 >> 24) | (v4 << (64 - 24)));
v3 = v3 + v4 + m[11];
v14 ^= v3;
v14 = ((v14 >> 16) | (v14 << (64 - 16)));
v9 = v9 + v14;
v4 ^= v9;
v4 = ((v4 >> 63) | (v4 << (64 - 63)));
// ##### Round(7) #####
// G(7, 0, v0, v4, v8, v12)
v0 = v0 + v4 + m[13];
v12 ^= v0;
v12 = ((v12 >> 32) | (v12 << (64 - 32)));
v8 = v8 + v12;
v4 ^= v8;
v4 = ((v4 >> 24) | (v4 << (64 - 24)));
v0 = v0 + v4 + m[11];
v12 ^= v0;
v12 = ((v12 >> 16) | (v12 << (64 - 16)));
v8 = v8 + v12;
v4 ^= v8;
v4 = ((v4 >> 63) | (v4 << (64 - 63)));
// G(7, 1, v1, v5, v9, v13)
v1 = v1 + v5 + m[7];
v13 ^= v1;
v13 = ((v13 >> 32) | (v13 << (64 - 32)));
v9 = v9 + v13;
v5 ^= v9;
v5 = ((v5 >> 24) | (v5 << (64 - 24)));
v1 = v1 + v5 + m[14];
v13 ^= v1;
v13 = ((v13 >> 16) | (v13 << (64 - 16)));
v9 = v9 + v13;
v5 ^= v9;
v5 = ((v5 >> 63) | (v5 << (64 - 63)));
// G(7, 2, v2, v6, v10, v14)
v2 = v2 + v6 + m[12];
v14 ^= v2;
v14 = ((v14 >> 32) | (v14 << (64 - 32)));
v10 = v10 + v14;
v6 ^= v10;
v6 = ((v6 >> 24) | (v6 << (64 - 24)));
v2 = v2 + v6 + m[1];
v14 ^= v2;
v14 = ((v14 >> 16) | (v14 << (64 - 16)));
v10 = v10 + v14;
v6 ^= v10;
v6 = ((v6 >> 63) | (v6 << (64 - 63)));
// G(7, 3, v3, v7, v11, v15)
v3 = v3 + v7 + m[3];
v15 ^= v3;
v15 = ((v15 >> 32) | (v15 << (64 - 32)));
v11 = v11 + v15;
v7 ^= v11;
v7 = ((v7 >> 24) | (v7 << (64 - 24)));
v3 = v3 + v7 + m[9];
v15 ^= v3;
v15 = ((v15 >> 16) | (v15 << (64 - 16)));
v11 = v11 + v15;
v7 ^= v11;
v7 = ((v7 >> 63) | (v7 << (64 - 63)));
// G(7, 4, v0, v5, v10, v15)
v0 = v0 + v5 + m[5];
v15 ^= v0;
v15 = ((v15 >> 32) | (v15 << (64 - 32)));
v10 = v10 + v15;
v5 ^= v10;
v5 = ((v5 >> 24) | (v5 << (64 - 24)));
v0 = v0 + v5 + m[0];
v15 ^= v0;
v15 = ((v15 >> 16) | (v15 << (64 - 16)));
v10 = v10 + v15;
v5 ^= v10;
v5 = ((v5 >> 63) | (v5 << (64 - 63)));
// G(7, 5, v1, v6, v11, v12)
v1 = v1 + v6 + m[15];
v12 ^= v1;
v12 = ((v12 >> 32) | (v12 << (64 - 32)));
v11 = v11 + v12;
v6 ^= v11;
v6 = ((v6 >> 24) | (v6 << (64 - 24)));
v1 = v1 + v6 + m[4];
v12 ^= v1;
v12 = ((v12 >> 16) | (v12 << (64 - 16)));
v11 = v11 + v12;
v6 ^= v11;
v6 = ((v6 >> 63) | (v6 << (64 - 63)));
// G(7, 6, v2, v7, v8, v13)
v2 = v2 + v7 + m[8];
v13 ^= v2;
v13 = ((v13 >> 32) | (v13 << (64 - 32)));
v8 = v8 + v13;
v7 ^= v8;
v7 = ((v7 >> 24) | (v7 << (64 - 24)));
v2 = v2 + v7 + m[6];
v13 ^= v2;
v13 = ((v13 >> 16) | (v13 << (64 - 16)));
v8 = v8 + v13;
v7 ^= v8;
v7 = ((v7 >> 63) | (v7 << (64 - 63)));
// G(7, 7, v3, v4, v9, v14)
v3 = v3 + v4 + m[2];
v14 ^= v3;
v14 = ((v14 >> 32) | (v14 << (64 - 32)));
v9 = v9 + v14;
v4 ^= v9;
v4 = ((v4 >> 24) | (v4 << (64 - 24)));
v3 = v3 + v4 + m[10];
v14 ^= v3;
v14 = ((v14 >> 16) | (v14 << (64 - 16)));
v9 = v9 + v14;
v4 ^= v9;
v4 = ((v4 >> 63) | (v4 << (64 - 63)));
// ##### Round(8) #####
// G(8, 0, v0, v4, v8, v12)
v0 = v0 + v4 + m[6];
v12 ^= v0;
v12 = ((v12 >> 32) | (v12 << (64 - 32)));
v8 = v8 + v12;
v4 ^= v8;
v4 = ((v4 >> 24) | (v4 << (64 - 24)));
v0 = v0 + v4 + m[15];
v12 ^= v0;
v12 = ((v12 >> 16) | (v12 << (64 - 16)));
v8 = v8 + v12;
v4 ^= v8;
v4 = ((v4 >> 63) | (v4 << (64 - 63)));
// G(8, 1, v1, v5, v9, v13)
v1 = v1 + v5 + m[14];
v13 ^= v1;
v13 = ((v13 >> 32) | (v13 << (64 - 32)));
v9 = v9 + v13;
v5 ^= v9;
v5 = ((v5 >> 24) | (v5 << (64 - 24)));
v1 = v1 + v5 + m[9];
v13 ^= v1;
v13 = ((v13 >> 16) | (v13 << (64 - 16)));
v9 = v9 + v13;
v5 ^= v9;
v5 = ((v5 >> 63) | (v5 << (64 - 63)));
// G(8, 2, v2, v6, v10, v14)
v2 = v2 + v6 + m[11];
v14 ^= v2;
v14 = ((v14 >> 32) | (v14 << (64 - 32)));
v10 = v10 + v14;
v6 ^= v10;
v6 = ((v6 >> 24) | (v6 << (64 - 24)));
v2 = v2 + v6 + m[3];
v14 ^= v2;
v14 = ((v14 >> 16) | (v14 << (64 - 16)));
v10 = v10 + v14;
v6 ^= v10;
v6 = ((v6 >> 63) | (v6 << (64 - 63)));
// G(8, 3, v3, v7, v11, v15)
v3 = v3 + v7 + m[0];
v15 ^= v3;
v15 = ((v15 >> 32) | (v15 << (64 - 32)));
v11 = v11 + v15;
v7 ^= v11;
v7 = ((v7 >> 24) | (v7 << (64 - 24)));
v3 = v3 + v7 + m[8];
v15 ^= v3;
v15 = ((v15 >> 16) | (v15 << (64 - 16)));
v11 = v11 + v15;
v7 ^= v11;
v7 = ((v7 >> 63) | (v7 << (64 - 63)));
// G(8, 4, v0, v5, v10, v15)
v0 = v0 + v5 + m[12];
v15 ^= v0;
v15 = ((v15 >> 32) | (v15 << (64 - 32)));
v10 = v10 + v15;
v5 ^= v10;
v5 = ((v5 >> 24) | (v5 << (64 - 24)));
v0 = v0 + v5 + m[2];
v15 ^= v0;
v15 = ((v15 >> 16) | (v15 << (64 - 16)));
v10 = v10 + v15;
v5 ^= v10;
v5 = ((v5 >> 63) | (v5 << (64 - 63)));
// G(8, 5, v1, v6, v11, v12)
v1 = v1 + v6 + m[13];
v12 ^= v1;
v12 = ((v12 >> 32) | (v12 << (64 - 32)));
v11 = v11 + v12;
v6 ^= v11;
v6 = ((v6 >> 24) | (v6 << (64 - 24)));
v1 = v1 + v6 + m[7];
v12 ^= v1;
v12 = ((v12 >> 16) | (v12 << (64 - 16)));
v11 = v11 + v12;
v6 ^= v11;
v6 = ((v6 >> 63) | (v6 << (64 - 63)));
// G(8, 6, v2, v7, v8, v13)
v2 = v2 + v7 + m[1];
v13 ^= v2;
v13 = ((v13 >> 32) | (v13 << (64 - 32)));
v8 = v8 + v13;
v7 ^= v8;
v7 = ((v7 >> 24) | (v7 << (64 - 24)));
v2 = v2 + v7 + m[4];
v13 ^= v2;
v13 = ((v13 >> 16) | (v13 << (64 - 16)));
v8 = v8 + v13;
v7 ^= v8;
v7 = ((v7 >> 63) | (v7 << (64 - 63)));
// G(8, 7, v3, v4, v9, v14)
v3 = v3 + v4 + m[10];
v14 ^= v3;
v14 = ((v14 >> 32) | (v14 << (64 - 32)));
v9 = v9 + v14;
v4 ^= v9;
v4 = ((v4 >> 24) | (v4 << (64 - 24)));
v3 = v3 + v4 + m[5];
v14 ^= v3;
v14 = ((v14 >> 16) | (v14 << (64 - 16)));
v9 = v9 + v14;
v4 ^= v9;
v4 = ((v4 >> 63) | (v4 << (64 - 63)));
// ##### Round(9) #####
// G(9, 0, v0, v4, v8, v12)
v0 = v0 + v4 + m[10];
v12 ^= v0;
v12 = ((v12 >> 32) | (v12 << (64 - 32)));
v8 = v8 + v12;
v4 ^= v8;
v4 = ((v4 >> 24) | (v4 << (64 - 24)));
v0 = v0 + v4 + m[2];
v12 ^= v0;
v12 = ((v12 >> 16) | (v12 << (64 - 16)));
v8 = v8 + v12;
v4 ^= v8;
v4 = ((v4 >> 63) | (v4 << (64 - 63)));
// G(9, 1, v1, v5, v9, v13)
v1 = v1 + v5 + m[8];
v13 ^= v1;
v13 = ((v13 >> 32) | (v13 << (64 - 32)));
v9 = v9 + v13;
v5 ^= v9;
v5 = ((v5 >> 24) | (v5 << (64 - 24)));
v1 = v1 + v5 + m[4];
v13 ^= v1;
v13 = ((v13 >> 16) | (v13 << (64 - 16)));
v9 = v9 + v13;
v5 ^= v9;
v5 = ((v5 >> 63) | (v5 << (64 - 63)));
// G(9, 2, v2, v6, v10, v14)
v2 = v2 + v6 + m[7];
v14 ^= v2;
v14 = ((v14 >> 32) | (v14 << (64 - 32)));
v10 = v10 + v14;
v6 ^= v10;
v6 = ((v6 >> 24) | (v6 << (64 - 24)));
v2 = v2 + v6 + m[6];
v14 ^= v2;
v14 = ((v14 >> 16) | (v14 << (64 - 16)));
v10 = v10 + v14;
v6 ^= v10;
v6 = ((v6 >> 63) | (v6 << (64 - 63)));
// G(9, 3, v3, v7, v11, v15)
v3 = v3 + v7 + m[1];
v15 ^= v3;
v15 = ((v15 >> 32) | (v15 << (64 - 32)));
v11 = v11 + v15;
v7 ^= v11;
v7 = ((v7 >> 24) | (v7 << (64 - 24)));
v3 = v3 + v7 + m[5];
v15 ^= v3;
v15 = ((v15 >> 16) | (v15 << (64 - 16)));
v11 = v11 + v15;
v7 ^= v11;
v7 = ((v7 >> 63) | (v7 << (64 - 63)));
// G(9, 4, v0, v5, v10, v15)
v0 = v0 + v5 + m[15];
v15 ^= v0;
v15 = ((v15 >> 32) | (v15 << (64 - 32)));
v10 = v10 + v15;
v5 ^= v10;
v5 = ((v5 >> 24) | (v5 << (64 - 24)));
v0 = v0 + v5 + m[11];
v15 ^= v0;
v15 = ((v15 >> 16) | (v15 << (64 - 16)));
v10 = v10 + v15;
v5 ^= v10;
v5 = ((v5 >> 63) | (v5 << (64 - 63)));
// G(9, 5, v1, v6, v11, v12)
v1 = v1 + v6 + m[9];
v12 ^= v1;
v12 = ((v12 >> 32) | (v12 << (64 - 32)));
v11 = v11 + v12;
v6 ^= v11;
v6 = ((v6 >> 24) | (v6 << (64 - 24)));
v1 = v1 + v6 + m[14];
v12 ^= v1;
v12 = ((v12 >> 16) | (v12 << (64 - 16)));
v11 = v11 + v12;
v6 ^= v11;
v6 = ((v6 >> 63) | (v6 << (64 - 63)));
// G(9, 6, v2, v7, v8, v13)
v2 = v2 + v7 + m[3];
v13 ^= v2;
v13 = ((v13 >> 32) | (v13 << (64 - 32)));
v8 = v8 + v13;
v7 ^= v8;
v7 = ((v7 >> 24) | (v7 << (64 - 24)));
v2 = v2 + v7 + m[12];
v13 ^= v2;
v13 = ((v13 >> 16) | (v13 << (64 - 16)));
v8 = v8 + v13;
v7 ^= v8;
v7 = ((v7 >> 63) | (v7 << (64 - 63)));
// G(9, 7, v3, v4, v9, v14)
v3 = v3 + v4 + m[13];
v14 ^= v3;
v14 = ((v14 >> 32) | (v14 << (64 - 32)));
v9 = v9 + v14;
v4 ^= v9;
v4 = ((v4 >> 24) | (v4 << (64 - 24)));
v3 = v3 + v4 + m[0];
v14 ^= v3;
v14 = ((v14 >> 16) | (v14 << (64 - 16)));
v9 = v9 + v14;
v4 ^= v9;
v4 = ((v4 >> 63) | (v4 << (64 - 63)));
// ##### Round(10) #####
// G(10, 0, v0, v4, v8, v12)
v0 = v0 + v4 + m[0];
v12 ^= v0;
v12 = ((v12 >> 32) | (v12 << (64 - 32)));
v8 = v8 + v12;
v4 ^= v8;
v4 = ((v4 >> 24) | (v4 << (64 - 24)));
v0 = v0 + v4 + m[1];
v12 ^= v0;
v12 = ((v12 >> 16) | (v12 << (64 - 16)));
v8 = v8 + v12;
v4 ^= v8;
v4 = ((v4 >> 63) | (v4 << (64 - 63)));
// G(10, 1, v1, v5, v9, v13)
v1 = v1 + v5 + m[2];
v13 ^= v1;
v13 = ((v13 >> 32) | (v13 << (64 - 32)));
v9 = v9 + v13;
v5 ^= v9;
v5 = ((v5 >> 24) | (v5 << (64 - 24)));
v1 = v1 + v5 + m[3];
v13 ^= v1;
v13 = ((v13 >> 16) | (v13 << (64 - 16)));
v9 = v9 + v13;
v5 ^= v9;
v5 = ((v5 >> 63) | (v5 << (64 - 63)));
// G(10, 2, v2, v6, v10, v14)
v2 = v2 + v6 + m[4];
v14 ^= v2;
v14 = ((v14 >> 32) | (v14 << (64 - 32)));
v10 = v10 + v14;
v6 ^= v10;
v6 = ((v6 >> 24) | (v6 << (64 - 24)));
v2 = v2 + v6 + m[5];
v14 ^= v2;
v14 = ((v14 >> 16) | (v14 << (64 - 16)));
v10 = v10 + v14;
v6 ^= v10;
v6 = ((v6 >> 63) | (v6 << (64 - 63)));
// G(10, 3, v3, v7, v11, v15)
v3 = v3 + v7 + m[6];
v15 ^= v3;
v15 = ((v15 >> 32) | (v15 << (64 - 32)));
v11 = v11 + v15;
v7 ^= v11;
v7 = ((v7 >> 24) | (v7 << (64 - 24)));
v3 = v3 + v7 + m[7];
v15 ^= v3;
v15 = ((v15 >> 16) | (v15 << (64 - 16)));
v11 = v11 + v15;
v7 ^= v11;
v7 = ((v7 >> 63) | (v7 << (64 - 63)));
// G(10, 4, v0, v5, v10, v15)
v0 = v0 + v5 + m[8];
v15 ^= v0;
v15 = ((v15 >> 32) | (v15 << (64 - 32)));
v10 = v10 + v15;
v5 ^= v10;
v5 = ((v5 >> 24) | (v5 << (64 - 24)));
v0 = v0 + v5 + m[9];
v15 ^= v0;
v15 = ((v15 >> 16) | (v15 << (64 - 16)));
v10 = v10 + v15;
v5 ^= v10;
v5 = ((v5 >> 63) | (v5 << (64 - 63)));
// G(10, 5, v1, v6, v11, v12)
v1 = v1 + v6 + m[10];
v12 ^= v1;
v12 = ((v12 >> 32) | (v12 << (64 - 32)));
v11 = v11 + v12;
v6 ^= v11;
v6 = ((v6 >> 24) | (v6 << (64 - 24)));
v1 = v1 + v6 + m[11];
v12 ^= v1;
v12 = ((v12 >> 16) | (v12 << (64 - 16)));
v11 = v11 + v12;
v6 ^= v11;
v6 = ((v6 >> 63) | (v6 << (64 - 63)));
// G(10, 6, v2, v7, v8, v13)
v2 = v2 + v7 + m[12];
v13 ^= v2;
v13 = ((v13 >> 32) | (v13 << (64 - 32)));
v8 = v8 + v13;
v7 ^= v8;
v7 = ((v7 >> 24) | (v7 << (64 - 24)));
v2 = v2 + v7 + m[13];
v13 ^= v2;
v13 = ((v13 >> 16) | (v13 << (64 - 16)));
v8 = v8 + v13;
v7 ^= v8;
v7 = ((v7 >> 63) | (v7 << (64 - 63)));
// G(10, 7, v3, v4, v9, v14)
v3 = v3 + v4 + m[14];
v14 ^= v3;
v14 = ((v14 >> 32) | (v14 << (64 - 32)));
v9 = v9 + v14;
v4 ^= v9;
v4 = ((v4 >> 24) | (v4 << (64 - 24)));
v3 = v3 + v4 + m[15];
v14 ^= v3;
v14 = ((v14 >> 16) | (v14 << (64 - 16)));
v9 = v9 + v14;
v4 ^= v9;
v4 = ((v4 >> 63) | (v4 << (64 - 63)));
// ##### Round(11) #####
// G(11, 0, v0, v4, v8, v12)
v0 = v0 + v4 + m[14];
v12 ^= v0;
v12 = ((v12 >> 32) | (v12 << (64 - 32)));
v8 = v8 + v12;
v4 ^= v8;
v4 = ((v4 >> 24) | (v4 << (64 - 24)));
v0 = v0 + v4 + m[10];
v12 ^= v0;
v12 = ((v12 >> 16) | (v12 << (64 - 16)));
v8 = v8 + v12;
v4 ^= v8;
v4 = ((v4 >> 63) | (v4 << (64 - 63)));
// G(11, 1, v1, v5, v9, v13)
v1 = v1 + v5 + m[4];
v13 ^= v1;
v13 = ((v13 >> 32) | (v13 << (64 - 32)));
v9 = v9 + v13;
v5 ^= v9;
v5 = ((v5 >> 24) | (v5 << (64 - 24)));
v1 = v1 + v5 + m[8];
v13 ^= v1;
v13 = ((v13 >> 16) | (v13 << (64 - 16)));
v9 = v9 + v13;
v5 ^= v9;
v5 = ((v5 >> 63) | (v5 << (64 - 63)));
// G(11, 2, v2, v6, v10, v14)
v2 = v2 + v6 + m[9];
v14 ^= v2;
v14 = ((v14 >> 32) | (v14 << (64 - 32)));
v10 = v10 + v14;
v6 ^= v10;
v6 = ((v6 >> 24) | (v6 << (64 - 24)));
v2 = v2 + v6 + m[15];
v14 ^= v2;
v14 = ((v14 >> 16) | (v14 << (64 - 16)));
v10 = v10 + v14;
v6 ^= v10;
v6 = ((v6 >> 63) | (v6 << (64 - 63)));
// G(11, 3, v3, v7, v11, v15)
v3 = v3 + v7 + m[13];
v15 ^= v3;
v15 = ((v15 >> 32) | (v15 << (64 - 32)));
v11 = v11 + v15;
v7 ^= v11;
v7 = ((v7 >> 24) | (v7 << (64 - 24)));
v3 = v3 + v7 + m[6];
v15 ^= v3;
v15 = ((v15 >> 16) | (v15 << (64 - 16)));
v11 = v11 + v15;
v7 ^= v11;
v7 = ((v7 >> 63) | (v7 << (64 - 63)));
// G(11, 4, v0, v5, v10, v15)
v0 = v0 + v5 + m[1];
v15 ^= v0;
v15 = ((v15 >> 32) | (v15 << (64 - 32)));
v10 = v10 + v15;
v5 ^= v10;
v5 = ((v5 >> 24) | (v5 << (64 - 24)));
v0 = v0 + v5 + m[12];
v15 ^= v0;
v15 = ((v15 >> 16) | (v15 << (64 - 16)));
v10 = v10 + v15;
v5 ^= v10;
v5 = ((v5 >> 63) | (v5 << (64 - 63)));
// G(11, 5, v1, v6, v11, v12)
v1 = v1 + v6 + m[0];
v12 ^= v1;
v12 = ((v12 >> 32) | (v12 << (64 - 32)));
v11 = v11 + v12;
v6 ^= v11;
v6 = ((v6 >> 24) | (v6 << (64 - 24)));
v1 = v1 + v6 + m[2];
v12 ^= v1;
v12 = ((v12 >> 16) | (v12 << (64 - 16)));
v11 = v11 + v12;
v6 ^= v11;
v6 = ((v6 >> 63) | (v6 << (64 - 63)));
// G(11, 6, v2, v7, v8, v13)
v2 = v2 + v7 + m[11];
v13 ^= v2;
v13 = ((v13 >> 32) | (v13 << (64 - 32)));
v8 = v8 + v13;
v7 ^= v8;
v7 = ((v7 >> 24) | (v7 << (64 - 24)));
v2 = v2 + v7 + m[7];
v13 ^= v2;
v13 = ((v13 >> 16) | (v13 << (64 - 16)));
v8 = v8 + v13;
v7 ^= v8;
v7 = ((v7 >> 63) | (v7 << (64 - 63)));
// G(11, 7, v3, v4, v9, v14)
v3 = v3 + v4 + m[5];
v14 ^= v3;
v14 = ((v14 >> 32) | (v14 << (64 - 32)));
v9 = v9 + v14;
v4 ^= v9;
v4 = ((v4 >> 24) | (v4 << (64 - 24)));
v3 = v3 + v4 + m[3];
v14 ^= v3;
v14 = ((v14 >> 16) | (v14 << (64 - 16)));
v9 = v9 + v14;
v4 ^= v9;
v4 = ((v4 >> 63) | (v4 << (64 - 63)));
h[0] ^= v0 ^ v8;
h[1] ^= v1 ^ v9;
h[2] ^= v2 ^ v10;
h[3] ^= v3 ^ v11;
h[4] ^= v4 ^ v12;
h[5] ^= v5 ^ v13;
h[6] ^= v6 ^ v14;
h[7] ^= v7 ^ v15;

View File

@ -0,0 +1,177 @@
// BLAKE2 reference source code package - C# implementation
// Written in 2012 by Christian Winnerlein <codesinchaos@gmail.com>
// To the extent possible under law, the author(s) have dedicated all copyright
// and related and neighboring rights to this software to the public domain
// worldwide. This software is distributed without any warranty.
// You should have received a copy of the CC0 Public Domain Dedication along with
// this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
using System;
namespace Blake2Sharp
#if false
public sealed partial class Blake2BCore
partial void Compress(byte[] block, int start)
var h = _h;
var m = _m;
if (BitConverter.IsLittleEndian)
Buffer.BlockCopy(block, start, m, 0, BlockSizeInBytes);
for (int i = 0; i < 16; ++i)
m[i] = BytesToUInt64(block, start + (i << 3));
var v0 = h[0];
var v1 = h[1];
var v2 = h[2];
var v3 = h[3];
var v4 = h[4];
var v5 = h[5];
var v6 = h[6];
var v7 = h[7];
var v8 = IV0;
var v9 = IV1;
var v10 = IV2;
var v11 = IV3;
var v12 = IV4 ^ _counter0;
var v13 = IV5 ^ _counter1;
var v14 = IV6 ^ _finaliziationFlag0;
var v15 = IV7 ^ _finaliziationFlag1;
for (int r = 0; r < NumberOfRounds; ++r)
// G(r,0,v0,v4,v8,v12)
v0 = v0 + v4 + m[Sigma[16 * r + 2 * 0 + 0]];
v12 ^= v0;
v12 = ((v12 >> 32) | (v12 << (64 - 32)));
v8 = v8 + v12;
v4 ^= v8;
v4 = ((v4 >> 24) | (v4 << (64 - 24)));
v0 = v0 + v4 + m[Sigma[16 * r + 2 * 0 + 1]];
v12 ^= v0;
v12 = ((v12 >> 16) | (v12 << (64 - 16)));
v8 = v8 + v12;
v4 ^= v8;
v4 = ((v4 >> 63) | (v4 << (64 - 63)));
// G(r,1,v1,v5,v9,v13)
v1 = v1 + v5 + m[Sigma[16 * r + 2 * 1 + 0]];
v13 ^= v1;
v13 = ((v13 >> 32) | (v13 << (64 - 32)));
v9 = v9 + v13;
v5 ^= v9;
v5 = ((v5 >> 24) | (v5 << (64 - 24)));
v1 = v1 + v5 + m[Sigma[16 * r + 2 * 1 + 1]];
v13 ^= v1;
v13 = ((v13 >> 16) | (v13 << (64 - 16)));
v9 = v9 + v13;
v5 ^= v9;
v5 = ((v5 >> 63) | (v5 << (64 - 63)));
// G(r,2,v2,v6,v10,v14)
v2 = v2 + v6 + m[Sigma[16 * r + 2 * 2 + 0]];
v14 ^= v2;
v14 = ((v14 >> 32) | (v14 << (64 - 32)));
v10 = v10 + v14;
v6 ^= v10;
v6 = ((v6 >> 24) | (v6 << (64 - 24)));
v2 = v2 + v6 + m[Sigma[16 * r + 2 * 2 + 1]];
v14 ^= v2;
v14 = ((v14 >> 16) | (v14 << (64 - 16)));
v10 = v10 + v14;
v6 ^= v10;
v6 = ((v6 >> 63) | (v6 << (64 - 63)));
// G(r,3,v3,v7,v11,v15)
v3 = v3 + v7 + m[Sigma[16 * r + 2 * 3 + 0]];
v15 ^= v3;
v15 = ((v15 >> 32) | (v15 << (64 - 32)));
v11 = v11 + v15;
v7 ^= v11;
v7 = ((v7 >> 24) | (v7 << (64 - 24)));
v3 = v3 + v7 + m[Sigma[16 * r + 2 * 3 + 1]];
v15 ^= v3;
v15 = ((v15 >> 16) | (v15 << (64 - 16)));
v11 = v11 + v15;
v7 ^= v11;
v7 = ((v7 >> 63) | (v7 << (64 - 63)));
// G(r,4,v0,v5,v10,v15)
v0 = v0 + v5 + m[Sigma[16 * r + 2 * 4 + 0]];
v15 ^= v0;
v15 = ((v15 >> 32) | (v15 << (64 - 32)));
v10 = v10 + v15;
v5 ^= v10;
v5 = ((v5 >> 24) | (v5 << (64 - 24)));
v0 = v0 + v5 + m[Sigma[16 * r + 2 * 4 + 1]];
v15 ^= v0;
v15 = ((v15 >> 16) | (v15 << (64 - 16)));
v10 = v10 + v15;
v5 ^= v10;
v5 = ((v5 >> 63) | (v5 << (64 - 63)));
// G(r,5,v1,v6,v11,v12)
v1 = v1 + v6 + m[Sigma[16 * r + 2 * 5 + 0]];
v12 ^= v1;
v12 = ((v12 >> 32) | (v12 << (64 - 32)));
v11 = v11 + v12;
v6 ^= v11;
v6 = ((v6 >> 24) | (v6 << (64 - 24)));
v1 = v1 + v6 + m[Sigma[16 * r + 2 * 5 + 1]];
v12 ^= v1;
v12 = ((v12 >> 16) | (v12 << (64 - 16)));
v11 = v11 + v12;
v6 ^= v11;
v6 = ((v6 >> 63) | (v6 << (64 - 63)));
// G(r,6,v2,v7,v8,v13)
v2 = v2 + v7 + m[Sigma[16 * r + 2 * 6 + 0]];
v13 ^= v2;
v13 = ((v13 >> 32) | (v13 << (64 - 32)));
v8 = v8 + v13;
v7 ^= v8;
v7 = ((v7 >> 24) | (v7 << (64 - 24)));
v2 = v2 + v7 + m[Sigma[16 * r + 2 * 6 + 1]];
v13 ^= v2;
v13 = ((v13 >> 16) | (v13 << (64 - 16)));
v8 = v8 + v13;
v7 ^= v8;
v7 = ((v7 >> 63) | (v7 << (64 - 63)));
// G(r,7,v3,v4,v9,v14)
v3 = v3 + v4 + m[Sigma[16 * r + 2 * 7 + 0]];
v14 ^= v3;
v14 = ((v14 >> 32) | (v14 << (64 - 32)));
v9 = v9 + v14;
v4 ^= v9;
v4 = ((v4 >> 24) | (v4 << (64 - 24)));
v3 = v3 + v4 + m[Sigma[16 * r + 2 * 7 + 1]];
v14 ^= v3;
v14 = ((v14 >> 16) | (v14 << (64 - 16)));
v9 = v9 + v14;
v4 ^= v9;
v4 = ((v4 >> 63) | (v4 << (64 - 63)));
h[0] ^= v0 ^ v8;
h[1] ^= v1 ^ v9;
h[2] ^= v2 ^ v10;
h[3] ^= v3 ^ v11;
h[4] ^= v4 ^ v12;
h[5] ^= v5 ^ v13;
h[6] ^= v6 ^ v14;
h[7] ^= v7 ^ v15;

View File

@ -0,0 +1,88 @@
// BLAKE2 reference source code package - C# implementation
// Written in 2012 by Christian Winnerlein <codesinchaos@gmail.com>
// To the extent possible under law, the author(s) have dedicated all copyright
// and related and neighboring rights to this software to the public domain
// worldwide. This software is distributed without any warranty.
// You should have received a copy of the CC0 Public Domain Dedication along with
// this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
using System;
namespace Blake2Sharp
#if false
public sealed partial class Blake2BCore
private ulong[] _v = new ulong[16];
private static ulong RotateRight(ulong value, int nBits)
return (value >> nBits) | (value << (64 - nBits));
private void G(int a, int b, int c, int d, int r, int i)
int p = (r << 4) + i;
int p0 = Sigma[p];
int p1 = Sigma[p + 1];
var v = _v;
var m = _m;
v[a] += v[b] + m[p0];
v[d] = RotateRight(v[d] ^ v[a], 32);
v[c] += v[d];
v[b] = RotateRight(v[b] ^ v[c], 24);
v[a] += v[b] + m[p1];
v[d] = RotateRight(v[d] ^ v[a], 16);
v[c] += v[d];
v[b] = RotateRight(v[b] ^ v[c], 63);
partial void Compress(byte[] block, int start)
var v = _v;
var h = _h;
var m = _m;
for (int i = 0; i < 16; ++i)
m[i] = BytesToUInt64(block, start + (i << 3));
v[0] = h[0];
v[1] = h[1];
v[2] = h[2];
v[3] = h[3];
v[4] = h[4];
v[5] = h[5];
v[6] = h[6];
v[7] = h[7];
v[8] = IV0;
v[9] = IV1;
v[10] = IV2;
v[11] = IV3;
v[12] = IV4 ^ _counter0;
v[13] = IV5 ^ _counter1;
v[14] = IV6 ^ _finaliziationFlag0;
v[15] = IV7 ^ _finaliziationFlag1;
for (int r = 0; r < NumberOfRounds; ++r)
G(0, 4, 8, 12, r, 0);
G(1, 5, 9, 13, r, 2);
G(2, 6, 10, 14, r, 4);
G(3, 7, 11, 15, r, 6);
G(3, 4, 9, 14, r, 14);
G(2, 7, 8, 13, r, 12);
G(0, 5, 10, 15, r, 8);
G(1, 6, 11, 12, r, 10);
for (int i = 0; i < 8; ++i)
h[i] ^= v[i] ^ v[i + 8];

View File

@ -0,0 +1,198 @@
// BLAKE2 reference source code package - C# implementation
// Written in 2012 by Christian Winnerlein <codesinchaos@gmail.com>
// To the extent possible under law, the author(s) have dedicated all copyright
// and related and neighboring rights to this software to the public domain
// worldwide. This software is distributed without any warranty.
// You should have received a copy of the CC0 Public Domain Dedication along with
// this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
Based on BlakeSharp
by Dominik Reichl <dominik.reichl@t-online.de>
Web: http://www.dominik-reichl.de/
If you're using this class, it would be nice if you'd mention
me somewhere in the documentation of your program, but it's
not required.
BLAKE was designed by Jean-Philippe Aumasson, Luca Henzen,
Willi Meier and Raphael C.-W. Phan.
BlakeSharp was derived from the reference C implementation.
using System;
namespace Blake2Sharp
public sealed partial class Blake2BCore
private bool _isInitialized = false;
private int _bufferFilled;
private byte[] _buf = new byte[128];
private ulong[] _m = new ulong[16];
private ulong[] _h = new ulong[8];
private ulong _counter0;
private ulong _counter1;
private ulong _finalizationFlag0;
private ulong _finalizationFlag1;
private const int NumberOfRounds = 12;
private const int BlockSizeInBytes = 128;
const ulong IV0 = 0x6A09E667F3BCC908UL;
const ulong IV1 = 0xBB67AE8584CAA73BUL;
const ulong IV2 = 0x3C6EF372FE94F82BUL;
const ulong IV3 = 0xA54FF53A5F1D36F1UL;
const ulong IV4 = 0x510E527FADE682D1UL;
const ulong IV5 = 0x9B05688C2B3E6C1FUL;
const ulong IV6 = 0x1F83D9ABFB41BD6BUL;
const ulong IV7 = 0x5BE0CD19137E2179UL;
private static readonly int[] Sigma = new int[NumberOfRounds * 16] {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3,
11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4,
7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8,
9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13,
2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9,
12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11,
13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10,
6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5,
10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3
internal static ulong BytesToUInt64(byte[] buf, int offset)
((ulong)buf[offset + 7] << 7 * 8 |
((ulong)buf[offset + 6] << 6 * 8) |
((ulong)buf[offset + 5] << 5 * 8) |
((ulong)buf[offset + 4] << 4 * 8) |
((ulong)buf[offset + 3] << 3 * 8) |
((ulong)buf[offset + 2] << 2 * 8) |
((ulong)buf[offset + 1] << 1 * 8) |
private static void UInt64ToBytes(ulong value, byte[] buf, int offset)
buf[offset + 7] = (byte)(value >> 7 * 8);
buf[offset + 6] = (byte)(value >> 6 * 8);
buf[offset + 5] = (byte)(value >> 5 * 8);
buf[offset + 4] = (byte)(value >> 4 * 8);
buf[offset + 3] = (byte)(value >> 3 * 8);
buf[offset + 2] = (byte)(value >> 2 * 8);
buf[offset + 1] = (byte)(value >> 1 * 8);
buf[offset] = (byte)value;
partial void Compress(byte[] block, int start);
public void Initialize(ulong[] config)
if (config == null)
throw new ArgumentNullException("config");
if (config.Length != 8)
throw new ArgumentException("config length must be 8 words");
_isInitialized = true;
_h[0] = IV0;
_h[1] = IV1;
_h[2] = IV2;
_h[3] = IV3;
_h[4] = IV4;
_h[5] = IV5;
_h[6] = IV6;
_h[7] = IV7;
_counter0 = 0;
_counter1 = 0;
_finalizationFlag0 = 0;
_finalizationFlag1 = 0;
_bufferFilled = 0;
Array.Clear(_buf, 0, _buf.Length);
for (int i = 0; i < 8; i++)
_h[i] ^= config[i];
public void HashCore(byte[] array, int start, int count)
if (!_isInitialized)
throw new InvalidOperationException("Not initialized");
if (array == null)
throw new ArgumentNullException("array");
if (start < 0)
throw new ArgumentOutOfRangeException("start");
if (count < 0)
throw new ArgumentOutOfRangeException("count");
if ((long)start + (long)count > array.Length)
throw new ArgumentOutOfRangeException("start+count");
int offset = start;
int bufferRemaining = BlockSizeInBytes - _bufferFilled;
if ((_bufferFilled > 0) && (count > bufferRemaining))
Array.Copy(array, offset, _buf, _bufferFilled, bufferRemaining);
_counter0 += BlockSizeInBytes;
if (_counter0 == 0)
Compress(_buf, 0);
offset += bufferRemaining;
count -= bufferRemaining;
_bufferFilled = 0;
while (count > BlockSizeInBytes)
_counter0 += BlockSizeInBytes;
if (_counter0 == 0)
Compress(array, offset);
offset += BlockSizeInBytes;
count -= BlockSizeInBytes;
if (count > 0)
Array.Copy(array, offset, _buf, _bufferFilled, count);
_bufferFilled += count;
public byte[] HashFinal()
return HashFinal(false);
public byte[] HashFinal(bool isEndOfLayer)
if (!_isInitialized)
throw new InvalidOperationException("Not initialized");
_isInitialized = false;
//Last compression
_counter0 += (uint)_bufferFilled;
_finalizationFlag0 = ulong.MaxValue;
if (isEndOfLayer)
_finalizationFlag1 = ulong.MaxValue;
for (int i = _bufferFilled; i < _buf.Length; i++)
_buf[i] = 0;
Compress(_buf, 0);
byte[] hash = new byte[64];
for (int i = 0; i < 8; ++i)
UInt64ToBytes(_h[i], hash, i << 3);
return hash;

View File

@ -0,0 +1,66 @@
// BLAKE2 reference source code package - C# implementation
// Written in 2012 by Christian Winnerlein <codesinchaos@gmail.com>
// To the extent possible under law, the author(s) have dedicated all copyright
// and related and neighboring rights to this software to the public domain
// worldwide. This software is distributed without any warranty.
// You should have received a copy of the CC0 Public Domain Dedication along with
// this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
using System;
using System.Collections.Generic;
using System.Text;
namespace Blake2Sharp
internal class Blake2BHasher : Hasher
private readonly Blake2BCore core = new Blake2BCore();
private readonly ulong[] rawConfig;
private readonly byte[] key;
private readonly int outputSizeInBytes;
private static readonly Blake2BConfig DefaultConfig = new Blake2BConfig();
public override void Init()
if (key != null)
core.HashCore(key, 0, key.Length);
public override byte[] Finish()
var fullResult = core.HashFinal();
if (outputSizeInBytes != fullResult.Length)
var result = new byte[outputSizeInBytes];
Array.Copy(fullResult, result, result.Length);
return result;
else return fullResult;
public Blake2BHasher(Blake2BConfig config)
if (config == null)
config = DefaultConfig;
rawConfig = Blake2IvBuilder.ConfigB(config, null);
if (config.Key != null && config.Key.Length != 0)
key = new byte[128];
Array.Copy(config.Key, key, config.Key.Length);
outputSizeInBytes = config.OutputSizeInBytes;
public override void Update(byte[] data, int start, int count)
core.HashCore(data, start, count);

View File

@ -0,0 +1,52 @@
// BLAKE2 reference source code package - C# implementation
// Written in 2012 by Christian Winnerlein <codesinchaos@gmail.com>
// To the extent possible under law, the author(s) have dedicated all copyright
// and related and neighboring rights to this software to the public domain
// worldwide. This software is distributed without any warranty.
// You should have received a copy of the CC0 Public Domain Dedication along with
// this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
using System;
using System.Collections.Generic;
using System.Text;
namespace Blake2Sharp
/*public class Blake2BNodeHasher : NodeHasher
ulong[] rawConfig;
byte[] key;
Blake2BCore core = new Blake2BCore();
public override void Init(int depth, long nodeOffset)
throw new NotImplementedException();
public override byte[] Finish(bool isEndOfLayer)
throw new NotImplementedException();
public override void Update(byte[] data, int start, int count)
throw new NotImplementedException();
public Blake2BNodeHasher(Blake2BConfig config, Blake2BTreeConfig treeConfig)
if (config == null)
config = DefaultConfig;
rawConfig = Blake2IvBuilder.ConfigB(config, null);
if (config.Key != null && config.Key.Length != 0)
key = new byte[128];
Array.Copy(config.Key, key, config.Key.Length);
Init(0, 0);

View File

@ -0,0 +1,52 @@
// BLAKE2 reference source code package - C# implementation
// Written in 2012 by Christian Winnerlein <codesinchaos@gmail.com>
// To the extent possible under law, the author(s) have dedicated all copyright
// and related and neighboring rights to this software to the public domain
// worldwide. This software is distributed without any warranty.
// You should have received a copy of the CC0 Public Domain Dedication along with
// this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
using System;
namespace Blake2Sharp
public sealed class Blake2BTreeConfig : ICloneable
public int IntermediateHashSize { get; set; }
public int MaxHeight { get; set; }
public long LeafSize { get; set; }
public int FanOut { get; set; }
public Blake2BTreeConfig()
IntermediateHashSize = 64;
public Blake2BTreeConfig Clone()
var result = new Blake2BTreeConfig();
result.IntermediateHashSize = IntermediateHashSize;
result.MaxHeight = MaxHeight;
result.LeafSize = LeafSize;
result.FanOut = FanOut;
return result;
public static Blake2BTreeConfig CreateInterleaved(int parallelism)
var result = new Blake2BTreeConfig();
result.FanOut = parallelism;
result.MaxHeight = 2;
result.IntermediateHashSize = 64;
return result;
object ICloneable.Clone()
return Clone();

View File

@ -0,0 +1,76 @@
// BLAKE2 reference source code package - C# implementation
// Written in 2012 by Christian Winnerlein <codesinchaos@gmail.com>
// To the extent possible under law, the author(s) have dedicated all copyright
// and related and neighboring rights to this software to the public domain
// worldwide. This software is distributed without any warranty.
// You should have received a copy of the CC0 Public Domain Dedication along with
// this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
using System;
namespace Blake2Sharp
internal static class Blake2IvBuilder
private static readonly Blake2BTreeConfig SequentialTreeConfig = new Blake2BTreeConfig() { IntermediateHashSize = 0, LeafSize = 0, FanOut = 1, MaxHeight = 1 };
public static ulong[] ConfigB(Blake2BConfig config, Blake2BTreeConfig treeConfig)
bool isSequential = treeConfig == null;
if (isSequential)
treeConfig = SequentialTreeConfig;
var rawConfig = new ulong[8];
var result = new ulong[8];
//digest length
if (config.OutputSizeInBytes <= 0 | config.OutputSizeInBytes > 64)
throw new ArgumentOutOfRangeException("config.OutputSize");
rawConfig[0] |= (ulong)(uint)config.OutputSizeInBytes;
//Key length
if (config.Key != null)
if (config.Key.Length > 64)
throw new ArgumentException("config.Key", "Key too long");
rawConfig[0] |= (ulong)((uint)config.Key.Length << 8);
// FanOut
rawConfig[0] |= (uint)treeConfig.FanOut << 16;
// Depth
rawConfig[0] |= (uint)treeConfig.MaxHeight << 24;
// Leaf length
rawConfig[0] |= ((ulong)(uint)treeConfig.LeafSize) << 32;
// Inner length
if (!isSequential && (treeConfig.IntermediateHashSize <= 0 || treeConfig.IntermediateHashSize > 64))
throw new ArgumentOutOfRangeException("treeConfig.TreeIntermediateHashSize");
rawConfig[2] |= (uint)treeConfig.IntermediateHashSize << 8;
// Salt
if (config.Salt != null)
if (config.Salt.Length != 16)
throw new ArgumentException("config.Salt has invalid length");
rawConfig[4] = Blake2BCore.BytesToUInt64(config.Salt, 0);
rawConfig[5] = Blake2BCore.BytesToUInt64(config.Salt, 8);
// Personalization
if (config.Personalization != null)
if (config.Personalization.Length != 16)
throw new ArgumentException("config.Personalization has invalid length");
rawConfig[6] = Blake2BCore.BytesToUInt64(config.Personalization, 0);
rawConfig[6] = Blake2BCore.BytesToUInt64(config.Personalization, 8);
return rawConfig;
public static void ConfigBSetNode(ulong[] rawConfig, byte depth, ulong nodeOffset)
rawConfig[1] = nodeOffset;
rawConfig[2] = (rawConfig[2] & ~0xFFul) | depth;

View File

@ -0,0 +1,60 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
<TargetFrameworkProfile />
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
<Reference Include="System" />
<Compile Include="Blake2B.cs" />
<Compile Include="Blake2BCore.cs" />
<Compile Include="Blake2BNodeHasher.cs" />
<Compile Include="Blake2BConfig.cs" />
<Compile Include="Blake2BCore-FullyUnrolled.cs" />
<Compile Include="Blake2IvBuilder.cs" />
<Compile Include="Blake2BTreeConfig.cs" />
<Compile Include="Blake2BCore-Simple.cs" />
<Compile Include="Blake2BCore-Inline.cs" />
<Compile Include="Blake2BHasher.cs" />
<Compile Include="NodeHasher.cs" />
<Compile Include="Hasher.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
<Compile Include="TreeHasher.cs" />
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
<!-- To modify your build process, add your task inside one of the targets below and uncomment it.
Other similar extension points exist, see Microsoft.Common.targets.
<Target Name="BeforeBuild">
<Target Name="AfterBuild">

View File

@ -0,0 +1,60 @@
// BLAKE2 reference source code package - C# implementation
// Written in 2012 by Christian Winnerlein <codesinchaos@gmail.com>
// To the extent possible under law, the author(s) have dedicated all copyright
// and related and neighboring rights to this software to the public domain
// worldwide. This software is distributed without any warranty.
// You should have received a copy of the CC0 Public Domain Dedication along with
// this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
using System;
using System.Collections.Generic;
using System.Security.Cryptography;
using System.Text;
namespace Blake2Sharp
public abstract class Hasher
public abstract void Init();
public abstract byte[] Finish();
public abstract void Update(byte[] data, int start, int count);
public void Update(byte[] data)
Update(data, 0, data.Length);
public HashAlgorithm AsHashAlgorithm()
return new HashAlgorithmAdapter(this);
internal class HashAlgorithmAdapter : HashAlgorithm
private readonly Hasher _hasher;
protected override void HashCore(byte[] array, int ibStart, int cbSize)
_hasher.Update(array, ibStart, cbSize);
protected override byte[] HashFinal()
return _hasher.Finish();
public override void Initialize()
public HashAlgorithmAdapter(Hasher hasher)
_hasher = hasher;

View File

@ -0,0 +1,30 @@
// BLAKE2 reference source code package - C# implementation
// Written in 2012 by Christian Winnerlein <codesinchaos@gmail.com>
// To the extent possible under law, the author(s) have dedicated all copyright
// and related and neighboring rights to this software to the public domain
// worldwide. This software is distributed without any warranty.
// You should have received a copy of the CC0 Public Domain Dedication along with
// this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
using System;
using System.Collections.Generic;
using System.Security.Cryptography;
using System.Text;
namespace Blake2Sharp
public abstract class NodeHasher
public abstract void Init(int depth, long nodeOffset);
public abstract byte[] Finish(bool isEndOfLayer);
public abstract void Update(byte[] data, int start, int count);
public void Update(byte[] data)
Update(data, 0, data.Length);

View File

@ -0,0 +1,36 @@
using System.Reflection;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
// General Information about an assembly is controlled through the following
// set of attributes. Change these attribute values to modify the information
// associated with an assembly.
[assembly: AssemblyTitle("Blake2Sharp")]
[assembly: AssemblyDescription("Blake2 Hashfunction")]
[assembly: AssemblyConfiguration("")]
[assembly: AssemblyCompany("CodesInChaos")]
[assembly: AssemblyProduct("Blake2Sharp")]
[assembly: AssemblyCopyright("Public Domain")]
[assembly: AssemblyTrademark("")]
[assembly: AssemblyCulture("")]
// Setting ComVisible to false makes the types in this assembly not visible
// to COM components. If you need to access a type in this assembly from
// COM, set the ComVisible attribute to true on that type.
[assembly: ComVisible(false)]
// The following GUID is for the ID of the typelib if this project is exposed to COM
[assembly: Guid("b7361e6c-1a16-4653-9afb-134066503c8f")]
// Version information for an assembly consists of the following four values:
// Major Version
// Minor Version
// Build Number
// Revision
// You can specify all the values or you can default the Build and Revision Numbers
// by using the '*' as shown below:
// [assembly: AssemblyVersion("1.0.*")]
[assembly: AssemblyVersion("")]
[assembly: AssemblyFileVersion("")]

View File

@ -0,0 +1,77 @@
// BLAKE2 reference source code package - C# implementation
// Written in 2012 by Christian Winnerlein <codesinchaos@gmail.com>
// To the extent possible under law, the author(s) have dedicated all copyright
// and related and neighboring rights to this software to the public domain
// worldwide. This software is distributed without any warranty.
// You should have received a copy of the CC0 Public Domain Dedication along with
// this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
using System;
using System.Collections.Generic;
using System.Text;
namespace Blake2Sharp
/*public class TreeHasher : Hasher
NodeHasher nodeHasher;
int maxDepth;
int maxLeafSize;
int currentLeafSize;
int fanOut;
List<byte[]>[] intermediateHashes;
long[] counts;
public override void Init()
intermediateHashes = new List<byte[]>[maxDepth];
counts = new long[maxDepth];
public override byte[] Finish()
for (int layer = 0; layer < intermediateHashes.Length; layer++)
if (intermediateHashes[layer].Count > 0)
nodeHasher.Init(layer, counts[layer]);
foreach (var hash in intermediateHashes[layer])
intermediateHashes = null;
public override void Update(byte[] data, int start, int count)
while (count > 0)
int toHash = Math.Min(maxLeafSize - currentLeafSize, count);
nodeHasher.Update(data, start, toHash);
start += toHash;
count -= toHash;
if (count > 0)
for (int layer = 0; layer < intermediateHashes.Length; layer++)
if ((layer + 1 < maxDepth) && (intermediateHashes[layer].Count == fanOut))
nodeHasher.Init(layer, counts[layer]);
foreach (var hash in intermediateHashes[layer])
intermediateHashes[layer + 1].Add(nodeHasher.Finish);
counts[layer + 1]++;
nodeHasher.Init(0, counts[0]);

View File

@ -0,0 +1,26 @@
#define ROT(x, y)\
((x >> y)|(x << (64-y)))
#define G(r,i,a,b,c,d) \
YY G(r,i,a,b,c,d) XXX\
a = a + b + m[Sigma[16*r+2*i+0]]; XXX\
d ^= a; XXX\
d = ROT(d, 32); XXX\
c = c + d; XXX\
b ^= c; XXX\
b = ROT(b, 24); XXX\
a = a + b + m[Sigma[16*r+2*i+1]]; XXX\
d ^= a; XXX\
d = ROT(d, 16); XXX\
c = c + d; XXX\
b ^= c; XXX\
b = ROT(b, 63); XXX
G( r, 0, v0, v4, v8, v12 )
G( r, 1, v1, v5, v9, v13 )
G( r, 2, v2, v6, v10, v14 )
G( r, 3, v3, v7, v11, v15 )
G( r, 4, v0, v5, v10, v15 )
G( r, 5, v1, v6, v11, v12 )
G( r, 6, v2, v7, v8, v13 )
G( r, 7, v3, v4, v9, v14 )

ref/blake2-impl.h Normal file
View File

@ -0,0 +1,133 @@
BLAKE2 reference source code package - reference C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
#pragma once
#ifndef __BLAKE2_IMPL_H__
#define __BLAKE2_IMPL_H__
#include <stdint.h>
static inline uint32_t load32( const void *src )
return *( uint32_t * )( src );
const uint8_t *p = ( uint8_t * )src;
uint32_t w = *p++;
w |= ( uint32_t )( *p++ ) << 8;
w |= ( uint32_t )( *p++ ) << 16;
w |= ( uint32_t )( *p++ ) << 24;
return w;
static inline uint64_t load64( const void *src )
return *( uint64_t * )( src );
const uint8_t *p = ( uint8_t * )src;
uint64_t w = *p++;
w |= ( uint64_t )( *p++ ) << 8;
w |= ( uint64_t )( *p++ ) << 16;
w |= ( uint64_t )( *p++ ) << 24;
w |= ( uint64_t )( *p++ ) << 32;
w |= ( uint64_t )( *p++ ) << 40;
w |= ( uint64_t )( *p++ ) << 48;
w |= ( uint64_t )( *p++ ) << 56;
return w;
static inline void store32( void *dst, uint32_t w )
*( uint32_t * )( dst ) = w;
uint8_t *p = ( uint8_t * )dst;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w;
static inline void store64( void *dst, uint64_t w )
*( uint64_t * )( dst ) = w;
uint8_t *p = ( uint8_t * )dst;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w;
static inline uint64_t load48( const void *src )
const uint8_t *p = ( const uint8_t * )src;
uint64_t w = *p++;
w |= ( uint64_t )( *p++ ) << 8;
w |= ( uint64_t )( *p++ ) << 16;
w |= ( uint64_t )( *p++ ) << 24;
w |= ( uint64_t )( *p++ ) << 32;
w |= ( uint64_t )( *p++ ) << 40;
return w;
static inline void store48( void *dst, uint64_t w )
uint8_t *p = ( uint8_t * )dst;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w;
static inline uint32_t rotl32( const uint32_t w, const unsigned c )
return ( w << c ) | ( w >> ( 32 - c ) );
static inline uint64_t rotl64( const uint64_t w, const unsigned c )
return ( w << c ) | ( w >> ( 64 - c ) );
static inline uint32_t rotr32( const uint32_t w, const unsigned c )
return ( w >> c ) | ( w << ( 32 - c ) );
static inline uint64_t rotr64( const uint64_t w, const unsigned c )
return ( w >> c ) | ( w << ( 64 - c ) );
/* prevents compiler optimizing out memset() */
static inline void secure_zero_memory( void *v, size_t n )
volatile uint8_t *p = ( volatile uint8_t * )v;
while( n-- ) *p++ = 0;

ref/blake2-kat.h Normal file

File diff suppressed because it is too large Load Diff

ref/blake2.h Normal file
View File

@ -0,0 +1,156 @@
BLAKE2 reference source code package - reference C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
#pragma once
#ifndef __BLAKE2_H__
#define __BLAKE2_H__
#include <stddef.h>
#include <stdint.h>
#if defined(_MSC_VER)
#define ALIGN(x) __declspec(align(x))
#define ALIGN(x) __attribute__((aligned(x)))
#if defined(__cplusplus)
extern "C" {
enum blake2s_constant
enum blake2b_constant
#pragma pack(push, 1)
typedef struct __blake2s_param
uint8_t digest_length; // 1
uint8_t key_length; // 2
uint8_t fanout; // 3
uint8_t depth; // 4
uint32_t leaf_length; // 8
uint8_t node_offset[6];// 14
uint8_t node_depth; // 15
uint8_t inner_length; // 16
// uint8_t reserved[0];
uint8_t salt[BLAKE2S_SALTBYTES]; // 24
uint8_t personal[BLAKE2S_PERSONALBYTES]; // 32
} blake2s_param;
ALIGN( 64 ) typedef struct __blake2s_state
uint32_t h[8];
uint32_t t[2];
uint32_t f[2];
uint8_t buf[2 * BLAKE2S_BLOCKBYTES];
size_t buflen;
uint8_t last_node;
} blake2s_state ;
typedef struct __blake2b_param
uint8_t digest_length; // 1
uint8_t key_length; // 2
uint8_t fanout; // 3
uint8_t depth; // 4
uint32_t leaf_length; // 8
uint64_t node_offset; // 16
uint8_t node_depth; // 17
uint8_t inner_length; // 18
uint8_t reserved[14]; // 32
uint8_t salt[BLAKE2B_SALTBYTES]; // 48
uint8_t personal[BLAKE2B_PERSONALBYTES]; // 64
} blake2b_param;
ALIGN( 64 ) typedef struct __blake2b_state
uint64_t h[8];
uint64_t t[2];
uint64_t f[2];
uint8_t buf[2 * BLAKE2B_BLOCKBYTES];
size_t buflen;
uint8_t last_node;
} blake2b_state;
typedef struct __blake2sp_state
blake2s_state S[8][1];
blake2s_state R[1];
uint8_t buf[8 * BLAKE2S_BLOCKBYTES];
size_t buflen;
} blake2sp_state;
typedef struct __blake2bp_state
blake2b_state S[4][1];
blake2b_state R[1];
uint8_t buf[4 * BLAKE2B_BLOCKBYTES];
size_t buflen;
} blake2bp_state;
#pragma pack(pop)
// Streaming API
int blake2s_init( blake2s_state *S, const uint8_t outlen );
int blake2s_init_key( blake2s_state *S, const uint8_t outlen, const void *key, const uint8_t keylen );
int blake2s_init_param( blake2s_state *S, const blake2s_param *P );
int blake2s_update( blake2s_state *S, const uint8_t *in, uint64_t inlen );
int blake2s_final( blake2s_state *S, uint8_t *out, uint8_t outlen );
int blake2b_init( blake2b_state *S, const uint8_t outlen );
int blake2b_init_key( blake2b_state *S, const uint8_t outlen, const void *key, const uint8_t keylen );
int blake2b_init_param( blake2b_state *S, const blake2b_param *P );
int blake2b_update( blake2b_state *S, const uint8_t *in, uint64_t inlen );
int blake2b_final( blake2b_state *S, uint8_t *out, uint8_t outlen );
int blake2sp_init( blake2sp_state *S, const uint8_t outlen );
int blake2sp_init_key( blake2sp_state *S, const uint8_t outlen, const void *key, const uint8_t keylen );
int blake2sp_update( blake2sp_state *S, const uint8_t *in, uint64_t inlen );
int blake2sp_final( blake2sp_state *S, uint8_t *out, uint8_t outlen );
int blake2bp_init( blake2bp_state *S, const uint8_t outlen );
int blake2bp_init_key( blake2bp_state *S, const uint8_t outlen, const void *key, const uint8_t keylen );
int blake2bp_update( blake2bp_state *S, const uint8_t *in, uint64_t inlen );
int blake2bp_final( blake2bp_state *S, uint8_t *out, uint8_t outlen );
// Simple API
int blake2s( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen );
int blake2b( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen );
int blake2sp( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen );
int blake2bp( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen );
static inline int blake2( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen )
return blake2b( out, in, key, outlen, inlen, keylen );
#if defined(__cplusplus)

ref/blake2b-ref.c Normal file
View File

@ -0,0 +1,391 @@
BLAKE2 reference source code package - reference C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "blake2.h"
#include "blake2-impl.h"
static const uint64_t blake2b_IV[8] =
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
static const uint8_t blake2b_sigma[12][16] =
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }
static inline int blake2b_set_lastnode( blake2b_state *S )
S->f[1] = ~0ULL;
return 0;
static inline int blake2b_clear_lastnode( blake2b_state *S )
S->f[1] = 0ULL;
return 0;
/* Some helper functions, not necessarily useful */
static inline int blake2b_set_lastblock( blake2b_state *S )
if( S->last_node ) blake2b_set_lastnode( S );
S->f[0] = ~0ULL;
return 0;
static inline int blake2b_clear_lastblock( blake2b_state *S )
if( S->last_node ) blake2b_clear_lastnode( S );
S->f[0] = 0ULL;
return 0;
static inline int blake2b_increment_counter( blake2b_state *S, const uint64_t inc )
S->t[0] += inc;
S->t[1] += ( S->t[0] < inc );
return 0;
// Parameter-related functions
static inline int blake2b_param_set_digest_length( blake2b_param *P, const uint8_t digest_length )
P->digest_length = digest_length;
return 0;
static inline int blake2b_param_set_fanout( blake2b_param *P, const uint8_t fanout )
P->fanout = fanout;
return 0;
static inline int blake2b_param_set_max_depth( blake2b_param *P, const uint8_t depth )
P->depth = depth;
return 0;
static inline int blake2b_param_set_leaf_length( blake2b_param *P, const uint32_t leaf_length )
store32( &P->leaf_length, leaf_length );
return 0;
static inline int blake2b_param_set_node_offset( blake2b_param *P, const uint64_t node_offset )
store64( &P->node_offset, node_offset );
return 0;
static inline int blake2b_param_set_node_depth( blake2b_param *P, const uint8_t node_depth )
P->node_depth = node_depth;
return 0;
static inline int blake2b_param_set_inner_length( blake2b_param *P, const uint8_t inner_length )
P->inner_length = inner_length;
return 0;
static inline int blake2b_param_set_salt( blake2b_param *P, const uint8_t salt[BLAKE2B_SALTBYTES] )
memcpy( P->salt, salt, BLAKE2B_SALTBYTES );
return 0;
static inline int blake2b_param_set_personal( blake2b_param *P, const uint8_t personal[BLAKE2B_PERSONALBYTES] )
memcpy( P->personal, personal, BLAKE2B_PERSONALBYTES );
return 0;
static inline int blake2b_init0( blake2b_state *S )
memset( S, 0, sizeof( blake2b_state ) );
for( int i = 0; i < 8; ++i ) S->h[i] = blake2b_IV[i];
return 0;
/* init xors IV with input parameter block */
int blake2b_init_param( blake2b_state *S, const blake2b_param *P )
blake2b_init0( S );
uint8_t *p = ( uint8_t * )( P );
/* IV XOR ParamBlock */
for( size_t i = 0; i < 8; ++i )
S->h[i] ^= load64( p + sizeof( S->h[i] ) * i );
return 0;
int blake2b_init( blake2b_state *S, const uint8_t outlen )
blake2b_param P[1];
if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1;
P->digest_length = outlen;
P->key_length = 0;
P->fanout = 1;
P->depth = 1;
store32( &P->leaf_length, 0 );
store64( &P->node_offset, 0 );
P->node_depth = 0;
P->inner_length = 0;
memset( P->reserved, 0, sizeof( P->reserved ) );
memset( P->salt, 0, sizeof( P->salt ) );
memset( P->personal, 0, sizeof( P->personal ) );
return blake2b_init_param( S, P );
int blake2b_init_key( blake2b_state *S, const uint8_t outlen, const void *key, const uint8_t keylen )
blake2b_param P[1];
if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1;
if ( !key || !keylen || keylen > BLAKE2B_KEYBYTES ) return -1;
P->digest_length = outlen;
P->key_length = keylen;
P->fanout = 1;
P->depth = 1;
store32( &P->leaf_length, 0 );
store64( &P->node_offset, 0 );
P->node_depth = 0;
P->inner_length = 0;
memset( P->reserved, 0, sizeof( P->reserved ) );
memset( P->salt, 0, sizeof( P->salt ) );
memset( P->personal, 0, sizeof( P->personal ) );
if( blake2b_init_param( S, P ) < 0 ) return -1;
uint8_t block[BLAKE2B_BLOCKBYTES];
memset( block, 0, BLAKE2B_BLOCKBYTES );
memcpy( block, key, keylen );
blake2b_update( S, block, BLAKE2B_BLOCKBYTES );
secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */
return 0;
static int blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] )
uint64_t m[16];
uint64_t v[16];
int i;
for( i = 0; i < 16; ++i )
m[i] = load64( block + i * sizeof( m[i] ) );
for( i = 0; i < 8; ++i )
v[i] = S->h[i];
v[ 8] = blake2b_IV[0];
v[ 9] = blake2b_IV[1];
v[10] = blake2b_IV[2];
v[11] = blake2b_IV[3];
v[12] = S->t[0] ^ blake2b_IV[4];
v[13] = S->t[1] ^ blake2b_IV[5];
v[14] = S->f[0] ^ blake2b_IV[6];
v[15] = S->f[1] ^ blake2b_IV[7];
#define G(r,i,a,b,c,d) \
do { \
a = a + b + m[blake2b_sigma[r][2*i+0]]; \
d = rotr64(d ^ a, 32); \
c = c + d; \
b = rotr64(b ^ c, 24); \
a = a + b + m[blake2b_sigma[r][2*i+1]]; \
d = rotr64(d ^ a, 16); \
c = c + d; \
b = rotr64(b ^ c, 63); \
} while(0)
#define ROUND(r) \
do { \
G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
G(r,2,v[ 2],v[ 6],v[10],v[14]); \
G(r,3,v[ 3],v[ 7],v[11],v[15]); \
G(r,4,v[ 0],v[ 5],v[10],v[15]); \
G(r,5,v[ 1],v[ 6],v[11],v[12]); \
G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \
} while(0)
ROUND( 0 );
ROUND( 1 );
ROUND( 2 );
ROUND( 3 );
ROUND( 4 );
ROUND( 5 );
ROUND( 6 );
ROUND( 7 );
ROUND( 8 );
ROUND( 9 );
ROUND( 10 );
ROUND( 11 );
for( i = 0; i < 8; ++i )
S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
#undef G
#undef ROUND
return 0;
/* inlen now in bytes */
int blake2b_update( blake2b_state *S, const uint8_t *in, uint64_t inlen )
while( inlen > 0 )
size_t left = S->buflen;
size_t fill = 2 * BLAKE2B_BLOCKBYTES - left;
if( inlen > fill )
memcpy( S->buf + left, in, fill ); // Fill buffer
S->buflen += fill;
blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES );
blake2b_compress( S, S->buf ); // Compress
memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, BLAKE2B_BLOCKBYTES ); // Shift buffer left
in += fill;
inlen -= fill;
else // inlen <= fill
memcpy( S->buf + left, in, inlen );
S->buflen += inlen; // Be lazy, do not compress
in += inlen;
inlen -= inlen;
return 0;
/* Is this correct? */
int blake2b_final( blake2b_state *S, uint8_t *out, uint8_t outlen )
uint8_t buffer[BLAKE2B_OUTBYTES];
if( S->buflen > BLAKE2B_BLOCKBYTES )
blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES );
blake2b_compress( S, S->buf );
memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, S->buflen );
blake2b_increment_counter( S, S->buflen );
blake2b_set_lastblock( S );
memset( S->buf + S->buflen, 0, 2 * BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */
blake2b_compress( S, S->buf );
for( int i = 0; i < 8; ++i ) /* Output full hash to temp buffer */
store64( buffer + sizeof( S->h[i] ) * i, S->h[i] );
memcpy( out, buffer, outlen );
return 0;
/* inlen, at least, should be uint64_t. Others can be size_t. */
int blake2b( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen )
blake2b_state S[1];
/* Verify parameters */
if ( NULL == in ) return -1;
if ( NULL == out ) return -1;
if( NULL == key ) keylen = 0;
if( keylen > 0 )
if( blake2b_init_key( S, outlen, key, keylen ) < 0 ) return -1;
if( blake2b_init( S, outlen ) < 0 ) return -1;
blake2b_update( S, ( uint8_t * )in, inlen );
blake2b_final( S, out, outlen );
return 0;
#if defined(BLAKE2B_SELFTEST)
#include <string.h>
#include "blake2-kat.h"
int main( int argc, char **argv )
uint8_t key[BLAKE2B_KEYBYTES];
uint8_t buf[KAT_LENGTH];
for( size_t i = 0; i < BLAKE2B_KEYBYTES; ++i )
key[i] = ( uint8_t )i;
for( size_t i = 0; i < KAT_LENGTH; ++i )
buf[i] = ( uint8_t )i;
for( size_t i = 0; i < KAT_LENGTH; ++i )
uint8_t hash[BLAKE2B_OUTBYTES];
blake2b( hash, buf, key, BLAKE2B_OUTBYTES, i, BLAKE2B_KEYBYTES );
if( 0 != memcmp( hash, blake2b_keyed_kat[i], BLAKE2B_OUTBYTES ) )
puts( "error" );
return -1;
puts( "ok" );
return 0;

ref/blake2bp-ref.c Normal file
View File

@ -0,0 +1,288 @@
BLAKE2 reference source code package - reference C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#if defined(_OPENMP)
#include <omp.h>
#include "blake2.h"
#include "blake2-impl.h"
static inline int blake2bp_init_leaf( blake2b_state *S, uint8_t outlen, uint8_t keylen, uint64_t offset )
blake2b_param P[1];
P->digest_length = outlen;
P->key_length = keylen;
P->depth = 2;
store32( &P->leaf_length, 0 );
store64( &P->node_offset, offset );
P->node_depth = 0;
P->inner_length = outlen;
memset( P->reserved, 0, sizeof( P->reserved ) );
memset( P->salt, 0, sizeof( P->salt ) );
memset( P->personal, 0, sizeof( P->personal ) );
return blake2b_init_param( S, P );
static inline int blake2bp_init_root( blake2b_state *S, uint8_t outlen, uint8_t keylen )
blake2b_param P[1];
P->digest_length = outlen;
P->key_length = keylen;
P->depth = 2;
store32( &P->leaf_length, 0 );
store64( &P->node_offset, 0 );
P->node_depth = 1;
P->inner_length = outlen;
memset( P->reserved, 0, sizeof( P->reserved ) );
memset( P->salt, 0, sizeof( P->salt ) );
memset( P->personal, 0, sizeof( P->personal ) );
return blake2b_init_param( S, P );
int blake2bp_init( blake2bp_state *S, const uint8_t outlen )
if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1;
memset( S->buf, 0, sizeof( S->buf ) );
S->buflen = 0;
if( blake2bp_init_root( S->R, outlen, 0 ) < 0 )
return -1;
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
if( blake2bp_init_leaf( S->S[i], outlen, 0, i ) < 0 ) return -1;
S->R->last_node = 1;
S->S[PARALLELISM_DEGREE - 1]->last_node = 1;
return 0;
int blake2bp_init_key( blake2bp_state *S, const uint8_t outlen, const void *key, const uint8_t keylen )
if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1;
if( !key || !keylen || keylen > BLAKE2B_KEYBYTES ) return -1;
memset( S->buf, 0, sizeof( S->buf ) );
S->buflen = 0;
if( blake2bp_init_root( S->R, outlen, keylen ) < 0 )
return -1;
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
if( blake2bp_init_leaf( S->S[i], outlen, keylen, i ) < 0 ) return -1;
S->R->last_node = 1;
S->S[PARALLELISM_DEGREE - 1]->last_node = 1;
uint8_t block[BLAKE2B_BLOCKBYTES];
memset( block, 0, BLAKE2B_BLOCKBYTES );
memcpy( block, key, keylen );
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
blake2b_update( S->S[i], block, BLAKE2B_BLOCKBYTES );
secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */
return 0;
int blake2bp_update( blake2bp_state *S, const uint8_t *in, uint64_t inlen )
size_t left = S->buflen;
size_t fill = sizeof( S->buf ) - left;
if( left && inlen >= fill )
memcpy( S->buf + left, in, fill );
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
blake2b_update( S->S[i], S->buf + i * BLAKE2B_BLOCKBYTES, BLAKE2B_BLOCKBYTES );
in += fill;
inlen -= fill;
left = 0;
#if defined(_OPENMP)
#pragma omp parallel shared(S), num_threads(PARALLELISM_DEGREE)
for( size_t id__ = 0; id__ < PARALLELISM_DEGREE; ++id__ )
#if defined(_OPENMP)
size_t id__ = omp_get_thread_num();
uint64_t inlen__ = inlen;
const uint8_t *in__ = ( const uint8_t * )in;
in__ += id__ * BLAKE2B_BLOCKBYTES;
blake2b_update( S->S[id__], in__, BLAKE2B_BLOCKBYTES );
in += inlen - inlen % ( PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES );
if( inlen > 0 )
memcpy( S->buf + left, in, inlen );
S->buflen = left + inlen;
return 0;
int blake2bp_final( blake2bp_state *S, uint8_t *out, const uint8_t outlen )
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
if( S->buflen > i * BLAKE2B_BLOCKBYTES )
size_t left = S->buflen - i * BLAKE2B_BLOCKBYTES;
blake2b_update( S->S[i], S->buf + i * BLAKE2B_BLOCKBYTES, left );
blake2b_final( S->S[i], hash[i], BLAKE2B_OUTBYTES );
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
blake2b_update( S->R, hash[i], BLAKE2B_OUTBYTES );
blake2b_final( S->R, out, outlen );
return 0;
int blake2bp( uint8_t *out, const void *in, const void *key, uint8_t outlen, uint64_t inlen, uint8_t keylen )
blake2b_state S[PARALLELISM_DEGREE][1];
blake2b_state FS[1];
/* Verify parameters */
if ( NULL == in ) return -1;
if ( NULL == out ) return -1;
if ( NULL == key ) keylen = 0;
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
if( blake2bp_init_leaf( S[i], outlen, keylen, i ) < 0 ) return -1;
S[PARALLELISM_DEGREE - 1]->last_node = 1; // mark last node
if( keylen > 0 )
uint8_t block[BLAKE2B_BLOCKBYTES];
memset( block, 0, BLAKE2B_BLOCKBYTES );
memcpy( block, key, keylen );
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
blake2b_update( S[i], block, BLAKE2B_BLOCKBYTES );
secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */
#if defined(_OPENMP)
#pragma omp parallel shared(S,hash), num_threads(PARALLELISM_DEGREE)
for( size_t id__ = 0; id__ < PARALLELISM_DEGREE; ++id__ )
#if defined(_OPENMP)
size_t id__ = omp_get_thread_num();
uint64_t inlen__ = inlen;
const uint8_t *in__ = ( const uint8_t * )in;
in__ += id__ * BLAKE2B_BLOCKBYTES;
blake2b_update( S[id__], in__, BLAKE2B_BLOCKBYTES );
if( inlen__ > id__ * BLAKE2B_BLOCKBYTES )
const size_t left = inlen__ - id__ * BLAKE2B_BLOCKBYTES;
const size_t len = left <= BLAKE2B_BLOCKBYTES ? left : BLAKE2B_BLOCKBYTES;
blake2b_update( S[id__], in__, len );
blake2b_final( S[id__], hash[id__], BLAKE2B_OUTBYTES );
if( blake2bp_init_root( FS, outlen, keylen ) < 0 )
return -1;
FS->last_node = 1; // Mark as last node
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
blake2b_update( FS, hash[i], BLAKE2B_OUTBYTES );
blake2b_final( FS, out, outlen );
return 0;
#if defined(BLAKE2BP_SELFTEST)
#include <string.h>
#include "blake2-kat.h"
int main( int argc, char **argv )
uint8_t key[BLAKE2B_KEYBYTES];
uint8_t buf[KAT_LENGTH];
for( size_t i = 0; i < BLAKE2B_KEYBYTES; ++i )
key[i] = ( uint8_t )i;
for( size_t i = 0; i < KAT_LENGTH; ++i )
buf[i] = ( uint8_t )i;
for( size_t i = 0; i < KAT_LENGTH; ++i )
uint8_t hash[BLAKE2B_OUTBYTES];
blake2bp( hash, buf, key, BLAKE2B_OUTBYTES, i, BLAKE2B_KEYBYTES );
if( 0 != memcmp( hash, blake2bp_keyed_kat[i], BLAKE2B_OUTBYTES ) )
puts( "error" );
return -1;
puts( "ok" );
return 0;

ref/blake2s-ref.c Normal file
View File

@ -0,0 +1,380 @@
BLAKE2 reference source code package - reference C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "blake2.h"
#include "blake2-impl.h"
static const uint32_t blake2s_IV[8] =
0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL
static const uint8_t blake2s_sigma[10][16] =
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
static inline int blake2s_set_lastnode( blake2s_state *S )
S->f[1] = ~0U;
return 0;
static inline int blake2s_clear_lastnode( blake2s_state *S )
S->f[1] = 0U;
return 0;
/* Some helper functions, not necessarily useful */
static inline int blake2s_set_lastblock( blake2s_state *S )
if( S->last_node ) blake2s_set_lastnode( S );
S->f[0] = ~0U;
return 0;
static inline int blake2s_clear_lastblock( blake2s_state *S )
if( S->last_node ) blake2s_clear_lastnode( S );
S->f[0] = 0U;
return 0;
static inline int blake2s_increment_counter( blake2s_state *S, const uint32_t inc )
S->t[0] += inc;
S->t[1] += ( S->t[0] < inc );
return 0;
// Parameter-related functions
static inline int blake2s_param_set_digest_length( blake2s_param *P, const uint8_t digest_length )
P->digest_length = digest_length;
return 0;
static inline int blake2s_param_set_fanout( blake2s_param *P, const uint8_t fanout )
P->fanout = fanout;
return 0;
static inline int blake2s_param_set_max_depth( blake2s_param *P, const uint8_t depth )
P->depth = depth;
return 0;
static inline int blake2s_param_set_leaf_length( blake2s_param *P, const uint32_t leaf_length )
store32( &P->leaf_length, leaf_length );
return 0;
static inline int blake2s_param_set_node_offset( blake2s_param *P, const uint64_t node_offset )
store48( P->node_offset, node_offset );
return 0;
static inline int blake2s_param_set_node_depth( blake2s_param *P, const uint8_t node_depth )
P->node_depth = node_depth;
return 0;
static inline int blake2s_param_set_inner_length( blake2s_param *P, const uint8_t inner_length )
P->inner_length = inner_length;
return 0;
static inline int blake2s_param_set_salt( blake2s_param *P, const uint8_t salt[BLAKE2S_SALTBYTES] )
memcpy( P->salt, salt, BLAKE2S_SALTBYTES );
return 0;
static inline int blake2s_param_set_personal( blake2s_param *P, const uint8_t personal[BLAKE2S_PERSONALBYTES] )
memcpy( P->personal, personal, BLAKE2S_PERSONALBYTES );
return 0;
static inline int blake2s_init0( blake2s_state *S )
memset( S, 0, sizeof( blake2s_state ) );
for( int i = 0; i < 8; ++i ) S->h[i] = blake2s_IV[i];
return 0;
/* init2 xors IV with input parameter block */
int blake2s_init_param( blake2s_state *S, const blake2s_param *P )
blake2s_init0( S );
uint32_t *p = ( uint32_t * )( P );
/* IV XOR ParamBlock */
for( size_t i = 0; i < 8; ++i )
S->h[i] ^= load32( &p[i] );
return 0;
// Sequential blake2s initialization
int blake2s_init( blake2s_state *S, const uint8_t outlen )
blake2s_param P[1];
/* Move interval verification here? */
if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1;
P->digest_length = outlen;
P->key_length = 0;
P->fanout = 1;
P->depth = 1;
store32( &P->leaf_length, 0 );
store48( &P->node_offset, 0 );
P->node_depth = 0;
P->inner_length = 0;
// memset(P->reserved, 0, sizeof(P->reserved) );
memset( P->salt, 0, sizeof( P->salt ) );
memset( P->personal, 0, sizeof( P->personal ) );
return blake2s_init_param( S, P );
int blake2s_init_key( blake2s_state *S, const uint8_t outlen, const void *key, const uint8_t keylen )
blake2s_param P[1];
if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1;
if ( !key || !keylen || keylen > BLAKE2S_KEYBYTES ) return -1;
P->digest_length = outlen;
P->key_length = keylen;
P->fanout = 1;
P->depth = 1;
store32( &P->leaf_length, 0 );
store48( &P->node_offset, 0 );
P->node_depth = 0;
P->inner_length = 0;
// memset(P->reserved, 0, sizeof(P->reserved) );
memset( P->salt, 0, sizeof( P->salt ) );
memset( P->personal, 0, sizeof( P->personal ) );
if( blake2s_init_param( S, P ) < 0 ) return -1;
uint8_t block[BLAKE2S_BLOCKBYTES];
memset( block, 0, BLAKE2S_BLOCKBYTES );
memcpy( block, key, keylen );
blake2s_update( S, block, BLAKE2S_BLOCKBYTES );
secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */
return 0;
static int blake2s_compress( blake2s_state *S, const uint8_t block[BLAKE2S_BLOCKBYTES] )
uint32_t m[16];
uint32_t v[16];
for( size_t i = 0; i < 16; ++i )
m[i] = load32( block + i * sizeof( m[i] ) );
for( size_t i = 0; i < 8; ++i )
v[i] = S->h[i];
v[ 8] = blake2s_IV[0];
v[ 9] = blake2s_IV[1];
v[10] = blake2s_IV[2];
v[11] = blake2s_IV[3];
v[12] = S->t[0] ^ blake2s_IV[4];
v[13] = S->t[1] ^ blake2s_IV[5];
v[14] = S->f[0] ^ blake2s_IV[6];
v[15] = S->f[1] ^ blake2s_IV[7];
#define G(r,i,a,b,c,d) \
do { \
a = a + b + m[blake2s_sigma[r][2*i+0]]; \
d = rotr32(d ^ a, 16); \
c = c + d; \
b = rotr32(b ^ c, 12); \
a = a + b + m[blake2s_sigma[r][2*i+1]]; \
d = rotr32(d ^ a, 8); \
c = c + d; \
b = rotr32(b ^ c, 7); \
} while(0)
#define ROUND(r) \
do { \
G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
G(r,2,v[ 2],v[ 6],v[10],v[14]); \
G(r,3,v[ 3],v[ 7],v[11],v[15]); \
G(r,4,v[ 0],v[ 5],v[10],v[15]); \
G(r,5,v[ 1],v[ 6],v[11],v[12]); \
G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \
} while(0)
ROUND( 0 );
ROUND( 1 );
ROUND( 2 );
ROUND( 3 );
ROUND( 4 );
ROUND( 5 );
ROUND( 6 );
ROUND( 7 );
ROUND( 8 );
ROUND( 9 );
for( size_t i = 0; i < 8; ++i )
S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
#undef G
#undef ROUND
return 0;
int blake2s_update( blake2s_state *S, const uint8_t *in, uint64_t inlen )
while( inlen > 0 )
size_t left = S->buflen;
size_t fill = 2 * BLAKE2S_BLOCKBYTES - left;
if( inlen > fill )
memcpy( S->buf + left, in, fill ); // Fill buffer
S->buflen += fill;
blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES );
blake2s_compress( S, S->buf ); // Compress
memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES ); // Shift buffer left
in += fill;
inlen -= fill;
else // inlen <= fill
memcpy( S->buf + left, in, inlen );
S->buflen += inlen; // Be lazy, do not compress
in += inlen;
inlen -= inlen;
return 0;
int blake2s_final( blake2s_state *S, uint8_t *out, uint8_t outlen )
uint8_t buffer[BLAKE2S_OUTBYTES];
if( S->buflen > BLAKE2S_BLOCKBYTES )
blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES );
blake2s_compress( S, S->buf );
memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, S->buflen );
blake2s_increment_counter( S, ( uint32_t )S->buflen );
blake2s_set_lastblock( S );
memset( S->buf + S->buflen, 0, 2 * BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */
blake2s_compress( S, S->buf );
for( int i = 0; i < 8; ++i ) /* Output full hash to temp buffer */
store32( buffer + sizeof( S->h[i] ) * i, S->h[i] );
memcpy( out, buffer, outlen );
return 0;
int blake2s( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen )
blake2s_state S[1];
/* Verify parameters */
if ( NULL == in ) return -1;
if ( NULL == out ) return -1;
if ( NULL == key ) keylen = 0; /* Fail here instead if keylen != 0 and key == NULL? */
if( keylen > 0 )
if( blake2s_init_key( S, outlen, key, keylen ) < 0 ) return -1;
if( blake2s_init( S, outlen ) < 0 ) return -1;
blake2s_update( S, ( uint8_t * )in, inlen );
blake2s_final( S, out, outlen );
return 0;
#if defined(BLAKE2S_SELFTEST)
#include <string.h>
#include "blake2-kat.h"
int main( int argc, char **argv )
uint8_t key[BLAKE2S_KEYBYTES];
uint8_t buf[KAT_LENGTH];
for( size_t i = 0; i < BLAKE2S_KEYBYTES; ++i )
key[i] = ( uint8_t )i;
for( size_t i = 0; i < KAT_LENGTH; ++i )
buf[i] = ( uint8_t )i;
for( size_t i = 0; i < KAT_LENGTH; ++i )
uint8_t hash[BLAKE2S_OUTBYTES];
blake2s( hash, buf, key, BLAKE2S_OUTBYTES, i, BLAKE2S_KEYBYTES );
if( 0 != memcmp( hash, blake2s_keyed_kat[i], BLAKE2S_OUTBYTES ) )
puts( "error" );
return -1;
puts( "ok" );
return 0;

ref/blake2sp-ref.c Normal file
View File

@ -0,0 +1,295 @@
BLAKE2 reference source code package - reference C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#if defined(_OPENMP)
#include <omp.h>
#include "blake2.h"
#include "blake2-impl.h"
static inline int blake2sp_init_leaf( blake2s_state *S, uint8_t outlen, uint8_t keylen, uint64_t offset )
blake2s_param P[1];
P->digest_length = outlen;
P->key_length = keylen;
P->depth = 2;
store32( &P->leaf_length, 0 );
store48( P->node_offset, offset );
P->node_depth = 0;
P->inner_length = outlen;
memset( P->salt, 0, sizeof( P->salt ) );
memset( P->personal, 0, sizeof( P->personal ) );
return blake2s_init_param( S, P );
static inline int blake2sp_init_root( blake2s_state *S, uint8_t outlen, uint8_t keylen )
blake2s_param P[1];
P->digest_length = outlen;
P->key_length = keylen;
P->depth = 2;
store32( &P->leaf_length, 0 );
store48( P->node_offset, 0ULL );
P->node_depth = 1;
P->inner_length = outlen;
memset( P->salt, 0, sizeof( P->salt ) );
memset( P->personal, 0, sizeof( P->personal ) );
return blake2s_init_param( S, P );
int blake2sp_init( blake2sp_state *S, const uint8_t outlen )
if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1;
memset( S->buf, 0, sizeof( S->buf ) );
S->buflen = 0;
if( blake2sp_init_root( S->R, outlen, 0 ) < 0 )
return -1;
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
if( blake2sp_init_leaf( S->S[i], outlen, 0, i ) < 0 ) return -1;
S->R->last_node = 1;
S->S[PARALLELISM_DEGREE - 1]->last_node = 1;
return 0;
int blake2sp_init_key( blake2sp_state *S, const uint8_t outlen, const void *key, const uint8_t keylen )
if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1;
if( !key || !keylen || keylen > BLAKE2S_KEYBYTES ) return -1;
memset( S->buf, 0, sizeof( S->buf ) );
S->buflen = 0;
if( blake2sp_init_root( S->R, outlen, keylen ) < 0 )
return -1;
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
if( blake2sp_init_leaf( S->S[i], outlen, keylen, i ) < 0 ) return -1;
S->R->last_node = 1;
S->S[PARALLELISM_DEGREE - 1]->last_node = 1;
uint8_t block[BLAKE2S_BLOCKBYTES];
memset( block, 0, BLAKE2S_BLOCKBYTES );
memcpy( block, key, keylen );
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
blake2s_update( S->S[i], block, BLAKE2S_BLOCKBYTES );
secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */
return 0;
int blake2sp_update( blake2sp_state *S, const uint8_t *in, uint64_t inlen )
size_t left = S->buflen;
size_t fill = sizeof( S->buf ) - left;
if( left && inlen >= fill )
memcpy( S->buf + left, in, fill );
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
blake2s_update( S->S[i], S->buf + i * BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES );
in += fill;
inlen -= fill;
left = 0;
#if defined(_OPENMP)
#pragma omp parallel shared(S), num_threads(PARALLELISM_DEGREE)
for( size_t id__ = 0; id__ < PARALLELISM_DEGREE; ++id__ )
#if defined(_OPENMP)
size_t id__ = omp_get_thread_num();
uint64_t inlen__ = inlen;
const uint8_t *in__ = ( const uint8_t * )in;
in__ += id__ * BLAKE2S_BLOCKBYTES;
blake2s_update( S->S[id__], in__, BLAKE2S_BLOCKBYTES );
in += inlen - inlen % ( PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES );
if( inlen > 0 )
memcpy( S->buf + left, in, inlen );
S->buflen = left + inlen;
return 0;
int blake2sp_final( blake2sp_state *S, uint8_t *out, const uint8_t outlen )
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
if( S->buflen > i * BLAKE2S_BLOCKBYTES )
size_t left = S->buflen - i * BLAKE2S_BLOCKBYTES;
blake2s_update( S->S[i], S->buf + i * BLAKE2S_BLOCKBYTES, left );
blake2s_final( S->S[i], hash[i], BLAKE2S_OUTBYTES );
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
blake2s_update( S->R, hash[i], BLAKE2S_OUTBYTES );
blake2s_final( S->R, out, outlen );
return 0;
int blake2sp( uint8_t *out, const void *in, const void *key, uint8_t outlen, uint64_t inlen, uint8_t keylen )
blake2s_state S[PARALLELISM_DEGREE][1];
blake2s_state FS[1];
/* Verify parameters */
if ( NULL == in ) return -1;
if ( NULL == out ) return -1;
if ( NULL == key ) keylen = 0;
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
if( blake2sp_init_leaf( S[i], outlen, keylen, i ) < 0 ) return -1;
S[PARALLELISM_DEGREE - 1]->last_node = 1; // mark last node
if( keylen > 0 )
uint8_t block[BLAKE2S_BLOCKBYTES];
memset( block, 0, BLAKE2S_BLOCKBYTES );
memcpy( block, key, keylen );
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
blake2s_update( S[i], block, BLAKE2S_BLOCKBYTES );
secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */
#if defined(_OPENMP)
#pragma omp parallel shared(S,hash), num_threads(PARALLELISM_DEGREE)
for( size_t id__ = 0; id__ < PARALLELISM_DEGREE; ++id__ )
#if defined(_OPENMP)
size_t id__ = omp_get_thread_num();
uint64_t inlen__ = inlen;
const uint8_t *in__ = ( const uint8_t * )in;
in__ += id__ * BLAKE2S_BLOCKBYTES;
blake2s_update( S[id__], in__, BLAKE2S_BLOCKBYTES );
if( inlen__ > id__ * BLAKE2S_BLOCKBYTES )
const size_t left = inlen__ - id__ * BLAKE2S_BLOCKBYTES;
const size_t len = left <= BLAKE2S_BLOCKBYTES ? left : BLAKE2S_BLOCKBYTES;
blake2s_update( S[id__], in__, len );
blake2s_final( S[id__], hash[id__], BLAKE2S_OUTBYTES );
if( blake2sp_init_root( FS, outlen, keylen ) < 0 )
return -1;
FS->last_node = 1;
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
blake2s_update( FS, hash[i], BLAKE2S_OUTBYTES );
blake2s_final( FS, out, outlen );
return 0;
#if defined(BLAKE2SP_SELFTEST)
#include <string.h>
#include "blake2-kat.h"
int main( int argc, char **argv )
uint8_t key[BLAKE2S_KEYBYTES];
uint8_t buf[KAT_LENGTH];
for( size_t i = 0; i < BLAKE2S_KEYBYTES; ++i )
key[i] = ( uint8_t )i;
for( size_t i = 0; i < KAT_LENGTH; ++i )
buf[i] = ( uint8_t )i;
for( size_t i = 0; i < KAT_LENGTH; ++i )
uint8_t hash[BLAKE2S_OUTBYTES];
blake2sp( hash, buf, key, BLAKE2S_OUTBYTES, i, BLAKE2S_KEYBYTES );
if( 0 != memcmp( hash, blake2sp_keyed_kat[i], BLAKE2S_OUTBYTES ) )
puts( "error" );
return -1;
puts( "ok" );
return 0;

ref/genkat.c Normal file
View File

@ -0,0 +1,210 @@
BLAKE2 reference source code package - reference C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "blake2.h"
#define STR_(x) #x
#define STR(x) STR_(x)
#define LENGTH 256
#define MAKE_KAT(name,size_prefix) \
do \
{ \
printf( "static const uint8_t " #name "_kat[KAT_LENGTH][" #size_prefix "_OUTBYTES] = \n{\n" ); \
for( size_t i = 0; i < LENGTH; ++i ) \
{ \
name( hash, in, NULL, size_prefix ## _OUTBYTES, i, 0 ); \
printf( "\t{\n\t\t" ); \
for( int j = 0; j < size_prefix ## _OUTBYTES; ++j ) \
printf( "0x%02X%s", hash[j], ( j + 1 ) == size_prefix ## _OUTBYTES ? "\n" : j && !( ( j + 1 ) % 8 ) ? ",\n\t\t" : ", " ); \
printf( "\t},\n" ); \
} \
printf( "};\n\n\n\n\n" ); \
} while (0)
#define MAKE_KEYED_KAT(name,size_prefix) \
do \
{ \
printf( "static const uint8_t " #name "_keyed_kat[KAT_LENGTH][" #size_prefix "_OUTBYTES] = \n{\n" ); \
for( size_t i = 0; i < LENGTH; ++i ) \
{ \
name( hash, in, key, size_prefix ## _OUTBYTES, i, size_prefix ## _KEYBYTES ); \
printf( "\t{\n\t\t" ); \
for( int j = 0; j < size_prefix ## _OUTBYTES; ++j ) \
printf( "0x%02X%s", hash[j], ( j + 1 ) == size_prefix ## _OUTBYTES ? "\n" : j && !( ( j + 1 ) % 8 ) ? ",\n\t\t" : ", " ); \
printf( "\t},\n" ); \
} \
printf( "};\n\n\n\n\n" ); \
} while (0)
int main( int argc, char **argv )
uint8_t key[64] = {0};
uint8_t in[LENGTH] = {0};
uint8_t hash[64] = {0};
for( size_t i = 0; i < sizeof( in ); ++i )
in[i] = i;
for( size_t i = 0; i < sizeof( key ); ++i )
key[i] = i;
puts( "#pragma once\n"
"#ifndef __BLAKE2_KAT_H__\n"
"#define __BLAKE2_KAT_H__\n\n\n"
"#include <stdint.h>\n\n"
"#define KAT_LENGTH " STR( LENGTH ) "\n\n\n" );
MAKE_KAT( blake2s, BLAKE2S );
MAKE_KAT( blake2b, BLAKE2B );
MAKE_KAT( blake2sp, BLAKE2S );
MAKE_KEYED_KAT( blake2sp, BLAKE2S );
MAKE_KAT( blake2bp, BLAKE2B );
MAKE_KEYED_KAT( blake2bp, BLAKE2B );
/*printf( "static const uint8_t blake2s_kat[KAT_LENGTH][BLAKE2S_OUTBYTES] = \n{\n" );
for( size_t i = 0; i < LENGTH; ++i )
blake2s( hash, in, NULL, BLAKE2S_OUTBYTES, i, 0 );
printf( "\t{\n\t\t" );
for( int j = 0; j < BLAKE2S_OUTBYTES; ++j )
printf( "0x%02X%s", hash[j], ( j + 1 ) == BLAKE2S_OUTBYTES ? "\n" : j && !( ( j + 1 ) % 8 ) ? ",\n\t\t" : ", " );
printf( "\t},\n" );
printf( "};\n\n\n\n\n" );
printf( "static const uint8_t blake2s_keyed_kat[KAT_LENGTH][BLAKE2S_OUTBYTES] = \n{\n" );
for( size_t i = 0; i < LENGTH; ++i )
blake2s( hash, in, key, BLAKE2S_OUTBYTES, i, BLAKE2S_KEYBYTES );
printf( "\t{\n\t\t" );
for( int j = 0; j < BLAKE2S_OUTBYTES; ++j )
printf( "0x%02X%s", hash[j], ( j + 1 ) == BLAKE2S_OUTBYTES ? "\n" : j && !( ( j + 1 ) % 8 ) ? ",\n\t\t" : ", " );
printf( "\t},\n" );
printf( "};\n\n\n\n\n" );
printf( "static const uint8_t blake2b_kat[KAT_LENGTH][BLAKE2B_OUTBYTES] = \n{\n" );
for( size_t i = 0; i < LENGTH; ++i )
blake2b( hash, in, NULL, BLAKE2B_OUTBYTES, i, 0 );
printf( "\t{\n\t\t" );
for( int j = 0; j < BLAKE2B_OUTBYTES; ++j )
printf( "0x%02X%s", hash[j], ( j + 1 ) == BLAKE2B_OUTBYTES ? "\n" : j && !( ( j + 1 ) % 8 ) ? ",\n\t\t" : ", " );
printf( "\t},\n" );
printf( "};\n\n\n\n\n" );
printf( "static const uint8_t blake2b_keyed_kat[KAT_LENGTH][BLAKE2B_OUTBYTES] = \n{\n" );
for( size_t i = 0; i < LENGTH; ++i )
blake2b( hash, in, key, BLAKE2B_OUTBYTES, i, BLAKE2B_KEYBYTES );
printf( "\t{\n\t\t" );
for( int j = 0; j < BLAKE2B_OUTBYTES; ++j )
printf( "0x%02X%s", hash[j], ( j + 1 ) == BLAKE2B_OUTBYTES ? "\n" : j && !( ( j + 1 ) % 8 ) ? ",\n\t\t" : ", " );
printf( "\t},\n" );
printf( "};\n\n\n\n\n" );
printf( "static const uint8_t blake2sp_kat[KAT_LENGTH][BLAKE2S_OUTBYTES] = \n{\n" );
for( size_t i = 0; i < LENGTH; ++i )
blake2sp( hash, in, NULL, BLAKE2S_OUTBYTES, i, 0 );
printf( "\t{\n\t\t" );
for( int j = 0; j < BLAKE2S_OUTBYTES; ++j )
printf( "0x%02X%s", hash[j], ( j + 1 ) == BLAKE2S_OUTBYTES ? "\n" : j && !( ( j + 1 ) % 8 ) ? ",\n\t\t" : ", " );
printf( "\t},\n" );
printf( "};\n\n\n\n\n" );
printf( "static const uint8_t blake2sp_keyed_kat[KAT_LENGTH][BLAKE2S_OUTBYTES] = \n{\n" );
for( size_t i = 0; i < LENGTH; ++i )
blake2sp( hash, in, key, BLAKE2S_OUTBYTES, i, BLAKE2S_KEYBYTES );
printf( "\t{\n\t\t" );
for( int j = 0; j < BLAKE2S_OUTBYTES; ++j )
printf( "0x%02X%s", hash[j], ( j + 1 ) == BLAKE2S_OUTBYTES ? "\n" : j && !( ( j + 1 ) % 8 ) ? ",\n\t\t" : ", " );
printf( "\t},\n" );
printf( "};\n\n\n\n\n" );
printf( "static const uint8_t blake2bp_kat[KAT_LENGTH][BLAKE2B_OUTBYTES] = \n{\n" );
for( size_t i = 0; i < LENGTH; ++i )
blake2bp( hash, in, NULL, BLAKE2B_OUTBYTES, i, 0 );
printf( "\t{\n\t\t" );
for( int j = 0; j < BLAKE2B_OUTBYTES; ++j )
printf( "0x%02X%s", hash[j], ( j + 1 ) == BLAKE2B_OUTBYTES ? "\n" : j && !( ( j + 1 ) % 8 ) ? ",\n\t\t" : ", " );
printf( "\t},\n" );
printf( "};\n\n\n\n\n" );
printf( "static const uint8_t blake2bp_keyed_kat[KAT_LENGTH][BLAKE2B_OUTBYTES] = \n{\n" );
for( size_t i = 0; i < LENGTH; ++i )
blake2bp( hash, in, key, BLAKE2B_OUTBYTES, i, BLAKE2B_KEYBYTES );
printf( "\t{\n\t\t" );
for( int j = 0; j < BLAKE2B_OUTBYTES; ++j )
printf( "0x%02X%s", hash[j], ( j + 1 ) == BLAKE2B_OUTBYTES ? "\n" : j && !( ( j + 1 ) % 8 ) ? ",\n\t\t" : ", " );
printf( "\t},\n" );
printf( "};\n\n\n\n\n" );*/
puts( "#endif\n\n\n" );
return 0;

ref/makefile Normal file
View File

@ -0,0 +1,23 @@
CFLAGS=-std=c99 -Wall -pedantic
all: blake2s blake2b blake2sp blake2bp
blake2s: blake2s-ref.c
$(CC) blake2s-ref.c -o $@ $(CFLAGS) -DBLAKE2S_SELFTEST
blake2b: blake2b-ref.c
$(CC) blake2b-ref.c -o $@ $(CFLAGS) -DBLAKE2B_SELFTEST
blake2sp: blake2sp-ref.c blake2s-ref.c
$(CC) blake2sp-ref.c -o $@ $(CFLAGS) -DBLAKE2SP_SELFTEST
blake2bp: blake2bp-ref.c blake2b-ref.c
$(CC) blake2bp-ref.c -o $@ $(CFLAGS) -DBLAKE2BP_SELFTEST
$(CC) $(CFLAGS) -o genkat genkat.c blake2b-ref.c blake2s-ref.c blake2sp-ref.c blake2bp-ref.c
./genkat > blake2-kat.h
rm -rf *.o genkat blake2s blake2b blake2sp blake2bp

sse/blake2-config.h Normal file
View File

@ -0,0 +1,72 @@
BLAKE2 reference source code package - optimized C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
#pragma once
#ifndef __BLAKE2_CONFIG_H__
#define __BLAKE2_CONFIG_H__
// These don't work everywhere
#if defined(__SSE2__)
#define HAVE_SSE2
#if defined(__SSSE3__)
#define HAVE_SSSE3
#if defined(__SSE4_1__)
#define HAVE_SSE41
#if defined(__AVX__)
#define HAVE_AVX
#if defined(__XOP__)
#define HAVE_XOP
#ifdef HAVE_AVX2
#ifndef HAVE_AVX
#define HAVE_AVX
#ifdef HAVE_XOP
#ifndef HAVE_AVX
#define HAVE_AVX
#ifdef HAVE_AVX
#ifndef HAVE_SSE41
#define HAVE_SSE41
#ifdef HAVE_SSE41
#ifndef HAVE_SSSE3
#define HAVE_SSSE3
#ifdef HAVE_SSSE3
#define HAVE_SSE2
#if !defined(HAVE_SSE2)
#error "This code requires at least SSE2."

sse/blake2-impl.h Normal file
View File

@ -0,0 +1,133 @@
BLAKE2 reference source code package - optimized C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
#pragma once
#ifndef __BLAKE2_IMPL_H__
#define __BLAKE2_IMPL_H__
#include <stdint.h>
static inline uint32_t load32( const void *src )
return *( uint32_t * )( src );
const uint8_t *p = ( uint8_t * )src;
uint32_t w = *p++;
w |= ( uint32_t )( *p++ ) << 8;
w |= ( uint32_t )( *p++ ) << 16;
w |= ( uint32_t )( *p++ ) << 24;
return w;
static inline uint64_t load64( const void *src )
return *( uint64_t * )( src );
const uint8_t *p = ( uint8_t * )src;
uint64_t w = *p++;
w |= ( uint64_t )( *p++ ) << 8;
w |= ( uint64_t )( *p++ ) << 16;
w |= ( uint64_t )( *p++ ) << 24;
w |= ( uint64_t )( *p++ ) << 32;
w |= ( uint64_t )( *p++ ) << 40;
w |= ( uint64_t )( *p++ ) << 48;
w |= ( uint64_t )( *p++ ) << 56;
return w;
static inline void store32( void *dst, uint32_t w )
*( uint32_t * )( dst ) = w;
uint8_t *p = ( uint8_t * )dst;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w;
static inline void store64( void *dst, uint64_t w )
*( uint64_t * )( dst ) = w;
uint8_t *p = ( uint8_t * )dst;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w;
static inline uint64_t load48( const void *src )
const uint8_t *p = ( const uint8_t * )src;
uint64_t w = *p++;
w |= ( uint64_t )( *p++ ) << 8;
w |= ( uint64_t )( *p++ ) << 16;
w |= ( uint64_t )( *p++ ) << 24;
w |= ( uint64_t )( *p++ ) << 32;
w |= ( uint64_t )( *p++ ) << 40;
return w;
static inline void store48( void *dst, uint64_t w )
uint8_t *p = ( uint8_t * )dst;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w; w >>= 8;
*p++ = ( uint8_t )w;
static inline uint32_t rotl32( const uint32_t w, const unsigned c )
return ( w << c ) | ( w >> ( 32 - c ) );
static inline uint64_t rotl64( const uint64_t w, const unsigned c )
return ( w << c ) | ( w >> ( 64 - c ) );
static inline uint32_t rotr32( const uint32_t w, const unsigned c )
return ( w >> c ) | ( w << ( 32 - c ) );
static inline uint64_t rotr64( const uint64_t w, const unsigned c )
return ( w >> c ) | ( w << ( 64 - c ) );
/* prevents compiler optimizing out memset() */
static inline void secure_zero_memory( void *v, size_t n )
volatile uint8_t *p = ( volatile uint8_t * )v;
while( n-- ) *p++ = 0;

sse/blake2-kat.h Normal file

File diff suppressed because it is too large Load Diff

sse/blake2.h Normal file
View File

@ -0,0 +1,156 @@
BLAKE2 reference source code package - optimized C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
#pragma once
#ifndef __BLAKE2_H__
#define __BLAKE2_H__
#include <stddef.h>
#include <stdint.h>
#if defined(_MSC_VER)
#define ALIGN(x) __declspec(align(x))
#define ALIGN(x) __attribute__ ((__aligned__(x)))
#if defined(__cplusplus)
extern "C" {
enum blake2s_constant
enum blake2b_constant
#pragma pack(push, 1)
typedef struct __blake2s_param
uint8_t digest_length; // 1
uint8_t key_length; // 2
uint8_t fanout; // 3
uint8_t depth; // 4
uint32_t leaf_length; // 8
uint8_t node_offset[6];// 14
uint8_t node_depth; // 15
uint8_t inner_length; // 16
// uint8_t reserved[0];
uint8_t salt[BLAKE2S_SALTBYTES]; // 24
uint8_t personal[BLAKE2S_PERSONALBYTES]; // 32
} blake2s_param;
ALIGN( 64 ) typedef struct __blake2s_state
uint32_t h[8];
uint32_t t[2];
uint32_t f[2];
uint8_t buf[2 * BLAKE2S_BLOCKBYTES];
size_t buflen;
uint8_t last_node;
} blake2s_state;
typedef struct __blake2b_param
uint8_t digest_length; // 1
uint8_t key_length; // 2
uint8_t fanout; // 3
uint8_t depth; // 4
uint32_t leaf_length; // 8
uint64_t node_offset; // 16
uint8_t node_depth; // 17
uint8_t inner_length; // 18
uint8_t reserved[14]; // 32
uint8_t salt[BLAKE2B_SALTBYTES]; // 48
uint8_t personal[BLAKE2B_PERSONALBYTES]; // 64
} blake2b_param;
ALIGN( 64 ) typedef struct __blake2b_state
uint64_t h[8];
uint64_t t[2];
uint64_t f[2];
uint8_t buf[2 * BLAKE2B_BLOCKBYTES];
size_t buflen;
uint8_t last_node;
} blake2b_state;
ALIGN( 64 ) typedef struct __blake2sp_state
blake2s_state S[8][1];
blake2s_state R[1];
uint8_t buf[8 * BLAKE2S_BLOCKBYTES];
size_t buflen;
} blake2sp_state;
ALIGN( 64 ) typedef struct __blake2bp_state
blake2b_state S[4][1];
blake2b_state R[1];
uint8_t buf[4 * BLAKE2B_BLOCKBYTES];
size_t buflen;
} blake2bp_state;
#pragma pack(pop)
// Streaming API
int blake2s_init( blake2s_state *S, const uint8_t outlen );
int blake2s_init_key( blake2s_state *S, const uint8_t outlen, const void *key, const uint8_t keylen );
int blake2s_init_param( blake2s_state *S, const blake2s_param *P );
int blake2s_update( blake2s_state *S, const uint8_t *in, uint64_t inlen );
int blake2s_final( blake2s_state *S, uint8_t *out, uint8_t outlen );
int blake2b_init( blake2b_state *S, const uint8_t outlen );
int blake2b_init_key( blake2b_state *S, const uint8_t outlen, const void *key, const uint8_t keylen );
int blake2b_init_param( blake2b_state *S, const blake2b_param *P );
int blake2b_update( blake2b_state *S, const uint8_t *in, uint64_t inlen );
int blake2b_final( blake2b_state *S, uint8_t *out, uint8_t outlen );
int blake2sp_init( blake2sp_state *S, const uint8_t outlen );
int blake2sp_init_key( blake2sp_state *S, const uint8_t outlen, const void *key, const uint8_t keylen );
int blake2sp_update( blake2sp_state *S, const uint8_t *in, uint64_t inlen );
int blake2sp_final( blake2sp_state *S, uint8_t *out, uint8_t outlen );
int blake2bp_init( blake2bp_state *S, const uint8_t outlen );
int blake2bp_init_key( blake2bp_state *S, const uint8_t outlen, const void *key, const uint8_t keylen );
int blake2bp_update( blake2bp_state *S, const uint8_t *in, uint64_t inlen );
int blake2bp_final( blake2bp_state *S, uint8_t *out, uint8_t outlen );
// Simple API
int blake2s( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen );
int blake2b( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen );
int blake2sp( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen );
int blake2bp( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen );
static inline int blake2( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen )
return blake2b( out, in, key, outlen, inlen, keylen );
#if defined(__cplusplus)

sse/blake2b-load-sse2.h Normal file
View File

@ -0,0 +1,68 @@
BLAKE2 reference source code package - optimized C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
#pragma once
#ifndef __BLAKE2B_LOAD_SSE2_H__
#define __BLAKE2B_LOAD_SSE2_H__
#define LOAD_MSG_0_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4)
#define LOAD_MSG_0_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5)
#define LOAD_MSG_0_3(b0, b1) b0 = _mm_set_epi64x(m10, m8); b1 = _mm_set_epi64x(m14, m12)
#define LOAD_MSG_0_4(b0, b1) b0 = _mm_set_epi64x(m11, m9); b1 = _mm_set_epi64x(m15, m13)
#define LOAD_MSG_1_1(b0, b1) b0 = _mm_set_epi64x(m4, m14); b1 = _mm_set_epi64x(m13, m9)
#define LOAD_MSG_1_2(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m6, m15)
#define LOAD_MSG_1_3(b0, b1) b0 = _mm_set_epi64x(m0, m1); b1 = _mm_set_epi64x(m5, m11)
#define LOAD_MSG_1_4(b0, b1) b0 = _mm_set_epi64x(m2, m12); b1 = _mm_set_epi64x(m3, m7)
#define LOAD_MSG_2_1(b0, b1) b0 = _mm_set_epi64x(m12, m11); b1 = _mm_set_epi64x(m15, m5)
#define LOAD_MSG_2_2(b0, b1) b0 = _mm_set_epi64x(m0, m8); b1 = _mm_set_epi64x(m13, m2)
#define LOAD_MSG_2_3(b0, b1) b0 = _mm_set_epi64x(m3, m10); b1 = _mm_set_epi64x(m9, m7)
#define LOAD_MSG_2_4(b0, b1) b0 = _mm_set_epi64x(m6, m14); b1 = _mm_set_epi64x(m4, m1)
#define LOAD_MSG_3_1(b0, b1) b0 = _mm_set_epi64x(m3, m7); b1 = _mm_set_epi64x(m11, m13)
#define LOAD_MSG_3_2(b0, b1) b0 = _mm_set_epi64x(m1, m9); b1 = _mm_set_epi64x(m14, m12)
#define LOAD_MSG_3_3(b0, b1) b0 = _mm_set_epi64x(m5, m2); b1 = _mm_set_epi64x(m15, m4)
#define LOAD_MSG_3_4(b0, b1) b0 = _mm_set_epi64x(m10, m6); b1 = _mm_set_epi64x(m8, m0)
#define LOAD_MSG_4_1(b0, b1) b0 = _mm_set_epi64x(m5, m9); b1 = _mm_set_epi64x(m10, m2)
#define LOAD_MSG_4_2(b0, b1) b0 = _mm_set_epi64x(m7, m0); b1 = _mm_set_epi64x(m15, m4)
#define LOAD_MSG_4_3(b0, b1) b0 = _mm_set_epi64x(m11, m14); b1 = _mm_set_epi64x(m3, m6)
#define LOAD_MSG_4_4(b0, b1) b0 = _mm_set_epi64x(m12, m1); b1 = _mm_set_epi64x(m13, m8)
#define LOAD_MSG_5_1(b0, b1) b0 = _mm_set_epi64x(m6, m2); b1 = _mm_set_epi64x(m8, m0)
#define LOAD_MSG_5_2(b0, b1) b0 = _mm_set_epi64x(m10, m12); b1 = _mm_set_epi64x(m3, m11)
#define LOAD_MSG_5_3(b0, b1) b0 = _mm_set_epi64x(m7, m4); b1 = _mm_set_epi64x(m1, m15)
#define LOAD_MSG_5_4(b0, b1) b0 = _mm_set_epi64x(m5, m13); b1 = _mm_set_epi64x(m9, m14)
#define LOAD_MSG_6_1(b0, b1) b0 = _mm_set_epi64x(m1, m12); b1 = _mm_set_epi64x(m4, m14)
#define LOAD_MSG_6_2(b0, b1) b0 = _mm_set_epi64x(m15, m5); b1 = _mm_set_epi64x(m10, m13)
#define LOAD_MSG_6_3(b0, b1) b0 = _mm_set_epi64x(m6, m0); b1 = _mm_set_epi64x(m8, m9)
#define LOAD_MSG_6_4(b0, b1) b0 = _mm_set_epi64x(m3, m7); b1 = _mm_set_epi64x(m11, m2)
#define LOAD_MSG_7_1(b0, b1) b0 = _mm_set_epi64x(m7, m13); b1 = _mm_set_epi64x(m3, m12)
#define LOAD_MSG_7_2(b0, b1) b0 = _mm_set_epi64x(m14, m11); b1 = _mm_set_epi64x(m9, m1)
#define LOAD_MSG_7_3(b0, b1) b0 = _mm_set_epi64x(m15, m5); b1 = _mm_set_epi64x(m2, m8)
#define LOAD_MSG_7_4(b0, b1) b0 = _mm_set_epi64x(m4, m0); b1 = _mm_set_epi64x(m10, m6)
#define LOAD_MSG_8_1(b0, b1) b0 = _mm_set_epi64x(m14, m6); b1 = _mm_set_epi64x(m0, m11)
#define LOAD_MSG_8_2(b0, b1) b0 = _mm_set_epi64x(m9, m15); b1 = _mm_set_epi64x(m8, m3)
#define LOAD_MSG_8_3(b0, b1) b0 = _mm_set_epi64x(m13, m12); b1 = _mm_set_epi64x(m10, m1)
#define LOAD_MSG_8_4(b0, b1) b0 = _mm_set_epi64x(m7, m2); b1 = _mm_set_epi64x(m5, m4)
#define LOAD_MSG_9_1(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m1, m7)
#define LOAD_MSG_9_2(b0, b1) b0 = _mm_set_epi64x(m4, m2); b1 = _mm_set_epi64x(m5, m6)
#define LOAD_MSG_9_3(b0, b1) b0 = _mm_set_epi64x(m9, m15); b1 = _mm_set_epi64x(m13, m3)
#define LOAD_MSG_9_4(b0, b1) b0 = _mm_set_epi64x(m14, m11); b1 = _mm_set_epi64x(m0, m12)
#define LOAD_MSG_10_1(b0, b1) b0 = _mm_set_epi64x(m2, m0); b1 = _mm_set_epi64x(m6, m4)
#define LOAD_MSG_10_2(b0, b1) b0 = _mm_set_epi64x(m3, m1); b1 = _mm_set_epi64x(m7, m5)
#define LOAD_MSG_10_3(b0, b1) b0 = _mm_set_epi64x(m10, m8); b1 = _mm_set_epi64x(m14, m12)
#define LOAD_MSG_10_4(b0, b1) b0 = _mm_set_epi64x(m11, m9); b1 = _mm_set_epi64x(m15, m13)
#define LOAD_MSG_11_1(b0, b1) b0 = _mm_set_epi64x(m4, m14); b1 = _mm_set_epi64x(m13, m9)
#define LOAD_MSG_11_2(b0, b1) b0 = _mm_set_epi64x(m8, m10); b1 = _mm_set_epi64x(m6, m15)
#define LOAD_MSG_11_3(b0, b1) b0 = _mm_set_epi64x(m0, m1); b1 = _mm_set_epi64x(m5, m11)
#define LOAD_MSG_11_4(b0, b1) b0 = _mm_set_epi64x(m2, m12); b1 = _mm_set_epi64x(m3, m7)

sse/blake2b-load-sse41.h Normal file
View File

@ -0,0 +1,402 @@
BLAKE2 reference source code package - optimized C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
#pragma once
#ifndef __BLAKE2B_LOAD_SSE41_H__
#define __BLAKE2B_LOAD_SSE41_H__
#define LOAD_MSG_0_1(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m0, m1); \
b1 = _mm_unpacklo_epi64(m2, m3); \
} while(0)
#define LOAD_MSG_0_2(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m0, m1); \
b1 = _mm_unpackhi_epi64(m2, m3); \
} while(0)
#define LOAD_MSG_0_3(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m4, m5); \
b1 = _mm_unpacklo_epi64(m6, m7); \
} while(0)
#define LOAD_MSG_0_4(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m4, m5); \
b1 = _mm_unpackhi_epi64(m6, m7); \
} while(0)
#define LOAD_MSG_1_1(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m7, m2); \
b1 = _mm_unpackhi_epi64(m4, m6); \
} while(0)
#define LOAD_MSG_1_2(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m5, m4); \
b1 = _mm_alignr_epi8(m3, m7, 8); \
} while(0)
#define LOAD_MSG_1_3(b0, b1) \
do \
{ \
b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \
b1 = _mm_unpackhi_epi64(m5, m2); \
} while(0)
#define LOAD_MSG_1_4(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m6, m1); \
b1 = _mm_unpackhi_epi64(m3, m1); \
} while(0)
#define LOAD_MSG_2_1(b0, b1) \
do \
{ \
b0 = _mm_alignr_epi8(m6, m5, 8); \
b1 = _mm_unpackhi_epi64(m2, m7); \
} while(0)
#define LOAD_MSG_2_2(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m4, m0); \
b1 = _mm_blend_epi16(m1, m6, 0xF0); \
} while(0)
#define LOAD_MSG_2_3(b0, b1) \
do \
{ \
b0 = _mm_blend_epi16(m5, m1, 0xF0); \
b1 = _mm_unpackhi_epi64(m3, m4); \
} while(0)
#define LOAD_MSG_2_4(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m7, m3); \
b1 = _mm_alignr_epi8(m2, m0, 8); \
} while(0)
#define LOAD_MSG_3_1(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m3, m1); \
b1 = _mm_unpackhi_epi64(m6, m5); \
} while(0)
#define LOAD_MSG_3_2(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m4, m0); \
b1 = _mm_unpacklo_epi64(m6, m7); \
} while(0)
#define LOAD_MSG_3_3(b0, b1) \
do \
{ \
b0 = _mm_blend_epi16(m1, m2, 0xF0); \
b1 = _mm_blend_epi16(m2, m7, 0xF0); \
} while(0)
#define LOAD_MSG_3_4(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m3, m5); \
b1 = _mm_unpacklo_epi64(m0, m4); \
} while(0)
#define LOAD_MSG_4_1(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m4, m2); \
b1 = _mm_unpacklo_epi64(m1, m5); \
} while(0)
#define LOAD_MSG_4_2(b0, b1) \
do \
{ \
b0 = _mm_blend_epi16(m0, m3, 0xF0); \
b1 = _mm_blend_epi16(m2, m7, 0xF0); \
} while(0)
#define LOAD_MSG_4_3(b0, b1) \
do \
{ \
b0 = _mm_blend_epi16(m7, m5, 0xF0); \
b1 = _mm_blend_epi16(m3, m1, 0xF0); \
} while(0)
#define LOAD_MSG_4_4(b0, b1) \
do \
{ \
b0 = _mm_alignr_epi8(m6, m0, 8); \
b1 = _mm_blend_epi16(m4, m6, 0xF0); \
} while(0)
#define LOAD_MSG_5_1(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m1, m3); \
b1 = _mm_unpacklo_epi64(m0, m4); \
} while(0)
#define LOAD_MSG_5_2(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m6, m5); \
b1 = _mm_unpackhi_epi64(m5, m1); \
} while(0)
#define LOAD_MSG_5_3(b0, b1) \
do \
{ \
b0 = _mm_blend_epi16(m2, m3, 0xF0); \
b1 = _mm_unpackhi_epi64(m7, m0); \
} while(0)
#define LOAD_MSG_5_4(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m6, m2); \
b1 = _mm_blend_epi16(m7, m4, 0xF0); \
} while(0)
#define LOAD_MSG_6_1(b0, b1) \
do \
{ \
b0 = _mm_blend_epi16(m6, m0, 0xF0); \
b1 = _mm_unpacklo_epi64(m7, m2); \
} while(0)
#define LOAD_MSG_6_2(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m2, m7); \
b1 = _mm_alignr_epi8(m5, m6, 8); \
} while(0)
#define LOAD_MSG_6_3(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m0, m3); \
b1 = _mm_shuffle_epi32(m4, _MM_SHUFFLE(1,0,3,2)); \
} while(0)
#define LOAD_MSG_6_4(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m3, m1); \
b1 = _mm_blend_epi16(m1, m5, 0xF0); \
} while(0)
#define LOAD_MSG_7_1(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m6, m3); \
b1 = _mm_blend_epi16(m6, m1, 0xF0); \
} while(0)
#define LOAD_MSG_7_2(b0, b1) \
do \
{ \
b0 = _mm_alignr_epi8(m7, m5, 8); \
b1 = _mm_unpackhi_epi64(m0, m4); \
} while(0)
#define LOAD_MSG_7_3(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m2, m7); \
b1 = _mm_unpacklo_epi64(m4, m1); \
} while(0)
#define LOAD_MSG_7_4(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m0, m2); \
b1 = _mm_unpacklo_epi64(m3, m5); \
} while(0)
#define LOAD_MSG_8_1(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m3, m7); \
b1 = _mm_alignr_epi8(m0, m5, 8); \
} while(0)
#define LOAD_MSG_8_2(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m7, m4); \
b1 = _mm_alignr_epi8(m4, m1, 8); \
} while(0)
#define LOAD_MSG_8_3(b0, b1) \
do \
{ \
b0 = m6; \
b1 = _mm_alignr_epi8(m5, m0, 8); \
} while(0)
#define LOAD_MSG_8_4(b0, b1) \
do \
{ \
b0 = _mm_blend_epi16(m1, m3, 0xF0); \
b1 = m2; \
} while(0)
#define LOAD_MSG_9_1(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m5, m4); \
b1 = _mm_unpackhi_epi64(m3, m0); \
} while(0)
#define LOAD_MSG_9_2(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m1, m2); \
b1 = _mm_blend_epi16(m3, m2, 0xF0); \
} while(0)
#define LOAD_MSG_9_3(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m7, m4); \
b1 = _mm_unpackhi_epi64(m1, m6); \
} while(0)
#define LOAD_MSG_9_4(b0, b1) \
do \
{ \
b0 = _mm_alignr_epi8(m7, m5, 8); \
b1 = _mm_unpacklo_epi64(m6, m0); \
} while(0)
#define LOAD_MSG_10_1(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m0, m1); \
b1 = _mm_unpacklo_epi64(m2, m3); \
} while(0)
#define LOAD_MSG_10_2(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m0, m1); \
b1 = _mm_unpackhi_epi64(m2, m3); \
} while(0)
#define LOAD_MSG_10_3(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m4, m5); \
b1 = _mm_unpacklo_epi64(m6, m7); \
} while(0)
#define LOAD_MSG_10_4(b0, b1) \
do \
{ \
b0 = _mm_unpackhi_epi64(m4, m5); \
b1 = _mm_unpackhi_epi64(m6, m7); \
} while(0)
#define LOAD_MSG_11_1(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m7, m2); \
b1 = _mm_unpackhi_epi64(m4, m6); \
} while(0)
#define LOAD_MSG_11_2(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m5, m4); \
b1 = _mm_alignr_epi8(m3, m7, 8); \
} while(0)
#define LOAD_MSG_11_3(b0, b1) \
do \
{ \
b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1,0,3,2)); \
b1 = _mm_unpackhi_epi64(m5, m2); \
} while(0)
#define LOAD_MSG_11_4(b0, b1) \
do \
{ \
b0 = _mm_unpacklo_epi64(m6, m1); \
b1 = _mm_unpackhi_epi64(m3, m1); \
} while(0)

sse/blake2b-round.h Normal file
View File

@ -0,0 +1,160 @@
BLAKE2 reference source code package - optimized C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
#pragma once
#ifndef __BLAKE2B_ROUND_H__
#define __BLAKE2B_ROUND_H__
#define LOAD(p) _mm_load_si128( (__m128i *)(p) )
#define STORE(p,r) _mm_store_si128((__m128i *)(p), r)
#define LOADU(p) _mm_loadu_si128( (__m128i *)(p) )
#define STOREU(p,r) _mm_storeu_si128((__m128i *)(p), r)
#define TOF(reg) _mm_castsi128_ps((reg))
#define TOI(reg) _mm_castps_si128((reg))
#define LIKELY(x) __builtin_expect((x),1)
/* Microarchitecture-specific macros */
#ifndef HAVE_XOP
#ifdef HAVE_SSSE3
#define _mm_roti_epi64(x, c) \
(-(c) == 32) ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2,3,0,1)) \
: (-(c) == 24) ? _mm_shuffle_epi8((x), r24) \
: (-(c) == 16) ? _mm_shuffle_epi8((x), r16) \
: (-(c) == 63) ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_add_epi64((x), (x))) \
: _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_slli_epi64((x), 64-(-(c))))
#define _mm_roti_epi64(r, c) _mm_xor_si128(_mm_srli_epi64( (r), -(c) ),_mm_slli_epi64( (r), 64-(-c) ))
/* ... */
#define G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \
row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
row4l = _mm_xor_si128(row4l, row1l); \
row4h = _mm_xor_si128(row4h, row1h); \
row4l = _mm_roti_epi64(row4l, -32); \
row4h = _mm_roti_epi64(row4h, -32); \
row3l = _mm_add_epi64(row3l, row4l); \
row3h = _mm_add_epi64(row3h, row4h); \
row2l = _mm_xor_si128(row2l, row3l); \
row2h = _mm_xor_si128(row2h, row3h); \
row2l = _mm_roti_epi64(row2l, -24); \
row2h = _mm_roti_epi64(row2h, -24); \
#define G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \
row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \
row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \
row4l = _mm_xor_si128(row4l, row1l); \
row4h = _mm_xor_si128(row4h, row1h); \
row4l = _mm_roti_epi64(row4l, -16); \
row4h = _mm_roti_epi64(row4h, -16); \
row3l = _mm_add_epi64(row3l, row4l); \
row3h = _mm_add_epi64(row3h, row4h); \
row2l = _mm_xor_si128(row2l, row3l); \
row2h = _mm_xor_si128(row2h, row3h); \
row2l = _mm_roti_epi64(row2l, -63); \
row2h = _mm_roti_epi64(row2h, -63); \
#if defined(HAVE_SSSE3)
#define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
t0 = _mm_alignr_epi8(row2h, row2l, 8); \
t1 = _mm_alignr_epi8(row2l, row2h, 8); \
row2l = t0; \
row2h = t1; \
t0 = row3l; \
row3l = row3h; \
row3h = t0; \
t0 = _mm_alignr_epi8(row4h, row4l, 8); \
t1 = _mm_alignr_epi8(row4l, row4h, 8); \
row4l = t1; \
row4h = t0;
#define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
t0 = _mm_alignr_epi8(row2l, row2h, 8); \
t1 = _mm_alignr_epi8(row2h, row2l, 8); \
row2l = t0; \
row2h = t1; \
t0 = row3l; \
row3l = row3h; \
row3h = t0; \
t0 = _mm_alignr_epi8(row4l, row4h, 8); \
t1 = _mm_alignr_epi8(row4h, row4l, 8); \
row4l = t1; \
row4h = t0;
#define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
t0 = row4l;\
t1 = row2l;\
row4l = row3l;\
row3l = row3h;\
row3h = row4l;\
row4l = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t0, t0)); \
row4h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row4h, row4h)); \
row2l = _mm_unpackhi_epi64(row2l, _mm_unpacklo_epi64(row2h, row2h)); \
row2h = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(t1, t1))
#define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \
t0 = row3l;\
row3l = row3h;\
row3h = t0;\
t0 = row2l;\
t1 = row4l;\
row2l = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(row2l, row2l)); \
row2h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row2h, row2h)); \
row4l = _mm_unpackhi_epi64(row4l, _mm_unpacklo_epi64(row4h, row4h)); \
row4h = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t1, t1))
#if defined(HAVE_SSE41)
#include "blake2b-load-sse41.h"
#include "blake2b-load-sse2.h"
#define ROUND(r) \
LOAD_MSG_ ##r ##_1(b0, b1); \
G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
LOAD_MSG_ ##r ##_2(b0, b1); \
G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \
LOAD_MSG_ ##r ##_3(b0, b1); \
G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \
LOAD_MSG_ ##r ##_4(b0, b1); \
G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \

sse/blake2b.c Normal file
View File

@ -0,0 +1,431 @@
BLAKE2 reference source code package - optimized C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "blake2.h"
#include "blake2-impl.h"
#include "blake2-config.h"
#include <emmintrin.h>
#if defined(HAVE_SSSE3)
#include <tmmintrin.h>
#if defined(HAVE_SSE41)
#include <smmintrin.h>
#if defined(HAVE_AVX)
#include <immintrin.h>
#if defined(HAVE_XOP)
#include <x86intrin.h>
#include "blake2b-round.h"
ALIGN( 64 ) static const uint64_t blake2b_IV[8] =
0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL
static const uint8_t blake2b_sigma[12][16] =
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }
/* Some helper functions, not necessarily useful */
static inline int blake2b_set_lastnode( blake2b_state *S )
S->f[1] = ~0ULL;
return 0;
static inline int blake2b_clear_lastnode( blake2b_state *S )
S->f[1] = 0ULL;
return 0;
static inline int blake2b_set_lastblock( blake2b_state *S )
if( S->last_node ) blake2b_set_lastnode( S );
S->f[0] = ~0ULL;
return 0;
static inline int blake2b_clear_lastblock( blake2b_state *S )
if( S->last_node ) blake2b_clear_lastnode( S );
S->f[0] = 0ULL;
return 0;
static inline int blake2b_increment_counter( blake2b_state *S, const uint64_t inc )
#if __x86_64__
// ADD/ADC chain
__uint128_t t = ( ( __uint128_t )S->t[1] << 64 ) | S->t[0];
t += inc;
S->t[0] = ( uint64_t )( t >> 0 );
S->t[1] = ( uint64_t )( t >> 64 );
S->t[0] += inc;
S->t[1] += ( S->t[0] < inc );
return 0;
// Parameter-related functions
static inline int blake2b_param_set_digest_length( blake2b_param *P, const uint8_t digest_length )
P->digest_length = digest_length;
return 0;
static inline int blake2b_param_set_fanout( blake2b_param *P, const uint8_t fanout )
P->fanout = fanout;
return 0;
static inline int blake2b_param_set_max_depth( blake2b_param *P, const uint8_t depth )
P->depth = depth;
return 0;
static inline int blake2b_param_set_leaf_length( blake2b_param *P, const uint32_t leaf_length )
P->leaf_length = leaf_length;
return 0;
static inline int blake2b_param_set_node_offset( blake2b_param *P, const uint64_t node_offset )
P->node_offset = node_offset;
return 0;
static inline int blake2b_param_set_node_depth( blake2b_param *P, const uint8_t node_depth )
P->node_depth = node_depth;
return 0;
static inline int blake2b_param_set_inner_length( blake2b_param *P, const uint8_t inner_length )
P->inner_length = inner_length;
return 0;
static inline int blake2b_param_set_salt( blake2b_param *P, const uint8_t salt[BLAKE2B_SALTBYTES] )
memcpy( P->salt, salt, BLAKE2B_SALTBYTES );
return 0;
static inline int blake2b_param_set_personal( blake2b_param *P, const uint8_t personal[BLAKE2B_PERSONALBYTES] )
memcpy( P->personal, personal, BLAKE2B_PERSONALBYTES );
return 0;
static inline int blake2b_init0( blake2b_state *S )
memset( S, 0, sizeof( blake2b_state ) );
for( int i = 0; i < 8; ++i ) S->h[i] = blake2b_IV[i];
return 0;
/* init xors IV with input parameter block */
int blake2b_init_param( blake2b_state *S, const blake2b_param *P )
uint8_t *p, *h, *v;
//blake2b_init0( S );
v = ( uint8_t * )( blake2b_IV );
h = ( uint8_t * )( S->h );
p = ( uint8_t * )( P );
/* IV XOR ParamBlock */
memset( S, 0, sizeof( blake2b_state ) );
for( int i = 0; i < BLAKE2B_OUTBYTES; ++i ) h[i] = v[i] ^ p[i];
return 0;
/* Some sort of default parameter block initialization, for sequential blake2b */
int blake2b_init( blake2b_state *S, const uint8_t outlen )
if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1;
const blake2b_param P =
return blake2b_init_param( S, &P );
int blake2b_init_key( blake2b_state *S, const uint8_t outlen, const void *key, const uint8_t keylen )
if ( ( !outlen ) || ( outlen > BLAKE2B_OUTBYTES ) ) return -1;
if ( ( !keylen ) || keylen > BLAKE2B_KEYBYTES ) return -1;
const blake2b_param P =
if( blake2b_init_param( S, &P ) < 0 )
return 0;
uint8_t block[BLAKE2B_BLOCKBYTES];
memset( block, 0, BLAKE2B_BLOCKBYTES );
memcpy( block, key, keylen );
blake2b_update( S, block, BLAKE2B_BLOCKBYTES );
secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */
return 0;
static inline int blake2b_compress( blake2b_state *S, const uint8_t block[BLAKE2B_BLOCKBYTES] )
__m128i row1l, row1h;
__m128i row2l, row2h;
__m128i row3l, row3h;
__m128i row4l, row4h;
__m128i b0, b1;
__m128i t0, t1;
#if defined(HAVE_SSSE3) && !defined(HAVE_XOP)
const __m128i r16 = _mm_setr_epi8( 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9 );
const __m128i r24 = _mm_setr_epi8( 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10 );
#if defined(HAVE_SSE41)
const __m128i m0 = LOADU( block + 00 );
const __m128i m1 = LOADU( block + 16 );
const __m128i m2 = LOADU( block + 32 );
const __m128i m3 = LOADU( block + 48 );
const __m128i m4 = LOADU( block + 64 );
const __m128i m5 = LOADU( block + 80 );
const __m128i m6 = LOADU( block + 96 );
const __m128i m7 = LOADU( block + 112 );
const uint64_t m0 = ( ( uint64_t * )block )[ 0];
const uint64_t m1 = ( ( uint64_t * )block )[ 1];
const uint64_t m2 = ( ( uint64_t * )block )[ 2];
const uint64_t m3 = ( ( uint64_t * )block )[ 3];
const uint64_t m4 = ( ( uint64_t * )block )[ 4];
const uint64_t m5 = ( ( uint64_t * )block )[ 5];
const uint64_t m6 = ( ( uint64_t * )block )[ 6];
const uint64_t m7 = ( ( uint64_t * )block )[ 7];
const uint64_t m8 = ( ( uint64_t * )block )[ 8];
const uint64_t m9 = ( ( uint64_t * )block )[ 9];
const uint64_t m10 = ( ( uint64_t * )block )[10];
const uint64_t m11 = ( ( uint64_t * )block )[11];
const uint64_t m12 = ( ( uint64_t * )block )[12];
const uint64_t m13 = ( ( uint64_t * )block )[13];
const uint64_t m14 = ( ( uint64_t * )block )[14];
const uint64_t m15 = ( ( uint64_t * )block )[15];
row1l = LOAD( &S->h[0] );
row1h = LOAD( &S->h[2] );
row2l = LOAD( &S->h[4] );
row2h = LOAD( &S->h[6] );
row3l = LOAD( &blake2b_IV[0] );
row3h = LOAD( &blake2b_IV[2] );
row4l = _mm_xor_si128( LOAD( &blake2b_IV[4] ), LOAD( &S->t[0] ) );
row4h = _mm_xor_si128( LOAD( &blake2b_IV[6] ), LOAD( &S->f[0] ) );
ROUND( 0 );
ROUND( 1 );
ROUND( 2 );
ROUND( 3 );
ROUND( 4 );
ROUND( 5 );
ROUND( 6 );
ROUND( 7 );
ROUND( 8 );
ROUND( 9 );
ROUND( 10 );
ROUND( 11 );
row1l = _mm_xor_si128( row3l, row1l );
row1h = _mm_xor_si128( row3h, row1h );
STORE( &S->h[0], _mm_xor_si128( LOAD( &S->h[0] ), row1l ) );
STORE( &S->h[2], _mm_xor_si128( LOAD( &S->h[2] ), row1h ) );
row2l = _mm_xor_si128( row4l, row2l );
row2h = _mm_xor_si128( row4h, row2h );
STORE( &S->h[4], _mm_xor_si128( LOAD( &S->h[4] ), row2l ) );
STORE( &S->h[6], _mm_xor_si128( LOAD( &S->h[6] ), row2h ) );
return 0;
int blake2b_update( blake2b_state *S, const uint8_t *in, uint64_t inlen )
while( inlen > 0 )
size_t left = S->buflen;
size_t fill = 2 * BLAKE2B_BLOCKBYTES - left;
if( inlen > fill )
memcpy( S->buf + left, in, fill ); // Fill buffer
S->buflen += fill;
blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES );
blake2b_compress( S, S->buf ); // Compress
memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, BLAKE2B_BLOCKBYTES ); // Shift buffer left
in += fill;
inlen -= fill;
else // inlen <= fill
memcpy( S->buf + left, in, inlen );
S->buflen += inlen; // Be lazy, do not compress
in += inlen;
inlen -= inlen;
return 0;
int blake2b_final( blake2b_state *S, uint8_t *out, uint8_t outlen )
if( S->buflen > BLAKE2B_BLOCKBYTES )
blake2b_increment_counter( S, BLAKE2B_BLOCKBYTES );
blake2b_compress( S, S->buf );
memcpy( S->buf, S->buf + BLAKE2B_BLOCKBYTES, S->buflen );
blake2b_increment_counter( S, S->buflen );
blake2b_set_lastblock( S );
memset( S->buf + S->buflen, 0, 2 * BLAKE2B_BLOCKBYTES - S->buflen ); /* Padding */
blake2b_compress( S, S->buf );
memcpy( out, &S->h[0], outlen );
return 0;
int blake2b( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen )
blake2b_state S[1];
/* Verify parameters */
if ( NULL == in ) return -1;
if ( NULL == out ) return -1;
if( NULL == key ) keylen = 0;
if( keylen )
if( blake2b_init_key( S, outlen, key, keylen ) < 0 ) return -1;
if( blake2b_init( S, outlen ) < 0 ) return -1;
blake2b_update( S, ( uint8_t * )in, inlen );
blake2b_final( S, out, outlen );
return 0;
#if defined(SUPERCOP)
int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen )
return blake2b( out, in, NULL, BLAKE2B_OUTBYTES, inlen, 0 );
#if defined(BLAKE2B_SELFTEST)
#include <string.h>
#include "blake2-kat.h"
int main( int argc, char **argv )
uint8_t key[BLAKE2B_KEYBYTES];
uint8_t buf[KAT_LENGTH];
for( size_t i = 0; i < BLAKE2B_KEYBYTES; ++i )
key[i] = ( uint8_t )i;
for( size_t i = 0; i < KAT_LENGTH; ++i )
buf[i] = ( uint8_t )i;
for( size_t i = 0; i < KAT_LENGTH; ++i )
uint8_t hash[BLAKE2B_OUTBYTES];
blake2b( hash, buf, key, BLAKE2B_OUTBYTES, i, BLAKE2B_KEYBYTES );
if( 0 != memcmp( hash, blake2b_keyed_kat[i], BLAKE2B_OUTBYTES ) )
puts( "error" );
return -1;
puts( "ok" );
return 0;

sse/blake2bp.c Normal file
View File

@ -0,0 +1,296 @@
BLAKE2 reference source code package - optimized C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#if defined(_OPENMP)
#include <omp.h>
#include "blake2.h"
#include "blake2-impl.h"
static inline int blake2bp_init_leaf( blake2b_state *S, uint8_t outlen, uint8_t keylen, uint64_t offset )
blake2b_param P[1];
P->digest_length = outlen;
P->key_length = keylen;
P->depth = 2;
P->leaf_length = 0;
P->node_offset = offset;
P->node_depth = 0;
P->inner_length = outlen;
memset( P->reserved, 0, sizeof( P->reserved ) );
memset( P->salt, 0, sizeof( P->salt ) );
memset( P->personal, 0, sizeof( P->personal ) );
return blake2b_init_param( S, P );
static inline int blake2bp_init_root( blake2b_state *S, uint8_t outlen, uint8_t keylen )
blake2b_param P[1];
P->digest_length = outlen;
P->key_length = keylen;
P->depth = 2;
P->leaf_length = 0;
P->node_offset = 0;
P->node_depth = 1;
P->inner_length = outlen;
memset( P->reserved, 0, sizeof( P->reserved ) );
memset( P->salt, 0, sizeof( P->salt ) );
memset( P->personal, 0, sizeof( P->personal ) );
return blake2b_init_param( S, P );
int blake2bp_init( blake2bp_state *S, const uint8_t outlen )
if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1;
memset( S->buf, 0, sizeof( S->buf ) );
S->buflen = 0;
if( blake2bp_init_root( S->R, outlen, 0 ) < 0 )
return -1;
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
if( blake2bp_init_leaf( S->S[i], outlen, 0, i ) < 0 ) return -1;
S->R->last_node = 1;
S->S[PARALLELISM_DEGREE - 1]->last_node = 1;
return 0;
int blake2bp_init_key( blake2bp_state *S, const uint8_t outlen, const void *key, const uint8_t keylen )
if( !outlen || outlen > BLAKE2B_OUTBYTES ) return -1;
if( !key || !keylen || keylen > BLAKE2B_KEYBYTES ) return -1;
memset( S->buf, 0, sizeof( S->buf ) );
S->buflen = 0;
if( blake2bp_init_root( S->R, outlen, keylen ) < 0 )
return -1;
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
if( blake2bp_init_leaf( S->S[i], outlen, keylen, i ) < 0 ) return -1;
S->R->last_node = 1;
S->S[PARALLELISM_DEGREE - 1]->last_node = 1;
uint8_t block[BLAKE2B_BLOCKBYTES];
memset( block, 0, BLAKE2B_BLOCKBYTES );
memcpy( block, key, keylen );
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
blake2b_update( S->S[i], block, BLAKE2B_BLOCKBYTES );
secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */
return 0;
int blake2bp_update( blake2bp_state *S, const uint8_t *in, uint64_t inlen )
size_t left = S->buflen;
size_t fill = sizeof( S->buf ) - left;
if( left && inlen >= fill )
memcpy( S->buf + left, in, fill );
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
blake2b_update( S->S[i], S->buf + i * BLAKE2B_BLOCKBYTES, BLAKE2B_BLOCKBYTES );
in += fill;
inlen -= fill;
left = 0;
#if defined(_OPENMP)
#pragma omp parallel shared(S), num_threads(PARALLELISM_DEGREE)
for( size_t id__ = 0; id__ < PARALLELISM_DEGREE; ++id__ )
#if defined(_OPENMP)
size_t id__ = omp_get_thread_num();
uint64_t inlen__ = inlen;
const uint8_t *in__ = ( const uint8_t * )in;
in__ += id__ * BLAKE2B_BLOCKBYTES;
blake2b_update( S->S[id__], in__, BLAKE2B_BLOCKBYTES );
in += inlen - inlen % ( PARALLELISM_DEGREE * BLAKE2B_BLOCKBYTES );
if( inlen > 0 )
memcpy( S->buf + left, in, inlen );
S->buflen = left + inlen;
return 0;
int blake2bp_final( blake2bp_state *S, uint8_t *out, const uint8_t outlen )
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
if( S->buflen > i * BLAKE2B_BLOCKBYTES )
size_t left = S->buflen - i * BLAKE2B_BLOCKBYTES;
blake2b_update( S->S[i], S->buf + i * BLAKE2B_BLOCKBYTES, left );
blake2b_final( S->S[i], hash[i], BLAKE2B_OUTBYTES );
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
blake2b_update( S->R, hash[i], BLAKE2B_OUTBYTES );
blake2b_final( S->R, out, outlen );
return 0;
int blake2bp( uint8_t *out, const void *in, const void *key, uint8_t outlen, uint64_t inlen, uint8_t keylen )
blake2b_state S[PARALLELISM_DEGREE][1];
blake2b_state FS[1];
/* Verify parameters */
if ( NULL == in ) return -1;
if ( NULL == out ) return -1;
if ( NULL == key ) keylen = 0;
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
if( blake2bp_init_leaf( S[i], outlen, keylen, i ) < 0 ) return -1;
S[PARALLELISM_DEGREE - 1]->last_node = 1; // mark last node
if( keylen > 0 )
uint8_t block[BLAKE2B_BLOCKBYTES];
memset( block, 0, BLAKE2B_BLOCKBYTES );
memcpy( block, key, keylen );
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
blake2b_update( S[i], block, BLAKE2B_BLOCKBYTES );
secure_zero_memory( block, BLAKE2B_BLOCKBYTES ); /* Burn the key from stack */
#if defined(_OPENMP)
#pragma omp parallel shared(S,hash), num_threads(PARALLELISM_DEGREE)
for( size_t id__ = 0; id__ < PARALLELISM_DEGREE; ++id__ )
#if defined(_OPENMP)
size_t id__ = omp_get_thread_num();
uint64_t inlen__ = inlen;
const uint8_t *in__ = ( const uint8_t * )in;
in__ += id__ * BLAKE2B_BLOCKBYTES;
blake2b_update( S[id__], in__, BLAKE2B_BLOCKBYTES );
if( inlen__ > id__ * BLAKE2B_BLOCKBYTES )
const size_t left = inlen__ - id__ * BLAKE2B_BLOCKBYTES;
const size_t len = left <= BLAKE2B_BLOCKBYTES ? left : BLAKE2B_BLOCKBYTES;
blake2b_update( S[id__], in__, len );
blake2b_final( S[id__], hash[id__], BLAKE2B_OUTBYTES );
if( blake2bp_init_root( FS, outlen, keylen ) < 0 )
return -1;
FS->last_node = 1; // Mark as last node
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
blake2b_update( FS, hash[i], BLAKE2B_OUTBYTES );
blake2b_final( FS, out, outlen );
return 0;
#if defined(BLAKE2BP_SELFTEST)
#include <string.h>
#include "blake2-kat.h"
int main( int argc, char **argv )
uint8_t key[BLAKE2B_KEYBYTES];
uint8_t buf[KAT_LENGTH];
for( size_t i = 0; i < BLAKE2B_KEYBYTES; ++i )
key[i] = ( uint8_t )i;
for( size_t i = 0; i < KAT_LENGTH; ++i )
buf[i] = ( uint8_t )i;
for( size_t i = 0; i < KAT_LENGTH; ++i )
uint8_t hash[BLAKE2B_OUTBYTES];
//blake2bp( hash, buf, key, BLAKE2B_OUTBYTES, i, BLAKE2B_KEYBYTES );
blake2bp_state S[1];
blake2bp_init_key( S, BLAKE2B_OUTBYTES, key, BLAKE2B_KEYBYTES );
blake2bp_update( S, buf, i );
blake2bp_final( S, hash, BLAKE2B_OUTBYTES );
if( 0 != memcmp( hash, blake2bp_keyed_kat[i], BLAKE2B_OUTBYTES ) )
puts( "error" );
return -1;
puts( "ok" );
return 0;

sse/blake2s-load-sse2.h Normal file
View File

@ -0,0 +1,59 @@
BLAKE2 reference source code package - optimized C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
#pragma once
#ifndef __BLAKE2S_LOAD_SSE2_H__
#define __BLAKE2S_LOAD_SSE2_H__
#define LOAD_MSG_0_1(buf) buf = _mm_set_epi32(m6,m4,m2,m0)
#define LOAD_MSG_0_2(buf) buf = _mm_set_epi32(m7,m5,m3,m1)
#define LOAD_MSG_0_3(buf) buf = _mm_set_epi32(m14,m12,m10,m8)
#define LOAD_MSG_0_4(buf) buf = _mm_set_epi32(m15,m13,m11,m9)
#define LOAD_MSG_1_1(buf) buf = _mm_set_epi32(m13,m9,m4,m14)
#define LOAD_MSG_1_2(buf) buf = _mm_set_epi32(m6,m15,m8,m10)
#define LOAD_MSG_1_3(buf) buf = _mm_set_epi32(m5,m11,m0,m1)
#define LOAD_MSG_1_4(buf) buf = _mm_set_epi32(m3,m7,m2,m12)
#define LOAD_MSG_2_1(buf) buf = _mm_set_epi32(m15,m5,m12,m11)
#define LOAD_MSG_2_2(buf) buf = _mm_set_epi32(m13,m2,m0,m8)
#define LOAD_MSG_2_3(buf) buf = _mm_set_epi32(m9,m7,m3,m10)
#define LOAD_MSG_2_4(buf) buf = _mm_set_epi32(m4,m1,m6,m14)
#define LOAD_MSG_3_1(buf) buf = _mm_set_epi32(m11,m13,m3,m7)
#define LOAD_MSG_3_2(buf) buf = _mm_set_epi32(m14,m12,m1,m9)
#define LOAD_MSG_3_3(buf) buf = _mm_set_epi32(m15,m4,m5,m2)
#define LOAD_MSG_3_4(buf) buf = _mm_set_epi32(m8,m0,m10,m6)
#define LOAD_MSG_4_1(buf) buf = _mm_set_epi32(m10,m2,m5,m9)
#define LOAD_MSG_4_2(buf) buf = _mm_set_epi32(m15,m4,m7,m0)
#define LOAD_MSG_4_3(buf) buf = _mm_set_epi32(m3,m6,m11,m14)
#define LOAD_MSG_4_4(buf) buf = _mm_set_epi32(m13,m8,m12,m1)
#define LOAD_MSG_5_1(buf) buf = _mm_set_epi32(m8,m0,m6,m2)
#define LOAD_MSG_5_2(buf) buf = _mm_set_epi32(m3,m11,m10,m12)
#define LOAD_MSG_5_3(buf) buf = _mm_set_epi32(m1,m15,m7,m4)
#define LOAD_MSG_5_4(buf) buf = _mm_set_epi32(m9,m14,m5,m13)
#define LOAD_MSG_6_1(buf) buf = _mm_set_epi32(m4,m14,m1,m12)
#define LOAD_MSG_6_2(buf) buf = _mm_set_epi32(m10,m13,m15,m5)
#define LOAD_MSG_6_3(buf) buf = _mm_set_epi32(m8,m9,m6,m0)
#define LOAD_MSG_6_4(buf) buf = _mm_set_epi32(m11,m2,m3,m7)
#define LOAD_MSG_7_1(buf) buf = _mm_set_epi32(m3,m12,m7,m13)
#define LOAD_MSG_7_2(buf) buf = _mm_set_epi32(m9,m1,m14,m11)
#define LOAD_MSG_7_3(buf) buf = _mm_set_epi32(m2,m8,m15,m5)
#define LOAD_MSG_7_4(buf) buf = _mm_set_epi32(m10,m6,m4,m0)
#define LOAD_MSG_8_1(buf) buf = _mm_set_epi32(m0,m11,m14,m6)
#define LOAD_MSG_8_2(buf) buf = _mm_set_epi32(m8,m3,m9,m15)
#define LOAD_MSG_8_3(buf) buf = _mm_set_epi32(m10,m1,m13,m12)
#define LOAD_MSG_8_4(buf) buf = _mm_set_epi32(m5,m4,m7,m2)
#define LOAD_MSG_9_1(buf) buf = _mm_set_epi32(m1,m7,m8,m10)
#define LOAD_MSG_9_2(buf) buf = _mm_set_epi32(m5,m6,m4,m2)
#define LOAD_MSG_9_3(buf) buf = _mm_set_epi32(m13,m3,m9,m15)
#define LOAD_MSG_9_4(buf) buf = _mm_set_epi32(m0,m12,m14,m11)

sse/blake2s-load-sse41.h Normal file
View File

@ -0,0 +1,229 @@
BLAKE2 reference source code package - optimized C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
#pragma once
#ifndef __BLAKE2S_LOAD_SSE41_H__
#define __BLAKE2S_LOAD_SSE41_H__
#define LOAD_MSG_0_1(buf) \
buf = TOI(_mm_shuffle_ps(TOF(m0), TOF(m1), _MM_SHUFFLE(2,0,2,0)));
#define LOAD_MSG_0_2(buf) \
buf = TOI(_mm_shuffle_ps(TOF(m0), TOF(m1), _MM_SHUFFLE(3,1,3,1)));
#define LOAD_MSG_0_3(buf) \
buf = TOI(_mm_shuffle_ps(TOF(m2), TOF(m3), _MM_SHUFFLE(2,0,2,0)));
#define LOAD_MSG_0_4(buf) \
buf = TOI(_mm_shuffle_ps(TOF(m2), TOF(m3), _MM_SHUFFLE(3,1,3,1)));
#define LOAD_MSG_1_1(buf) \
t0 = _mm_blend_epi16(m1, m2, 0x0C); \
t1 = _mm_slli_si128(m3, 4); \
t2 = _mm_blend_epi16(t0, t1, 0xF0); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,1,0,3));
#define LOAD_MSG_1_2(buf) \
t0 = _mm_shuffle_epi32(m2,_MM_SHUFFLE(0,0,2,0)); \
t1 = _mm_blend_epi16(m1,m3,0xC0); \
t2 = _mm_blend_epi16(t0, t1, 0xF0); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,3,0,1));
#define LOAD_MSG_1_3(buf) \
t0 = _mm_slli_si128(m1, 4); \
t1 = _mm_blend_epi16(m2, t0, 0x30); \
t2 = _mm_blend_epi16(m0, t1, 0xF0); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,3,0,1));
#define LOAD_MSG_1_4(buf) \
t0 = _mm_unpackhi_epi32(m0,m1); \
t1 = _mm_slli_si128(m3, 4); \
t2 = _mm_blend_epi16(t0, t1, 0x0C); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,3,0,1));
#define LOAD_MSG_2_1(buf) \
t0 = _mm_unpackhi_epi32(m2,m3); \
t1 = _mm_blend_epi16(m3,m1,0x0C); \
t2 = _mm_blend_epi16(t0, t1, 0x0F); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(3,1,0,2));
#define LOAD_MSG_2_2(buf) \
t0 = _mm_unpacklo_epi32(m2,m0); \
t1 = _mm_blend_epi16(t0, m0, 0xF0); \
t2 = _mm_slli_si128(m3, 8); \
buf = _mm_blend_epi16(t1, t2, 0xC0);
#define LOAD_MSG_2_3(buf) \
t0 = _mm_blend_epi16(m0, m2, 0x3C); \
t1 = _mm_srli_si128(m1, 12); \
t2 = _mm_blend_epi16(t0,t1,0x03); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(1,0,3,2));
#define LOAD_MSG_2_4(buf) \
t0 = _mm_slli_si128(m3, 4); \
t1 = _mm_blend_epi16(m0, m1, 0x33); \
t2 = _mm_blend_epi16(t1, t0, 0xC0); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(0,1,2,3));
#define LOAD_MSG_3_1(buf) \
t0 = _mm_unpackhi_epi32(m0,m1); \
t1 = _mm_unpackhi_epi32(t0, m2); \
t2 = _mm_blend_epi16(t1, m3, 0x0C); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(3,1,0,2));
#define LOAD_MSG_3_2(buf) \
t0 = _mm_slli_si128(m2, 8); \
t1 = _mm_blend_epi16(m3,m0,0x0C); \
t2 = _mm_blend_epi16(t1, t0, 0xC0); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,0,1,3));
#define LOAD_MSG_3_3(buf) \
t0 = _mm_blend_epi16(m0,m1,0x0F); \
t1 = _mm_blend_epi16(t0, m3, 0xC0); \
buf = _mm_shuffle_epi32(t1, _MM_SHUFFLE(3,0,1,2));
#define LOAD_MSG_3_4(buf) \
t0 = _mm_unpacklo_epi32(m0,m2); \
t1 = _mm_unpackhi_epi32(m1,m2); \
buf = _mm_unpacklo_epi64(t1,t0);
#define LOAD_MSG_4_1(buf) \
t0 = _mm_unpacklo_epi64(m1,m2); \
t1 = _mm_unpackhi_epi64(m0,m2); \
t2 = _mm_blend_epi16(t0,t1,0x33); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,0,1,3));
#define LOAD_MSG_4_2(buf) \
t0 = _mm_unpackhi_epi64(m1,m3); \
t1 = _mm_unpacklo_epi64(m0,m1); \
buf = _mm_blend_epi16(t0,t1,0x33);
#define LOAD_MSG_4_3(buf) \
t0 = _mm_unpackhi_epi64(m3,m1); \
t1 = _mm_unpackhi_epi64(m2,m0); \
buf = _mm_blend_epi16(t1,t0,0x33);
#define LOAD_MSG_4_4(buf) \
t0 = _mm_blend_epi16(m0,m2,0x03); \
t1 = _mm_slli_si128(t0, 8); \
t2 = _mm_blend_epi16(t1,m3,0x0F); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(1,2,0,3));
#define LOAD_MSG_5_1(buf) \
t0 = _mm_unpackhi_epi32(m0,m1); \
t1 = _mm_unpacklo_epi32(m0,m2); \
buf = _mm_unpacklo_epi64(t0,t1);
#define LOAD_MSG_5_2(buf) \
t0 = _mm_srli_si128(m2, 4); \
t1 = _mm_blend_epi16(m0,m3,0x03); \
buf = _mm_blend_epi16(t1,t0,0x3C);
#define LOAD_MSG_5_3(buf) \
t0 = _mm_blend_epi16(m1,m0,0x0C); \
t1 = _mm_srli_si128(m3, 4); \
t2 = _mm_blend_epi16(t0,t1,0x30); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(1,2,3,0));
#define LOAD_MSG_5_4(buf) \
t0 = _mm_unpacklo_epi64(m1,m2); \
t1= _mm_shuffle_epi32(m3, _MM_SHUFFLE(0,2,0,1)); \
buf = _mm_blend_epi16(t0,t1,0x33);
#define LOAD_MSG_6_1(buf) \
t0 = _mm_slli_si128(m1, 12); \
t1 = _mm_blend_epi16(m0,m3,0x33); \
buf = _mm_blend_epi16(t1,t0,0xC0);
#define LOAD_MSG_6_2(buf) \
t0 = _mm_blend_epi16(m3,m2,0x30); \
t1 = _mm_srli_si128(m1, 4); \
t2 = _mm_blend_epi16(t0,t1,0x03); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(2,1,3,0));
#define LOAD_MSG_6_3(buf) \
t0 = _mm_unpacklo_epi64(m0,m2); \
t1 = _mm_srli_si128(m1, 4); \
buf = _mm_shuffle_epi32(_mm_blend_epi16(t0,t1,0x0C), _MM_SHUFFLE(2,3,1,0));
#define LOAD_MSG_6_4(buf) \
t0 = _mm_unpackhi_epi32(m1,m2); \
t1 = _mm_unpackhi_epi64(m0,t0); \
buf = _mm_shuffle_epi32(t1, _MM_SHUFFLE(3,0,1,2));
#define LOAD_MSG_7_1(buf) \
t0 = _mm_unpackhi_epi32(m0,m1); \
t1 = _mm_blend_epi16(t0,m3,0x0F); \
buf = _mm_shuffle_epi32(t1,_MM_SHUFFLE(2,0,3,1));
#define LOAD_MSG_7_2(buf) \
t0 = _mm_blend_epi16(m2,m3,0x30); \
t1 = _mm_srli_si128(m0,4); \
t2 = _mm_blend_epi16(t0,t1,0x03); \
buf = _mm_shuffle_epi32(t2, _MM_SHUFFLE(1,0,2,3));
#define LOAD_MSG_7_3(buf) \
t0 = _mm_unpackhi_epi64(m0,m3); \
t1 = _mm_unpacklo_epi64(m1,m2); \
t2 = _mm_blend_epi16(t0,t1,0x3C); \
buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(0,2,3,1));
#define LOAD_MSG_7_4(buf) \
t0 = _mm_unpacklo_epi32(m0,m1); \
t1 = _mm_unpackhi_epi32(m1,m2); \
buf = _mm_unpacklo_epi64(t0,t1);
#define LOAD_MSG_8_1(buf) \
t0 = _mm_unpackhi_epi32(m1,m3); \
t1 = _mm_unpacklo_epi64(t0,m0); \
t2 = _mm_blend_epi16(t1,m2,0xC0); \
buf = _mm_shufflehi_epi16(t2,_MM_SHUFFLE(1,0,3,2));
#define LOAD_MSG_8_2(buf) \
t0 = _mm_unpackhi_epi32(m0,m3); \
t1 = _mm_blend_epi16(m2,t0,0xF0); \
buf = _mm_shuffle_epi32(t1,_MM_SHUFFLE(0,2,1,3));
#define LOAD_MSG_8_3(buf) \
t0 = _mm_blend_epi16(m2,m0,0x0C); \
t1 = _mm_slli_si128(t0,4); \
buf = _mm_blend_epi16(t1,m3,0x0F);
#define LOAD_MSG_8_4(buf) \
t0 = _mm_blend_epi16(m1,m0,0x30); \
buf = _mm_shuffle_epi32(t0,_MM_SHUFFLE(1,0,3,2));
#define LOAD_MSG_9_1(buf) \
t0 = _mm_blend_epi16(m0,m2,0x03); \
t1 = _mm_blend_epi16(m1,m2,0x30); \
t2 = _mm_blend_epi16(t1,t0,0x0F); \
buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(1,3,0,2));
#define LOAD_MSG_9_2(buf) \
t0 = _mm_slli_si128(m0,4); \
t1 = _mm_blend_epi16(m1,t0,0xC0); \
buf = _mm_shuffle_epi32(t1,_MM_SHUFFLE(1,2,0,3));
#define LOAD_MSG_9_3(buf) \
t0 = _mm_unpackhi_epi32(m0,m3); \
t1 = _mm_unpacklo_epi32(m2,m3); \
t2 = _mm_unpackhi_epi64(t0,t1); \
buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(3,0,2,1));
#define LOAD_MSG_9_4(buf) \
t0 = _mm_blend_epi16(m3,m2,0xC0); \
t1 = _mm_unpacklo_epi32(m0,m3); \
t2 = _mm_blend_epi16(t0,t1,0x0F); \
buf = _mm_shuffle_epi32(t2,_MM_SHUFFLE(0,1,2,3));

sse/blake2s-load-xop.h Normal file
View File

@ -0,0 +1,189 @@
BLAKE2 reference source code package - optimized C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
#pragma once
#ifndef __BLAKE2S_LOAD_XOP_H__
#define __BLAKE2S_LOAD_XOP_H__
#define TOB(x) ((x)*4*0x01010101 + 0x03020100) // ..or not TOB
/* Basic VPPERM emulation, for testing purposes */
/*static __m128i _mm_perm_epi8(const __m128i src1, const __m128i src2, const __m128i sel)
const __m128i sixteen = _mm_set1_epi8(16);
const __m128i t0 = _mm_shuffle_epi8(src1, sel);
const __m128i s1 = _mm_shuffle_epi8(src2, _mm_sub_epi8(sel, sixteen));
const __m128i mask = _mm_or_si128(_mm_cmpeq_epi8(sel, sixteen),
_mm_cmpgt_epi8(sel, sixteen)); // (>=16) = 0xff : 00
return _mm_blendv_epi8(t0, s1, mask);
#define LOAD_MSG_0_1(buf) \
buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(6),TOB(4),TOB(2),TOB(0)) );
#define LOAD_MSG_0_2(buf) \
buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(7),TOB(5),TOB(3),TOB(1)) );
#define LOAD_MSG_0_3(buf) \
buf = _mm_perm_epi8(m2, m3, _mm_set_epi32(TOB(6),TOB(4),TOB(2),TOB(0)) );
#define LOAD_MSG_0_4(buf) \
buf = _mm_perm_epi8(m2, m3, _mm_set_epi32(TOB(7),TOB(5),TOB(3),TOB(1)) );
#define LOAD_MSG_1_1(buf) \
t0 = _mm_perm_epi8(m1, m2, _mm_set_epi32(TOB(0),TOB(5),TOB(0),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(5),TOB(2),TOB(1),TOB(6)) );
#define LOAD_MSG_1_2(buf) \
t1 = _mm_perm_epi8(m1, m2, _mm_set_epi32(TOB(2),TOB(0),TOB(4),TOB(6)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(7),TOB(1),TOB(0)) );
#define LOAD_MSG_1_3(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(5),TOB(0),TOB(0),TOB(1)) ); \
buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(7),TOB(1),TOB(0)) );
#define LOAD_MSG_1_4(buf) \
t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(3),TOB(7),TOB(2),TOB(0)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(1),TOB(4)) );
#define LOAD_MSG_2_1(buf) \
t0 = _mm_perm_epi8(m1, m2, _mm_set_epi32(TOB(0),TOB(1),TOB(0),TOB(7)) ); \
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(7),TOB(2),TOB(4),TOB(0)) );
#define LOAD_MSG_2_2(buf) \
t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(2),TOB(0),TOB(4)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(5),TOB(2),TOB(1),TOB(0)) );
#define LOAD_MSG_2_3(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(7),TOB(3),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(5),TOB(2),TOB(1),TOB(6)) );
#define LOAD_MSG_2_4(buf) \
t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(4),TOB(1),TOB(6),TOB(0)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(1),TOB(6)) );
#define LOAD_MSG_3_1(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(0),TOB(3),TOB(7)) ); \
t0 = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(7),TOB(2),TOB(1),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(5),TOB(1),TOB(0)) );
#define LOAD_MSG_3_2(buf) \
t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(0),TOB(1),TOB(5)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(6),TOB(4),TOB(1),TOB(0)) );
#define LOAD_MSG_3_3(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(4),TOB(5),TOB(2)) ); \
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(7),TOB(2),TOB(1),TOB(0)) );
#define LOAD_MSG_3_4(buf) \
t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(0),TOB(0),TOB(6)) ); \
buf = _mm_perm_epi8(t1, m2, _mm_set_epi32(TOB(4),TOB(2),TOB(6),TOB(0)) );
#define LOAD_MSG_4_1(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(2),TOB(5),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(6),TOB(2),TOB(1),TOB(5)) );
#define LOAD_MSG_4_2(buf) \
t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(4),TOB(7),TOB(0)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(7),TOB(2),TOB(1),TOB(0)) );
#define LOAD_MSG_4_3(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(3),TOB(6),TOB(0),TOB(0)) ); \
t0 = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(2),TOB(7),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(1),TOB(6)) );
#define LOAD_MSG_4_4(buf) \
t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(4),TOB(0),TOB(1)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(5),TOB(2),TOB(4),TOB(0)) );
#define LOAD_MSG_5_1(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(0),TOB(6),TOB(2)) ); \
buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(4),TOB(2),TOB(1),TOB(0)) );
#define LOAD_MSG_5_2(buf) \
t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(3),TOB(7),TOB(6),TOB(0)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(1),TOB(4)) );
#define LOAD_MSG_5_3(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(1),TOB(0),TOB(7),TOB(4)) ); \
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(7),TOB(1),TOB(0)) );
#define LOAD_MSG_5_4(buf) \
t1 = _mm_perm_epi8(m1, m2, _mm_set_epi32(TOB(5),TOB(0),TOB(1),TOB(0)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(6),TOB(1),TOB(5)) );
#define LOAD_MSG_6_1(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(4),TOB(0),TOB(1),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(6),TOB(1),TOB(4)) );
#define LOAD_MSG_6_2(buf) \
t1 = _mm_perm_epi8(m1, m2, _mm_set_epi32(TOB(6),TOB(0),TOB(0),TOB(1)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(5),TOB(7),TOB(0)) );
#define LOAD_MSG_6_3(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(0),TOB(6),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(4),TOB(5),TOB(1),TOB(0)) );
#define LOAD_MSG_6_4(buf) \
t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(2),TOB(3),TOB(7)) ); \
buf = _mm_perm_epi8(t1, m2, _mm_set_epi32(TOB(7),TOB(2),TOB(1),TOB(0)) );
#define LOAD_MSG_7_1(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(3),TOB(0),TOB(7),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(4),TOB(1),TOB(5)) );
#define LOAD_MSG_7_2(buf) \
t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(5),TOB(1),TOB(0),TOB(7)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(6),TOB(0)) );
#define LOAD_MSG_7_3(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(2),TOB(0),TOB(0),TOB(5)) ); \
t0 = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(4),TOB(1),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(7),TOB(0)) );
#define LOAD_MSG_7_4(buf) \
t1 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(6),TOB(4),TOB(0)) ); \
buf = _mm_perm_epi8(t1, m2, _mm_set_epi32(TOB(6),TOB(2),TOB(1),TOB(0)) );
#define LOAD_MSG_8_1(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(0),TOB(0),TOB(0),TOB(6)) ); \
t0 = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(7),TOB(1),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(6),TOB(0)) );
#define LOAD_MSG_8_2(buf) \
t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(4),TOB(3),TOB(5),TOB(0)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(1),TOB(7)) );
#define LOAD_MSG_8_3(buf) \
t0 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(6),TOB(1),TOB(0),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(3),TOB(2),TOB(5),TOB(4)) ); \
#define LOAD_MSG_8_4(buf) \
buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(5),TOB(4),TOB(7),TOB(2)) );
#define LOAD_MSG_9_1(buf) \
t0 = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(1),TOB(7),TOB(0),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m2, _mm_set_epi32(TOB(3),TOB(2),TOB(4),TOB(6)) );
#define LOAD_MSG_9_2(buf) \
buf = _mm_perm_epi8(m0, m1, _mm_set_epi32(TOB(5),TOB(6),TOB(4),TOB(2)) );
#define LOAD_MSG_9_3(buf) \
t0 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(3),TOB(5),TOB(0)) ); \
buf = _mm_perm_epi8(t0, m3, _mm_set_epi32(TOB(5),TOB(2),TOB(1),TOB(7)) );
#define LOAD_MSG_9_4(buf) \
t1 = _mm_perm_epi8(m0, m2, _mm_set_epi32(TOB(0),TOB(0),TOB(0),TOB(7)) ); \
buf = _mm_perm_epi8(t1, m3, _mm_set_epi32(TOB(3),TOB(4),TOB(6),TOB(0)) );

sse/blake2s-round.h Normal file
View File

@ -0,0 +1,91 @@
BLAKE2 reference source code package - optimized C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
#pragma once
#ifndef __BLAKE2S_ROUND_H__
#define __BLAKE2S_ROUND_H__
#define LOAD(p) _mm_load_si128( (__m128i *)(p) )
#define STORE(p,r) _mm_store_si128((__m128i *)(p), r)
#define LOADU(p) _mm_loadu_si128( (__m128i *)(p) )
#define STOREU(p,r) _mm_storeu_si128((__m128i *)(p), r)
#define TOF(reg) _mm_castsi128_ps((reg))
#define TOI(reg) _mm_castps_si128((reg))
#define LIKELY(x) __builtin_expect((x),1)
/* Microarchitecture-specific macros */
#ifndef HAVE_XOP
#ifdef HAVE_SSSE3
#define _mm_roti_epi32(r, c) ( \
(8==-(c)) ? _mm_shuffle_epi8(r,r8) \
: (16==-(c)) ? _mm_shuffle_epi8(r,r16) \
: _mm_xor_si128(_mm_srli_epi32( (r), -(c) ),_mm_slli_epi32( (r), 32-(-(c)) )) )
#define _mm_roti_epi32(r, c) _mm_xor_si128(_mm_srli_epi32( (r), -(c) ),_mm_slli_epi32( (r), 32-(-c) ))
/* ... */
#define G1(row1,row2,row3,row4,buf) \
row1 = _mm_add_epi32( _mm_add_epi32( row1, buf), row2 ); \
row4 = _mm_xor_si128( row4, row1 ); \
row4 = _mm_roti_epi32(row4, -16); \
row3 = _mm_add_epi32( row3, row4 ); \
row2 = _mm_xor_si128( row2, row3 ); \
row2 = _mm_roti_epi32(row2, -12);
#define G2(row1,row2,row3,row4,buf) \
row1 = _mm_add_epi32( _mm_add_epi32( row1, buf), row2 ); \
row4 = _mm_xor_si128( row4, row1 ); \
row4 = _mm_roti_epi32(row4, -8); \
row3 = _mm_add_epi32( row3, row4 ); \
row2 = _mm_xor_si128( row2, row3 ); \
row2 = _mm_roti_epi32(row2, -7);
#define DIAGONALIZE(row1,row2,row3,row4) \
row4 = _mm_shuffle_epi32( row4, _MM_SHUFFLE(2,1,0,3) ); \
row3 = _mm_shuffle_epi32( row3, _MM_SHUFFLE(1,0,3,2) ); \
row2 = _mm_shuffle_epi32( row2, _MM_SHUFFLE(0,3,2,1) );
#define UNDIAGONALIZE(row1,row2,row3,row4) \
row4 = _mm_shuffle_epi32( row4, _MM_SHUFFLE(0,3,2,1) ); \
row3 = _mm_shuffle_epi32( row3, _MM_SHUFFLE(1,0,3,2) ); \
row2 = _mm_shuffle_epi32( row2, _MM_SHUFFLE(2,1,0,3) );
#if defined(HAVE_XOP)
#include "blake2s-load-xop.h"
#elif defined(HAVE_SSE41)
#include "blake2s-load-sse41.h"
#include "blake2s-load-sse2.h"
#define ROUND(r) \
LOAD_MSG_ ##r ##_1(buf1); \
G1(row1,row2,row3,row4,buf1); \
LOAD_MSG_ ##r ##_2(buf2); \
G2(row1,row2,row3,row4,buf2); \
DIAGONALIZE(row1,row2,row3,row4); \
LOAD_MSG_ ##r ##_3(buf3); \
G1(row1,row2,row3,row4,buf3); \
LOAD_MSG_ ##r ##_4(buf4); \
G2(row1,row2,row3,row4,buf4); \
UNDIAGONALIZE(row1,row2,row3,row4); \

sse/blake2s.c Normal file
View File

@ -0,0 +1,416 @@
BLAKE2 reference source code package - optimized C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "blake2.h"
#include "blake2-impl.h"
#include "blake2-config.h"
#include <emmintrin.h>
#if defined(HAVE_SSSE3)
#include <tmmintrin.h>
#if defined(HAVE_SSE41)
#include <smmintrin.h>
#if defined(HAVE_AVX)
#include <immintrin.h>
#if defined(HAVE_XOP)
#include <x86intrin.h>
#include "blake2s-round.h"
ALIGN( 64 ) static const uint32_t blake2s_IV[8] =
0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL
static const uint8_t blake2s_sigma[10][16] =
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
/* Some helper functions, not necessarily useful */
static inline int blake2s_set_lastnode( blake2s_state *S )
S->f[1] = ~0U;
return 0;
static inline int blake2s_clear_lastnode( blake2s_state *S )
S->f[1] = 0U;
return 0;
static inline int blake2s_set_lastblock( blake2s_state *S )
if( S->last_node ) blake2s_set_lastnode( S );
S->f[0] = ~0U;
return 0;
static inline int blake2s_clear_lastblock( blake2s_state *S )
if( S->last_node ) blake2s_clear_lastnode( S );
S->f[0] = 0U;
return 0;
static inline int blake2s_increment_counter( blake2s_state *S, const uint32_t inc )
uint64_t t = ( ( uint64_t )S->t[1] << 32 ) | S->t[0];
t += inc;
S->t[0] = ( uint32_t )( t >> 0 );
S->t[1] = ( uint32_t )( t >> 32 );
return 0;
// Parameter-related functions
static inline int blake2s_param_set_digest_length( blake2s_param *P, const uint8_t digest_length )
P->digest_length = digest_length;
return 0;
static inline int blake2s_param_set_fanout( blake2s_param *P, const uint8_t fanout )
P->fanout = fanout;
return 0;
static inline int blake2s_param_set_max_depth( blake2s_param *P, const uint8_t depth )
P->depth = depth;
return 0;
static inline int blake2s_param_set_leaf_length( blake2s_param *P, const uint32_t leaf_length )
P->leaf_length = leaf_length;
return 0;
static inline int blake2s_param_set_node_offset( blake2s_param *P, const uint64_t node_offset )
store48( P->node_offset, node_offset );
return 0;
static inline int blake2s_param_set_node_depth( blake2s_param *P, const uint8_t node_depth )
P->node_depth = node_depth;
return 0;
static inline int blake2s_param_set_inner_length( blake2s_param *P, const uint8_t inner_length )
P->inner_length = inner_length;
return 0;
static inline int blake2s_param_set_salt( blake2s_param *P, const uint8_t salt[BLAKE2S_SALTBYTES] )
memcpy( P->salt, salt, BLAKE2S_SALTBYTES );
return 0;
static inline int blake2s_param_set_personal( blake2s_param *P, const uint8_t personal[BLAKE2S_PERSONALBYTES] )
memcpy( P->personal, personal, BLAKE2S_PERSONALBYTES );
return 0;
static inline int blake2s_init0( blake2s_state *S )
memset( S, 0, sizeof( blake2s_state ) );
for( int i = 0; i < 8; ++i ) S->h[i] = blake2s_IV[i];
return 0;
/* init2 xors IV with input parameter block */
int blake2s_init_param( blake2s_state *S, const blake2s_param *P )
uint8_t *p, *h, *v;
//blake2s_init0( S );
v = ( uint8_t * )( blake2s_IV );
h = ( uint8_t * )( S->h );
p = ( uint8_t * )( P );
/* IV XOR ParamBlock */
memset( S, 0, sizeof( blake2s_state ) );
for( int i = 0; i < BLAKE2S_OUTBYTES; ++i ) h[i] = v[i] ^ p[i];
return 0;
/* Some sort of default parameter block initialization, for sequential blake2s */
int blake2s_init( blake2s_state *S, const uint8_t outlen )
/* Move interval verification here? */
if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1;
const blake2s_param P =
return blake2s_init_param( S, &P );
int blake2s_init_key( blake2s_state *S, const uint8_t outlen, const void *key, const uint8_t keylen )
/* Move interval verification here? */
if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1;
if ( ( !key ) || ( !keylen ) || keylen > BLAKE2S_KEYBYTES ) return -1;
const blake2s_param P =
if( blake2s_init_param( S, &P ) < 0 )
return -1;
uint8_t block[BLAKE2S_BLOCKBYTES];
memset( block, 0, BLAKE2S_BLOCKBYTES );
memcpy( block, key, keylen );
blake2s_update( S, block, BLAKE2S_BLOCKBYTES );
secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */
return 0;
static inline int blake2s_compress( blake2s_state *S, const uint8_t block[BLAKE2S_BLOCKBYTES] )
__m128i row1, row2, row3, row4;
__m128i buf1, buf2, buf3, buf4;
#if defined(HAVE_SSE41)
__m128i t0, t1;
#if !defined(HAVE_XOP)
__m128i t2;
__m128i ff0, ff1;
#if defined(HAVE_SSSE3) && !defined(HAVE_XOP)
const __m128i r8 = _mm_set_epi8( 12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1 );
const __m128i r16 = _mm_set_epi8( 13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2 );
#if defined(HAVE_SSE41)
const __m128i m0 = LOADU( block + 00 );
const __m128i m1 = LOADU( block + 16 );
const __m128i m2 = LOADU( block + 32 );
const __m128i m3 = LOADU( block + 48 );
const uint32_t m0 = ( ( uint32_t * )block )[ 0];
const uint32_t m1 = ( ( uint32_t * )block )[ 1];
const uint32_t m2 = ( ( uint32_t * )block )[ 2];
const uint32_t m3 = ( ( uint32_t * )block )[ 3];
const uint32_t m4 = ( ( uint32_t * )block )[ 4];
const uint32_t m5 = ( ( uint32_t * )block )[ 5];
const uint32_t m6 = ( ( uint32_t * )block )[ 6];
const uint32_t m7 = ( ( uint32_t * )block )[ 7];
const uint32_t m8 = ( ( uint32_t * )block )[ 8];
const uint32_t m9 = ( ( uint32_t * )block )[ 9];
const uint32_t m10 = ( ( uint32_t * )block )[10];
const uint32_t m11 = ( ( uint32_t * )block )[11];
const uint32_t m12 = ( ( uint32_t * )block )[12];
const uint32_t m13 = ( ( uint32_t * )block )[13];
const uint32_t m14 = ( ( uint32_t * )block )[14];
const uint32_t m15 = ( ( uint32_t * )block )[15];
row1 = ff0 = LOAD( &S->h[0] );
row2 = ff1 = LOAD( &S->h[4] );
row3 = _mm_setr_epi32( 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A );
row4 = _mm_xor_si128( _mm_setr_epi32( 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 ), LOAD( &S->t[0] ) );
ROUND( 0 );
ROUND( 1 );
ROUND( 2 );
ROUND( 3 );
ROUND( 4 );
ROUND( 5 );
ROUND( 6 );
ROUND( 7 );
ROUND( 8 );
ROUND( 9 );
STORE( &S->h[0], _mm_xor_si128( ff0, _mm_xor_si128( row1, row3 ) ) );
STORE( &S->h[4], _mm_xor_si128( ff1, _mm_xor_si128( row2, row4 ) ) );
return 0;
/* inlen now in bytes */
int blake2s_update( blake2s_state *S, const uint8_t *in, uint64_t inlen )
while( inlen > 0 )
size_t left = S->buflen;
size_t fill = 2 * BLAKE2S_BLOCKBYTES - left;
if( inlen > fill )
memcpy( S->buf + left, in, fill ); // Fill buffer
S->buflen += fill;
blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES );
blake2s_compress( S, S->buf ); // Compress
memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES ); // Shift buffer left
in += fill;
inlen -= fill;
else // inlen <= fill
memcpy( S->buf + left, in, inlen );
S->buflen += inlen; // Be lazy, do not compress
in += inlen;
inlen -= inlen;
return 0;
/* Is this correct? */
int blake2s_final( blake2s_state *S, uint8_t *out, uint8_t outlen )
uint8_t buffer[BLAKE2S_OUTBYTES];
if( S->buflen > BLAKE2S_BLOCKBYTES )
blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES );
blake2s_compress( S, S->buf );
memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, S->buflen );
blake2s_increment_counter( S, ( uint32_t )S->buflen );
blake2s_set_lastblock( S );
memset( S->buf + S->buflen, 0, 2 * BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */
blake2s_compress( S, S->buf );
for( int i = 0; i < 8; ++i ) /* Output full hash to temp buffer */
store32( buffer + sizeof( S->h[i] ) * i, S->h[i] );
memcpy( out, buffer, outlen );
return 0;
/* inlen, at least, should be uint64_t. Others can be size_t. */
int blake2s( uint8_t *out, const void *in, const void *key, const uint8_t outlen, const uint64_t inlen, uint8_t keylen )
blake2s_state S[1];
/* Verify parameters */
if ( NULL == in ) return -1;
if ( NULL == out ) return -1;
if ( NULL == key ) keylen = 0; /* Fail here instead if keylen != 0 and key == NULL? */
if( keylen > 0 )
if( blake2s_init_key( S, outlen, key, keylen ) < 0 ) return -1;
if( blake2s_init( S, outlen ) < 0 ) return -1;
blake2s_update( S, ( uint8_t * )in, inlen );
blake2s_final( S, out, outlen );
return 0;
#if defined(SUPERCOP)
int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen )
return blake2s( out, in, NULL, BLAKE2S_OUTBYTES, inlen, 0 );
#if defined(BLAKE2S_SELFTEST)
#include <string.h>
#include "blake2-kat.h"
int main( int argc, char **argv )
uint8_t key[BLAKE2S_KEYBYTES];
uint8_t buf[KAT_LENGTH];
for( size_t i = 0; i < BLAKE2S_KEYBYTES; ++i )
key[i] = ( uint8_t )i;
for( size_t i = 0; i < KAT_LENGTH; ++i )
buf[i] = ( uint8_t )i;
for( size_t i = 0; i < KAT_LENGTH; ++i )
uint8_t hash[BLAKE2S_OUTBYTES];
if( blake2s( hash, buf, key, BLAKE2S_OUTBYTES, i, BLAKE2S_KEYBYTES ) < 0 ||
0 != memcmp( hash, blake2s_keyed_kat[i], BLAKE2S_OUTBYTES ) )
puts( "error" );
return -1;
puts( "ok" );
return 0;

sse/blake2sp.c Normal file
View File

@ -0,0 +1,291 @@
BLAKE2 reference source code package - optimized C implementations
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
To the extent possible under law, the author(s) have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along with
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#if defined(_OPENMP)
#include <omp.h>
#include "blake2.h"
#include "blake2-impl.h"
static inline int blake2sp_init_leaf( blake2s_state *S, uint8_t outlen, uint8_t keylen, uint64_t offset )
blake2s_param P[1];
P->digest_length = outlen;
P->key_length = keylen;
P->depth = 2;
P->leaf_length = 0;
store48( P->node_offset, offset );
P->node_depth = 0;
P->inner_length = outlen;
memset( P->salt, 0, sizeof( P->salt ) );
memset( P->personal, 0, sizeof( P->personal ) );
return blake2s_init_param( S, P );
static inline int blake2sp_init_root( blake2s_state *S, uint8_t outlen, uint8_t keylen )
blake2s_param P[1];
P->digest_length = outlen;
P->key_length = keylen;
P->depth = 2;
P->leaf_length = 0;
store48( P->node_offset, 0ULL );
P->node_depth = 1;
P->inner_length = outlen;
memset( P->salt, 0, sizeof( P->salt ) );
memset( P->personal, 0, sizeof( P->personal ) );
return blake2s_init_param( S, P );
int blake2sp_init( blake2sp_state *S, const uint8_t outlen )
if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1;
memset( S->buf, 0, sizeof( S->buf ) );
S->buflen = 0;
if( blake2sp_init_root( S->R, outlen, 0 ) < 0 )
return -1;
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
if( blake2sp_init_leaf( S->S[i], outlen, 0, i ) < 0 ) return -1;
S->R->last_node = 1;
S->S[PARALLELISM_DEGREE - 1]->last_node = 1;
return 0;
int blake2sp_init_key( blake2sp_state *S, const uint8_t outlen, const void *key, const uint8_t keylen )
if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1;
if( !key || !keylen || keylen > BLAKE2S_KEYBYTES ) return -1;
memset( S->buf, 0, sizeof( S->buf ) );
S->buflen = 0;
if( blake2sp_init_root( S->R, outlen, keylen ) < 0 )
return -1;
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
if( blake2sp_init_leaf( S->S[i], outlen, keylen, i ) < 0 ) return -1;
S->R->last_node = 1;
S->S[PARALLELISM_DEGREE - 1]->last_node = 1;
uint8_t block[BLAKE2S_BLOCKBYTES];
memset( block, 0, BLAKE2S_BLOCKBYTES );
memcpy( block, key, keylen );
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
blake2s_update( S->S[i], block, BLAKE2S_BLOCKBYTES );
secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */
return 0;
int blake2sp_update( blake2sp_state *S, const uint8_t *in, uint64_t inlen )
size_t left = S->buflen;
size_t fill = sizeof( S->buf ) - left;
if( left && inlen >= fill )
memcpy( S->buf + left, in, fill );
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
blake2s_update( S->S[i], S->buf + i * BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES );
in += fill;
inlen -= fill;
left = 0;
#if defined(_OPENMP)
#pragma omp parallel shared(S), num_threads(PARALLELISM_DEGREE)
for( size_t id__ = 0; id__ < PARALLELISM_DEGREE; ++id__ )
#if defined(_OPENMP)
size_t id__ = omp_get_thread_num();
uint64_t inlen__ = inlen;
const uint8_t *in__ = ( const uint8_t * )in;
in__ += id__ * BLAKE2S_BLOCKBYTES;
blake2s_update( S->S[id__], in__, BLAKE2S_BLOCKBYTES );
in += inlen - inlen % ( PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES );
if( inlen > 0 )
memcpy( S->buf + left, in, inlen );
S->buflen = left + inlen;
return 0;
int blake2sp_final( blake2sp_state *S, uint8_t *out, const uint8_t outlen )
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
if( S->buflen > i * BLAKE2S_BLOCKBYTES )
size_t left = S->buflen - i * BLAKE2S_BLOCKBYTES;
blake2s_update( S->S[i], S->buf + i * BLAKE2S_BLOCKBYTES, left );
blake2s_final( S->S[i], hash[i], BLAKE2S_OUTBYTES );
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
blake2s_update( S->R, hash[i], BLAKE2S_OUTBYTES );
blake2s_final( S->R, out, outlen );
return 0;
int blake2sp( uint8_t *out, const void *in, const void *key, uint8_t outlen, uint64_t inlen, uint8_t keylen )
blake2s_state S[PARALLELISM_DEGREE][1];
blake2s_state FS[1];
/* Verify parameters */
if ( NULL == in ) return -1;
if ( NULL == out ) return -1;
if ( NULL == key ) keylen = 0;
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
if( blake2sp_init_leaf( S[i], outlen, keylen, i ) < 0 ) return -1;
S[PARALLELISM_DEGREE - 1]->last_node = 1; // mark last node
if( keylen > 0 )
uint8_t block[BLAKE2S_BLOCKBYTES];
memset( block, 0, BLAKE2S_BLOCKBYTES );
memcpy( block, key, keylen );
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
blake2s_update( S[i], block, BLAKE2S_BLOCKBYTES );
secure_zero_memory( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */
#if defined(_OPENMP)
#pragma omp parallel shared(S,hash), num_threads(PARALLELISM_DEGREE)
for( size_t id__ = 0; id__ < PARALLELISM_DEGREE; ++id__ )
#if defined(_OPENMP)
size_t id__ = omp_get_thread_num();
uint64_t inlen__ = inlen;
const uint8_t *in__ = ( const uint8_t * )in;
in__ += id__ * BLAKE2S_BLOCKBYTES;
blake2s_update( S[id__], in__, BLAKE2S_BLOCKBYTES );
if( inlen__ > id__ * BLAKE2S_BLOCKBYTES )
const size_t left = inlen__ - id__ * BLAKE2S_BLOCKBYTES;
const size_t len = left <= BLAKE2S_BLOCKBYTES ? left : BLAKE2S_BLOCKBYTES;
blake2s_update( S[id__], in__, len );
blake2s_final( S[id__], hash[id__], BLAKE2S_OUTBYTES );
if( blake2sp_init_root( FS, outlen, keylen ) < 0 )
return -1;
FS->last_node = 1;
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
blake2s_update( FS, hash[i], BLAKE2S_OUTBYTES );
blake2s_final( FS, out, outlen );
return 0;
#if defined(BLAKE2SP_SELFTEST)
#include <string.h>
#include "blake2-kat.h"
int main( int argc, char **argv )
uint8_t key[BLAKE2S_KEYBYTES];
uint8_t buf[KAT_LENGTH];
for( size_t i = 0; i < BLAKE2S_KEYBYTES; ++i )
key[i] = ( uint8_t )i;
for( size_t i = 0; i < KAT_LENGTH; ++i )
buf[i] = ( uint8_t )i;
for( size_t i = 0; i < KAT_LENGTH; ++i )
uint8_t hash[BLAKE2S_OUTBYTES];
blake2sp( hash, buf, key, BLAKE2S_OUTBYTES, i, BLAKE2S_KEYBYTES );
if( 0 != memcmp( hash, blake2sp_keyed_kat[i], BLAKE2S_OUTBYTES ) )
puts( "error" );
return -1;
puts( "ok" );
return 0;

sse/makefile Normal file
View File

@ -0,0 +1,19 @@
CFLAGS=-std=c99 -Wall -pedantic -O3 -march=native
all: blake2s blake2b blake2sp blake2bp
blake2s: blake2s.c
$(CC) blake2s.c -o $@ $(CFLAGS) -DBLAKE2S_SELFTEST
blake2b: blake2b.c
$(CC) blake2b.c -o $@ $(CFLAGS) -DBLAKE2B_SELFTEST
blake2sp: blake2sp.c blake2s.c
$(CC) blake2sp.c blake2s.c -o $@ $(CFLAGS) -DBLAKE2SP_SELFTEST
blake2bp: blake2bp.c blake2b.c
$(CC) blake2bp.c blake2b.c -o $@ $(CFLAGS) -DBLAKE2BP_SELFTEST
rm -rf *.o blake2s blake2b blake2sp blake2bp