TERES/SOFTWARE/A64-TERES/u-boot_new/sprite/encrypt/twofish/twofish_new.c
Dimitar Gamishev 093685c7d8 u-boot
2017-10-13 14:02:55 +03:00

432 lines
12 KiB
C

/* This is an independent implementation of the encryption algorithm: */
/* */
/* Twofish by Bruce Schneier and colleagues */
/* */
/* which is a candidate algorithm in the Advanced Encryption Standard */
/* programme of the US National Institute of Standards and Technology. */
/* */
/* Copyright in this implementation is held by Dr B R Gladman but I */
/* hereby give permission for its free direct or derivative use subject */
/* to acknowledgment of its origin and compliance with any conditions */
/* that the originators of t he algorithm place on its exploitation. */
/* */
/* My thanks to Doug Whiting and Niels Ferguson for comments that led */
/* to improvements in this implementation. */
/* */
/* Dr Brian Gladman (gladman@seven77.demon.co.uk) 14th January 1999 */
/* Slightly modificatied for libzdt by Jeremy Tregunna. */
/* $Id: twofish.c,v 1.1.1.1 1999/10/15 22:49:23 nmav Exp $ */
/* Timing data for Twofish (twofish.c)
128 bit key:
Key Setup: 8414 cycles
Encrypt: 376 cycles = 68.1 mbits/sec
Decrypt: 374 cycles = 68.4 mbits/sec
Mean: 375 cycles = 68.3 mbits/sec
192 bit key:
Key Setup: 11628 cycles
Encrypt: 376 cycles = 68.1 mbits/sec
Decrypt: 374 cycles = 68.4 mbits/sec
Mean: 375 cycles = 68.3 mbits/sec
256 bit key:
Key Setup: 15457 cycles
Encrypt: 381 cycles = 67.2 mbits/sec
Decrypt: 374 cycles = 68.4 mbits/sec
Mean: 378 cycles = 67.8 mbits/sec
*/
#include "twofish_new.h"
/* u32 k_len;
* u32 l_key[40];
* u32 s_key[4];
*/
/* Extract byte from a 32 bit quantity (little endian notation) */
#define byte(x,n) ((unsigned char)((x) >> (n << 3)))
/* finite field arithmetic for GF(2**8) with the modular */
/* polynomial x^8 + x^6 + x^5 + x^3 + 1 (0x169) */
#define G_M 0x0169
unsigned char tab_5b[4] = { 0, G_M >> 2, G_M >> 1, (G_M >> 1) ^ (G_M >> 2) };
unsigned char tab_ef[4] = { 0, (G_M >> 1) ^ (G_M >> 2), G_M >> 1, G_M >> 2 };
#define ffm_01(x) (x)
#define ffm_5b(x) ((x) ^ ((x) >> 2) ^ tab_5b[(x) & 3])
#define ffm_ef(x) ((x) ^ ((x) >> 1) ^ ((x) >> 2) ^ tab_ef[(x) & 3])
unsigned char ror4[16] = { 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15 };
unsigned char ashx[16] = { 0, 9, 2, 11, 4, 13, 6, 15, 8, 1, 10, 3, 12, 5, 14, 7 };
unsigned char qt0[2][16] = {
{8, 1, 7, 13, 6, 15, 3, 2, 0, 11, 5, 9, 14, 12, 10, 4},
{2, 8, 11, 13, 15, 7, 6, 14, 3, 1, 9, 4, 0, 10, 12, 5}
};
unsigned char qt1[2][16] = {
{14, 12, 11, 8, 1, 2, 3, 5, 15, 4, 10, 6, 7, 0, 9, 13},
{1, 14, 2, 11, 4, 12, 3, 7, 6, 13, 10, 5, 15, 9, 0, 8}
};
unsigned char qt2[2][16] = {
{11, 10, 5, 14, 6, 13, 9, 0, 12, 8, 15, 3, 2, 4, 7, 1},
{4, 12, 7, 5, 1, 6, 9, 10, 0, 14, 13, 8, 2, 11, 3, 15}
};
unsigned char qt3[2][16] = {
{13, 7, 15, 4, 1, 2, 6, 14, 9, 11, 3, 0, 8, 5, 12, 10},
{11, 9, 5, 1, 12, 3, 13, 14, 6, 4, 7, 15, 2, 0, 8, 10}
};
unsigned char
qp(const uint n,const unsigned char x)
{
unsigned char a0,a1,a2,a3,a4,b0,b1,b2,b3,b4;
a0 = x >> 4;
b0 = x & 15;
a1 = a0 ^ b0;
b1 = ror4[b0] ^ ashx[a0];
a2 = qt0[n][a1];
b2 = qt1[n][b1];
a3 = a2 ^ b2;
b3 = ror4[b2] ^ ashx[a2];
a4 = qt2[n][a3];
b4 = qt3[n][b3];
return (b4 << 4) | a4;
}
#define q(n,x) pkey->q_tab[n][x]
void
gen_qtab(TWI *pkey)
{
uint i;
for(i = 0; i < 256; ++i) {
q(0,i) = qp(0,(unsigned char)i);
q(1,i) = qp(1,(unsigned char)i);
}
}
void gen_mtab(TWI *pkey)
{
uint i,f01,f5b,fef;
for(i = 0; i < 256; ++i) {
f01 = q(1,i);
f5b = ffm_5b(f01);
fef = ffm_ef(f01);
pkey->m_tab[0][i] =
f01 + (f5b << 8) + (fef << 16) + (fef << 24);
pkey->m_tab[2][i] =
f5b + (fef << 8) + (f01 << 16) + (fef << 24);
f01 = q(0,i);
f5b = ffm_5b(f01);
fef = ffm_ef(f01);
pkey->m_tab[1][i] =
fef + (fef << 8) + (f5b << 16) + (f01 << 24);
pkey->m_tab[3][i] =
f5b + (f01 << 8) + (fef << 16) + (f5b << 24);
}
}
#define mds(n,x) pkey->m_tab[n][x]
uint h_fun(TWI *pkey,const uint x,const uint key[])
{
uint b0,b1,b2,b3;
b0 = byte(x, 0);
b1 = byte(x, 1);
b2 = byte(x, 2);
b3 = byte(x, 3);
switch (pkey->k_len) {
case 4:
b0 = q(1, b0) ^ byte(key[3], 0);
b1 = q(0, b1) ^ byte(key[3], 1);
b2 = q(0, b2) ^ byte(key[3], 2);
b3 = q(1, b3) ^ byte(key[3], 3);
case 3:
b0 = q(1, b0) ^ byte(key[2], 0);
b1 = q(1, b1) ^ byte(key[2], 1);
b2 = q(0, b2) ^ byte(key[2], 2);
b3 = q(0, b3) ^ byte(key[2], 3);
case 2:
b0 = q(0, q(0, b0) ^ byte(key[1], 0)) ^ byte(key[0], 0);
b1 = q(0, q(1, b1) ^ byte(key[1], 1)) ^ byte(key[0], 1);
b2 = q(1, q(0, b2) ^ byte(key[1], 2)) ^ byte(key[0], 2);
b3 = q(1, q(1, b3) ^ byte(key[1], 3)) ^ byte(key[0], 3);
}
return mds(0, b0) ^ mds(1, b1) ^ mds(2, b2) ^ mds(3, b3);
}
#define q20(x) q(0,q(0,x) ^ byte(key[1],0)) ^ byte(key[0],0)
#define q21(x) q(0,q(1,x) ^ byte(key[1],1)) ^ byte(key[0],1)
#define q22(x) q(1,q(0,x) ^ byte(key[1],2)) ^ byte(key[0],2)
#define q23(x) q(1,q(1,x) ^ byte(key[1],3)) ^ byte(key[0],3)
#define q30(x) q(0,q(0,q(1, x) ^ byte(key[2],0)) ^ byte(key[1],0)) ^ byte(key[0],0)
#define q31(x) q(0,q(1,q(1, x) ^ byte(key[2],1)) ^ byte(key[1],1)) ^ byte(key[0],1)
#define q32(x) q(1,q(0,q(0, x) ^ byte(key[2],2)) ^ byte(key[1],2)) ^ byte(key[0],2)
#define q33(x) q(1,q(1,q(0, x) ^ byte(key[2],3)) ^ byte(key[1],3)) ^ byte(key[0],3)
#define q40(x) q(0,q(0,q(1, q(1, x) ^ byte(key[3],0)) ^ byte(key[2],0)) ^ byte(key[1],0)) ^ byte(key[0],0)
#define q41(x) q(0,q(1,q(1, q(0, x) ^ byte(key[3],1)) ^ byte(key[2],1)) ^ byte(key[1],1)) ^ byte(key[0],1)
#define q42(x) q(1,q(0,q(0, q(0, x) ^ byte(key[3],2)) ^ byte(key[2],2)) ^ byte(key[1],2)) ^ byte(key[0],2)
#define q43(x) q(1,q(1,q(0, q(1, x) ^ byte(key[3],3)) ^ byte(key[2],3)) ^ byte(key[1],3)) ^ byte(key[0],3)
void gen_mk_tab(TWI *pkey, uint key[])
{
uint i;
unsigned char by;
switch (pkey->k_len) {
case 2:
for (i = 0; i < 256; ++i) {
by = (unsigned char) i;
pkey->mk_tab[0][i] = mds(0, q20(by));
pkey->mk_tab[1][i] = mds(1, q21(by));
pkey->mk_tab[2][i] = mds(2, q22(by));
pkey->mk_tab[3][i] = mds(3, q23(by));
}
break;
case 3:
for (i = 0; i < 256; ++i) {
by = (unsigned char) i;
pkey->mk_tab[0][i] = mds(0, q30(by));
pkey->mk_tab[1][i] = mds(1, q31(by));
pkey->mk_tab[2][i] = mds(2, q32(by));
pkey->mk_tab[3][i] = mds(3, q33(by));
}
break;
case 4:
for (i = 0; i < 256; ++i) {
by = (unsigned char) i;
pkey->mk_tab[0][i] = mds(0, q40(by));
pkey->mk_tab[1][i] = mds(1, q41(by));
pkey->mk_tab[2][i] = mds(2, q42(by));
pkey->mk_tab[3][i] = mds(3, q43(by));
}
}
}
/* The (12,8) Reed Soloman code has the generator polynomial
g(x) = x^4 + (a + 1/a) * x^3 + a * x^2 + (a + 1/a) * x + 1
where the coefficients are in the finite field GF(2^8) with a
modular polynomial a^8 + a^6 + a^3 + a^2 + 1. To generate the
remainder we have to start with a 12th order polynomial with our
eight input bytes as the coefficients of the 4th to 11th terms.
That is:
m[7] * x^11 + m[6] * x^10 ... + m[0] * x^4 + 0 * x^3 +... + 0
We then multiply the generator polynomial by m[7] * x^7 and subtract
it - xor in GF(2^8) - from the above to eliminate the x^7 term (the
artihmetic on the coefficients is done in GF(2^8). We then multiply
the generator polynomial by x^6 * coeff(x^10) and use this to remove
the x^10 term. We carry on in this way until the x^4 term is removed
so that we are left with:
r[3] * x^3 + r[2] * x^2 + r[1] 8 x^1 + r[0]
which give the resulting 4 bytes of the remainder. This is equivalent
to the matrix multiplication in the Twofish description but much faster
to implement.
*/
#define G_MOD 0x0000014d
uint mds_rem(uint p0,uint p1)
{
uint i,t,u;
for (i = 0; i < 8; ++i) {
t = p1 >> 24; /* get most significant coefficient */
p1 = (p1 << 8) | (p0 >> 24);
p0 <<= 8; /* shift others up */
/* multiply t by a (the primitive element - i.e. left shift) */
u = (t << 1);
if (t & 0x80)
/* subtract modular polynomial on overflow */
u ^= G_MOD;
p1 ^= t ^ (u << 16); /* remove t * (a * x^2 + 1) */
u ^= (t >> 1); /* form u = a * t + t / a = t * (a + 1 / a); */
if (t & 0x01)
/* add the modular polynomial on underflow */
u ^= G_MOD >> 1;
p1 ^= (u << 24) | (u << 8); /* remove t * (a + 1/a) * (x^3 + x) */
}
return p1;
}
/* initialise the key schedule from the user supplied key */
void twofish_new_set_key(TWI *pkey,const uint in_key[],const uint key_len)
{
uint i, a, b, me_key[4], mo_key[4];
pkey->qt_gen = 0;
if (!pkey->qt_gen) {
gen_qtab(pkey);
pkey->qt_gen = 1;
}
pkey->mt_gen = 0;
if (!pkey->mt_gen) {
gen_mtab(pkey);
pkey->mt_gen = 1;
}
pkey->k_len = (key_len * 8) / 64; /* 2, 3 or 4 */
for (i = 0; i < pkey->k_len; ++i) {
a = in_key[i + i];
me_key[i] = a;
b = in_key[i + i + 1];
mo_key[i] = b;
pkey->s_key[pkey->k_len - i - 1] = mds_rem(a, b);
}
for (i = 0; i < 40; i += 2) {
a = 0x01010101 * i;
b = a + 0x01010101;
a = h_fun(pkey, a, me_key);
b = rotl(h_fun(pkey, b, mo_key), 8);
pkey->l_key[i] = a + b;
pkey->l_key[i + 1] = rotl(a + 2 * b, 9);
}
gen_mk_tab(pkey, (uint *)pkey->s_key);
}
/* decrypt a block of text */
/*
void my_i_rnd(TWI * pkey, uint * blk , uint i)
{
uint t0 = 0;
uint t1 = 0;
uint t2 = 0;
uint i_sum = 0;
uint *key = (uint *)pkey->l_key;
uint *mk_tab0 = (uint *)&(pkey->mk_tab[0]);
uint *mk_tab1 = (uint *)&(pkey->mk_tab[1]);
uint *mk_tab2 = (uint *)&(pkey->mk_tab[2]);
uint *mk_tab3 = (uint *)&(pkey->mk_tab[3]);
unsigned char *buff = (unsigned char *)blk;
i_sum = i << 2;
t0 = mk_tab0[buff[0]] ^ mk_tab1[buff[1]] ^ mk_tab2[buff[2]] ^ mk_tab3[buff[3]];
t1 = mk_tab0[buff[7]] ^ mk_tab1[buff[4]] ^ mk_tab2[buff[5]] ^ mk_tab3[buff[6]];
t2 = t1 + t0;
blk[2] = rotl(blk[2], 1) ^ (t2 + key[i_sum + 10]);
blk[3] = rotr(blk[3] ^ (t2 + t1 + key[i_sum + 11]), 1);
t0 = mk_tab0[buff[8]] ^ mk_tab1[buff[9]] ^ mk_tab2[buff[10]] ^ mk_tab3[buff[11]];
t1 = mk_tab0[buff[15]] ^ mk_tab1[buff[12]] ^ mk_tab2[buff[13]] ^ mk_tab3[buff[14]];
t2 = t1 + t0;
blk[0] = rotl(blk[0], 1) ^ (t2 + key[i_sum + 8]);
blk[1] = rotr(blk[1] ^ (t2 + t1 + key[i_sum + 9]), 1);
}
*/
void twofish_new_decrypt(TWI * pkey, uint * in_blk, uint * out_blk)
{
uint blk[4], i;
uint *key = (uint *)pkey->l_key;
uint t0, t1, t2;
uint i_sum;
uint *mk_tab0 = (uint *)&(pkey->mk_tab[0]);
uint *mk_tab1 = (uint *)&(pkey->mk_tab[1]);
uint *mk_tab2 = (uint *)&(pkey->mk_tab[2]);
uint *mk_tab3 = (uint *)&(pkey->mk_tab[3]);
unsigned char *buff = (unsigned char *)blk;
blk[0] = in_blk[0] ^ key[4];
blk[1] = in_blk[1] ^ key[5];
blk[2] = in_blk[2] ^ key[6];
blk[3] = in_blk[3] ^ key[7];
for(i = 7; i <= 7; i--)
{
i_sum = i << 2;
t0 = mk_tab0[buff[0]] ^ mk_tab1[buff[1]] ^ mk_tab2[buff[2]] ^ mk_tab3[buff[3]];
t1 = mk_tab0[buff[7]] ^ mk_tab1[buff[4]] ^ mk_tab2[buff[5]] ^ mk_tab3[buff[6]];
t2 = t1 + t0;
//blk[2] = rotl(blk[2], 1) ^ (t2 + key[i_sum + 10]);
//blk[3] = rotr(blk[3] ^ (t2 + t1 + key[i_sum + 11]), 1);
blk[2] = ((blk[2] << 1) | (blk[2] >> (32 - 1))) ^ (t2 + key[i_sum + 10]);
blk[3] = (((blk[3] ^ (t2 + t1 + key[i_sum + 11])) >> 1) | ((blk[3] ^ (t2 + t1 + key[i_sum + 11])) << (32 - 1)));
t0 = mk_tab0[buff[8]] ^ mk_tab1[buff[9]] ^ mk_tab2[buff[10]] ^ mk_tab3[buff[11]];
t1 = mk_tab0[buff[15]] ^ mk_tab1[buff[12]] ^ mk_tab2[buff[13]] ^ mk_tab3[buff[14]];
t2 = t1 + t0;
//blk[0] = rotl(blk[0], 1) ^ (t2 + key[i_sum + 8]);
//blk[1] = rotr(blk[1] ^ (t2 + t1 + key[i_sum + 9]), 1);
blk[0] = ((blk[0] << 1) | (blk[0] >> (32 - 1))) ^ (t2 + key[i_sum + 8]);
blk[1] = (((blk[1] ^ (t2 + t1 + key[i_sum + 9])) >> 1) | ((blk[1] ^ (t2 + t1 + key[i_sum + 9])) << (32 - 1)));
}
out_blk[0] = blk[2] ^ pkey->l_key[0];
out_blk[1] = blk[3] ^ pkey->l_key[1];
out_blk[2] = blk[0] ^ pkey->l_key[2];
out_blk[3] = blk[1] ^ pkey->l_key[3];
return;
}