You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
trezor-firmware/crypto/aes/gf_mul_lo.h

774 lines
27 KiB

/*
---------------------------------------------------------------------------
Copyright (c) 1998-2010, Brian Gladman, Worcester, UK. All rights reserved.
The redistribution and use of this software (with or without changes)
is allowed without the payment of fees or royalties provided that:
source code distributions include the above copyright notice, this
list of conditions and the following disclaimer;
binary distributions include the above copyright notice, this list
of conditions and the following disclaimer in their documentation.
This software is provided 'as is' with no explicit or implied warranties
in respect of its operation, including, but not limited to, correctness
and fitness for purpose.
---------------------------------------------------------------------------
Issue Date: 18/02/2014
This file provides the low level primitives needed for Galois Field
operations in GF(2^128) for the four most likely field representations.
*/
#ifndef _GF_MUL_LO_H
#define _GF_MUL_LO_H
#if defined( USE_INLINING )
# if defined( _MSC_VER )
# define gf_decl __inline
# elif defined( __GNUC__ ) || defined( __GNU_LIBRARY__ )
# define gf_decl static inline
# else
# define gf_decl static
# endif
#endif
#if 0 /* used for testing only: t1(UNIT_BITS), t2(UNIT_BITS) */
# define _t1(n) bswap ## n ## _block(x, x)
# define t1(n) _t1(n)
# define _t2(n) bswap ## n ## _block(x, x); bswap ## n ## _block(r, r)
# define t2(n) _t2(n)
#endif
#define gf_m(n,x) gf_mulx ## n ## x
#define gf_mulx1(x) gf_m(1,x)
#define gf_mulx4(x) gf_m(4,x)
#define gf_mulx8(x) gf_m(8,x)
#define MASK(x) ((x) * (UNIT_CAST(-1,UNIT_BITS) / 0xff))
#define DATA_256(q) {\
q(0x00), q(0x01), q(0x02), q(0x03), q(0x04), q(0x05), q(0x06), q(0x07),\
q(0x08), q(0x09), q(0x0a), q(0x0b), q(0x0c), q(0x0d), q(0x0e), q(0x0f),\
q(0x10), q(0x11), q(0x12), q(0x13), q(0x14), q(0x15), q(0x16), q(0x17),\
q(0x18), q(0x19), q(0x1a), q(0x1b), q(0x1c), q(0x1d), q(0x1e), q(0x1f),\
q(0x20), q(0x21), q(0x22), q(0x23), q(0x24), q(0x25), q(0x26), q(0x27),\
q(0x28), q(0x29), q(0x2a), q(0x2b), q(0x2c), q(0x2d), q(0x2e), q(0x2f),\
q(0x30), q(0x31), q(0x32), q(0x33), q(0x34), q(0x35), q(0x36), q(0x37),\
q(0x38), q(0x39), q(0x3a), q(0x3b), q(0x3c), q(0x3d), q(0x3e), q(0x3f),\
q(0x40), q(0x41), q(0x42), q(0x43), q(0x44), q(0x45), q(0x46), q(0x47),\
q(0x48), q(0x49), q(0x4a), q(0x4b), q(0x4c), q(0x4d), q(0x4e), q(0x4f),\
q(0x50), q(0x51), q(0x52), q(0x53), q(0x54), q(0x55), q(0x56), q(0x57),\
q(0x58), q(0x59), q(0x5a), q(0x5b), q(0x5c), q(0x5d), q(0x5e), q(0x5f),\
q(0x60), q(0x61), q(0x62), q(0x63), q(0x64), q(0x65), q(0x66), q(0x67),\
q(0x68), q(0x69), q(0x6a), q(0x6b), q(0x6c), q(0x6d), q(0x6e), q(0x6f),\
q(0x70), q(0x71), q(0x72), q(0x73), q(0x74), q(0x75), q(0x76), q(0x77),\
q(0x78), q(0x79), q(0x7a), q(0x7b), q(0x7c), q(0x7d), q(0x7e), q(0x7f),\
q(0x80), q(0x81), q(0x82), q(0x83), q(0x84), q(0x85), q(0x86), q(0x87),\
q(0x88), q(0x89), q(0x8a), q(0x8b), q(0x8c), q(0x8d), q(0x8e), q(0x8f),\
q(0x90), q(0x91), q(0x92), q(0x93), q(0x94), q(0x95), q(0x96), q(0x97),\
q(0x98), q(0x99), q(0x9a), q(0x9b), q(0x9c), q(0x9d), q(0x9e), q(0x9f),\
q(0xa0), q(0xa1), q(0xa2), q(0xa3), q(0xa4), q(0xa5), q(0xa6), q(0xa7),\
q(0xa8), q(0xa9), q(0xaa), q(0xab), q(0xac), q(0xad), q(0xae), q(0xaf),\
q(0xb0), q(0xb1), q(0xb2), q(0xb3), q(0xb4), q(0xb5), q(0xb6), q(0xb7),\
q(0xb8), q(0xb9), q(0xba), q(0xbb), q(0xbc), q(0xbd), q(0xbe), q(0xbf),\
q(0xc0), q(0xc1), q(0xc2), q(0xc3), q(0xc4), q(0xc5), q(0xc6), q(0xc7),\
q(0xc8), q(0xc9), q(0xca), q(0xcb), q(0xcc), q(0xcd), q(0xce), q(0xcf),\
q(0xd0), q(0xd1), q(0xd2), q(0xd3), q(0xd4), q(0xd5), q(0xd6), q(0xd7),\
q(0xd8), q(0xd9), q(0xda), q(0xdb), q(0xdc), q(0xdd), q(0xde), q(0xdf),\
q(0xe0), q(0xe1), q(0xe2), q(0xe3), q(0xe4), q(0xe5), q(0xe6), q(0xe7),\
q(0xe8), q(0xe9), q(0xea), q(0xeb), q(0xec), q(0xed), q(0xee), q(0xef),\
q(0xf0), q(0xf1), q(0xf2), q(0xf3), q(0xf4), q(0xf5), q(0xf6), q(0xf7),\
q(0xf8), q(0xf9), q(0xfa), q(0xfb), q(0xfc), q(0xfd), q(0xfe), q(0xff) }
/* Within the 16 bytes of the field element the top and bottom field bits
are within bytes as follows (bit numbers in bytes 0 from ls up) for
each of the four field representations supported (see gf128mul.txt):
GF_BIT 127 126 125 124 123 122 121 120 ..... 7 6 5 4 3 2 1 0
0x87 1 0 0 0 0 1 1 1
BL x[ 0] 7 6 5 4 3 2 1 0 x[15] 7 6 5 4 3 2 1 0
LL x[15] 7 6 5 4 3 2 1 0 x[ 0] 7 6 5 4 3 2 1 0
GF_BIT 120 121 122 123 124 125 126 127 ..... 0 1 2 3 4 5 6 7
0xc1 1 1 1 0 0 0 0 1
BB x[ 0] 7 6 5 4 3 2 1 0 x[15] 7 6 5 4 3 2 1 0
LB x[15] 7 6 5 4 3 2 1 0 x[ 0] 7 6 5 4 3 2 1 0
When the field element is multiplied by x^n, the high bits overflow
and are used to form an overflow byte. For the BL and LL modes this
byte has the lowest overflow bit in bit 0 whereas for the BB and LB
modes this bit is in biit 7. So we have for this byte:
bit (bit n = 2^n) 7 6 5 4 3 2 1 0
BL and LL x^7 x^6 x^5 x^4 x^3 x^2 x^1 x^0
BB and LB x^0 x^1 x^2 x^3 x^4 x^5 x^6 x^7
This byte then has to be multiplied by the low bits of the field
polynomial, which produces a value of 16 bits to be xored into the
left shifted field value. For the BL and LL modes bit 0 gives the
word value 0x0087, bit 1 gives 0x010e (0x87 left shifted 1), 0x021c
(0x87 left shifted 2), ... For the BB and LB modes, bit 7 gives the
value 0x00e1, bit 6 gives 0x8070, bit 5 gives 0x4038, ... Each bit
in the overflow byte is expanded in this way and is xored into the
overall result, so eaach of the 256 byte values will produce a
corresponding word value that is computed by the gf_uint16_xor(i)
macros below.
These word values have to be xored into the low 16 bits of the
field value. If the byte endianess of the mode matches that of
the architecture xoring the word value will be correct. But if
the mode has the opposite endianess, the word value has to be
xored in byte reversed order. This is done by the ord() macro.
*/
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN \
&& (defined( GF_MODE_LB ) || defined( GF_MODE_LL )) || \
PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN \
&& (defined( GF_MODE_BB ) || defined( GF_MODE_BL ))
# define ord(hi, lo) 0x##hi##lo
#else
# define ord(hi, lo) 0x##lo##hi
#endif
#if defined( GF_MODE_BL ) || defined( GF_MODE_LL )
/* field and numeric bit significance correspond */
#define gf_uint16_xor(i) ( \
(i & 0x01 ? ord(00,87) : 0) ^ (i & 0x02 ? ord(01,0e) : 0) ^ \
(i & 0x04 ? ord(02,1c) : 0) ^ (i & 0x08 ? ord(04,38) : 0) ^ \
(i & 0x10 ? ord(08,70) : 0) ^ (i & 0x20 ? ord(10,e0) : 0) ^ \
(i & 0x40 ? ord(21,c0) : 0) ^ (i & 0x80 ? ord(43,80) : 0) )
enum x_bit
{
X_0 = 0x01, X_1 = 0x02, X_2 = 0x04, X_3 = 0x08,
X_4 = 0x10, X_5 = 0x20, X_6 = 0x40, X_7 = 0x80
};
#elif defined( GF_MODE_BB ) || defined( GF_MODE_LB )
/* field and numeric bit significance are in reverse */
#define gf_uint16_xor(i) ( \
(i & 0x80 ? ord(00,e1) : 0) ^ (i & 0x40 ? ord(80,70) : 0) ^ \
(i & 0x20 ? ord(40,38) : 0) ^ (i & 0x10 ? ord(20,1c) : 0) ^ \
(i & 0x08 ? ord(10,0e) : 0) ^ (i & 0x04 ? ord(08,07) : 0) ^ \
(i & 0x02 ? ord(84,03) : 0) ^ (i & 0x01 ? ord(c2,01) : 0) )
enum x_bit
{
X_0 = 0x80, X_1 = 0x40, X_2 = 0x20, X_3 = 0x10,
X_4 = 0x08, X_5 = 0x04, X_6 = 0x02, X_7 = 0x01
};
#else
#error Galois Field representation has not been set
#endif
const uint16_t gf_tab[256] = DATA_256(gf_uint16_xor);
/* LL Mode Galois Field operations
x[0] x[1] x[2] x[3] x[4] x[5] x[6] x[7]
ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls
10000111 ........ ........ ........ ........ ........ ........ ........
07....00 15....08 23....16 31....24 39....32 47....40 55....48 63....56
x[8] x[9] x[10] x[11] x[12] x[13] x[14] x[15]
ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls
........ ........ ........ ........ ........ ........ ........ M.......
71....64 79....72 87....80 95....88 103...96 111..104 119..112 127..120
*/
#if UNIT_BITS == 64
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
#define f1_ll(n,r,x) r[n] = (x[n] << 1) | (n ? x[n-1] >> 63 : 0)
#define f4_ll(n,r,x) r[n] = (x[n] << 4) | (n ? x[n-1] >> 60 : 0)
#define f8_ll(n,r,x) r[n] = (x[n] << 8) | (n ? x[n-1] >> 56 : 0)
#else
#define f1_ll(n,r,x) r[n] = ((x[n] << 1) & ~MASK(0x01)) | (((x[n] >> 15) \
| (n ? x[n-1] << 49 : 0)) & MASK(0x01))
#define f4_ll(n,r,x) r[n] = ((x[n] << 4) & ~MASK(0x0f)) | (((x[n] >> 12) \
| (n ? x[n-1] << 52 : 0)) & MASK(0x0f))
#define f8_ll(n,r,x) r[n] = (x[n] >> 8) | (n ? x[n-1] << 56 : 0)
#endif
gf_decl void gf_mulx1_ll(gf_t r, const gf_t x)
{ gf_unit_t _tt = 0;
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
_tt = gf_tab[(UNIT_PTR(x)[1] >> 63) & 0x01];
#else
_tt = ((gf_unit_t)(gf_tab[(UNIT_PTR(x)[1] >> 7) & 0x01])) << 48;
#endif
rep2_d2(f1_ll, UNIT_PTR(r), UNIT_PTR(x));
UNIT_PTR(r)[0] ^= _tt;
}
gf_decl void gf_mulx4_ll(gf_t x)
{ gf_unit_t _tt = 0;
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
_tt = gf_tab[(UNIT_PTR(x)[1] >> 60) & 0x0f];
#else
_tt = ((gf_unit_t)(gf_tab[(UNIT_PTR(x)[1] >> 4) & 0x0f])) << 48;
#endif
rep2_d2(f4_ll, UNIT_PTR(x), UNIT_PTR(x));
UNIT_PTR(x)[0] ^= _tt;
}
gf_decl void gf_mulx8_ll(gf_t x)
{ gf_unit_t _tt = 0;
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
_tt = gf_tab[UNIT_PTR(x)[1] >> 56];
#else
_tt = ((gf_unit_t)(gf_tab[UNIT_PTR(x)[1] & 0xff])) << 48;
#endif
rep2_d2(f8_ll, UNIT_PTR(x), UNIT_PTR(x));
UNIT_PTR(x)[0] ^= _tt;
}
#elif UNIT_BITS == 32
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
#define f1_ll(n,r,x) r[n] = (x[n] << 1) | (n ? x[n-1] >> 31 : 0)
#define f4_ll(n,r,x) r[n] = (x[n] << 4) | (n ? x[n-1] >> 28 : 0)
#define f8_ll(n,r,x) r[n] = (x[n] << 8) | (n ? x[n-1] >> 24 : 0)
#else
#define f1_ll(n,r,x) r[n] = ((x[n] << 1) & ~MASK(0x01)) | (((x[n] >> 15) \
| (n ? x[n-1] << 17 : 0)) & MASK(0x01))
#define f4_ll(n,r,x) r[n] = ((x[n] << 4) & ~MASK(0x0f)) | (((x[n] >> 12) \
| (n ? x[n-1] << 20 : 0)) & MASK(0x0f))
#define f8_ll(n,r,x) r[n] = (x[n] >> 8) | (n ? x[n-1] << 24 : 0)
#endif
gf_decl void gf_mulx1_ll(gf_t r, const gf_t x)
{ gf_unit_t _tt = 0;
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
_tt = gf_tab[(UNIT_PTR(x)[3] >> 31) & 0x01];
#else
_tt = ((gf_unit_t)(gf_tab[(UNIT_PTR(x)[3] >> 7) & 0x01])) << 16;
#endif
rep2_d4(f1_ll, UNIT_PTR(r), UNIT_PTR(x));
UNIT_PTR(r)[0] ^= _tt;
}
gf_decl void gf_mulx4_ll(gf_t x)
{ gf_unit_t _tt = 0;
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
_tt = gf_tab[(UNIT_PTR(x)[3] >> 28) & 0x0f];
#else
_tt = ((gf_unit_t)(gf_tab[(UNIT_PTR(x)[3] >> 4) & 0x0f])) << 16;
#endif
rep2_d4(f4_ll, UNIT_PTR(x), UNIT_PTR(x));
UNIT_PTR(x)[0] ^= _tt;
}
gf_decl void gf_mulx8_ll(gf_t x)
{ gf_unit_t _tt = 0;
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
_tt = gf_tab[UNIT_PTR(x)[3] >> 24];
#else
_tt = ((gf_unit_t)(gf_tab[UNIT_PTR(x)[3] & 0xff])) << 16;
#endif
rep2_d4(f8_ll, UNIT_PTR(x), UNIT_PTR(x));
UNIT_PTR(x)[0] ^= _tt;
}
#else
#define f1_ll(n,r,x) r[n] = (x[n] << 1) | (n ? x[n-1] >> 7 : 0)
#define f4_ll(n,r,x) r[n] = (x[n] << 4) | (n ? x[n-1] >> 4 : 0)
gf_decl void gf_mulx1_ll(gf_t r, const gf_t x)
{ uint16_t _tt = 0;
_tt = gf_tab[(UNIT_PTR(x)[15] >> 7) & 0x01];
rep2_d16(f1_ll, UNIT_PTR(r), UNIT_PTR(x));
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
UNIT_PTR(r)[0] ^= _tt & 0xff;
#else
UNIT_PTR(r)[0] ^= _tt >> 8;
#endif
}
gf_decl void gf_mulx4_ll(gf_t x)
{ uint16_t _tt = 0;
_tt = gf_tab[(UNIT_PTR(x)[15] >> 4) & 0x0f];
rep2_d16(f4_ll, UNIT_PTR(x), UNIT_PTR(x));
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
UNIT_PTR(x)[1] ^= _tt >> 8;
UNIT_PTR(x)[0] ^= _tt & 0xff;
#else
UNIT_PTR(x)[1] ^= _tt & 0xff;
UNIT_PTR(x)[0] = _tt >> 8;
#endif
}
gf_decl void gf_mulx8_ll(gf_t x)
{ uint16_t _tt = 0;
_tt = gf_tab[UNIT_PTR(x)[15]];
memmove(UNIT_PTR(x) + 1, UNIT_PTR(x), 15);
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
UNIT_PTR(x)[1] ^= _tt >> 8;
UNIT_PTR(x)[0] = _tt & 0xff;
#else
UNIT_PTR(x)[1] ^= _tt & 0xff;
UNIT_PTR(x)[0] = _tt >> 8;
#endif
}
#endif
/* BL Mode Galois Field operations
x[0] x[1] x[2] x[3] x[4] x[5] x[6] x[7]
ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls
M....... ........ ........ ........ ........ ........ ........ ........
127..120 119..112 111..104 103...96 95....88 87....80 79....72 71....64
x[8] x[9] x[10] x[11] x[12] x[13] x[14] x[15]
ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls
........ ........ ........ ........ ........ ........ ........ 10000111
63....56 55....48 47....40 39....32 31....24 23....16 15....08 07....00
*/
#if UNIT_BITS == 64
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
#define f1_bl(n,r,x) r[n] = ((x[n] << 1) & ~MASK(0x01)) | (((x[n] >> 15) \
| (!n ? x[n+1] << 49 : 0)) & MASK(0x01))
#define f4_bl(n,r,x) r[n] = ((x[n] << 4) & ~MASK(0x0f)) | (((x[n] >> 12) \
| (!n ? x[n+1] << 52 : 0)) & MASK(0x0f))
#define f8_bl(n,r,x) r[n] = (x[n] >> 8) | (!n ? x[n+1] << 56 : 0)
#else
#define f1_bl(n,r,x) r[n] = (x[n] << 1) | (!n ? x[n+1] >> 63 : 0)
#define f4_bl(n,r,x) r[n] = (x[n] << 4) | (!n ? x[n+1] >> 60 : 0)
#define f8_bl(n,r,x) r[n] = (x[n] << 8) | (!n ? x[n+1] >> 56 : 0)
#endif
gf_decl void gf_mulx1_bl(gf_t r, const gf_t x)
{ gf_unit_t _tt = 0;
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
_tt = ((gf_unit_t)(gf_tab[(UNIT_PTR(x)[0] >> 7) & 0x01])) << 48;
#else
_tt = gf_tab[(UNIT_PTR(x)[0] >> 63) & 0x01];
#endif
rep2_u2(f1_bl, UNIT_PTR(r), UNIT_PTR(x));
UNIT_PTR(r)[1] ^= _tt;
}
gf_decl void gf_mulx4_bl(gf_t x)
{ gf_unit_t _tt = 0;
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
_tt = ((gf_unit_t)(gf_tab[(UNIT_PTR(x)[0] >> 4) & 0x0f])) << 48;
#else
_tt = gf_tab[(UNIT_PTR(x)[0] >> 60) & 0x0f];
#endif
rep2_u2(f4_bl, UNIT_PTR(x), UNIT_PTR(x));
UNIT_PTR(x)[1] ^= _tt;
}
gf_decl void gf_mulx8_bl(gf_t x)
{ gf_unit_t _tt = 0;
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
_tt = ((gf_unit_t)(gf_tab[UNIT_PTR(x)[0] & 0xff])) << 48;
#else
_tt = gf_tab[(UNIT_PTR(x)[0] >> 56) & 0xff];
#endif
rep2_u2(f8_bl, UNIT_PTR(x), UNIT_PTR(x));
UNIT_PTR(x)[1] ^= _tt;
}
#elif UNIT_BITS == 32
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
#define f1_bl(n,r,x) r[n] = ((x[n] << 1) & ~MASK(0x01)) | (((x[n] >> 15) \
| (n < 3 ? x[n+1] << 17 : 0)) & MASK(0x01))
#define f4_bl(n,r,x) r[n] = ((x[n] << 4) & ~MASK(0x0f)) | (((x[n] >> 12) \
| (n < 3 ? x[n+1] << 20 : 0)) & MASK(0x0f))
#define f8_bl(n,r,x) r[n] = (x[n] >> 8) | (n < 3 ? x[n+1] << 24 : 0)
#else
#define f1_bl(n,r,x) r[n] = (x[n] << 1) | (n < 3 ? x[n+1] >> 31 : 0)
#define f4_bl(n,r,x) r[n] = (x[n] << 4) | (n < 3 ? x[n+1] >> 28 : 0)
#define f8_bl(n,r,x) r[n] = (x[n] << 8) | (n < 3 ? x[n+1] >> 24 : 0)
#endif
gf_decl void gf_mulx1_bl(gf_t r, const gf_t x)
{ gf_unit_t _tt = 0;
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
_tt = ((gf_unit_t)(gf_tab[(UNIT_PTR(x)[0] >> 7) & 0x01])) << 16;
#else
_tt = gf_tab[(UNIT_PTR(x)[0] >> 31) & 0x01];
#endif
rep2_u4(f1_bl, UNIT_PTR(r), UNIT_PTR(x));
UNIT_PTR(r)[3] ^= _tt;
}
gf_decl void gf_mulx4_bl(gf_t x)
{ gf_unit_t _tt = 0;
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
_tt = ((gf_unit_t)(gf_tab[(UNIT_PTR(x)[0] >> 4) & 0x0f])) << 16;
#else
_tt = gf_tab[(UNIT_PTR(x)[0] >> 28) & 0x0f];
#endif
rep2_u4(f4_bl, UNIT_PTR(x), UNIT_PTR(x));
UNIT_PTR(x)[3] ^= _tt;
}
gf_decl void gf_mulx8_bl(gf_t x)
{ gf_unit_t _tt = 0;
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
_tt = ((gf_unit_t)(gf_tab[UNIT_PTR(x)[0] & 0xff])) << 16;
#else
_tt = gf_tab[(UNIT_PTR(x)[0] >> 24) & 0xff];
#endif
rep2_u4(f8_bl, UNIT_PTR(x), UNIT_PTR(x));
UNIT_PTR(x)[3] ^= _tt;
}
#else
#define f1_bl(n,r,x) r[n] = (x[n] << 1) | (n < 15 ? x[n+1] >> 7 : 0)
#define f4_bl(n,r,x) r[n] = (x[n] << 4) | (n < 15 ? x[n+1] >> 4 : 0)
gf_decl void gf_mulx1_bl(gf_t r, const gf_t x)
{ uint16_t _tt = 0;
_tt = gf_tab[(UNIT_PTR(x)[0] >> 7) & 0x01];
rep2_u16(f1_bl, UNIT_PTR(r), UNIT_PTR(x));
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
UNIT_PTR(r)[15] ^= _tt >> 8;
#else
UNIT_PTR(r)[15] ^= _tt & 0xff;
#endif
}
gf_decl void gf_mulx4_bl(gf_t x)
{ uint16_t _tt = 0;
_tt = gf_tab[(UNIT_PTR(x)[0] >> 4) & 0x0f];
rep2_u16(f4_bl, UNIT_PTR(x), UNIT_PTR(x));
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
UNIT_PTR(x)[14] ^= _tt & 0xff;
UNIT_PTR(x)[15] ^= _tt >> 8;
#else
UNIT_PTR(x)[14] ^= _tt >> 8;
UNIT_PTR(x)[15] = _tt & 0xff;
#endif
}
gf_decl void gf_mulx8_bl(gf_t x)
{ uint16_t _tt = 0;
_tt = gf_tab[UNIT_PTR(x)[0]];
memmove(UNIT_PTR(x), UNIT_PTR(x) + 1, 15);
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
UNIT_PTR(x)[14] ^= _tt & 0xff;
UNIT_PTR(x)[15] = _tt >> 8;
#else
UNIT_PTR(x)[14] ^= _tt >> 8;
UNIT_PTR(x)[15] = _tt & 0xff;
#endif
}
#endif
/* LB Mode Galois Field operations
x[0] x[1] x[2] x[3] x[4] x[5] x[6] x[7]
ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls
11100001 ........ ........ ........ ........ ........ ........ ........
00....07 08....15 16....23 24....31 32....39 40....47 48....55 56....63
x[8] x[9] x[10] x[11] x[12] x[13] x[14] x[15]
ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls
........ ........ ........ ........ ........ ........ ........ .......M
64....71 72....79 80....87 88....95 96...103 104..111 112..119 120..127
*/
#if UNIT_BITS == 64
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
#define f1_lb(n,r,x) r[n] = ((x[n] >> 1) & ~MASK(0x80)) | (((x[n] << 15) \
| (n ? x[n-1] >> 49 : 0)) & MASK(0x80))
#define f4_lb(n,r,x) r[n] = ((x[n] >> 4) & ~MASK(0xf0)) | (((x[n] << 12) \
| (n ? x[n-1] >> 52 : 0)) & MASK(0xf0))
#define f8_lb(n,r,x) r[n] = (x[n] << 8) | (n ? x[n-1] >> 56 : 0)
#else
#define f1_lb(n,r,x) r[n] = (x[n] >> 1) | (n ? x[n-1] << 63 : 0)
#define f4_lb(n,r,x) r[n] = (x[n] >> 4) | (n ? x[n-1] << 60 : 0)
#define f8_lb(n,r,x) x[n] = (x[n] >> 8) | (n ? x[n-1] << 56 : 0)
#endif
gf_decl void gf_mulx1_lb(gf_t r, const gf_t x)
{ gf_unit_t _tt = 0;
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
_tt = gf_tab[(UNIT_PTR(x)[1] >> 49) & MASK(0x80)];
#else
_tt = ((gf_unit_t)(gf_tab[(UNIT_PTR(x)[1] << 7) & 0xff])) << 48;
#endif
rep2_d2(f1_lb, UNIT_PTR(r), UNIT_PTR(x));
UNIT_PTR(r)[0] ^= _tt;
}
gf_decl void gf_mulx4_lb(gf_t x)
{ gf_unit_t _tt = 0;
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
_tt = gf_tab[(UNIT_PTR(x)[1] >> 52) & MASK(0xf0)];
#else
_tt = ((gf_unit_t)(gf_tab[(UNIT_PTR(x)[1] << 4) & 0xff])) << 48;
#endif
rep2_d2(f4_lb, UNIT_PTR(x), UNIT_PTR(x));
UNIT_PTR(x)[0] ^= _tt;
}
gf_decl void gf_mulx8_lb(gf_t x)
{ gf_unit_t _tt = 0;
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
_tt = gf_tab[UNIT_PTR(x)[1] >> 56];
#else
_tt = ((gf_unit_t)(gf_tab[UNIT_PTR(x)[1] & 0xff])) << 48;
#endif
rep2_d2(f8_lb, UNIT_PTR(x), UNIT_PTR(x));
UNIT_PTR(x)[0] ^= _tt;
}
#elif UNIT_BITS == 32
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
#define f1_lb(n,r,x) r[n] = ((x[n] >> 1) & ~MASK(0x80)) | (((x[n] << 15) \
| (n ? x[n-1] >> 17 : 0)) & MASK(0x80))
#define f4_lb(n,r,x) r[n] = ((x[n] >> 4) & ~MASK(0xf0)) | (((x[n] << 12) \
| (n ? x[n-1] >> 20 : 0)) & MASK(0xf0))
#define f8_lb(n,r,x) r[n] = (x[n] << 8) | (n ? x[n-1] >> 24 : 0)
#else
#define f1_lb(n,r,x) r[n] = (x[n] >> 1) | (n ? x[n-1] << 31 : 0)
#define f4_lb(n,r,x) r[n] = (x[n] >> 4) | (n ? x[n-1] << 28 : 0)
#define f8_lb(n,r,x) r[n] = (x[n] >> 8) | (n ? x[n-1] << 24 : 0)
#endif
gf_decl void gf_mulx1_lb(gf_t r, const gf_t x)
{ gf_unit_t _tt = 0;
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
_tt = gf_tab[(UNIT_PTR(x)[3] >> 17) & MASK(0x80)];
#else
_tt = ((gf_unit_t)(gf_tab[(UNIT_PTR(x)[3] << 7) & 0xff])) << 16;
#endif
rep2_d4(f1_lb, UNIT_PTR(r), UNIT_PTR(x));
UNIT_PTR(r)[0] ^= _tt;
}
gf_decl void gf_mulx4_lb(gf_t x)
{ gf_unit_t _tt = 0;
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
_tt = gf_tab[(UNIT_PTR(x)[3] >> 20) & MASK(0xf0)];
#else
_tt = ((gf_unit_t)(gf_tab[(UNIT_PTR(x)[3] << 4) & 0xff])) << 16;
#endif
rep2_d4(f4_lb, UNIT_PTR(x), UNIT_PTR(x));
UNIT_PTR(x)[0] ^= _tt;
}
gf_decl void gf_mulx8_lb(gf_t x)
{ gf_unit_t _tt = 0;
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
_tt = gf_tab[UNIT_PTR(x)[3] >> 24];
#else
_tt = ((gf_unit_t)(gf_tab[UNIT_PTR(x)[3] & 0xff])) << 16;
#endif
rep2_d4(f8_lb, UNIT_PTR(x), UNIT_PTR(x));
UNIT_PTR(x)[0] ^= _tt;
}
#else
#define f1_lb(n,r,x) r[n] = (x[n] >> 1) | (n ? x[n-1] << 7 : 0)
#define f4_lb(n,r,x) r[n] = (x[n] >> 4) | (n ? x[n-1] << 4 : 0)
gf_decl void gf_mulx1_lb(gf_t r, const gf_t x)
{ uint16_t _tt = 0;
_tt = gf_tab[(UNIT_PTR(x)[15] << 7) & 0x80];
rep2_d16(f1_lb, UNIT_PTR(r), UNIT_PTR(x));
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
UNIT_PTR(r)[0] ^= _tt;
#else
UNIT_PTR(r)[0] ^= _tt >> 8;
#endif
}
gf_decl void gf_mulx4_lb(gf_t x)
{ uint16_t _tt = 0;
_tt = gf_tab[(UNIT_PTR(x)[15] << 4) & 0xf0];
rep2_d16(f4_lb, UNIT_PTR(x), UNIT_PTR(x));
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
UNIT_PTR(x)[1] ^= _tt >> 8;
UNIT_PTR(x)[0] ^= _tt & 0xff;
#else
UNIT_PTR(x)[1] ^= _tt & 0xff;
UNIT_PTR(x)[0] ^= _tt >> 8;
#endif
}
gf_decl void gf_mulx8_lb(gf_t x)
{ uint16_t _tt = 0;
_tt = gf_tab[UNIT_PTR(x)[15]];
memmove(UNIT_PTR(x) + 1, UNIT_PTR(x), 15);
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
UNIT_PTR(x)[1] ^= _tt >> 8;
UNIT_PTR(x)[0] = _tt & 0xff;
#else
UNIT_PTR(x)[1] ^= _tt & 0xff;
UNIT_PTR(x)[0] = _tt >> 8;
#endif
}
#endif
/* BB Mode Galois Field operations
x[0] x[1] x[2] x[3] x[4] x[5] x[6] x[7]
ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls
.......M ........ ........ ........ ........ ........ ........ ........
120..127 112..119 104..111 96...103 88....95 80....87 72....79 64....71
x[8] x[9] x[10] x[11] x[12] x[13] x[14] x[15]
ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls
........ ........ ........ ........ ........ ........ ........ 11100001
56....63 48....55 40....47 32....39 24....31 16....23 08....15 00....07
*/
#if UNIT_BITS == 64
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
#define f1_bb(n,r,x) r[n] = (x[n] >> 1) | (!n ? x[n+1] << 63 : 0)
#define f4_bb(n,r,x) r[n] = (x[n] >> 4) | (!n ? x[n+1] << 60 : 0)
#define f8_bb(n,r,x) r[n] = (x[n] >> 8) | (!n ? x[n+1] << 56 : 0)
#else
#define f1_bb(n,r,x) r[n] = ((x[n] >> 1) & ~MASK(0x80)) | (((x[n] << 15) \
| (!n ? x[n+1] >> 49 : 0)) & MASK(0x80))
#define f4_bb(n,r,x) r[n] = ((x[n] >> 4) & ~MASK(0xf0)) | (((x[n] << 12) \
| (!n ? x[n+1] >> 52 : 0)) & MASK(0xf0))
#define f8_bb(n,r,x) r[n] = (x[n] << 8) | (!n ? x[n+1] >> 56 : 0)
#endif
gf_decl void gf_mulx1_bb(gf_t r, const gf_t x)
{ gf_unit_t _tt = 0;
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
_tt = (( gf_unit_t)(gf_tab[(UNIT_PTR(x)[0] << 7) & 0x80])) << 48;
#else
_tt = gf_tab[(UNIT_PTR(x)[0] >> 49) & 0x80];
#endif
rep2_u2(f1_bb, UNIT_PTR(r), UNIT_PTR(x));
UNIT_PTR(r)[1] ^= _tt;
}
gf_decl void gf_mulx4_bb(gf_t x)
{ gf_unit_t _tt = 0;
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
_tt = ((gf_unit_t)(gf_tab[(UNIT_PTR(x)[0] << 4) & 0xf0])) << 48;
#else
_tt = gf_tab[(UNIT_PTR(x)[0] >> 52) & 0xf0];
#endif
rep2_u2(f4_bb, UNIT_PTR(x), UNIT_PTR(x));
UNIT_PTR(x)[1] ^= _tt;
}
gf_decl void gf_mulx8_bb(gf_t x)
{ gf_unit_t _tt = 0;
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
_tt = ((gf_unit_t)(gf_tab[UNIT_PTR(x)[0] & 0xff])) << 48;
#else
_tt = gf_tab[(UNIT_PTR(x)[0] >> 56) & 0xff];
#endif
rep2_u2(f8_bb, UNIT_PTR(x), UNIT_PTR(x));
UNIT_PTR(x)[1] ^= _tt;
}
#elif UNIT_BITS == 32
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
#define f1_bb(n,r,x) r[n] = (x[n] >> 1) | (n < 3 ? x[n+1] << 31 : 0)
#define f4_bb(n,r,x) r[n] = (x[n] >> 4) | (n < 3 ? x[n+1] << 28 : 0)
#define f8_bb(n,r,x) r[n] = (x[n] >> 8) | (n < 3 ? x[n+1] << 24 : 0)
#else
#define f1_bb(n,r,x) r[n] = ((x[n] >> 1) & ~MASK(0x80)) | (((x[n] << 15) \
| (n < 3 ? x[n+1] >> 17 : 0)) & MASK(0x80))
#define f4_bb(n,r,x) r[n] = ((x[n] >> 4) & ~MASK(0xf0)) | (((x[n] << 12) \
| (n < 3 ? x[n+1] >> 20 : 0)) & MASK(0xf0))
#define f8_bb(n,r,x) r[n] = (x[n] << 8) | (n < 3 ? x[n+1] >> 24 : 0)
#endif
gf_decl void gf_mulx1_bb(gf_t r, const gf_t x)
{ gf_unit_t _tt = 0;
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
_tt = ((gf_unit_t)(gf_tab[(UNIT_PTR(x)[0] << 7) & 0x80])) << 16;
#else
_tt = gf_tab[(UNIT_PTR(x)[0] >> 17) & 0x80];
#endif
rep2_u4(f1_bb, UNIT_PTR(r), UNIT_PTR(x));
UNIT_PTR(r)[3] ^= _tt;
}
gf_decl void gf_mulx4_bb(gf_t x)
{ gf_unit_t _tt = 0;
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
_tt = ((gf_unit_t)(gf_tab[(UNIT_PTR(x)[0] << 4) & 0xf0])) << 16;
#else
_tt = gf_tab[(UNIT_PTR(x)[0] >> 20) & 0xf0];
#endif
rep2_u4(f4_bb, UNIT_PTR(x), UNIT_PTR(x));
UNIT_PTR(x)[3] ^= _tt;
}
gf_decl void gf_mulx8_bb(gf_t x)
{ gf_unit_t _tt = 0;
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
_tt = ((gf_unit_t)(gf_tab[UNIT_PTR(x)[0] & 0xff])) << 16;
#else
_tt = gf_tab[(UNIT_PTR(x)[0] >> 24) & 0xff];
#endif
rep2_u4(f8_bb, UNIT_PTR(x), UNIT_PTR(x));
UNIT_PTR(x)[3] ^= _tt;
}
#else
#define f1_bb(n,r,x) r[n] = (x[n] >> 1) | (n < 15 ? x[n+1] << 7 : 0)
#define f4_bb(n,r,x) r[n] = (x[n] >> 4) | (n < 15 ? x[n+1] << 4 : 0)
gf_decl void gf_mulx1_bb(gf_t r, const gf_t x)
{ uint16_t _tt = 0;
_tt = gf_tab[(UNIT_PTR(x)[0] << 7) & 0x80];
rep2_u16(f1_bb, UNIT_PTR(r), UNIT_PTR(x));
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
UNIT_PTR(r)[15] ^= _tt >> 8;
#else
UNIT_PTR(r)[15] ^= _tt;
#endif
}
gf_decl void gf_mulx4_bb(gf_t x)
{ uint16_t _tt = 0;
_tt = gf_tab[(UNIT_PTR(x)[0] << 4) & 0xf0];
rep2_u16(f4_bb, UNIT_PTR(x), UNIT_PTR(x));
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
UNIT_PTR(x)[14] ^= _tt & 0xff;
UNIT_PTR(x)[15] ^= _tt >> 8;
#else
UNIT_PTR(x)[14] ^= _tt >> 8;
UNIT_PTR(x)[15] ^= _tt & 0xff;
#endif
}
gf_decl void gf_mulx8_bb(gf_t x)
{ uint16_t _tt = 0;
_tt = gf_tab[UNIT_PTR(x)[0]];
memmove(UNIT_PTR(x), UNIT_PTR(x) + 1, 15);
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
UNIT_PTR(x)[14] ^= _tt & 0xff;
UNIT_PTR(x)[15] = _tt >> 8;
#else
UNIT_PTR(x)[14] ^= _tt >> 8;
UNIT_PTR(x)[15] = _tt & 0xff;
#endif
}
#endif
#endif