mirror of
https://github.com/trezor/trezor-firmware.git
synced 2024-11-21 15:08:12 +00:00
feat(crypto): Add Brian Gladman's implementation of GCM.
This commit is contained in:
parent
89147ef493
commit
6e207215e3
2
Makefile
2
Makefile
@ -65,7 +65,7 @@ yaml_check: ## check yaml formatting
|
||||
yamllint .
|
||||
|
||||
editor_check: ## check editorconfig formatting
|
||||
editorconfig-checker -exclude '.*\.(so|dat|toif|der)'
|
||||
editorconfig-checker -exclude '.*\.(so|dat|toif|der)|^crypto/aes/'
|
||||
|
||||
cstyle_check: ## run code style check on low-level C code
|
||||
clang-format --version
|
||||
|
547
crypto/aes/aesgcm.c
Normal file
547
crypto/aes/aesgcm.c
Normal file
@ -0,0 +1,547 @@
|
||||
/*
|
||||
---------------------------------------------------------------------------
|
||||
Copyright (c) 1998-2010, Brian Gladman, Worcester, UK. All rights reserved.
|
||||
|
||||
The redistribution and use of this software (with or without changes)
|
||||
is allowed without the payment of fees or royalties provided that:
|
||||
|
||||
source code distributions include the above copyright notice, this
|
||||
list of conditions and the following disclaimer;
|
||||
|
||||
binary distributions include the above copyright notice, this list
|
||||
of conditions and the following disclaimer in their documentation.
|
||||
|
||||
This software is provided 'as is' with no explicit or implied warranties
|
||||
in respect of its operation, including, but not limited to, correctness
|
||||
and fitness for purpose.
|
||||
---------------------------------------------------------------------------
|
||||
Issue Date: 30/03/2011
|
||||
|
||||
My thanks to:
|
||||
|
||||
Colin Sinclair for finding an error and suggesting a number of
|
||||
improvements to this code.
|
||||
|
||||
John Viega and David McGrew for their support in the development
|
||||
of this code and to David for testing it on a big-endIAN system.
|
||||
|
||||
Mark Rodenkirch and Jason Papadopoulos for their help in finding
|
||||
a bug in the fast buffer operations on big endian systems.
|
||||
*/
|
||||
|
||||
#include "gcm.h"
|
||||
#include "mode_hdr.h"
|
||||
|
||||
/* This GCM implementation needs a Galois Field multiplier for GF(2^128).
|
||||
which operates on field elements using a polynomial field representation
|
||||
x^127 + x^126 + ... + x^2 + x + 1 using the bits in a bit sequence that
|
||||
will be numbered by the power of x that they represent. GCM uses the
|
||||
polynomial x^128 + x^7 + x^2 + x + 1 as its basis for representation.
|
||||
|
||||
The obvious way of representing this in a computer system is to map GF
|
||||
'x' to the binary integer '2' - but this was way too obvious for any
|
||||
cryptographer to adopt!
|
||||
|
||||
Here bytes are numbered in memory order and bits within bytes according
|
||||
to their integer numeric significance. The term 'little endian' is then
|
||||
used to describe mappings in which numeric (power of 2) or field (power
|
||||
of x) significance increase with increasing bit or byte numbers with
|
||||
'big endian' being used to describe the inverse situation.
|
||||
|
||||
GCM uses little endian byte ordering and big endian bit ordering, a
|
||||
representation that will be described as LB. Hence the low end of the
|
||||
field polynomial is in byte[0], which has the value 0xe1 rather than
|
||||
0x87 in the more obvious mappings.
|
||||
|
||||
The related field multipler can use this mapping but if you want to
|
||||
use an alternative (e.g hardware) multiplier that uses a different
|
||||
polynomial field representation, you can do so by changing the form
|
||||
used for the field elements when this alternative multiplier is used.
|
||||
|
||||
If GF_REPRESENTATION is defined as one of:
|
||||
|
||||
REVERSE_BITS // change to LL
|
||||
REVERSE_BYTES | REVERSE_BITS // change to BL
|
||||
REVERSE_NONE // no change
|
||||
REVERSE_BYTES // change to BB
|
||||
|
||||
then an appropriate change of representation will occur before and
|
||||
after calls to your revised field multiplier. To use this you need
|
||||
to add gf_convert.c to your application.
|
||||
*/
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
#if 1
|
||||
# undef GF_REPRESENTATION
|
||||
#elif 0
|
||||
# define GF_REPRESENTATION REVERSE_BITS
|
||||
#elif 0
|
||||
# define GF_REPRESENTATION REVERSE_BYTES | REVERSE_BITS
|
||||
#elif 0
|
||||
# define GF_REPRESENTATION REVERSE_NONE
|
||||
#elif 0
|
||||
# define GF_REPRESENTATION REVERSE_BITS
|
||||
#endif
|
||||
|
||||
#define BLOCK_SIZE GCM_BLOCK_SIZE /* block length */
|
||||
#define BLK_ADR_MASK (BLOCK_SIZE - 1) /* mask for 'in block' address */
|
||||
#define CTR_POS 12
|
||||
|
||||
#define inc_ctr(x) \
|
||||
{ int i = BLOCK_SIZE; while(i-- > CTR_POS && !++(UI8_PTR(x)[i])) ; }
|
||||
|
||||
ret_type gcm_init_and_key( /* initialise mode and set key */
|
||||
const unsigned char key[], /* the key value */
|
||||
unsigned long key_len, /* and its length in bytes */
|
||||
gcm_ctx ctx[1]) /* the mode context */
|
||||
{
|
||||
memset(ctx->ghash_h, 0, sizeof(ctx->ghash_h));
|
||||
|
||||
/* set the AES key */
|
||||
aes_encrypt_key(key, key_len, ctx->aes);
|
||||
|
||||
/* compute E(0) (for the hash function) */
|
||||
aes_encrypt(UI8_PTR(ctx->ghash_h), UI8_PTR(ctx->ghash_h), ctx->aes);
|
||||
|
||||
#if defined( GF_REPRESENTATION )
|
||||
convert_representation(ctx->ghash_h, ctx->ghash_h, GF_REPRESENTATION);
|
||||
#endif
|
||||
|
||||
#if defined( TABLES_64K )
|
||||
init_64k_table(ctx->ghash_h, ctx->gf_t64k);
|
||||
#elif defined( TABLES_8K )
|
||||
init_8k_table(ctx->ghash_h, ctx->gf_t8k);
|
||||
#elif defined( TABLES_4K )
|
||||
init_4k_table(ctx->ghash_h, ctx->gf_t4k);
|
||||
#elif defined( TABLES_256 )
|
||||
init_256_table(ctx->ghash_h, ctx->gf_t256);
|
||||
#endif
|
||||
#if defined( GF_REPRESENTATION )
|
||||
convert_representation(ctx->ghash_h, ctx->ghash_h, GF_REPRESENTATION);
|
||||
#endif
|
||||
return RETURN_GOOD;
|
||||
}
|
||||
|
||||
void gf_mul_hh(gf_t a, gcm_ctx ctx[1])
|
||||
{
|
||||
#if defined( GF_REPRESENTATION ) || !defined( NO_TABLES )
|
||||
gf_t scr;
|
||||
#endif
|
||||
#if defined( GF_REPRESENTATION )
|
||||
convert_representation(a, a, GF_REPRESENTATION);
|
||||
#endif
|
||||
|
||||
#if defined( TABLES_64K )
|
||||
gf_mul_64k(a, ctx->gf_t64k, scr);
|
||||
#elif defined( TABLES_8K )
|
||||
gf_mul_8k(a, ctx->gf_t8k, scr);
|
||||
#elif defined( TABLES_4K )
|
||||
gf_mul_4k(a, ctx->gf_t4k, scr);
|
||||
#elif defined( TABLES_256 )
|
||||
gf_mul_256(a, ctx->gf_t256, scr);
|
||||
#else
|
||||
# if defined( GF_REPRESENTATION )
|
||||
convert_representation(scr, ctx->ghash_h, GF_REPRESENTATION);
|
||||
gf_mul(a, scr);
|
||||
# else
|
||||
gf_mul(a, ctx->ghash_h);
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if defined( GF_REPRESENTATION )
|
||||
convert_representation(a, a, GF_REPRESENTATION);
|
||||
#endif
|
||||
}
|
||||
|
||||
ret_type gcm_init_message( /* initialise a new message */
|
||||
const unsigned char iv[], /* the initialisation vector */
|
||||
unsigned long iv_len, /* and its length in bytes */
|
||||
gcm_ctx ctx[1]) /* the mode context */
|
||||
{ uint32_t i, n_pos = 0;
|
||||
uint8_t *p;
|
||||
|
||||
memset(ctx->ctr_val, 0, BLOCK_SIZE);
|
||||
if(iv_len == CTR_POS)
|
||||
{
|
||||
memcpy(ctx->ctr_val, iv, CTR_POS); UI8_PTR(ctx->ctr_val)[15] = 0x01;
|
||||
}
|
||||
else
|
||||
{ n_pos = iv_len;
|
||||
while(n_pos >= BLOCK_SIZE)
|
||||
{
|
||||
xor_block_aligned(ctx->ctr_val, ctx->ctr_val, iv);
|
||||
n_pos -= BLOCK_SIZE;
|
||||
iv += BLOCK_SIZE;
|
||||
gf_mul_hh((gf_t*)ctx->ctr_val, ctx);
|
||||
}
|
||||
|
||||
if(n_pos)
|
||||
{
|
||||
p = UI8_PTR(ctx->ctr_val);
|
||||
while(n_pos-- > 0)
|
||||
*p++ ^= *iv++;
|
||||
gf_mul_hh((gf_t*)ctx->ctr_val, ctx);
|
||||
}
|
||||
n_pos = (iv_len << 3);
|
||||
for(i = BLOCK_SIZE - 1; n_pos; --i, n_pos >>= 8)
|
||||
UI8_PTR(ctx->ctr_val)[i] ^= (unsigned char)n_pos;
|
||||
gf_mul_hh((gf_t*)ctx->ctr_val, ctx);
|
||||
}
|
||||
|
||||
ctx->y0_val = *UI32_PTR(UI8_PTR(ctx->ctr_val) + CTR_POS);
|
||||
memset(ctx->hdr_ghv, 0, BLOCK_SIZE);
|
||||
memset(ctx->txt_ghv, 0, BLOCK_SIZE);
|
||||
ctx->hdr_cnt = 0;
|
||||
ctx->txt_ccnt = ctx->txt_acnt = 0;
|
||||
return RETURN_GOOD;
|
||||
}
|
||||
|
||||
ret_type gcm_auth_header( /* authenticate the header */
|
||||
const unsigned char hdr[], /* the header buffer */
|
||||
unsigned long hdr_len, /* and its length in bytes */
|
||||
gcm_ctx ctx[1]) /* the mode context */
|
||||
{ uint32_t cnt = 0, b_pos = (uint32_t)ctx->hdr_cnt & BLK_ADR_MASK;
|
||||
|
||||
if(!hdr_len)
|
||||
return RETURN_GOOD;
|
||||
|
||||
if(ctx->hdr_cnt && b_pos == 0)
|
||||
gf_mul_hh((gf_t*)ctx->hdr_ghv, ctx);
|
||||
|
||||
if(!((hdr - (UI8_PTR(ctx->hdr_ghv) + b_pos)) & BUF_ADRMASK))
|
||||
{
|
||||
while(cnt < hdr_len && (b_pos & BUF_ADRMASK))
|
||||
UI8_PTR(ctx->hdr_ghv)[b_pos++] ^= hdr[cnt++];
|
||||
|
||||
while(cnt + BUF_INC <= hdr_len && b_pos <= BLOCK_SIZE - BUF_INC)
|
||||
{
|
||||
*UNIT_PTR(UI8_PTR(ctx->hdr_ghv) + b_pos) ^= *UNIT_PTR(hdr + cnt);
|
||||
cnt += BUF_INC; b_pos += BUF_INC;
|
||||
}
|
||||
|
||||
while(cnt + BLOCK_SIZE <= hdr_len)
|
||||
{
|
||||
gf_mul_hh((gf_t*)ctx->hdr_ghv, ctx);
|
||||
xor_block_aligned(ctx->hdr_ghv, ctx->hdr_ghv, hdr + cnt);
|
||||
cnt += BLOCK_SIZE;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while(cnt < hdr_len && b_pos < BLOCK_SIZE)
|
||||
UI8_PTR(ctx->hdr_ghv)[b_pos++] ^= hdr[cnt++];
|
||||
|
||||
while(cnt + BLOCK_SIZE <= hdr_len)
|
||||
{
|
||||
gf_mul_hh((gf_t*)ctx->hdr_ghv, ctx);
|
||||
xor_block(ctx->hdr_ghv, ctx->hdr_ghv, hdr + cnt);
|
||||
cnt += BLOCK_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
while(cnt < hdr_len)
|
||||
{
|
||||
if(b_pos == BLOCK_SIZE)
|
||||
{
|
||||
gf_mul_hh((gf_t*)ctx->hdr_ghv, ctx);
|
||||
b_pos = 0;
|
||||
}
|
||||
UI8_PTR(ctx->hdr_ghv)[b_pos++] ^= hdr[cnt++];
|
||||
}
|
||||
|
||||
ctx->hdr_cnt += cnt;
|
||||
return RETURN_GOOD;
|
||||
}
|
||||
|
||||
ret_type gcm_auth_data( /* authenticate ciphertext data */
|
||||
const unsigned char data[], /* the data buffer */
|
||||
unsigned long data_len, /* and its length in bytes */
|
||||
gcm_ctx ctx[1]) /* the mode context */
|
||||
{ uint32_t cnt = 0, b_pos = (uint32_t)ctx->txt_acnt & BLK_ADR_MASK;
|
||||
|
||||
if(!data_len)
|
||||
return RETURN_GOOD;
|
||||
|
||||
if(ctx->txt_acnt && b_pos == 0)
|
||||
gf_mul_hh((gf_t*)ctx->txt_ghv, ctx);
|
||||
|
||||
if(!((data - (UI8_PTR(ctx->txt_ghv) + b_pos)) & BUF_ADRMASK))
|
||||
{
|
||||
while(cnt < data_len && (b_pos & BUF_ADRMASK))
|
||||
UI8_PTR(ctx->txt_ghv)[b_pos++] ^= data[cnt++];
|
||||
|
||||
while(cnt + BUF_INC <= data_len && b_pos <= BLOCK_SIZE - BUF_INC)
|
||||
{
|
||||
*UNIT_PTR(UI8_PTR(ctx->txt_ghv) + b_pos) ^= *UNIT_PTR(data + cnt);
|
||||
cnt += BUF_INC; b_pos += BUF_INC;
|
||||
}
|
||||
|
||||
while(cnt + BLOCK_SIZE <= data_len)
|
||||
{
|
||||
gf_mul_hh((gf_t*)ctx->txt_ghv, ctx);
|
||||
xor_block_aligned(ctx->txt_ghv, ctx->txt_ghv, data + cnt);
|
||||
cnt += BLOCK_SIZE;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while(cnt < data_len && b_pos < BLOCK_SIZE)
|
||||
UI8_PTR(ctx->txt_ghv)[b_pos++] ^= data[cnt++];
|
||||
|
||||
while(cnt + BLOCK_SIZE <= data_len)
|
||||
{
|
||||
gf_mul_hh((gf_t*)ctx->txt_ghv, ctx);
|
||||
xor_block(ctx->txt_ghv, ctx->txt_ghv, data + cnt);
|
||||
cnt += BLOCK_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
while(cnt < data_len)
|
||||
{
|
||||
if(b_pos == BLOCK_SIZE)
|
||||
{
|
||||
gf_mul_hh((gf_t*)ctx->txt_ghv, ctx);
|
||||
b_pos = 0;
|
||||
}
|
||||
UI8_PTR(ctx->txt_ghv)[b_pos++] ^= data[cnt++];
|
||||
}
|
||||
|
||||
ctx->txt_acnt += cnt;
|
||||
return RETURN_GOOD;
|
||||
}
|
||||
|
||||
ret_type gcm_crypt_data( /* encrypt or decrypt data */
|
||||
unsigned char data[], /* the data buffer */
|
||||
unsigned long data_len, /* and its length in bytes */
|
||||
gcm_ctx ctx[1]) /* the mode context */
|
||||
{ uint32_t cnt = 0, b_pos = (uint32_t)ctx->txt_ccnt & BLK_ADR_MASK;
|
||||
|
||||
if(!data_len)
|
||||
return RETURN_GOOD;
|
||||
|
||||
if(!((data - (UI8_PTR(ctx->enc_ctr) + b_pos)) & BUF_ADRMASK))
|
||||
{
|
||||
if(b_pos)
|
||||
{
|
||||
while(cnt < data_len && (b_pos & BUF_ADRMASK))
|
||||
data[cnt++] ^= UI8_PTR(ctx->enc_ctr)[b_pos++];
|
||||
|
||||
while(cnt + BUF_INC <= data_len && b_pos <= BLOCK_SIZE - BUF_INC)
|
||||
{
|
||||
*UNIT_PTR(data + cnt) ^= *UNIT_PTR(UI8_PTR(ctx->enc_ctr) + b_pos);
|
||||
cnt += BUF_INC; b_pos += BUF_INC;
|
||||
}
|
||||
}
|
||||
|
||||
while(cnt + BLOCK_SIZE <= data_len)
|
||||
{
|
||||
inc_ctr(ctx->ctr_val);
|
||||
aes_encrypt(UI8_PTR(ctx->ctr_val), UI8_PTR(ctx->enc_ctr), ctx->aes);
|
||||
xor_block_aligned(data + cnt, data + cnt, ctx->enc_ctr);
|
||||
cnt += BLOCK_SIZE;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(b_pos)
|
||||
while(cnt < data_len && b_pos < BLOCK_SIZE)
|
||||
data[cnt++] ^= UI8_PTR(ctx->enc_ctr)[b_pos++];
|
||||
|
||||
while(cnt + BLOCK_SIZE <= data_len)
|
||||
{
|
||||
inc_ctr(ctx->ctr_val);
|
||||
aes_encrypt(UI8_PTR(ctx->ctr_val), UI8_PTR(ctx->enc_ctr), ctx->aes);
|
||||
xor_block(data + cnt, data + cnt, ctx->enc_ctr);
|
||||
cnt += BLOCK_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
while(cnt < data_len)
|
||||
{
|
||||
if(b_pos == BLOCK_SIZE || !b_pos)
|
||||
{
|
||||
inc_ctr(ctx->ctr_val);
|
||||
aes_encrypt(UI8_PTR(ctx->ctr_val), UI8_PTR(ctx->enc_ctr), ctx->aes);
|
||||
b_pos = 0;
|
||||
}
|
||||
data[cnt++] ^= UI8_PTR(ctx->enc_ctr)[b_pos++];
|
||||
}
|
||||
|
||||
ctx->txt_ccnt += cnt;
|
||||
return RETURN_GOOD;
|
||||
}
|
||||
|
||||
ret_type gcm_compute_tag( /* compute authentication tag */
|
||||
unsigned char tag[], /* the buffer for the tag */
|
||||
unsigned long tag_len, /* and its length in bytes */
|
||||
gcm_ctx ctx[1]) /* the mode context */
|
||||
{ uint32_t i, ln;
|
||||
gf_t tbuf;
|
||||
|
||||
if(ctx->txt_acnt != ctx->txt_ccnt && ctx->txt_ccnt > 0)
|
||||
return RETURN_ERROR;
|
||||
|
||||
gf_mul_hh((gf_t*)ctx->hdr_ghv, ctx);
|
||||
gf_mul_hh((gf_t*)ctx->txt_ghv, ctx);
|
||||
|
||||
if(ctx->hdr_cnt)
|
||||
{
|
||||
ln = (uint32_t)((ctx->txt_acnt + BLOCK_SIZE - 1) / BLOCK_SIZE);
|
||||
if(ln)
|
||||
{
|
||||
#if 1 /* alternative versions of the exponentiation operation */
|
||||
memcpy(tbuf, ctx->ghash_h, BLOCK_SIZE);
|
||||
# if defined( GF_REPRESENTATION )
|
||||
convert_representation(tbuf, tbuf, GF_REPRESENTATION);
|
||||
convert_representation(ctx->hdr_ghv, ctx->hdr_ghv, GF_REPRESENTATION);
|
||||
# endif
|
||||
for( ; ; )
|
||||
{
|
||||
if(ln & 1)
|
||||
{
|
||||
gf_mul((void*)ctx->hdr_ghv, tbuf);
|
||||
}
|
||||
if(!(ln >>= 1))
|
||||
break;
|
||||
gf_mul(tbuf, tbuf);
|
||||
}
|
||||
#else /* this one seems slower on x86 and x86_64 :-( */
|
||||
i = ln | ln >> 1; i |= i >> 2; i |= i >> 4;
|
||||
i |= i >> 8; i |= i >> 16; i &= ~(i >> 1);
|
||||
memset(tbuf, 0, BLOCK_SIZE);
|
||||
UI8_PTR(tbuf)[0] = 0x80;
|
||||
while(i)
|
||||
{
|
||||
# if defined( GF_REPRESENTATION )
|
||||
convert_representation(tbuf, tbuf, GF_REPRESENTATION);
|
||||
# endif
|
||||
gf_mul(tbuf, tbuf);
|
||||
# if defined( GF_REPRESENTATION )
|
||||
convert_representation(tbuf, tbuf, GF_REPRESENTATION);
|
||||
# endif
|
||||
if(i & ln)
|
||||
gf_mul_hh((gf_t*)tbuf, ctx);
|
||||
i >>= 1;
|
||||
}
|
||||
# if defined( GF_REPRESENTATION )
|
||||
convert_representation(tbuf, tbuf, GF_REPRESENTATION);
|
||||
convert_representation(ctx->hdr_ghv, ctx->hdr_ghv, GF_REPRESENTATION);
|
||||
# endif
|
||||
gf_mul((void*)ctx->hdr_ghv, tbuf);
|
||||
#endif
|
||||
#if defined( GF_REPRESENTATION )
|
||||
convert_representation(ctx->hdr_ghv, ctx->hdr_ghv, GF_REPRESENTATION);
|
||||
# endif
|
||||
}
|
||||
}
|
||||
|
||||
i = BLOCK_SIZE;
|
||||
#ifdef BRG_UI64
|
||||
{ uint64_t tm = ((uint64_t)ctx->txt_acnt) << 3;
|
||||
while(i-- > 0)
|
||||
{
|
||||
UI8_PTR(ctx->hdr_ghv)[i] ^= UI8_PTR(ctx->txt_ghv)[i] ^ (unsigned char)tm;
|
||||
tm = (i == 8 ? (((uint64_t)ctx->hdr_cnt) << 3) : tm >> 8);
|
||||
}
|
||||
}
|
||||
#else
|
||||
{ uint32_t tm = ctx->txt_acnt << 3;
|
||||
|
||||
while(i-- > 0)
|
||||
{
|
||||
UI8_PTR(ctx->hdr_ghv)[i] ^= UI8_PTR(ctx->txt_ghv)[i] ^ (unsigned char)tm;
|
||||
if(i & 3)
|
||||
tm >>= 8;
|
||||
else if(i == 4)
|
||||
tm = ctx->txt_acnt >> 29;
|
||||
else if(i == 8)
|
||||
tm = ctx->hdr_cnt << 3;
|
||||
else
|
||||
tm = ctx->hdr_cnt >> 29;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
gf_mul_hh((gf_t*)ctx->hdr_ghv, ctx);
|
||||
|
||||
memcpy(ctx->enc_ctr, ctx->ctr_val, BLOCK_SIZE);
|
||||
*UI32_PTR(UI8_PTR(ctx->enc_ctr) + CTR_POS) = ctx->y0_val;
|
||||
aes_encrypt(UI8_PTR(ctx->enc_ctr), UI8_PTR(ctx->enc_ctr), ctx->aes);
|
||||
for(i = 0; i < (unsigned int)tag_len; ++i)
|
||||
tag[i] = (unsigned char)(UI8_PTR(ctx->hdr_ghv)[i] ^ UI8_PTR(ctx->enc_ctr)[i]);
|
||||
|
||||
return (ctx->txt_ccnt == ctx->txt_acnt ? RETURN_GOOD : RETURN_WARN);
|
||||
}
|
||||
|
||||
ret_type gcm_end( /* clean up and end operation */
|
||||
gcm_ctx ctx[1]) /* the mode context */
|
||||
{
|
||||
memset(ctx, 0, sizeof(gcm_ctx));
|
||||
return RETURN_GOOD;
|
||||
}
|
||||
|
||||
ret_type gcm_encrypt( /* encrypt & authenticate data */
|
||||
unsigned char data[], /* the data buffer */
|
||||
unsigned long data_len, /* and its length in bytes */
|
||||
gcm_ctx ctx[1]) /* the mode context */
|
||||
{
|
||||
|
||||
gcm_crypt_data(data, data_len, ctx);
|
||||
gcm_auth_data(data, data_len, ctx);
|
||||
return RETURN_GOOD;
|
||||
}
|
||||
|
||||
ret_type gcm_decrypt( /* authenticate & decrypt data */
|
||||
unsigned char data[], /* the data buffer */
|
||||
unsigned long data_len, /* and its length in bytes */
|
||||
gcm_ctx ctx[1]) /* the mode context */
|
||||
{
|
||||
gcm_auth_data(data, data_len, ctx);
|
||||
gcm_crypt_data(data, data_len, ctx);
|
||||
return RETURN_GOOD;
|
||||
}
|
||||
|
||||
ret_type gcm_encrypt_message( /* encrypt an entire message */
|
||||
const unsigned char iv[], /* the initialisation vector */
|
||||
unsigned long iv_len, /* and its length in bytes */
|
||||
const unsigned char hdr[], /* the header buffer */
|
||||
unsigned long hdr_len, /* and its length in bytes */
|
||||
unsigned char msg[], /* the message buffer */
|
||||
unsigned long msg_len, /* and its length in bytes */
|
||||
unsigned char tag[], /* the buffer for the tag */
|
||||
unsigned long tag_len, /* and its length in bytes */
|
||||
gcm_ctx ctx[1]) /* the mode context */
|
||||
{
|
||||
gcm_init_message(iv, iv_len, ctx);
|
||||
gcm_auth_header(hdr, hdr_len, ctx);
|
||||
gcm_encrypt(msg, msg_len, ctx);
|
||||
return gcm_compute_tag(tag, tag_len, ctx) ? RETURN_ERROR : RETURN_GOOD;
|
||||
}
|
||||
|
||||
ret_type gcm_decrypt_message( /* decrypt an entire message */
|
||||
const unsigned char iv[], /* the initialisation vector */
|
||||
unsigned long iv_len, /* and its length in bytes */
|
||||
const unsigned char hdr[], /* the header buffer */
|
||||
unsigned long hdr_len, /* and its length in bytes */
|
||||
unsigned char msg[], /* the message buffer */
|
||||
unsigned long msg_len, /* and its length in bytes */
|
||||
const unsigned char tag[], /* the buffer for the tag */
|
||||
unsigned long tag_len, /* and its length in bytes */
|
||||
gcm_ctx ctx[1]) /* the mode context */
|
||||
{ uint8_t local_tag[BLOCK_SIZE];
|
||||
ret_type rr;
|
||||
|
||||
gcm_init_message(iv, iv_len, ctx);
|
||||
gcm_auth_header(hdr, hdr_len, ctx);
|
||||
gcm_decrypt(msg, msg_len, ctx);
|
||||
rr = gcm_compute_tag(local_tag, tag_len, ctx);
|
||||
return (rr != RETURN_GOOD || memcmp(tag, local_tag, tag_len)) ? RETURN_ERROR : RETURN_GOOD;
|
||||
}
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
233
crypto/aes/aesgcm.h
Normal file
233
crypto/aes/aesgcm.h
Normal file
@ -0,0 +1,233 @@
|
||||
/*
|
||||
---------------------------------------------------------------------------
|
||||
Copyright (c) 1998-2010, Brian Gladman, Worcester, UK. All rights reserved.
|
||||
|
||||
The redistribution and use of this software (with or without changes)
|
||||
is allowed without the payment of fees or royalties provided that:
|
||||
|
||||
source code distributions include the above copyright notice, this
|
||||
list of conditions and the following disclaimer;
|
||||
|
||||
binary distributions include the above copyright notice, this list
|
||||
of conditions and the following disclaimer in their documentation.
|
||||
|
||||
This software is provided 'as is' with no explicit or implied warranties
|
||||
in respect of its operation, including, but not limited to, correctness
|
||||
and fitness for purpose.
|
||||
---------------------------------------------------------------------------
|
||||
Issue Date: 11/01/2011
|
||||
|
||||
I am grateful for the work done by Mark Rodenkirch and Jason Papadopoulos
|
||||
in helping to remove a bug in the operation of this code on big endian
|
||||
systems when fast buffer operations are enabled.
|
||||
---------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#ifndef _GCM_H
|
||||
#define _GCM_H
|
||||
|
||||
#include "aes.h"
|
||||
#include "gf128mul.h"
|
||||
|
||||
/* USER DEFINABLE OPTIONS (Further options need to be set in gf128mul.h) */
|
||||
|
||||
/* UNIT_BITS sets the size of variables used to process 16 byte buffers
|
||||
when the buffer alignment allows this. When buffers are processed
|
||||
in bytes, 16 individual operations are invoolved. But if, say, such
|
||||
a buffer is divided into 4 32 bit variables, it can then be processed
|
||||
in 4 operations, making the code typically much faster. In general
|
||||
it will pay to use the longest natively supported size, which will
|
||||
probably be 32 or 64 bits in 32 and 64 bit systems respectively.
|
||||
*/
|
||||
|
||||
#if defined( UNIT_BITS )
|
||||
# undef UNIT_BITS
|
||||
#endif
|
||||
|
||||
#if !defined( UNIT_BITS )
|
||||
# if PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN
|
||||
# if 0
|
||||
# define UNIT_BITS 8
|
||||
# elif 0
|
||||
# define UNIT_BITS 32
|
||||
# elif 1
|
||||
# define UNIT_BITS 64
|
||||
# endif
|
||||
# elif defined( _WIN64 )
|
||||
# define UNIT_BITS 64
|
||||
# else
|
||||
# define UNIT_BITS 32
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if UNIT_BITS == 64 && !defined( NEED_UINT_64T )
|
||||
# define NEED_UINT_64T
|
||||
#endif
|
||||
|
||||
/* END OF USER DEFINABLE OPTIONS */
|
||||
|
||||
/* After encryption or decryption operations the return value of
|
||||
'compute tag' will be one of the values RETURN_GOOD, RETURN_WARN
|
||||
or RETURN_ERROR, the latter indicating an error. A return value
|
||||
RETURN_GOOD indicates that both encryption and authentication
|
||||
have taken place and resulted in the returned tag value. If
|
||||
the returned value is RETURN_WARN, the tag value is the result
|
||||
of authentication alone without encryption (CCM) or decryption
|
||||
(GCM and EAX).
|
||||
*/
|
||||
#ifndef RETURN_GOOD
|
||||
# define RETURN_WARN 1
|
||||
# define RETURN_GOOD 0
|
||||
# define RETURN_ERROR -1
|
||||
#endif
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
#ifndef RET_TYPE_DEFINED
|
||||
typedef int ret_type;
|
||||
#endif
|
||||
UNIT_TYPEDEF(gcm_unit_t, UNIT_BITS);
|
||||
BUFR_TYPEDEF(gcm_buf_t, UNIT_BITS, AES_BLOCK_SIZE);
|
||||
|
||||
#define GCM_BLOCK_SIZE AES_BLOCK_SIZE
|
||||
|
||||
/* The GCM-AES context */
|
||||
|
||||
typedef struct
|
||||
{
|
||||
#if defined( TABLES_64K )
|
||||
gf_t64k_a gf_t64k;
|
||||
#endif
|
||||
#if defined( TABLES_8K )
|
||||
gf_t8k_a gf_t8k;
|
||||
#endif
|
||||
#if defined( TABLES_4K )
|
||||
gf_t4k_a gf_t4k;
|
||||
#endif
|
||||
#if defined( TABLES_256 )
|
||||
gf_t256_a gf_t256;
|
||||
#endif
|
||||
gcm_buf_t ctr_val; /* CTR counter value */
|
||||
gcm_buf_t enc_ctr; /* encrypted CTR block */
|
||||
gcm_buf_t hdr_ghv; /* ghash buffer (header) */
|
||||
gcm_buf_t txt_ghv; /* ghash buffer (ciphertext) */
|
||||
gf_t ghash_h; /* ghash H value */
|
||||
aes_encrypt_ctx aes[1]; /* AES encryption context */
|
||||
uint32_t y0_val; /* initial counter value */
|
||||
uint32_t hdr_cnt; /* header bytes so far */
|
||||
uint32_t txt_ccnt; /* text bytes so far (encrypt) */
|
||||
uint32_t txt_acnt; /* text bytes so far (auth) */
|
||||
} gcm_ctx;
|
||||
|
||||
/* The following calls handle mode initialisation, keying and completion */
|
||||
|
||||
ret_type gcm_init_and_key( /* initialise mode and set key */
|
||||
const unsigned char key[], /* the key value */
|
||||
unsigned long key_len, /* and its length in bytes */
|
||||
gcm_ctx ctx[1]); /* the mode context */
|
||||
|
||||
ret_type gcm_end( /* clean up and end operation */
|
||||
gcm_ctx ctx[1]); /* the mode context */
|
||||
|
||||
/* The following calls handle complete messages in memory as one operation */
|
||||
|
||||
ret_type gcm_encrypt_message( /* encrypt an entire message */
|
||||
const unsigned char iv[], /* the initialisation vector */
|
||||
unsigned long iv_len, /* and its length in bytes */
|
||||
const unsigned char hdr[], /* the header buffer */
|
||||
unsigned long hdr_len, /* and its length in bytes */
|
||||
unsigned char msg[], /* the message buffer */
|
||||
unsigned long msg_len, /* and its length in bytes */
|
||||
unsigned char tag[], /* the buffer for the tag */
|
||||
unsigned long tag_len, /* and its length in bytes */
|
||||
gcm_ctx ctx[1]); /* the mode context */
|
||||
|
||||
/* RETURN_GOOD is returned if the input tag */
|
||||
/* matches that for the decrypted message */
|
||||
ret_type gcm_decrypt_message( /* decrypt an entire message */
|
||||
const unsigned char iv[], /* the initialisation vector */
|
||||
unsigned long iv_len, /* and its length in bytes */
|
||||
const unsigned char hdr[], /* the header buffer */
|
||||
unsigned long hdr_len, /* and its length in bytes */
|
||||
unsigned char msg[], /* the message buffer */
|
||||
unsigned long msg_len, /* and its length in bytes */
|
||||
const unsigned char tag[], /* the buffer for the tag */
|
||||
unsigned long tag_len, /* and its length in bytes */
|
||||
gcm_ctx ctx[1]); /* the mode context */
|
||||
|
||||
/* The following calls handle messages in a sequence of operations followed */
|
||||
/* by tag computation after the sequence has been completed. In these calls */
|
||||
/* the user is responsible for verfiying the computed tag on decryption */
|
||||
|
||||
ret_type gcm_init_message( /* initialise a new message */
|
||||
const unsigned char iv[], /* the initialisation vector */
|
||||
unsigned long iv_len, /* and its length in bytes */
|
||||
gcm_ctx ctx[1]); /* the mode context */
|
||||
|
||||
ret_type gcm_auth_header( /* authenticate the header */
|
||||
const unsigned char hdr[], /* the header buffer */
|
||||
unsigned long hdr_len, /* and its length in bytes */
|
||||
gcm_ctx ctx[1]); /* the mode context */
|
||||
|
||||
ret_type gcm_encrypt( /* encrypt & authenticate data */
|
||||
unsigned char data[], /* the data buffer */
|
||||
unsigned long data_len, /* and its length in bytes */
|
||||
gcm_ctx ctx[1]); /* the mode context */
|
||||
|
||||
ret_type gcm_decrypt( /* authenticate & decrypt data */
|
||||
unsigned char data[], /* the data buffer */
|
||||
unsigned long data_len, /* and its length in bytes */
|
||||
gcm_ctx ctx[1]); /* the mode context */
|
||||
|
||||
ret_type gcm_compute_tag( /* compute authentication tag */
|
||||
unsigned char tag[], /* the buffer for the tag */
|
||||
unsigned long tag_len, /* and its length in bytes */
|
||||
gcm_ctx ctx[1]); /* the mode context */
|
||||
|
||||
/* The use of the following calls should be avoided if possible because
|
||||
their use requires a very good understanding of the way this encryption
|
||||
mode works and the way in which this code implements it in order to use
|
||||
them correctly.
|
||||
|
||||
The gcm_auth_data routine is used to authenticate encrypted message data.
|
||||
In message encryption gcm_crypt_data must be called before gcm_auth_data
|
||||
is called since it is encrypted data that is authenticated. In message
|
||||
decryption authentication must occur before decryption and data can be
|
||||
authenticated without being decrypted if necessary.
|
||||
|
||||
If these calls are used it is up to the user to ensure that these routines
|
||||
are called in the correct order and that the correct data is passed to
|
||||
them.
|
||||
|
||||
When gcm_compute_tag is called it is assumed that an error in use has
|
||||
occurred if both encryption (or decryption) and authentication have taken
|
||||
place but the total lengths of the message data respectively authenticated
|
||||
and encrypted are not the same. If authentication has taken place but
|
||||
there has been no corresponding encryption or decryption operations (none
|
||||
at all) only a warning is issued. This should be treated as an error if it
|
||||
occurs during encryption but it is only signalled as a warning as it might
|
||||
be intentional when decryption operations are involved (this avoids having
|
||||
different compute tag functions for encryption and decryption). Decryption
|
||||
operations can be undertaken freely after authetication but if the tag is
|
||||
computed after such operations an error will be signalled if the lengths
|
||||
of the data authenticated and decrypted don't match.
|
||||
*/
|
||||
|
||||
ret_type gcm_auth_data( /* authenticate ciphertext data */
|
||||
const unsigned char data[], /* the data buffer */
|
||||
unsigned long data_len, /* and its length in bytes */
|
||||
gcm_ctx ctx[1]); /* the mode context */
|
||||
|
||||
ret_type gcm_crypt_data( /* encrypt or decrypt data */
|
||||
unsigned char data[], /* the data buffer */
|
||||
unsigned long data_len, /* and its length in bytes */
|
||||
gcm_ctx ctx[1]); /* the mode context */
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
29
crypto/aes/brg_endian.h
Normal file
29
crypto/aes/brg_endian.h
Normal file
@ -0,0 +1,29 @@
|
||||
/*
|
||||
---------------------------------------------------------------------------
|
||||
Copyright (c) 1998-2013, Brian Gladman, Worcester, UK. All rights reserved.
|
||||
|
||||
The redistribution and use of this software (with or without changes)
|
||||
is allowed without the payment of fees or royalties provided that:
|
||||
|
||||
source code distributions include the above copyright notice, this
|
||||
list of conditions and the following disclaimer;
|
||||
|
||||
binary distributions include the above copyright notice, this list
|
||||
of conditions and the following disclaimer in their documentation.
|
||||
|
||||
This software is provided 'as is' with no explicit or implied warranties
|
||||
in respect of its operation, including, but not limited to, correctness
|
||||
and fitness for purpose.
|
||||
---------------------------------------------------------------------------
|
||||
Issue Date: 10/09/2018
|
||||
*/
|
||||
|
||||
#ifndef _BRG_ENDIAN_H
|
||||
#define _BRG_ENDIAN_H
|
||||
|
||||
#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */
|
||||
#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */
|
||||
|
||||
#define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
|
||||
|
||||
#endif
|
471
crypto/aes/gf128mul.c
Normal file
471
crypto/aes/gf128mul.c
Normal file
@ -0,0 +1,471 @@
|
||||
/*
|
||||
---------------------------------------------------------------------------
|
||||
Copyright (c) 1998-2010, Brian Gladman, Worcester, UK. All rights reserved.
|
||||
|
||||
The redistribution and use of this software (with or without changes)
|
||||
is allowed without the payment of fees or royalties provided that:
|
||||
|
||||
source code distributions include the above copyright notice, this
|
||||
list of conditions and the following disclaimer;
|
||||
|
||||
binary distributions include the above copyright notice, this list
|
||||
of conditions and the following disclaimer in their documentation.
|
||||
|
||||
This software is provided 'as is' with no explicit or implied warranties
|
||||
in respect of its operation, including, but not limited to, correctness
|
||||
and fitness for purpose.
|
||||
---------------------------------------------------------------------------
|
||||
Issue Date: 20/12/2007
|
||||
|
||||
This file provides fast multiplication in GF(128) as required by several
|
||||
cryptographic authentication modes (see gfmul128.h).
|
||||
*/
|
||||
|
||||
/* Speed critical loops can be unrolled to gain speed but consume more memory */
|
||||
#if 1
|
||||
# define UNROLL_LOOPS
|
||||
#endif
|
||||
|
||||
/* The order of these includes matters */
|
||||
#include "mode_hdr.h"
|
||||
#include "gf128mul.h"
|
||||
#include "gf_mul_lo.h"
|
||||
|
||||
#if defined( GF_MODE_LL )
|
||||
# define mode _ll
|
||||
#elif defined( GF_MODE_BL )
|
||||
# define mode _bl
|
||||
#elif defined( GF_MODE_LB )
|
||||
# define mode _lb
|
||||
#elif defined( GF_MODE_BB )
|
||||
# define mode _bb
|
||||
#else
|
||||
# error mode is not defined
|
||||
#endif
|
||||
|
||||
#if defined( GF_MODE_LL) || defined( GF_MODE_LB )
|
||||
# define GF_INDEX(i) (i)
|
||||
#else
|
||||
# define GF_INDEX(i) (15 - (i))
|
||||
#endif
|
||||
|
||||
/* A slow field multiplier */
|
||||
|
||||
void gf_mul(gf_t a, const gf_t b)
|
||||
{ gf_t p[8];
|
||||
uint8_t *q, ch;
|
||||
int i;
|
||||
|
||||
copy_block_aligned(p[0], a);
|
||||
for(i = 0; i < 7; ++i)
|
||||
gf_mulx1(mode)(p[i + 1], p[i]);
|
||||
|
||||
q = (uint8_t*)(a == b ? p[0] : b);
|
||||
memset(a, 0, GF_BYTE_LEN);
|
||||
for(i = 15 ; ; )
|
||||
{
|
||||
ch = q[GF_INDEX(i)];
|
||||
if(ch & X_0)
|
||||
xor_block_aligned(a, a, p[0]);
|
||||
if(ch & X_1)
|
||||
xor_block_aligned(a, a, p[1]);
|
||||
if(ch & X_2)
|
||||
xor_block_aligned(a, a, p[2]);
|
||||
if(ch & X_3)
|
||||
xor_block_aligned(a, a, p[3]);
|
||||
if(ch & X_4)
|
||||
xor_block_aligned(a, a, p[4]);
|
||||
if(ch & X_5)
|
||||
xor_block_aligned(a, a, p[5]);
|
||||
if(ch & X_6)
|
||||
xor_block_aligned(a, a, p[6]);
|
||||
if(ch & X_7)
|
||||
xor_block_aligned(a, a, p[7]);
|
||||
if(!i--)
|
||||
break;
|
||||
gf_mulx8(mode)(a);
|
||||
}
|
||||
}
|
||||
|
||||
#if defined( TABLES_64K )
|
||||
|
||||
/* This version uses 64k bytes of table space on the stack.
|
||||
An input variable field value in a[] has to be multiplied
|
||||
by a key value in g[] that changes far less frequently.
|
||||
|
||||
To do this a[] is split up into 16 smaller field values,
|
||||
each one byte in length. For the 256 values of each of
|
||||
these smaller values, we can precompute the result of
|
||||
mulltiplying g by this field value. We can then combine
|
||||
these values to provide the full multiply. So for each
|
||||
of 16 bytes we have a table of 256 field values each of
|
||||
16 bytes - 64k bytes in total.
|
||||
*/
|
||||
|
||||
void init_64k_table(const gf_t g, gf_t64k_t t)
|
||||
{ int i = 0, j, k;
|
||||
|
||||
/*
|
||||
depending on the representation we have to process bits
|
||||
within bytes high to low (0xe1 style ) or low to high
|
||||
(0x87 style). We start by producing the powers x ,x^2
|
||||
.. x^7 and put them in t[0][1], t[0][2] .. t[128] or in
|
||||
t[128], t[64] .. t[1] depending on the bit order in use.
|
||||
*/
|
||||
|
||||
/* clear the element for the zero field element */
|
||||
memset(t[0][0], 0, GF_BYTE_LEN);
|
||||
|
||||
#if defined( GF_MODE_LL ) || defined( GF_MODE_BL )
|
||||
|
||||
/* g -> t[0][1], generate t[0][2] ... */
|
||||
memcpy(t[0][1], g, GF_BYTE_LEN);
|
||||
for(j = 1; j <= 64; j <<= 1)
|
||||
gf_mulx1(mode)(t[0][j + j], t[0][j]);
|
||||
#else
|
||||
|
||||
/* g -> t[0][128], generate t[0][64] ... */
|
||||
memcpy(t[0][128], g, GF_BYTE_LEN);
|
||||
for(j = 64; j >= 1; j >>= 1)
|
||||
gf_mulx1(mode)(t[0][j], t[0][j + j]);
|
||||
#endif
|
||||
|
||||
for( ; ; )
|
||||
{
|
||||
/* if { n } stands for the field value represented by
|
||||
the integer n, we can express higher multiplies in
|
||||
the table as follows:
|
||||
|
||||
1. g * { 3} = g * {2} ^ g * {1}
|
||||
|
||||
2. g * { 5} = g * {4} ^ g * {1}
|
||||
g * { 6} = g * {4} ^ g * {2}
|
||||
g * { 7} = g * {4} ^ g * {3}
|
||||
|
||||
3. g * { 9} = g * {8} ^ g * {1}
|
||||
g * {10} = g * {8} ^ g * {2}
|
||||
....
|
||||
|
||||
and so on. This is what the following loops do.
|
||||
*/
|
||||
for(j = 2; j < 256; j += j)
|
||||
for(k = 1; k < j; ++k)
|
||||
xor_block_aligned(t[i][j + k], t[i][j], t[i][k]);
|
||||
|
||||
if(++i == GF_BYTE_LEN) /* all 16 byte positions done */
|
||||
return;
|
||||
|
||||
/* We now move to the next byte up and set up its eight
|
||||
starting values by multiplying the values in the
|
||||
lower table by x^8
|
||||
*/
|
||||
memset(t[i][0], 0, GF_BYTE_LEN);
|
||||
for(j = 128; j > 0; j >>= 1)
|
||||
{
|
||||
memcpy(t[i][j], t[i - 1][j], GF_BYTE_LEN);
|
||||
gf_mulx8(mode)(t[i][j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#define xor_64k(i,ap,t,r) xor_block_aligned(r, r, t[i][ap[GF_INDEX(i)]])
|
||||
|
||||
#if defined( UNROLL_LOOPS )
|
||||
|
||||
void gf_mul_64k(gf_t a, const gf_t64k_t t, gf_t r)
|
||||
{ uint8_t *ap = (uint8_t*)a;
|
||||
memset(r, 0, GF_BYTE_LEN);
|
||||
xor_64k(15, ap, t, r); xor_64k(14, ap, t, r);
|
||||
xor_64k(13, ap, t, r); xor_64k(12, ap, t, r);
|
||||
xor_64k(11, ap, t, r); xor_64k(10, ap, t, r);
|
||||
xor_64k( 9, ap, t, r); xor_64k( 8, ap, t, r);
|
||||
xor_64k( 7, ap, t, r); xor_64k( 6, ap, t, r);
|
||||
xor_64k( 5, ap, t, r); xor_64k( 4, ap, t, r);
|
||||
xor_64k( 3, ap, t, r); xor_64k( 2, ap, t, r);
|
||||
xor_64k( 1, ap, t, r); xor_64k( 0, ap, t, r);
|
||||
copy_block_aligned(a, r);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void gf_mul_64k(gf_t a, const gf_t64k_t t, gf_t r)
|
||||
{ int i;
|
||||
uint8_t *ap = (uint8_t*)a;
|
||||
memset(r, 0, GF_BYTE_LEN);
|
||||
for(i = 15; i >= 0; --i)
|
||||
{
|
||||
xor_64k(i,ap,t,r);
|
||||
}
|
||||
copy_block_aligned(a, r);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if defined( TABLES_8K )
|
||||
|
||||
/* This version uses 8k bytes of table space on the stack.
|
||||
An input field value in a[] has to be multiplied by a
|
||||
key value in g[]. To do this a[] is split up into 32
|
||||
smaller field values each 4-bits in length. For the
|
||||
16 values of each of these smaller field values we can
|
||||
precompute the result of mulltiplying g[] by the field
|
||||
value in question. So for each of 32 nibbles we have a
|
||||
table of 16 field values, each of 16 bytes - 8k bytes
|
||||
in total.
|
||||
*/
|
||||
void init_8k_table(const gf_t g, gf_t8k_t t)
|
||||
{ int i = 0, j, k;
|
||||
|
||||
/* do the low 4-bit nibble first - t[0][16] - and note
|
||||
that the unit multiplier sits at 0x01 - t[0][1] in
|
||||
the table. Then multiplies by x go at 2, 4, 8
|
||||
*/
|
||||
/* set the table elements for a zero multiplier */
|
||||
memset(t[0][0], 0, GF_BYTE_LEN);
|
||||
memset(t[1][0], 0, GF_BYTE_LEN);
|
||||
|
||||
#if defined( GF_MODE_LL ) || defined( GF_MODE_BL )
|
||||
|
||||
/* t[0][1] = g, compute t[0][2], t[0][4], t[0][8] */
|
||||
memcpy(t[0][1], g, GF_BYTE_LEN);
|
||||
for(j = 1; j <= 4; j <<= 1)
|
||||
gf_mulx1(mode)(t[0][j + j], t[0][j]);
|
||||
/* t[1][1] = t[0][1] * x^4 = t[0][8] * x */
|
||||
gf_mulx1(mode)(t[1][1], t[0][8]);
|
||||
for(j = 1; j <= 4; j <<= 1)
|
||||
gf_mulx1(mode)(t[1][j + j], t[1][j]);
|
||||
#else
|
||||
|
||||
/* g -> t[0][8], compute t[0][4], t[0][2], t[0][1] */
|
||||
memcpy(t[1][8], g, GF_BYTE_LEN);
|
||||
for(j = 4; j >= 1; j >>= 1)
|
||||
gf_mulx1(mode)(t[1][j], t[1][j + j]);
|
||||
/* t[1][1] = t[0][1] * x^4 = t[0][8] * x */
|
||||
gf_mulx1(mode)(t[0][8], t[1][1]);
|
||||
for(j = 4; j >= 1; j >>= 1)
|
||||
gf_mulx1(mode)(t[0][j], t[0][j + j]);
|
||||
#endif
|
||||
|
||||
for( ; ; )
|
||||
{
|
||||
for(j = 2; j < 16; j += j)
|
||||
for(k = 1; k < j; ++k)
|
||||
xor_block_aligned(t[i][j + k], t[i][j], t[i][k]);
|
||||
|
||||
if(++i == 2 * GF_BYTE_LEN)
|
||||
return;
|
||||
|
||||
if(i > 1)
|
||||
{
|
||||
memset(t[i][0], 0, GF_BYTE_LEN);
|
||||
for(j = 8; j > 0; j >>= 1)
|
||||
{
|
||||
memcpy(t[i][j], t[i - 2][j], GF_BYTE_LEN);
|
||||
gf_mulx8(mode)(t[i][j]);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#define xor_8k(i,ap,t,r) \
|
||||
xor_block_aligned(r, r, t[i + i][ap[GF_INDEX(i)] & 15]); \
|
||||
xor_block_aligned(r, r, t[i + i + 1][ap[GF_INDEX(i)] >> 4])
|
||||
|
||||
#if defined( UNROLL_LOOPS )
|
||||
|
||||
void gf_mul_8k(gf_t a, const gf_t8k_t t, gf_t r)
|
||||
{ uint8_t *ap = (uint8_t*)a;
|
||||
memset(r, 0, GF_BYTE_LEN);
|
||||
xor_8k(15, ap, t, r); xor_8k(14, ap, t, r);
|
||||
xor_8k(13, ap, t, r); xor_8k(12, ap, t, r);
|
||||
xor_8k(11, ap, t, r); xor_8k(10, ap, t, r);
|
||||
xor_8k( 9, ap, t, r); xor_8k( 8, ap, t, r);
|
||||
xor_8k( 7, ap, t, r); xor_8k( 6, ap, t, r);
|
||||
xor_8k( 5, ap, t, r); xor_8k( 4, ap, t, r);
|
||||
xor_8k( 3, ap, t, r); xor_8k( 2, ap, t, r);
|
||||
xor_8k( 1, ap, t, r); xor_8k( 0, ap, t, r);
|
||||
copy_block_aligned(a, r);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void gf_mul_8k(gf_t a, const gf_t8k_t t, gf_t r)
|
||||
{ int i;
|
||||
uint8_t *ap = (uint8_t*)a;
|
||||
memset(r, 0, GF_BYTE_LEN);
|
||||
for(i = 15; i >= 0; --i)
|
||||
{
|
||||
xor_8k(i,ap,t,r);
|
||||
}
|
||||
memcpy(a, r, GF_BYTE_LEN);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if defined( TABLES_4K )
|
||||
|
||||
/* This version uses 4k bytes of table space on the stack.
|
||||
A 16 byte buffer has to be multiplied by a 16 byte key
|
||||
value in GF(128). If we consider a GF(128) value in a
|
||||
single byte, we can construct a table of the 256 16
|
||||
byte values that result from multiplying g by the 256
|
||||
values of this byte. This requires 4096 bytes.
|
||||
|
||||
If we take the highest byte in the buffer and use this
|
||||
table to multiply it by g, we then have to multiply it
|
||||
by x^120 to get the final value. For the next highest
|
||||
byte the result has to be multiplied by x^112 and so on.
|
||||
|
||||
But we can do this by accumulating the result in an
|
||||
accumulator starting with the result for the top byte.
|
||||
We repeatedly multiply the accumulator value by x^8 and
|
||||
then add in (i.e. xor) the 16 bytes of the next lower
|
||||
byte in the buffer, stopping when we reach the lowest
|
||||
byte. This requires a 4096 byte table.
|
||||
*/
|
||||
|
||||
void init_4k_table(const gf_t g, gf_t4k_t t)
|
||||
{ int j, k;
|
||||
|
||||
memset(t[0], 0, GF_BYTE_LEN);
|
||||
|
||||
#if defined( GF_MODE_LL ) || defined( GF_MODE_BL )
|
||||
|
||||
memcpy(t[1], g, GF_BYTE_LEN);
|
||||
for(j = 1; j <= 64; j <<= 1)
|
||||
gf_mulx1(mode)(t[j + j], t[j]);
|
||||
#else
|
||||
|
||||
memcpy(t[128], g, GF_BYTE_LEN);
|
||||
for(j = 64; j >= 1; j >>= 1)
|
||||
gf_mulx1(mode)(t[j], t[j + j]);
|
||||
#endif
|
||||
|
||||
for(j = 2; j < 256; j += j)
|
||||
for(k = 1; k < j; ++k)
|
||||
xor_block_aligned(t[j + k], t[j], t[k]);
|
||||
}
|
||||
|
||||
#define xor_4k(i,ap,t,r) gf_mulx8(mode)(r); xor_block_aligned(r, r, t[ap[GF_INDEX(i)]])
|
||||
|
||||
#if defined( UNROLL_LOOPS )
|
||||
|
||||
void gf_mul_4k(gf_t a, const gf_t4k_t t, gf_t r)
|
||||
{ uint8_t *ap = (uint8_t*)a;
|
||||
memset(r, 0, GF_BYTE_LEN);
|
||||
xor_4k(15, ap, t, r); xor_4k(14, ap, t, r);
|
||||
xor_4k(13, ap, t, r); xor_4k(12, ap, t, r);
|
||||
xor_4k(11, ap, t, r); xor_4k(10, ap, t, r);
|
||||
xor_4k( 9, ap, t, r); xor_4k( 8, ap, t, r);
|
||||
xor_4k( 7, ap, t, r); xor_4k( 6, ap, t, r);
|
||||
xor_4k( 5, ap, t, r); xor_4k( 4, ap, t, r);
|
||||
xor_4k( 3, ap, t, r); xor_4k( 2, ap, t, r);
|
||||
xor_4k( 1, ap, t, r); xor_4k( 0, ap, t, r);
|
||||
copy_block_aligned(a, r);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void gf_mul_4k(gf_t a, const gf_t4k_t t, gf_t r)
|
||||
{ int i = 15;
|
||||
uint8_t *ap = (uint8_t*)a;
|
||||
memset(r, 0, GF_BYTE_LEN);
|
||||
for(i = 15; i >=0; --i)
|
||||
{
|
||||
xor_4k(i, ap, t, r);
|
||||
}
|
||||
copy_block_aligned(a, r);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if defined( TABLES_256 )
|
||||
|
||||
/* This version uses 256 bytes of table space on the stack.
|
||||
A 16 byte buffer has to be multiplied by a 16 byte key
|
||||
value in GF(128). If we consider a GF(128) value in a
|
||||
single 4-bit nibble, we can construct a table of the 16
|
||||
16 byte values that result from the 16 values of this
|
||||
byte. This requires 256 bytes. If we take the highest
|
||||
4-bit nibble in the buffer and use this table to get the
|
||||
result, we then have to multiply by x^124 to get the
|
||||
final value. For the next highest byte the result has to
|
||||
be multiplied by x^120 and so on. But we can do this by
|
||||
accumulating the result in an accumulator starting with
|
||||
the result for the top nibble. We repeatedly multiply
|
||||
the accumulator value by x^4 and then add in (i.e. xor)
|
||||
the 16 bytes of the next lower nibble in the buffer,
|
||||
stopping when we reach the lowest nibble. This uses a
|
||||
256 byte table.
|
||||
*/
|
||||
|
||||
void init_256_table(const gf_t g, gf_t256_t t)
|
||||
{ int j, k;
|
||||
|
||||
memset(t[0], 0, GF_BYTE_LEN);
|
||||
|
||||
#if defined( GF_MODE_LL ) || defined( GF_MODE_BL )
|
||||
|
||||
memcpy(t[1], g, GF_BYTE_LEN);
|
||||
for(j = 1; j <= 4; j <<= 1)
|
||||
gf_mulx1(mode)(t[j + j], t[j]);
|
||||
#else
|
||||
|
||||
memcpy(t[8], g, GF_BYTE_LEN);
|
||||
for(j = 4; j >= 1; j >>= 1)
|
||||
gf_mulx1(mode)(t[j], t[j + j]);
|
||||
#endif
|
||||
|
||||
for(j = 2; j < 16; j += j)
|
||||
for(k = 1; k < j; ++k)
|
||||
xor_block_aligned(t[j + k], t[j], t[k]);
|
||||
}
|
||||
|
||||
#define x_lo(i,ap,t,r) gf_mulx4(mode)(r); xor_block_aligned(r, r, t[ap[GF_INDEX(i)] & 0x0f])
|
||||
#define x_hi(i,ap,t,r) gf_mulx4(mode)(r); xor_block_aligned(r, r, t[ap[GF_INDEX(i)] >> 4])
|
||||
|
||||
#if defined( GF_MODE_LL ) || defined( GF_MODE_BL )
|
||||
#define xor_256(a,b,c,d) x_hi(a,b,c,d); x_lo(a,b,c,d)
|
||||
#else
|
||||
#define xor_256(a,b,c,d) x_lo(a,b,c,d); x_hi(a,b,c,d)
|
||||
#endif
|
||||
|
||||
#if defined( UNROLL_LOOPS )
|
||||
|
||||
void gf_mul_256(gf_t a, const gf_t256_t t, gf_t r)
|
||||
{ uint8_t *ap = (uint8_t*)a;
|
||||
memset(r, 0, GF_BYTE_LEN);
|
||||
xor_256(15, ap, t, r); xor_256(14, ap, t, r);
|
||||
xor_256(13, ap, t, r); xor_256(12, ap, t, r);
|
||||
xor_256(11, ap, t, r); xor_256(10, ap, t, r);
|
||||
xor_256( 9, ap, t, r); xor_256( 8, ap, t, r);
|
||||
xor_256( 7, ap, t, r); xor_256( 6, ap, t, r);
|
||||
xor_256( 5, ap, t, r); xor_256( 4, ap, t, r);
|
||||
xor_256( 3, ap, t, r); xor_256( 2, ap, t, r);
|
||||
xor_256( 1, ap, t, r); xor_256( 0, ap, t, r);
|
||||
copy_block_aligned(a, r);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void gf_mul_256(gf_t a, const gf_t256_t t, gf_t r)
|
||||
{ int i;
|
||||
uint8_t *ap = (uint8_t*)a;
|
||||
memset(r, 0, GF_BYTE_LEN);
|
||||
for(i = 15; i >= 0; --i)
|
||||
{
|
||||
xor_256(i, ap, t, r);
|
||||
}
|
||||
copy_block_aligned(a, r);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
215
crypto/aes/gf128mul.h
Normal file
215
crypto/aes/gf128mul.h
Normal file
@ -0,0 +1,215 @@
|
||||
/*
|
||||
---------------------------------------------------------------------------
|
||||
Copyright (c) 1998-2010, Brian Gladman, Worcester, UK. All rights reserved.
|
||||
|
||||
The redistribution and use of this software (with or without changes)
|
||||
is allowed without the payment of fees or royalties provided that:
|
||||
|
||||
source code distributions include the above copyright notice, this
|
||||
list of conditions and the following disclaimer;
|
||||
|
||||
binary distributions include the above copyright notice, this list
|
||||
of conditions and the following disclaimer in their documentation.
|
||||
|
||||
This software is provided 'as is' with no explicit or implied warranties
|
||||
in respect of its operation, including, but not limited to, correctness
|
||||
and fitness for purpose.
|
||||
---------------------------------------------------------------------------
|
||||
Issue Date: 11/01/2011
|
||||
|
||||
I am grateful for the work done by Mark Rodenkirch and Jason Papadopoulos
|
||||
in helping to remove a bug in the operation of this code on big endian
|
||||
systems when fast buffer operations are enabled.
|
||||
---------------------------------------------------------------------------
|
||||
|
||||
An implementation of field multiplication in the Galois Field GF(2^128)
|
||||
|
||||
A polynomial representation is used for the field with the coefficients
|
||||
held in bit sequences in which the bit numbers are the powers of x that
|
||||
a bit represents. The field polynomial used is (x^128+x^7+x^2+x+1).
|
||||
|
||||
The obvious way of representing field elements in a computer system is
|
||||
to map 'x' in the field to the binary integer '2'. But this was way too
|
||||
obvious for cryptographers!
|
||||
|
||||
Here bytes are numbered in their memory order and bits within bytes are
|
||||
numbered according to their integer numeric significance (that is as is
|
||||
now normal with bit 0 representing unity). The term 'little endian'
|
||||
will then used to describe mappings where numeric (power of 2) or field
|
||||
(power of x) significance increases with increasing bit or byte numbers
|
||||
with 'big endian' being used to describe the inverse situation.
|
||||
|
||||
The GF bit sequence can then be mapped onto 8-bit bytes in computer
|
||||
memory in one of four simple ways:
|
||||
|
||||
A mapping in which x maps to the integer 2 in little endian
|
||||
form for both bytes and bits within bytes:
|
||||
|
||||
LL: bit for x^n ==> bit for 2^(n % 8) in byte[n / 8]
|
||||
|
||||
A mapping in which x maps to the integer 2 in big endian form
|
||||
for both bytes and bits within bytes:
|
||||
|
||||
BL: bit for x^n ==> bit for 2^(n % 8) in byte[15 - n / 8]
|
||||
|
||||
A little endian mapping for bytes but with the bits within
|
||||
bytes in reverse order (big endian bytes):
|
||||
|
||||
LB: bit for x^n ==> bit for 2^(7 - n % 8) in byte[n / 8]
|
||||
|
||||
A big endian mapping for bytes but with the bits within
|
||||
bytes in reverse order (big endian bytes):
|
||||
|
||||
BB: bit for x^n ==> bit for 2^(7 - n % 8) in byte[15 - n / 8]
|
||||
|
||||
128-bit field elements are represented by 16 byte buffers but for
|
||||
processing efficiency reasons it is often desirable to process arrays
|
||||
of bytes using longer types such as, for example, unsigned long values.
|
||||
The type used for representing these buffers will be called a 'gf_unit'
|
||||
and the buffer itself will be referred to as a 'gf_t' type.
|
||||
|
||||
THe field multiplier is based on the assumption that one of the two
|
||||
field elements involved in multiplication will change only relatively
|
||||
infrequently, making it worthwhile to precompute tables to speed up
|
||||
multiplication by this value.
|
||||
*/
|
||||
|
||||
#ifndef _GF128MUL_H
|
||||
#define _GF128MUL_H
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "brg_endian.h"
|
||||
|
||||
/* USER DEFINABLE OPTIONS */
|
||||
/* UNIT_BITS sets the size of variables used to process 16 byte buffers
|
||||
when the buffer alignment allows this. When buffers are processed
|
||||
in bytes, 16 individual operations are invoolved. But if, say, such
|
||||
a buffer is divided into 4 32 bit variables, it can then be processed
|
||||
in 4 operations, making the code typically much faster. In general
|
||||
it will pay to use the longest natively supported size, which will
|
||||
probably be 32 or 64 bits in 32 and 64 bit systems respectively.
|
||||
*/
|
||||
|
||||
#if defined( UNIT_BITS )
|
||||
# undef UNIT_BITS
|
||||
#endif
|
||||
|
||||
#if !defined( UNIT_BITS )
|
||||
# if PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN
|
||||
# if 0
|
||||
# define UNIT_BITS 8
|
||||
# elif 0
|
||||
# define UNIT_BITS 32
|
||||
# elif 1
|
||||
# define UNIT_BITS 64
|
||||
# endif
|
||||
# elif defined( _WIN64 )
|
||||
# define UNIT_BITS 64
|
||||
# else
|
||||
# define UNIT_BITS 32
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if UNIT_BITS == 64 && !defined( NEED_UINT_64T )
|
||||
# define NEED_UINT_64T
|
||||
#endif
|
||||
|
||||
#include "brg_types.h"
|
||||
|
||||
/* Choose the Galois Field representation to use (see above) */
|
||||
#if 0
|
||||
# define GF_MODE_LL
|
||||
#elif 0
|
||||
# define GF_MODE_BL
|
||||
#elif 1
|
||||
# define GF_MODE_LB /* the representation used by GCM */
|
||||
#elif 0
|
||||
# define GF_MODE_BB
|
||||
#else
|
||||
# error mode is not defined
|
||||
#endif
|
||||
|
||||
/* Table sizes for GF(128) Multiply. Normally larger tables give
|
||||
higher speed but cache loading might change this. Normally only
|
||||
one table size (or none at all) will be specified here
|
||||
*/
|
||||
#if 0
|
||||
# define TABLES_64K
|
||||
#endif
|
||||
#if 0
|
||||
# define TABLES_8K
|
||||
#endif
|
||||
#if 1
|
||||
# define TABLES_4K
|
||||
#endif
|
||||
#if 0
|
||||
# define TABLES_256
|
||||
#endif
|
||||
|
||||
/* END OF USER DEFINABLE OPTIONS */
|
||||
|
||||
#if !(defined( TABLES_64K ) || defined( TABLES_8K ) \
|
||||
|| defined( TABLES_4K ) || defined( TABLES_256 ))
|
||||
# define NO_TABLES
|
||||
#endif
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
#define GF_BYTE_LEN 16
|
||||
#define GF_UNIT_LEN (GF_BYTE_LEN / (UNIT_BITS >> 3))
|
||||
|
||||
UNIT_TYPEDEF(gf_unit_t, UNIT_BITS);
|
||||
BUFR_TYPEDEF(gf_t, UNIT_BITS, GF_BYTE_LEN);
|
||||
|
||||
/* Code for conversion between the four different galois field representations
|
||||
is optionally available using gf_convert.c
|
||||
*/
|
||||
|
||||
typedef enum { REVERSE_NONE = 0, REVERSE_BITS = 1, REVERSE_BYTES = 2 } transform;
|
||||
|
||||
void convert_representation(gf_t dest, const gf_t source, transform rev);
|
||||
|
||||
void gf_mul(gf_t a, const gf_t b); /* slow field multiply */
|
||||
|
||||
/* types and calls for 64k table driven field multiplier */
|
||||
|
||||
typedef gf_t gf_t64k_a[16][256];
|
||||
typedef gf_t (*gf_t64k_t)[256];
|
||||
|
||||
void init_64k_table(const gf_t g, gf_t64k_t t);
|
||||
void gf_mul_64k(gf_t a, const gf_t64k_t t, void *r);
|
||||
|
||||
/* types and calls for 8k table driven field multiplier */
|
||||
|
||||
typedef gf_t gf_t8k_a[32][16];
|
||||
typedef gf_t (*gf_t8k_t)[16];
|
||||
|
||||
void init_8k_table(const gf_t g, gf_t8k_t t);
|
||||
void gf_mul_8k(gf_t a, const gf_t8k_t t, gf_t r);
|
||||
|
||||
/* types and calls for 8k table driven field multiplier */
|
||||
|
||||
typedef gf_t gf_t4k_a[256];
|
||||
typedef gf_t (*gf_t4k_t);
|
||||
|
||||
void init_4k_table(const gf_t g, gf_t4k_t t);
|
||||
void gf_mul_4k(gf_t a, const gf_t4k_t t, gf_t r);
|
||||
|
||||
/* types and calls for 8k table driven field multiplier */
|
||||
|
||||
typedef gf_t gf_t256_a[16];
|
||||
typedef gf_t (*gf_t256_t);
|
||||
|
||||
void init_256_table(const gf_t g, gf_t256_t t);
|
||||
void gf_mul_256(gf_t a, const gf_t256_t t, gf_t r);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
773
crypto/aes/gf_mul_lo.h
Normal file
773
crypto/aes/gf_mul_lo.h
Normal file
@ -0,0 +1,773 @@
|
||||
/*
|
||||
---------------------------------------------------------------------------
|
||||
Copyright (c) 1998-2010, Brian Gladman, Worcester, UK. All rights reserved.
|
||||
|
||||
The redistribution and use of this software (with or without changes)
|
||||
is allowed without the payment of fees or royalties provided that:
|
||||
|
||||
source code distributions include the above copyright notice, this
|
||||
list of conditions and the following disclaimer;
|
||||
|
||||
binary distributions include the above copyright notice, this list
|
||||
of conditions and the following disclaimer in their documentation.
|
||||
|
||||
This software is provided 'as is' with no explicit or implied warranties
|
||||
in respect of its operation, including, but not limited to, correctness
|
||||
and fitness for purpose.
|
||||
---------------------------------------------------------------------------
|
||||
Issue Date: 18/02/2014
|
||||
|
||||
This file provides the low level primitives needed for Galois Field
|
||||
operations in GF(2^128) for the four most likely field representations.
|
||||
*/
|
||||
|
||||
#ifndef _GF_MUL_LO_H
|
||||
#define _GF_MUL_LO_H
|
||||
|
||||
#if defined( USE_INLINING )
|
||||
# if defined( _MSC_VER )
|
||||
# define gf_decl __inline
|
||||
# elif defined( __GNUC__ ) || defined( __GNU_LIBRARY__ )
|
||||
# define gf_decl static inline
|
||||
# else
|
||||
# define gf_decl static
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if 0 /* used for testing only: t1(UNIT_BITS), t2(UNIT_BITS) */
|
||||
# define _t1(n) bswap ## n ## _block(x, x)
|
||||
# define t1(n) _t1(n)
|
||||
# define _t2(n) bswap ## n ## _block(x, x); bswap ## n ## _block(r, r)
|
||||
# define t2(n) _t2(n)
|
||||
#endif
|
||||
|
||||
#define gf_m(n,x) gf_mulx ## n ## x
|
||||
#define gf_mulx1(x) gf_m(1,x)
|
||||
#define gf_mulx4(x) gf_m(4,x)
|
||||
#define gf_mulx8(x) gf_m(8,x)
|
||||
|
||||
#define MASK(x) ((x) * (UNIT_CAST(-1,UNIT_BITS) / 0xff))
|
||||
|
||||
#define DATA_256(q) {\
|
||||
q(0x00), q(0x01), q(0x02), q(0x03), q(0x04), q(0x05), q(0x06), q(0x07),\
|
||||
q(0x08), q(0x09), q(0x0a), q(0x0b), q(0x0c), q(0x0d), q(0x0e), q(0x0f),\
|
||||
q(0x10), q(0x11), q(0x12), q(0x13), q(0x14), q(0x15), q(0x16), q(0x17),\
|
||||
q(0x18), q(0x19), q(0x1a), q(0x1b), q(0x1c), q(0x1d), q(0x1e), q(0x1f),\
|
||||
q(0x20), q(0x21), q(0x22), q(0x23), q(0x24), q(0x25), q(0x26), q(0x27),\
|
||||
q(0x28), q(0x29), q(0x2a), q(0x2b), q(0x2c), q(0x2d), q(0x2e), q(0x2f),\
|
||||
q(0x30), q(0x31), q(0x32), q(0x33), q(0x34), q(0x35), q(0x36), q(0x37),\
|
||||
q(0x38), q(0x39), q(0x3a), q(0x3b), q(0x3c), q(0x3d), q(0x3e), q(0x3f),\
|
||||
q(0x40), q(0x41), q(0x42), q(0x43), q(0x44), q(0x45), q(0x46), q(0x47),\
|
||||
q(0x48), q(0x49), q(0x4a), q(0x4b), q(0x4c), q(0x4d), q(0x4e), q(0x4f),\
|
||||
q(0x50), q(0x51), q(0x52), q(0x53), q(0x54), q(0x55), q(0x56), q(0x57),\
|
||||
q(0x58), q(0x59), q(0x5a), q(0x5b), q(0x5c), q(0x5d), q(0x5e), q(0x5f),\
|
||||
q(0x60), q(0x61), q(0x62), q(0x63), q(0x64), q(0x65), q(0x66), q(0x67),\
|
||||
q(0x68), q(0x69), q(0x6a), q(0x6b), q(0x6c), q(0x6d), q(0x6e), q(0x6f),\
|
||||
q(0x70), q(0x71), q(0x72), q(0x73), q(0x74), q(0x75), q(0x76), q(0x77),\
|
||||
q(0x78), q(0x79), q(0x7a), q(0x7b), q(0x7c), q(0x7d), q(0x7e), q(0x7f),\
|
||||
q(0x80), q(0x81), q(0x82), q(0x83), q(0x84), q(0x85), q(0x86), q(0x87),\
|
||||
q(0x88), q(0x89), q(0x8a), q(0x8b), q(0x8c), q(0x8d), q(0x8e), q(0x8f),\
|
||||
q(0x90), q(0x91), q(0x92), q(0x93), q(0x94), q(0x95), q(0x96), q(0x97),\
|
||||
q(0x98), q(0x99), q(0x9a), q(0x9b), q(0x9c), q(0x9d), q(0x9e), q(0x9f),\
|
||||
q(0xa0), q(0xa1), q(0xa2), q(0xa3), q(0xa4), q(0xa5), q(0xa6), q(0xa7),\
|
||||
q(0xa8), q(0xa9), q(0xaa), q(0xab), q(0xac), q(0xad), q(0xae), q(0xaf),\
|
||||
q(0xb0), q(0xb1), q(0xb2), q(0xb3), q(0xb4), q(0xb5), q(0xb6), q(0xb7),\
|
||||
q(0xb8), q(0xb9), q(0xba), q(0xbb), q(0xbc), q(0xbd), q(0xbe), q(0xbf),\
|
||||
q(0xc0), q(0xc1), q(0xc2), q(0xc3), q(0xc4), q(0xc5), q(0xc6), q(0xc7),\
|
||||
q(0xc8), q(0xc9), q(0xca), q(0xcb), q(0xcc), q(0xcd), q(0xce), q(0xcf),\
|
||||
q(0xd0), q(0xd1), q(0xd2), q(0xd3), q(0xd4), q(0xd5), q(0xd6), q(0xd7),\
|
||||
q(0xd8), q(0xd9), q(0xda), q(0xdb), q(0xdc), q(0xdd), q(0xde), q(0xdf),\
|
||||
q(0xe0), q(0xe1), q(0xe2), q(0xe3), q(0xe4), q(0xe5), q(0xe6), q(0xe7),\
|
||||
q(0xe8), q(0xe9), q(0xea), q(0xeb), q(0xec), q(0xed), q(0xee), q(0xef),\
|
||||
q(0xf0), q(0xf1), q(0xf2), q(0xf3), q(0xf4), q(0xf5), q(0xf6), q(0xf7),\
|
||||
q(0xf8), q(0xf9), q(0xfa), q(0xfb), q(0xfc), q(0xfd), q(0xfe), q(0xff) }
|
||||
|
||||
/* Within the 16 bytes of the field element the top and bottom field bits
|
||||
are within bytes as follows (bit numbers in bytes 0 from ls up) for
|
||||
each of the four field representations supported (see gf128mul.txt):
|
||||
|
||||
GF_BIT 127 126 125 124 123 122 121 120 ..... 7 6 5 4 3 2 1 0
|
||||
0x87 1 0 0 0 0 1 1 1
|
||||
BL x[ 0] 7 6 5 4 3 2 1 0 x[15] 7 6 5 4 3 2 1 0
|
||||
LL x[15] 7 6 5 4 3 2 1 0 x[ 0] 7 6 5 4 3 2 1 0
|
||||
|
||||
GF_BIT 120 121 122 123 124 125 126 127 ..... 0 1 2 3 4 5 6 7
|
||||
0xc1 1 1 1 0 0 0 0 1
|
||||
BB x[ 0] 7 6 5 4 3 2 1 0 x[15] 7 6 5 4 3 2 1 0
|
||||
LB x[15] 7 6 5 4 3 2 1 0 x[ 0] 7 6 5 4 3 2 1 0
|
||||
|
||||
When the field element is multiplied by x^n, the high bits overflow
|
||||
and are used to form an overflow byte. For the BL and LL modes this
|
||||
byte has the lowest overflow bit in bit 0 whereas for the BB and LB
|
||||
modes this bit is in biit 7. So we have for this byte:
|
||||
|
||||
bit (bit n = 2^n) 7 6 5 4 3 2 1 0
|
||||
BL and LL x^7 x^6 x^5 x^4 x^3 x^2 x^1 x^0
|
||||
BB and LB x^0 x^1 x^2 x^3 x^4 x^5 x^6 x^7
|
||||
|
||||
This byte then has to be multiplied by the low bits of the field
|
||||
polynomial, which produces a value of 16 bits to be xored into the
|
||||
left shifted field value. For the BL and LL modes bit 0 gives the
|
||||
word value 0x0087, bit 1 gives 0x010e (0x87 left shifted 1), 0x021c
|
||||
(0x87 left shifted 2), ... For the BB and LB modes, bit 7 gives the
|
||||
value 0x00e1, bit 6 gives 0x8070, bit 5 gives 0x4038, ... Each bit
|
||||
in the overflow byte is expanded in this way and is xored into the
|
||||
overall result, so eaach of the 256 byte values will produce a
|
||||
corresponding word value that is computed by the gf_uint16_xor(i)
|
||||
macros below.
|
||||
|
||||
These word values have to be xored into the low 16 bits of the
|
||||
field value. If the byte endianess of the mode matches that of
|
||||
the architecture xoring the word value will be correct. But if
|
||||
the mode has the opposite endianess, the word value has to be
|
||||
xored in byte reversed order. This is done by the ord() macro.
|
||||
*/
|
||||
|
||||
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN \
|
||||
&& (defined( GF_MODE_LB ) || defined( GF_MODE_LL )) || \
|
||||
PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN \
|
||||
&& (defined( GF_MODE_BB ) || defined( GF_MODE_BL ))
|
||||
# define ord(hi, lo) 0x##hi##lo
|
||||
#else
|
||||
# define ord(hi, lo) 0x##lo##hi
|
||||
#endif
|
||||
|
||||
#if defined( GF_MODE_BL ) || defined( GF_MODE_LL )
|
||||
|
||||
/* field and numeric bit significance correspond */
|
||||
|
||||
#define gf_uint16_xor(i) ( \
|
||||
(i & 0x01 ? ord(00,87) : 0) ^ (i & 0x02 ? ord(01,0e) : 0) ^ \
|
||||
(i & 0x04 ? ord(02,1c) : 0) ^ (i & 0x08 ? ord(04,38) : 0) ^ \
|
||||
(i & 0x10 ? ord(08,70) : 0) ^ (i & 0x20 ? ord(10,e0) : 0) ^ \
|
||||
(i & 0x40 ? ord(21,c0) : 0) ^ (i & 0x80 ? ord(43,80) : 0) )
|
||||
|
||||
enum x_bit
|
||||
{
|
||||
X_0 = 0x01, X_1 = 0x02, X_2 = 0x04, X_3 = 0x08,
|
||||
X_4 = 0x10, X_5 = 0x20, X_6 = 0x40, X_7 = 0x80
|
||||
};
|
||||
|
||||
#elif defined( GF_MODE_BB ) || defined( GF_MODE_LB )
|
||||
|
||||
/* field and numeric bit significance are in reverse */
|
||||
|
||||
#define gf_uint16_xor(i) ( \
|
||||
(i & 0x80 ? ord(00,e1) : 0) ^ (i & 0x40 ? ord(80,70) : 0) ^ \
|
||||
(i & 0x20 ? ord(40,38) : 0) ^ (i & 0x10 ? ord(20,1c) : 0) ^ \
|
||||
(i & 0x08 ? ord(10,0e) : 0) ^ (i & 0x04 ? ord(08,07) : 0) ^ \
|
||||
(i & 0x02 ? ord(84,03) : 0) ^ (i & 0x01 ? ord(c2,01) : 0) )
|
||||
|
||||
enum x_bit
|
||||
{
|
||||
X_0 = 0x80, X_1 = 0x40, X_2 = 0x20, X_3 = 0x10,
|
||||
X_4 = 0x08, X_5 = 0x04, X_6 = 0x02, X_7 = 0x01
|
||||
};
|
||||
|
||||
#else
|
||||
#error Galois Field representation has not been set
|
||||
#endif
|
||||
|
||||
const uint16_t gf_tab[256] = DATA_256(gf_uint16_xor);
|
||||
|
||||
/* LL Mode Galois Field operations
|
||||
|
||||
x[0] x[1] x[2] x[3] x[4] x[5] x[6] x[7]
|
||||
ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls
|
||||
10000111 ........ ........ ........ ........ ........ ........ ........
|
||||
07....00 15....08 23....16 31....24 39....32 47....40 55....48 63....56
|
||||
x[8] x[9] x[10] x[11] x[12] x[13] x[14] x[15]
|
||||
ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls
|
||||
........ ........ ........ ........ ........ ........ ........ M.......
|
||||
71....64 79....72 87....80 95....88 103...96 111..104 119..112 127..120
|
||||
*/
|
||||
|
||||
#if UNIT_BITS == 64
|
||||
|
||||
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
|
||||
#define f1_ll(n,r,x) r[n] = (x[n] << 1) | (n ? x[n-1] >> 63 : 0)
|
||||
#define f4_ll(n,r,x) r[n] = (x[n] << 4) | (n ? x[n-1] >> 60 : 0)
|
||||
#define f8_ll(n,r,x) r[n] = (x[n] << 8) | (n ? x[n-1] >> 56 : 0)
|
||||
#else
|
||||
#define f1_ll(n,r,x) r[n] = ((x[n] << 1) & ~MASK(0x01)) | (((x[n] >> 15) \
|
||||
| (n ? x[n-1] << 49 : 0)) & MASK(0x01))
|
||||
#define f4_ll(n,r,x) r[n] = ((x[n] << 4) & ~MASK(0x0f)) | (((x[n] >> 12) \
|
||||
| (n ? x[n-1] << 52 : 0)) & MASK(0x0f))
|
||||
#define f8_ll(n,r,x) r[n] = (x[n] >> 8) | (n ? x[n-1] << 56 : 0)
|
||||
#endif
|
||||
|
||||
gf_decl void gf_mulx1_ll(gf_t r, const gf_t x)
|
||||
{ gf_unit_t _tt;
|
||||
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
|
||||
_tt = gf_tab[(UNIT_PTR(x)[1] >> 63) & 0x01];
|
||||
#else
|
||||
_tt = ((gf_unit_t)(gf_tab[(UNIT_PTR(x)[1] >> 7) & 0x01])) << 48;
|
||||
#endif
|
||||
rep2_d2(f1_ll, UNIT_PTR(r), UNIT_PTR(x));
|
||||
UNIT_PTR(r)[0] ^= _tt;
|
||||
}
|
||||
|
||||
gf_decl void gf_mulx4_ll(gf_t x)
|
||||
{ gf_unit_t _tt;
|
||||
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
|
||||
_tt = gf_tab[(UNIT_PTR(x)[1] >> 60) & 0x0f];
|
||||
#else
|
||||
_tt = ((gf_unit_t)(gf_tab[(UNIT_PTR(x)[1] >> 4) & 0x0f])) << 48;
|
||||
#endif
|
||||
rep2_d2(f4_ll, UNIT_PTR(x), UNIT_PTR(x));
|
||||
UNIT_PTR(x)[0] ^= _tt;
|
||||
}
|
||||
|
||||
gf_decl void gf_mulx8_ll(gf_t x)
|
||||
{ gf_unit_t _tt;
|
||||
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
|
||||
_tt = gf_tab[UNIT_PTR(x)[1] >> 56];
|
||||
#else
|
||||
_tt = ((gf_unit_t)(gf_tab[UNIT_PTR(x)[1] & 0xff])) << 48;
|
||||
#endif
|
||||
rep2_d2(f8_ll, UNIT_PTR(x), UNIT_PTR(x));
|
||||
UNIT_PTR(x)[0] ^= _tt;
|
||||
}
|
||||
|
||||
#elif UNIT_BITS == 32
|
||||
|
||||
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
|
||||
#define f1_ll(n,r,x) r[n] = (x[n] << 1) | (n ? x[n-1] >> 31 : 0)
|
||||
#define f4_ll(n,r,x) r[n] = (x[n] << 4) | (n ? x[n-1] >> 28 : 0)
|
||||
#define f8_ll(n,r,x) r[n] = (x[n] << 8) | (n ? x[n-1] >> 24 : 0)
|
||||
#else
|
||||
#define f1_ll(n,r,x) r[n] = ((x[n] << 1) & ~MASK(0x01)) | (((x[n] >> 15) \
|
||||
| (n ? x[n-1] << 17 : 0)) & MASK(0x01))
|
||||
#define f4_ll(n,r,x) r[n] = ((x[n] << 4) & ~MASK(0x0f)) | (((x[n] >> 12) \
|
||||
| (n ? x[n-1] << 20 : 0)) & MASK(0x0f))
|
||||
#define f8_ll(n,r,x) r[n] = (x[n] >> 8) | (n ? x[n-1] << 24 : 0)
|
||||
#endif
|
||||
|
||||
gf_decl void gf_mulx1_ll(gf_t r, const gf_t x)
|
||||
{ gf_unit_t _tt;
|
||||
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
|
||||
_tt = gf_tab[(UNIT_PTR(x)[3] >> 31) & 0x01];
|
||||
#else
|
||||
_tt = ((gf_unit_t)(gf_tab[(UNIT_PTR(x)[3] >> 7) & 0x01])) << 16;
|
||||
#endif
|
||||
rep2_d4(f1_ll, UNIT_PTR(r), UNIT_PTR(x));
|
||||
UNIT_PTR(r)[0] ^= _tt;
|
||||
}
|
||||
|
||||
gf_decl void gf_mulx4_ll(gf_t x)
|
||||
{ gf_unit_t _tt;
|
||||
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
|
||||
_tt = gf_tab[(UNIT_PTR(x)[3] >> 28) & 0x0f];
|
||||
#else
|
||||
_tt = ((gf_unit_t)(gf_tab[(UNIT_PTR(x)[3] >> 4) & 0x0f])) << 16;
|
||||
#endif
|
||||
rep2_d4(f4_ll, UNIT_PTR(x), UNIT_PTR(x));
|
||||
UNIT_PTR(x)[0] ^= _tt;
|
||||
}
|
||||
|
||||
gf_decl void gf_mulx8_ll(gf_t x)
|
||||
{ gf_unit_t _tt;
|
||||
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
|
||||
_tt = gf_tab[UNIT_PTR(x)[3] >> 24];
|
||||
#else
|
||||
_tt = ((gf_unit_t)(gf_tab[UNIT_PTR(x)[3] & 0xff])) << 16;
|
||||
#endif
|
||||
rep2_d4(f8_ll, UNIT_PTR(x), UNIT_PTR(x));
|
||||
UNIT_PTR(x)[0] ^= _tt;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define f1_ll(n,r,x) r[n] = (x[n] << 1) | (n ? x[n-1] >> 7 : 0)
|
||||
#define f4_ll(n,r,x) r[n] = (x[n] << 4) | (n ? x[n-1] >> 4 : 0)
|
||||
|
||||
gf_decl void gf_mulx1_ll(gf_t r, const gf_t x)
|
||||
{ uint16_t _tt;
|
||||
_tt = gf_tab[(UNIT_PTR(x)[15] >> 7) & 0x01];
|
||||
rep2_d16(f1_ll, UNIT_PTR(r), UNIT_PTR(x));
|
||||
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
|
||||
UNIT_PTR(r)[0] ^= _tt & 0xff;
|
||||
#else
|
||||
UNIT_PTR(r)[0] ^= _tt >> 8;
|
||||
#endif
|
||||
}
|
||||
|
||||
gf_decl void gf_mulx4_ll(gf_t x)
|
||||
{ uint16_t _tt;
|
||||
_tt = gf_tab[(UNIT_PTR(x)[15] >> 4) & 0x0f];
|
||||
rep2_d16(f4_ll, UNIT_PTR(x), UNIT_PTR(x));
|
||||
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
|
||||
UNIT_PTR(x)[1] ^= _tt >> 8;
|
||||
UNIT_PTR(x)[0] ^= _tt & 0xff;
|
||||
#else
|
||||
UNIT_PTR(x)[1] ^= _tt & 0xff;
|
||||
UNIT_PTR(x)[0] = _tt >> 8;
|
||||
#endif
|
||||
}
|
||||
|
||||
gf_decl void gf_mulx8_ll(gf_t x)
|
||||
{ uint16_t _tt;
|
||||
_tt = gf_tab[UNIT_PTR(x)[15]];
|
||||
memmove(UNIT_PTR(x) + 1, UNIT_PTR(x), 15);
|
||||
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
|
||||
UNIT_PTR(x)[1] ^= _tt >> 8;
|
||||
UNIT_PTR(x)[0] = _tt & 0xff;
|
||||
#else
|
||||
UNIT_PTR(x)[1] ^= _tt & 0xff;
|
||||
UNIT_PTR(x)[0] = _tt >> 8;
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* BL Mode Galois Field operations
|
||||
|
||||
x[0] x[1] x[2] x[3] x[4] x[5] x[6] x[7]
|
||||
ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls
|
||||
M....... ........ ........ ........ ........ ........ ........ ........
|
||||
127..120 119..112 111..104 103...96 95....88 87....80 79....72 71....64
|
||||
x[8] x[9] x[10] x[11] x[12] x[13] x[14] x[15]
|
||||
ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls
|
||||
........ ........ ........ ........ ........ ........ ........ 10000111
|
||||
63....56 55....48 47....40 39....32 31....24 23....16 15....08 07....00
|
||||
*/
|
||||
|
||||
#if UNIT_BITS == 64
|
||||
|
||||
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
|
||||
#define f1_bl(n,r,x) r[n] = ((x[n] << 1) & ~MASK(0x01)) | (((x[n] >> 15) \
|
||||
| (!n ? x[n+1] << 49 : 0)) & MASK(0x01))
|
||||
#define f4_bl(n,r,x) r[n] = ((x[n] << 4) & ~MASK(0x0f)) | (((x[n] >> 12) \
|
||||
| (!n ? x[n+1] << 52 : 0)) & MASK(0x0f))
|
||||
#define f8_bl(n,r,x) r[n] = (x[n] >> 8) | (!n ? x[n+1] << 56 : 0)
|
||||
#else
|
||||
#define f1_bl(n,r,x) r[n] = (x[n] << 1) | (!n ? x[n+1] >> 63 : 0)
|
||||
#define f4_bl(n,r,x) r[n] = (x[n] << 4) | (!n ? x[n+1] >> 60 : 0)
|
||||
#define f8_bl(n,r,x) r[n] = (x[n] << 8) | (!n ? x[n+1] >> 56 : 0)
|
||||
#endif
|
||||
|
||||
gf_decl void gf_mulx1_bl(gf_t r, const gf_t x)
|
||||
{ gf_unit_t _tt;
|
||||
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
|
||||
_tt = ((gf_unit_t)(gf_tab[(UNIT_PTR(x)[0] >> 7) & 0x01])) << 48;
|
||||
#else
|
||||
_tt = gf_tab[(UNIT_PTR(x)[0] >> 63) & 0x01];
|
||||
#endif
|
||||
rep2_u2(f1_bl, UNIT_PTR(r), UNIT_PTR(x));
|
||||
UNIT_PTR(r)[1] ^= _tt;
|
||||
}
|
||||
|
||||
gf_decl void gf_mulx4_bl(gf_t x)
|
||||
{ gf_unit_t _tt;
|
||||
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
|
||||
_tt = ((gf_unit_t)(gf_tab[(UNIT_PTR(x)[0] >> 4) & 0x0f])) << 48;
|
||||
#else
|
||||
_tt = gf_tab[(UNIT_PTR(x)[0] >> 60) & 0x0f];
|
||||
#endif
|
||||
rep2_u2(f4_bl, UNIT_PTR(x), UNIT_PTR(x));
|
||||
UNIT_PTR(x)[1] ^= _tt;
|
||||
}
|
||||
|
||||
gf_decl void gf_mulx8_bl(gf_t x)
|
||||
{ gf_unit_t _tt;
|
||||
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
|
||||
_tt = ((gf_unit_t)(gf_tab[UNIT_PTR(x)[0] & 0xff])) << 48;
|
||||
#else
|
||||
_tt = gf_tab[(UNIT_PTR(x)[0] >> 56) & 0xff];
|
||||
#endif
|
||||
rep2_u2(f8_bl, UNIT_PTR(x), UNIT_PTR(x));
|
||||
UNIT_PTR(x)[1] ^= _tt;
|
||||
}
|
||||
|
||||
#elif UNIT_BITS == 32
|
||||
|
||||
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
|
||||
#define f1_bl(n,r,x) r[n] = ((x[n] << 1) & ~MASK(0x01)) | (((x[n] >> 15) \
|
||||
| (n < 3 ? x[n+1] << 17 : 0)) & MASK(0x01))
|
||||
#define f4_bl(n,r,x) r[n] = ((x[n] << 4) & ~MASK(0x0f)) | (((x[n] >> 12) \
|
||||
| (n < 3 ? x[n+1] << 20 : 0)) & MASK(0x0f))
|
||||
#define f8_bl(n,r,x) r[n] = (x[n] >> 8) | (n < 3 ? x[n+1] << 24 : 0)
|
||||
#else
|
||||
#define f1_bl(n,r,x) r[n] = (x[n] << 1) | (n < 3 ? x[n+1] >> 31 : 0)
|
||||
#define f4_bl(n,r,x) r[n] = (x[n] << 4) | (n < 3 ? x[n+1] >> 28 : 0)
|
||||
#define f8_bl(n,r,x) r[n] = (x[n] << 8) | (n < 3 ? x[n+1] >> 24 : 0)
|
||||
#endif
|
||||
|
||||
gf_decl void gf_mulx1_bl(gf_t r, const gf_t x)
|
||||
{ gf_unit_t _tt;
|
||||
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
|
||||
_tt = ((gf_unit_t)(gf_tab[(UNIT_PTR(x)[0] >> 7) & 0x01])) << 16;
|
||||
#else
|
||||
_tt = gf_tab[(UNIT_PTR(x)[0] >> 31) & 0x01];
|
||||
#endif
|
||||
rep2_u4(f1_bl, UNIT_PTR(r), UNIT_PTR(x));
|
||||
UNIT_PTR(r)[3] ^= _tt;
|
||||
}
|
||||
|
||||
gf_decl void gf_mulx4_bl(gf_t x)
|
||||
{ gf_unit_t _tt;
|
||||
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
|
||||
_tt = ((gf_unit_t)(gf_tab[(UNIT_PTR(x)[0] >> 4) & 0x0f])) << 16;
|
||||
#else
|
||||
_tt = gf_tab[(UNIT_PTR(x)[0] >> 28) & 0x0f];
|
||||
#endif
|
||||
rep2_u4(f4_bl, UNIT_PTR(x), UNIT_PTR(x));
|
||||
UNIT_PTR(x)[3] ^= _tt;
|
||||
}
|
||||
|
||||
gf_decl void gf_mulx8_bl(gf_t x)
|
||||
{ gf_unit_t _tt;
|
||||
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
|
||||
_tt = ((gf_unit_t)(gf_tab[UNIT_PTR(x)[0] & 0xff])) << 16;
|
||||
#else
|
||||
_tt = gf_tab[(UNIT_PTR(x)[0] >> 24) & 0xff];
|
||||
#endif
|
||||
rep2_u4(f8_bl, UNIT_PTR(x), UNIT_PTR(x));
|
||||
UNIT_PTR(x)[3] ^= _tt;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define f1_bl(n,r,x) r[n] = (x[n] << 1) | (n < 15 ? x[n+1] >> 7 : 0)
|
||||
#define f4_bl(n,r,x) r[n] = (x[n] << 4) | (n < 15 ? x[n+1] >> 4 : 0)
|
||||
|
||||
gf_decl void gf_mulx1_bl(gf_t r, const gf_t x)
|
||||
{ uint16_t _tt;
|
||||
_tt = gf_tab[(UNIT_PTR(x)[0] >> 7) & 0x01];
|
||||
rep2_u16(f1_bl, UNIT_PTR(r), UNIT_PTR(x));
|
||||
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
|
||||
UNIT_PTR(r)[15] ^= _tt >> 8;
|
||||
#else
|
||||
UNIT_PTR(r)[15] ^= _tt & 0xff;
|
||||
#endif
|
||||
}
|
||||
|
||||
gf_decl void gf_mulx4_bl(gf_t x)
|
||||
{ uint16_t _tt;
|
||||
_tt = gf_tab[(UNIT_PTR(x)[0] >> 4) & 0x0f];
|
||||
rep2_u16(f4_bl, UNIT_PTR(x), UNIT_PTR(x));
|
||||
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
|
||||
UNIT_PTR(x)[14] ^= _tt & 0xff;
|
||||
UNIT_PTR(x)[15] ^= _tt >> 8;
|
||||
#else
|
||||
UNIT_PTR(x)[14] ^= _tt >> 8;
|
||||
UNIT_PTR(x)[15] = _tt & 0xff;
|
||||
#endif
|
||||
}
|
||||
|
||||
gf_decl void gf_mulx8_bl(gf_t x)
|
||||
{ uint16_t _tt;
|
||||
_tt = gf_tab[UNIT_PTR(x)[0]];
|
||||
memmove(UNIT_PTR(x), UNIT_PTR(x) + 1, 15);
|
||||
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
|
||||
UNIT_PTR(x)[14] ^= _tt & 0xff;
|
||||
UNIT_PTR(x)[15] = _tt >> 8;
|
||||
#else
|
||||
UNIT_PTR(x)[14] ^= _tt >> 8;
|
||||
UNIT_PTR(x)[15] = _tt & 0xff;
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* LB Mode Galois Field operations
|
||||
|
||||
x[0] x[1] x[2] x[3] x[4] x[5] x[6] x[7]
|
||||
ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls
|
||||
11100001 ........ ........ ........ ........ ........ ........ ........
|
||||
00....07 08....15 16....23 24....31 32....39 40....47 48....55 56....63
|
||||
x[8] x[9] x[10] x[11] x[12] x[13] x[14] x[15]
|
||||
ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls
|
||||
........ ........ ........ ........ ........ ........ ........ .......M
|
||||
64....71 72....79 80....87 88....95 96...103 104..111 112..119 120..127
|
||||
*/
|
||||
|
||||
#if UNIT_BITS == 64
|
||||
|
||||
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
|
||||
#define f1_lb(n,r,x) r[n] = ((x[n] >> 1) & ~MASK(0x80)) | (((x[n] << 15) \
|
||||
| (n ? x[n-1] >> 49 : 0)) & MASK(0x80))
|
||||
#define f4_lb(n,r,x) r[n] = ((x[n] >> 4) & ~MASK(0xf0)) | (((x[n] << 12) \
|
||||
| (n ? x[n-1] >> 52 : 0)) & MASK(0xf0))
|
||||
#define f8_lb(n,r,x) r[n] = (x[n] << 8) | (n ? x[n-1] >> 56 : 0)
|
||||
#else
|
||||
#define f1_lb(n,r,x) r[n] = (x[n] >> 1) | (n ? x[n-1] << 63 : 0)
|
||||
#define f4_lb(n,r,x) r[n] = (x[n] >> 4) | (n ? x[n-1] << 60 : 0)
|
||||
#define f8_lb(n,r,x) x[n] = (x[n] >> 8) | (n ? x[n-1] << 56 : 0)
|
||||
#endif
|
||||
|
||||
gf_decl void gf_mulx1_lb(gf_t r, const gf_t x)
|
||||
{ gf_unit_t _tt;
|
||||
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
|
||||
_tt = gf_tab[(UNIT_PTR(x)[1] >> 49) & MASK(0x80)];
|
||||
#else
|
||||
_tt = ((gf_unit_t)(gf_tab[(UNIT_PTR(x)[1] << 7) & 0xff])) << 48;
|
||||
#endif
|
||||
rep2_d2(f1_lb, UNIT_PTR(r), UNIT_PTR(x));
|
||||
UNIT_PTR(r)[0] ^= _tt;
|
||||
}
|
||||
|
||||
gf_decl void gf_mulx4_lb(gf_t x)
|
||||
{ gf_unit_t _tt;
|
||||
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
|
||||
_tt = gf_tab[(UNIT_PTR(x)[1] >> 52) & MASK(0xf0)];
|
||||
#else
|
||||
_tt = ((gf_unit_t)(gf_tab[(UNIT_PTR(x)[1] << 4) & 0xff])) << 48;
|
||||
#endif
|
||||
rep2_d2(f4_lb, UNIT_PTR(x), UNIT_PTR(x));
|
||||
UNIT_PTR(x)[0] ^= _tt;
|
||||
}
|
||||
|
||||
gf_decl void gf_mulx8_lb(gf_t x)
|
||||
{ gf_unit_t _tt;
|
||||
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
|
||||
_tt = gf_tab[UNIT_PTR(x)[1] >> 56];
|
||||
#else
|
||||
_tt = ((gf_unit_t)(gf_tab[UNIT_PTR(x)[1] & 0xff])) << 48;
|
||||
#endif
|
||||
rep2_d2(f8_lb, UNIT_PTR(x), UNIT_PTR(x));
|
||||
UNIT_PTR(x)[0] ^= _tt;
|
||||
}
|
||||
|
||||
#elif UNIT_BITS == 32
|
||||
|
||||
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
|
||||
#define f1_lb(n,r,x) r[n] = ((x[n] >> 1) & ~MASK(0x80)) | (((x[n] << 15) \
|
||||
| (n ? x[n-1] >> 17 : 0)) & MASK(0x80))
|
||||
#define f4_lb(n,r,x) r[n] = ((x[n] >> 4) & ~MASK(0xf0)) | (((x[n] << 12) \
|
||||
| (n ? x[n-1] >> 20 : 0)) & MASK(0xf0))
|
||||
#define f8_lb(n,r,x) r[n] = (x[n] << 8) | (n ? x[n-1] >> 24 : 0)
|
||||
#else
|
||||
#define f1_lb(n,r,x) r[n] = (x[n] >> 1) | (n ? x[n-1] << 31 : 0)
|
||||
#define f4_lb(n,r,x) r[n] = (x[n] >> 4) | (n ? x[n-1] << 28 : 0)
|
||||
#define f8_lb(n,r,x) r[n] = (x[n] >> 8) | (n ? x[n-1] << 24 : 0)
|
||||
#endif
|
||||
|
||||
gf_decl void gf_mulx1_lb(gf_t r, const gf_t x)
|
||||
{ gf_unit_t _tt;
|
||||
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
|
||||
_tt = gf_tab[(UNIT_PTR(x)[3] >> 17) & MASK(0x80)];
|
||||
#else
|
||||
_tt = ((gf_unit_t)(gf_tab[(UNIT_PTR(x)[3] << 7) & 0xff])) << 16;
|
||||
#endif
|
||||
rep2_d4(f1_lb, UNIT_PTR(r), UNIT_PTR(x));
|
||||
UNIT_PTR(r)[0] ^= _tt;
|
||||
}
|
||||
|
||||
gf_decl void gf_mulx4_lb(gf_t x)
|
||||
{ gf_unit_t _tt;
|
||||
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
|
||||
_tt = gf_tab[(UNIT_PTR(x)[3] >> 20) & MASK(0xf0)];
|
||||
#else
|
||||
_tt = ((gf_unit_t)(gf_tab[(UNIT_PTR(x)[3] << 4) & 0xff])) << 16;
|
||||
#endif
|
||||
rep2_d4(f4_lb, UNIT_PTR(x), UNIT_PTR(x));
|
||||
UNIT_PTR(x)[0] ^= _tt;
|
||||
}
|
||||
|
||||
gf_decl void gf_mulx8_lb(gf_t x)
|
||||
{ gf_unit_t _tt;
|
||||
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
|
||||
_tt = gf_tab[UNIT_PTR(x)[3] >> 24];
|
||||
#else
|
||||
_tt = ((gf_unit_t)(gf_tab[UNIT_PTR(x)[3] & 0xff])) << 16;
|
||||
#endif
|
||||
rep2_d4(f8_lb, UNIT_PTR(x), UNIT_PTR(x));
|
||||
UNIT_PTR(x)[0] ^= _tt;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define f1_lb(n,r,x) r[n] = (x[n] >> 1) | (n ? x[n-1] << 7 : 0)
|
||||
#define f4_lb(n,r,x) r[n] = (x[n] >> 4) | (n ? x[n-1] << 4 : 0)
|
||||
|
||||
gf_decl void gf_mulx1_lb(gf_t r, const gf_t x)
|
||||
{ uint16_t _tt;
|
||||
_tt = gf_tab[(UNIT_PTR(x)[15] << 7) & 0x80];
|
||||
rep2_d16(f1_lb, UNIT_PTR(r), UNIT_PTR(x));
|
||||
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
|
||||
UNIT_PTR(r)[0] ^= _tt;
|
||||
#else
|
||||
UNIT_PTR(r)[0] ^= _tt >> 8;
|
||||
#endif
|
||||
}
|
||||
|
||||
gf_decl void gf_mulx4_lb(gf_t x)
|
||||
{ uint16_t _tt;
|
||||
_tt = gf_tab[(UNIT_PTR(x)[15] << 4) & 0xf0];
|
||||
rep2_d16(f4_lb, UNIT_PTR(x), UNIT_PTR(x));
|
||||
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
|
||||
UNIT_PTR(x)[1] ^= _tt >> 8;
|
||||
UNIT_PTR(x)[0] ^= _tt & 0xff;
|
||||
#else
|
||||
UNIT_PTR(x)[1] ^= _tt & 0xff;
|
||||
UNIT_PTR(x)[0] ^= _tt >> 8;
|
||||
#endif
|
||||
}
|
||||
|
||||
gf_decl void gf_mulx8_lb(gf_t x)
|
||||
{ uint16_t _tt;
|
||||
_tt = gf_tab[UNIT_PTR(x)[15]];
|
||||
memmove(UNIT_PTR(x) + 1, UNIT_PTR(x), 15);
|
||||
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
|
||||
UNIT_PTR(x)[1] ^= _tt >> 8;
|
||||
UNIT_PTR(x)[0] = _tt & 0xff;
|
||||
#else
|
||||
UNIT_PTR(x)[1] ^= _tt & 0xff;
|
||||
UNIT_PTR(x)[0] = _tt >> 8;
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* BB Mode Galois Field operations
|
||||
|
||||
x[0] x[1] x[2] x[3] x[4] x[5] x[6] x[7]
|
||||
ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls
|
||||
.......M ........ ........ ........ ........ ........ ........ ........
|
||||
120..127 112..119 104..111 96...103 88....95 80....87 72....79 64....71
|
||||
x[8] x[9] x[10] x[11] x[12] x[13] x[14] x[15]
|
||||
ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls
|
||||
........ ........ ........ ........ ........ ........ ........ 11100001
|
||||
56....63 48....55 40....47 32....39 24....31 16....23 08....15 00....07
|
||||
*/
|
||||
|
||||
#if UNIT_BITS == 64
|
||||
|
||||
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
|
||||
#define f1_bb(n,r,x) r[n] = (x[n] >> 1) | (!n ? x[n+1] << 63 : 0)
|
||||
#define f4_bb(n,r,x) r[n] = (x[n] >> 4) | (!n ? x[n+1] << 60 : 0)
|
||||
#define f8_bb(n,r,x) r[n] = (x[n] >> 8) | (!n ? x[n+1] << 56 : 0)
|
||||
#else
|
||||
#define f1_bb(n,r,x) r[n] = ((x[n] >> 1) & ~MASK(0x80)) | (((x[n] << 15) \
|
||||
| (!n ? x[n+1] >> 49 : 0)) & MASK(0x80))
|
||||
#define f4_bb(n,r,x) r[n] = ((x[n] >> 4) & ~MASK(0xf0)) | (((x[n] << 12) \
|
||||
| (!n ? x[n+1] >> 52 : 0)) & MASK(0xf0))
|
||||
#define f8_bb(n,r,x) r[n] = (x[n] << 8) | (!n ? x[n+1] >> 56 : 0)
|
||||
#endif
|
||||
|
||||
gf_decl void gf_mulx1_bb(gf_t r, const gf_t x)
|
||||
{ gf_unit_t _tt;
|
||||
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
|
||||
_tt = (( gf_unit_t)(gf_tab[(UNIT_PTR(x)[0] << 7) & 0x80])) << 48;
|
||||
#else
|
||||
_tt = gf_tab[(UNIT_PTR(x)[0] >> 49) & 0x80];
|
||||
#endif
|
||||
rep2_u2(f1_bb, UNIT_PTR(r), UNIT_PTR(x));
|
||||
UNIT_PTR(r)[1] ^= _tt;
|
||||
}
|
||||
|
||||
gf_decl void gf_mulx4_bb(gf_t x)
|
||||
{ gf_unit_t _tt;
|
||||
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
|
||||
_tt = ((gf_unit_t)(gf_tab[(UNIT_PTR(x)[0] << 4) & 0xf0])) << 48;
|
||||
#else
|
||||
_tt = gf_tab[(UNIT_PTR(x)[0] >> 52) & 0xf0];
|
||||
#endif
|
||||
rep2_u2(f4_bb, UNIT_PTR(x), UNIT_PTR(x));
|
||||
UNIT_PTR(x)[1] ^= _tt;
|
||||
}
|
||||
|
||||
gf_decl void gf_mulx8_bb(gf_t x)
|
||||
{ gf_unit_t _tt;
|
||||
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
|
||||
_tt = ((gf_unit_t)(gf_tab[UNIT_PTR(x)[0] & 0xff])) << 48;
|
||||
#else
|
||||
_tt = gf_tab[(UNIT_PTR(x)[0] >> 56) & 0xff];
|
||||
#endif
|
||||
rep2_u2(f8_bb, UNIT_PTR(x), UNIT_PTR(x));
|
||||
UNIT_PTR(x)[1] ^= _tt;
|
||||
}
|
||||
|
||||
#elif UNIT_BITS == 32
|
||||
|
||||
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
|
||||
#define f1_bb(n,r,x) r[n] = (x[n] >> 1) | (n < 3 ? x[n+1] << 31 : 0)
|
||||
#define f4_bb(n,r,x) r[n] = (x[n] >> 4) | (n < 3 ? x[n+1] << 28 : 0)
|
||||
#define f8_bb(n,r,x) r[n] = (x[n] >> 8) | (n < 3 ? x[n+1] << 24 : 0)
|
||||
#else
|
||||
#define f1_bb(n,r,x) r[n] = ((x[n] >> 1) & ~MASK(0x80)) | (((x[n] << 15) \
|
||||
| (n < 3 ? x[n+1] >> 17 : 0)) & MASK(0x80))
|
||||
#define f4_bb(n,r,x) r[n] = ((x[n] >> 4) & ~MASK(0xf0)) | (((x[n] << 12) \
|
||||
| (n < 3 ? x[n+1] >> 20 : 0)) & MASK(0xf0))
|
||||
#define f8_bb(n,r,x) r[n] = (x[n] << 8) | (n < 3 ? x[n+1] >> 24 : 0)
|
||||
#endif
|
||||
|
||||
gf_decl void gf_mulx1_bb(gf_t r, const gf_t x)
|
||||
{ gf_unit_t _tt;
|
||||
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
|
||||
_tt = ((gf_unit_t)(gf_tab[(UNIT_PTR(x)[0] << 7) & 0x80])) << 16;
|
||||
#else
|
||||
_tt = gf_tab[(UNIT_PTR(x)[0] >> 17) & 0x80];
|
||||
#endif
|
||||
rep2_u4(f1_bb, UNIT_PTR(r), UNIT_PTR(x));
|
||||
UNIT_PTR(r)[3] ^= _tt;
|
||||
}
|
||||
|
||||
gf_decl void gf_mulx4_bb(gf_t x)
|
||||
{ gf_unit_t _tt;
|
||||
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
|
||||
_tt = ((gf_unit_t)(gf_tab[(UNIT_PTR(x)[0] << 4) & 0xf0])) << 16;
|
||||
#else
|
||||
_tt = gf_tab[(UNIT_PTR(x)[0] >> 20) & 0xf0];
|
||||
#endif
|
||||
rep2_u4(f4_bb, UNIT_PTR(x), UNIT_PTR(x));
|
||||
UNIT_PTR(x)[3] ^= _tt;
|
||||
}
|
||||
|
||||
gf_decl void gf_mulx8_bb(gf_t x)
|
||||
{ gf_unit_t _tt;
|
||||
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
|
||||
_tt = ((gf_unit_t)(gf_tab[UNIT_PTR(x)[0] & 0xff])) << 16;
|
||||
#else
|
||||
_tt = gf_tab[(UNIT_PTR(x)[0] >> 24) & 0xff];
|
||||
#endif
|
||||
rep2_u4(f8_bb, UNIT_PTR(x), UNIT_PTR(x));
|
||||
UNIT_PTR(x)[3] ^= _tt;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define f1_bb(n,r,x) r[n] = (x[n] >> 1) | (n < 15 ? x[n+1] << 7 : 0)
|
||||
#define f4_bb(n,r,x) r[n] = (x[n] >> 4) | (n < 15 ? x[n+1] << 4 : 0)
|
||||
|
||||
gf_decl void gf_mulx1_bb(gf_t r, const gf_t x)
|
||||
{ uint16_t _tt;
|
||||
_tt = gf_tab[(UNIT_PTR(x)[0] << 7) & 0x80];
|
||||
rep2_u16(f1_bb, UNIT_PTR(r), UNIT_PTR(x));
|
||||
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
|
||||
UNIT_PTR(r)[15] ^= _tt >> 8;
|
||||
#else
|
||||
UNIT_PTR(r)[15] ^= _tt;
|
||||
#endif
|
||||
}
|
||||
|
||||
gf_decl void gf_mulx4_bb(gf_t x)
|
||||
{ uint16_t _tt;
|
||||
_tt = gf_tab[(UNIT_PTR(x)[0] << 4) & 0xf0];
|
||||
rep2_u16(f4_bb, UNIT_PTR(x), UNIT_PTR(x));
|
||||
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
|
||||
UNIT_PTR(x)[14] ^= _tt & 0xff;
|
||||
UNIT_PTR(x)[15] ^= _tt >> 8;
|
||||
#else
|
||||
UNIT_PTR(x)[14] ^= _tt >> 8;
|
||||
UNIT_PTR(x)[15] ^= _tt & 0xff;
|
||||
#endif
|
||||
}
|
||||
|
||||
gf_decl void gf_mulx8_bb(gf_t x)
|
||||
{ uint16_t _tt;
|
||||
_tt = gf_tab[UNIT_PTR(x)[0]];
|
||||
memmove(UNIT_PTR(x), UNIT_PTR(x) + 1, 15);
|
||||
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
|
||||
UNIT_PTR(x)[14] ^= _tt & 0xff;
|
||||
UNIT_PTR(x)[15] = _tt >> 8;
|
||||
#else
|
||||
UNIT_PTR(x)[14] ^= _tt >> 8;
|
||||
UNIT_PTR(x)[15] = _tt & 0xff;
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
329
crypto/aes/mode_hdr.h
Normal file
329
crypto/aes/mode_hdr.h
Normal file
@ -0,0 +1,329 @@
|
||||
/*
|
||||
---------------------------------------------------------------------------
|
||||
Copyright (c) 1998-2014, Brian Gladman, Worcester, UK. All rights reserved.
|
||||
|
||||
The redistribution and use of this software (with or without changes)
|
||||
is allowed without the payment of fees or royalties provided that:
|
||||
|
||||
source code distributions include the above copyright notice, this
|
||||
list of conditions and the following disclaimer;
|
||||
|
||||
binary distributions include the above copyright notice, this list
|
||||
of conditions and the following disclaimer in their documentation.
|
||||
|
||||
This software is provided 'as is' with no explicit or implied warranties
|
||||
in respect of its operation, including, but not limited to, correctness
|
||||
and fitness for purpose.
|
||||
---------------------------------------------------------------------------
|
||||
Issue Date: 18/02/2014
|
||||
|
||||
This header file is an INTERNAL file which supports mode implementation
|
||||
*/
|
||||
|
||||
#ifndef _MODE_HDR_H
|
||||
#define _MODE_HDR_H
|
||||
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
|
||||
#include "brg_endian.h"
|
||||
|
||||
/* This define sets the units in which buffers are processed. This code
|
||||
can provide significant speed gains if buffers can be processed in
|
||||
32 or 64 bit chunks rather than in bytes. This define sets the units
|
||||
in which buffers will be accessed if possible
|
||||
*/
|
||||
#if !defined( UNIT_BITS )
|
||||
# if PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN
|
||||
# if 0
|
||||
# define UNIT_BITS 32
|
||||
# elif 1
|
||||
# define UNIT_BITS 64
|
||||
# endif
|
||||
# elif defined( _WIN64 )
|
||||
# define UNIT_BITS 64
|
||||
# else
|
||||
# define UNIT_BITS 32
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if UNIT_BITS == 64 && !defined( NEED_UINT_64T )
|
||||
# define NEED_UINT_64T
|
||||
#endif
|
||||
|
||||
#include "brg_types.h"
|
||||
|
||||
/* Use of inlines is preferred but code blocks can also be expanded inline
|
||||
using 'defines'. But the latter approach will typically generate a LOT
|
||||
of code and is not recommended.
|
||||
*/
|
||||
#if 1 && !defined( USE_INLINING )
|
||||
# define USE_INLINING
|
||||
#endif
|
||||
|
||||
#if defined( _MSC_VER )
|
||||
# if _MSC_VER >= 1400
|
||||
# include <stdlib.h>
|
||||
# include <intrin.h>
|
||||
# pragma intrinsic(memset)
|
||||
# pragma intrinsic(memcpy)
|
||||
# define rotl32 _rotl
|
||||
# define rotr32 _rotr
|
||||
# define rotl64 _rotl64
|
||||
# define rotr64 _rotl64
|
||||
# define bswap_16(x) _byteswap_ushort(x)
|
||||
# define bswap_32(x) _byteswap_ulong(x)
|
||||
# define bswap_64(x) _byteswap_uint64(x)
|
||||
# else
|
||||
# define rotl32 _lrotl
|
||||
# define rotr32 _lrotr
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if defined( USE_INLINING )
|
||||
# if defined( _MSC_VER )
|
||||
# define mh_decl __inline
|
||||
# elif defined( __GNUC__ ) || defined( __GNU_LIBRARY__ )
|
||||
# define mh_decl static inline
|
||||
# else
|
||||
# define mh_decl static
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define UI8_PTR(x) UPTR_CAST(x, 8)
|
||||
#define UI16_PTR(x) UPTR_CAST(x, 16)
|
||||
#define UI32_PTR(x) UPTR_CAST(x, 32)
|
||||
#define UI64_PTR(x) UPTR_CAST(x, 64)
|
||||
#define UNIT_PTR(x) UPTR_CAST(x, UNIT_BITS)
|
||||
|
||||
#define UI8_VAL(x) UNIT_CAST(x, 8)
|
||||
#define UI16_VAL(x) UNIT_CAST(x, 16)
|
||||
#define UI32_VAL(x) UNIT_CAST(x, 32)
|
||||
#define UI64_VAL(x) UNIT_CAST(x, 64)
|
||||
#define UNIT_VAL(x) UNIT_CAST(x, UNIT_BITS)
|
||||
|
||||
#define BUF_INC (UNIT_BITS >> 3)
|
||||
#define BUF_ADRMASK ((UNIT_BITS >> 3) - 1)
|
||||
|
||||
#define rep2_u2(f,r,x) f( 0,r,x); f( 1,r,x)
|
||||
#define rep2_u4(f,r,x) f( 0,r,x); f( 1,r,x); f( 2,r,x); f( 3,r,x)
|
||||
#define rep2_u16(f,r,x) f( 0,r,x); f( 1,r,x); f( 2,r,x); f( 3,r,x); \
|
||||
f( 4,r,x); f( 5,r,x); f( 6,r,x); f( 7,r,x); \
|
||||
f( 8,r,x); f( 9,r,x); f(10,r,x); f(11,r,x); \
|
||||
f(12,r,x); f(13,r,x); f(14,r,x); f(15,r,x)
|
||||
|
||||
#define rep2_d2(f,r,x) f( 1,r,x); f( 0,r,x)
|
||||
#define rep2_d4(f,r,x) f( 3,r,x); f( 2,r,x); f( 1,r,x); f( 0,r,x)
|
||||
#define rep2_d16(f,r,x) f(15,r,x); f(14,r,x); f(13,r,x); f(12,r,x); \
|
||||
f(11,r,x); f(10,r,x); f( 9,r,x); f( 8,r,x); \
|
||||
f( 7,r,x); f( 6,r,x); f( 5,r,x); f( 4,r,x); \
|
||||
f( 3,r,x); f( 2,r,x); f( 1,r,x); f( 0,r,x)
|
||||
|
||||
#define rep3_u2(f,r,x,y,c) f( 0,r,x,y,c); f( 1,r,x,y,c)
|
||||
#define rep3_u4(f,r,x,y,c) f( 0,r,x,y,c); f( 1,r,x,y,c); f( 2,r,x,y,c); f( 3,r,x,y,c)
|
||||
#define rep3_u16(f,r,x,y,c) f( 0,r,x,y,c); f( 1,r,x,y,c); f( 2,r,x,y,c); f( 3,r,x,y,c); \
|
||||
f( 4,r,x,y,c); f( 5,r,x,y,c); f( 6,r,x,y,c); f( 7,r,x,y,c); \
|
||||
f( 8,r,x,y,c); f( 9,r,x,y,c); f(10,r,x,y,c); f(11,r,x,y,c); \
|
||||
f(12,r,x,y,c); f(13,r,x,y,c); f(14,r,x,y,c); f(15,r,x,y,c)
|
||||
|
||||
#define rep3_d2(f,r,x,y,c) f( 1,r,x,y,c); f( 0,r,x,y,c)
|
||||
#define rep3_d4(f,r,x,y,c) f( 3,r,x,y,c); f( 2,r,x,y,c); f( 1,r,x,y,c); f( 0,r,x,y,c)
|
||||
#define rep3_d16(f,r,x,y,c) f(15,r,x,y,c); f(14,r,x,y,c); f(13,r,x,y,c); f(12,r,x,y,c); \
|
||||
f(11,r,x,y,c); f(10,r,x,y,c); f( 9,r,x,y,c); f( 8,r,x,y,c); \
|
||||
f( 7,r,x,y,c); f( 6,r,x,y,c); f( 5,r,x,y,c); f( 4,r,x,y,c); \
|
||||
f( 3,r,x,y,c); f( 2,r,x,y,c); f( 1,r,x,y,c); f( 0,r,x,y,c)
|
||||
|
||||
/* function pointers might be used for fast XOR operations */
|
||||
|
||||
typedef void (*xor_function)(void* r, const void* p, const void* q);
|
||||
|
||||
/* left and right rotates on 32 and 64 bit variables */
|
||||
|
||||
#if !defined( rotl32 ) /* NOTE: 0 <= n <= 32 ASSUMED */
|
||||
mh_decl uint32_t rotl32(uint32_t x, int n)
|
||||
{
|
||||
return (((x) << n) | ((x) >> (32 - n)));
|
||||
}
|
||||
#endif
|
||||
|
||||
#if !defined( rotr32 ) /* NOTE: 0 <= n <= 32 ASSUMED */
|
||||
mh_decl uint32_t rotr32(uint32_t x, int n)
|
||||
{
|
||||
return (((x) >> n) | ((x) << (32 - n)));
|
||||
}
|
||||
#endif
|
||||
|
||||
#if ( UNIT_BITS == 64 ) && !defined( rotl64 ) /* NOTE: 0 <= n <= 64 ASSUMED */
|
||||
mh_decl uint64_t rotl64(uint64_t x, int n)
|
||||
{
|
||||
return (((x) << n) | ((x) >> (64 - n)));
|
||||
}
|
||||
#endif
|
||||
|
||||
#if ( UNIT_BITS == 64 ) && !defined( rotr64 ) /* NOTE: 0 <= n <= 64 ASSUMED */
|
||||
mh_decl uint64_t rotr64(uint64_t x, int n)
|
||||
{
|
||||
return (((x) >> n) | ((x) << (64 - n)));
|
||||
}
|
||||
#endif
|
||||
|
||||
/* byte order inversions for 16, 32 and 64 bit variables */
|
||||
|
||||
#if !defined(bswap_16)
|
||||
mh_decl uint16_t bswap_16(uint16_t x)
|
||||
{
|
||||
return (uint16_t)((x >> 8) | (x << 8));
|
||||
}
|
||||
#endif
|
||||
|
||||
#if !defined(bswap_32)
|
||||
mh_decl uint32_t bswap_32(uint32_t x)
|
||||
{
|
||||
return ((rotr32((x), 24) & 0x00ff00ff) | (rotr32((x), 8) & 0xff00ff00));
|
||||
}
|
||||
#endif
|
||||
|
||||
#if ( UNIT_BITS == 64 ) && !defined(bswap_64)
|
||||
mh_decl uint64_t bswap_64(uint64_t x)
|
||||
{
|
||||
return bswap_32((uint32_t)(x >> 32)) | ((uint64_t)bswap_32((uint32_t)x) << 32);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* support for fast aligned buffer move, xor and byte swap operations -
|
||||
source and destination buffers for move and xor operations must not
|
||||
overlap, those for byte order revesal must either not overlap or
|
||||
must be identical
|
||||
*/
|
||||
#define f_copy(n,p,q) p[n] = q[n]
|
||||
#define f_xor(n,r,p,q,c) r[n] = c(p[n] ^ q[n])
|
||||
|
||||
mh_decl void copy_block(void* p, const void* q)
|
||||
{
|
||||
memcpy(p, q, 16);
|
||||
}
|
||||
|
||||
mh_decl void copy_block_aligned(void *p, const void *q)
|
||||
{
|
||||
#if UNIT_BITS == 8
|
||||
memcpy(p, q, 16);
|
||||
#elif UNIT_BITS == 32
|
||||
rep2_u4(f_copy,UNIT_PTR(p),UNIT_PTR(q));
|
||||
#else
|
||||
rep2_u2(f_copy,UNIT_PTR(p),UNIT_PTR(q));
|
||||
#endif
|
||||
}
|
||||
|
||||
mh_decl void xor_block(void *r, const void* p, const void* q)
|
||||
{
|
||||
rep3_u16(f_xor, UI8_PTR(r), UI8_PTR(p), UI8_PTR(q), UI8_VAL);
|
||||
}
|
||||
|
||||
mh_decl void xor_block_aligned(void *r, const void *p, const void *q)
|
||||
{
|
||||
#if UNIT_BITS == 8
|
||||
rep3_u16(f_xor, UNIT_PTR(r), UNIT_PTR(p), UNIT_PTR(q), UNIT_VAL);
|
||||
#elif UNIT_BITS == 32
|
||||
rep3_u4(f_xor, UNIT_PTR(r), UNIT_PTR(p), UNIT_PTR(q), UNIT_VAL);
|
||||
#else
|
||||
rep3_u2(f_xor, UNIT_PTR(r), UNIT_PTR(p), UNIT_PTR(q), UNIT_VAL);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* byte swap within 32-bit words in a 16 byte block; don't move 32-bit words */
|
||||
mh_decl void bswap32_block(void *d, const void* s)
|
||||
{
|
||||
#if UNIT_BITS == 8
|
||||
uint8_t t;
|
||||
t = UNIT_PTR(s)[ 0]; UNIT_PTR(d)[ 0] = UNIT_PTR(s)[ 3]; UNIT_PTR(d)[ 3] = t;
|
||||
t = UNIT_PTR(s)[ 1]; UNIT_PTR(d)[ 1] = UNIT_PTR(s)[ 2]; UNIT_PTR(d)[ 2] = t;
|
||||
t = UNIT_PTR(s)[ 4]; UNIT_PTR(d)[ 4] = UNIT_PTR(s)[ 7]; UNIT_PTR(d)[ 7] = t;
|
||||
t = UNIT_PTR(s)[ 5]; UNIT_PTR(d)[ 5] = UNIT_PTR(s)[ 6]; UNIT_PTR(d) [6] = t;
|
||||
t = UNIT_PTR(s)[ 8]; UNIT_PTR(d)[ 8] = UNIT_PTR(s)[11]; UNIT_PTR(d)[12] = t;
|
||||
t = UNIT_PTR(s)[ 9]; UNIT_PTR(d)[ 9] = UNIT_PTR(s)[10]; UNIT_PTR(d)[10] = t;
|
||||
t = UNIT_PTR(s)[12]; UNIT_PTR(d)[12] = UNIT_PTR(s)[15]; UNIT_PTR(d)[15] = t;
|
||||
t = UNIT_PTR(s)[13]; UNIT_PTR(d)[ 3] = UNIT_PTR(s)[14]; UNIT_PTR(d)[14] = t;
|
||||
#elif UNIT_BITS == 32
|
||||
UNIT_PTR(d)[0] = bswap_32(UNIT_PTR(s)[0]); UNIT_PTR(d)[1] = bswap_32(UNIT_PTR(s)[1]);
|
||||
UNIT_PTR(d)[2] = bswap_32(UNIT_PTR(s)[2]); UNIT_PTR(d)[3] = bswap_32(UNIT_PTR(s)[3]);
|
||||
#else
|
||||
UI32_PTR(d)[0] = bswap_32(UI32_PTR(s)[0]); UI32_PTR(d)[1] = bswap_32(UI32_PTR(s)[1]);
|
||||
UI32_PTR(d)[2] = bswap_32(UI32_PTR(s)[2]); UI32_PTR(d)[3] = bswap_32(UI32_PTR(s)[3]);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* byte swap within 64-bit words in a 16 byte block; don't move 64-bit words */
|
||||
mh_decl void bswap64_block(void *d, const void* s)
|
||||
{
|
||||
#if UNIT_BITS == 8
|
||||
uint8_t t;
|
||||
t = UNIT_PTR(s)[ 0]; UNIT_PTR(d)[ 0] = UNIT_PTR(s)[ 7]; UNIT_PTR(d)[ 7] = t;
|
||||
t = UNIT_PTR(s)[ 1]; UNIT_PTR(d)[ 1] = UNIT_PTR(s)[ 6]; UNIT_PTR(d)[ 6] = t;
|
||||
t = UNIT_PTR(s)[ 2]; UNIT_PTR(d)[ 2] = UNIT_PTR(s)[ 5]; UNIT_PTR(d)[ 5] = t;
|
||||
t = UNIT_PTR(s)[ 3]; UNIT_PTR(d)[ 3] = UNIT_PTR(s)[ 3]; UNIT_PTR(d) [3] = t;
|
||||
t = UNIT_PTR(s)[ 8]; UNIT_PTR(d)[ 8] = UNIT_PTR(s)[15]; UNIT_PTR(d)[15] = t;
|
||||
t = UNIT_PTR(s)[ 9]; UNIT_PTR(d)[ 9] = UNIT_PTR(s)[14]; UNIT_PTR(d)[14] = t;
|
||||
t = UNIT_PTR(s)[10]; UNIT_PTR(d)[10] = UNIT_PTR(s)[13]; UNIT_PTR(d)[13] = t;
|
||||
t = UNIT_PTR(s)[11]; UNIT_PTR(d)[11] = UNIT_PTR(s)[12]; UNIT_PTR(d)[12] = t;
|
||||
#elif UNIT_BITS == 32
|
||||
uint32_t t;
|
||||
t = bswap_32(UNIT_PTR(s)[0]); UNIT_PTR(d)[0] = bswap_32(UNIT_PTR(s)[1]); UNIT_PTR(d)[1] = t;
|
||||
t = bswap_32(UNIT_PTR(s)[2]); UNIT_PTR(d)[2] = bswap_32(UNIT_PTR(s)[2]); UNIT_PTR(d)[3] = t;
|
||||
#else
|
||||
UNIT_PTR(d)[0] = bswap_64(UNIT_PTR(s)[0]); UNIT_PTR(d)[1] = bswap_64(UNIT_PTR(s)[1]);
|
||||
#endif
|
||||
}
|
||||
|
||||
mh_decl void bswap128_block(void *d, const void* s)
|
||||
{
|
||||
#if UNIT_BITS == 8
|
||||
uint8_t t;
|
||||
t = UNIT_PTR(s)[0]; UNIT_PTR(d)[0] = UNIT_PTR(s)[15]; UNIT_PTR(d)[15] = t;
|
||||
t = UNIT_PTR(s)[1]; UNIT_PTR(d)[1] = UNIT_PTR(s)[14]; UNIT_PTR(d)[14] = t;
|
||||
t = UNIT_PTR(s)[2]; UNIT_PTR(d)[2] = UNIT_PTR(s)[13]; UNIT_PTR(d)[13] = t;
|
||||
t = UNIT_PTR(s)[3]; UNIT_PTR(d)[3] = UNIT_PTR(s)[12]; UNIT_PTR(d)[12] = t;
|
||||
t = UNIT_PTR(s)[4]; UNIT_PTR(d)[4] = UNIT_PTR(s)[11]; UNIT_PTR(d)[11] = t;
|
||||
t = UNIT_PTR(s)[5]; UNIT_PTR(d)[5] = UNIT_PTR(s)[10]; UNIT_PTR(d)[10] = t;
|
||||
t = UNIT_PTR(s)[6]; UNIT_PTR(d)[6] = UNIT_PTR(s)[ 9]; UNIT_PTR(d)[ 9] = t;
|
||||
t = UNIT_PTR(s)[7]; UNIT_PTR(d)[7] = UNIT_PTR(s)[ 8]; UNIT_PTR(d)[ 8] = t;
|
||||
#elif UNIT_BITS == 32
|
||||
uint32_t t;
|
||||
t = bswap_32(UNIT_PTR(s)[0]); UNIT_PTR(d)[0] = bswap_32(UNIT_PTR(s)[3]); UNIT_PTR(d)[3] = t;
|
||||
t = bswap_32(UNIT_PTR(s)[1]); UNIT_PTR(d)[1] = bswap_32(UNIT_PTR(s)[2]); UNIT_PTR(d)[2] = t;
|
||||
#else
|
||||
uint64_t t;
|
||||
t = bswap_64(UNIT_PTR(s)[0]); UNIT_PTR(d)[0] = bswap_64(UNIT_PTR(s)[1]); UNIT_PTR(d)[1] = t;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* platform byte order to big or little endian order for 16, 32 and 64 bit variables */
|
||||
|
||||
#if PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN
|
||||
|
||||
# define uint16_t_to_le(x) (x) = bswap_16((x))
|
||||
# define uint32_t_to_le(x) (x) = bswap_32((x))
|
||||
# define uint64_t_to_le(x) (x) = bswap_64((x))
|
||||
# define uint16_t_to_be(x)
|
||||
# define uint32_t_to_be(x)
|
||||
# define uint64_t_to_be(x)
|
||||
|
||||
#else
|
||||
|
||||
# define uint16_t_to_le(x)
|
||||
# define uint32_t_to_le(x)
|
||||
# define uint64_t_to_le(x)
|
||||
# define uint16_t_to_be(x) (x) = bswap_16((x))
|
||||
# define uint32_t_to_be(x) (x) = bswap_32((x))
|
||||
# define uint64_t_to_be(x) (x) = bswap_64((x))
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
@ -1,5 +1,5 @@
|
||||
^\./core/embed/bootloader/protob/
|
||||
^\./crypto/aes/aes\(\|crypt\|key\|_modes\|opt\|tab\|tst\)\.
|
||||
^\./crypto/aes/
|
||||
^\./crypto/chacha20poly1305/
|
||||
^\./crypto/ed25519-donna/
|
||||
^\./crypto/gui/
|
||||
|
Loading…
Reference in New Issue
Block a user