feat(crypto): Add Brian Gladman's implementation of GCM.

pull/3675/head
Andrew Kozlik 6 months ago committed by Andrew Kozlik
parent 89147ef493
commit 6e207215e3

@ -65,7 +65,7 @@ yaml_check: ## check yaml formatting
yamllint .
editor_check: ## check editorconfig formatting
editorconfig-checker -exclude '.*\.(so|dat|toif|der)'
editorconfig-checker -exclude '.*\.(so|dat|toif|der)|^crypto/aes/'
cstyle_check: ## run code style check on low-level C code
clang-format --version

@ -0,0 +1,547 @@
/*
---------------------------------------------------------------------------
Copyright (c) 1998-2010, Brian Gladman, Worcester, UK. All rights reserved.
The redistribution and use of this software (with or without changes)
is allowed without the payment of fees or royalties provided that:
source code distributions include the above copyright notice, this
list of conditions and the following disclaimer;
binary distributions include the above copyright notice, this list
of conditions and the following disclaimer in their documentation.
This software is provided 'as is' with no explicit or implied warranties
in respect of its operation, including, but not limited to, correctness
and fitness for purpose.
---------------------------------------------------------------------------
Issue Date: 30/03/2011
My thanks to:
Colin Sinclair for finding an error and suggesting a number of
improvements to this code.
John Viega and David McGrew for their support in the development
of this code and to David for testing it on a big-endIAN system.
Mark Rodenkirch and Jason Papadopoulos for their help in finding
a bug in the fast buffer operations on big endian systems.
*/
#include "gcm.h"
#include "mode_hdr.h"
/* This GCM implementation needs a Galois Field multiplier for GF(2^128).
which operates on field elements using a polynomial field representation
x^127 + x^126 + ... + x^2 + x + 1 using the bits in a bit sequence that
will be numbered by the power of x that they represent. GCM uses the
polynomial x^128 + x^7 + x^2 + x + 1 as its basis for representation.
The obvious way of representing this in a computer system is to map GF
'x' to the binary integer '2' - but this was way too obvious for any
cryptographer to adopt!
Here bytes are numbered in memory order and bits within bytes according
to their integer numeric significance. The term 'little endian' is then
used to describe mappings in which numeric (power of 2) or field (power
of x) significance increase with increasing bit or byte numbers with
'big endian' being used to describe the inverse situation.
GCM uses little endian byte ordering and big endian bit ordering, a
representation that will be described as LB. Hence the low end of the
field polynomial is in byte[0], which has the value 0xe1 rather than
0x87 in the more obvious mappings.
The related field multipler can use this mapping but if you want to
use an alternative (e.g hardware) multiplier that uses a different
polynomial field representation, you can do so by changing the form
used for the field elements when this alternative multiplier is used.
If GF_REPRESENTATION is defined as one of:
REVERSE_BITS // change to LL
REVERSE_BYTES | REVERSE_BITS // change to BL
REVERSE_NONE // no change
REVERSE_BYTES // change to BB
then an appropriate change of representation will occur before and
after calls to your revised field multiplier. To use this you need
to add gf_convert.c to your application.
*/
#if defined(__cplusplus)
extern "C"
{
#endif
#if 1
# undef GF_REPRESENTATION
#elif 0
# define GF_REPRESENTATION REVERSE_BITS
#elif 0
# define GF_REPRESENTATION REVERSE_BYTES | REVERSE_BITS
#elif 0
# define GF_REPRESENTATION REVERSE_NONE
#elif 0
# define GF_REPRESENTATION REVERSE_BITS
#endif
#define BLOCK_SIZE GCM_BLOCK_SIZE /* block length */
#define BLK_ADR_MASK (BLOCK_SIZE - 1) /* mask for 'in block' address */
#define CTR_POS 12
#define inc_ctr(x) \
{ int i = BLOCK_SIZE; while(i-- > CTR_POS && !++(UI8_PTR(x)[i])) ; }
ret_type gcm_init_and_key( /* initialise mode and set key */
const unsigned char key[], /* the key value */
unsigned long key_len, /* and its length in bytes */
gcm_ctx ctx[1]) /* the mode context */
{
memset(ctx->ghash_h, 0, sizeof(ctx->ghash_h));
/* set the AES key */
aes_encrypt_key(key, key_len, ctx->aes);
/* compute E(0) (for the hash function) */
aes_encrypt(UI8_PTR(ctx->ghash_h), UI8_PTR(ctx->ghash_h), ctx->aes);
#if defined( GF_REPRESENTATION )
convert_representation(ctx->ghash_h, ctx->ghash_h, GF_REPRESENTATION);
#endif
#if defined( TABLES_64K )
init_64k_table(ctx->ghash_h, ctx->gf_t64k);
#elif defined( TABLES_8K )
init_8k_table(ctx->ghash_h, ctx->gf_t8k);
#elif defined( TABLES_4K )
init_4k_table(ctx->ghash_h, ctx->gf_t4k);
#elif defined( TABLES_256 )
init_256_table(ctx->ghash_h, ctx->gf_t256);
#endif
#if defined( GF_REPRESENTATION )
convert_representation(ctx->ghash_h, ctx->ghash_h, GF_REPRESENTATION);
#endif
return RETURN_GOOD;
}
void gf_mul_hh(gf_t a, gcm_ctx ctx[1])
{
#if defined( GF_REPRESENTATION ) || !defined( NO_TABLES )
gf_t scr;
#endif
#if defined( GF_REPRESENTATION )
convert_representation(a, a, GF_REPRESENTATION);
#endif
#if defined( TABLES_64K )
gf_mul_64k(a, ctx->gf_t64k, scr);
#elif defined( TABLES_8K )
gf_mul_8k(a, ctx->gf_t8k, scr);
#elif defined( TABLES_4K )
gf_mul_4k(a, ctx->gf_t4k, scr);
#elif defined( TABLES_256 )
gf_mul_256(a, ctx->gf_t256, scr);
#else
# if defined( GF_REPRESENTATION )
convert_representation(scr, ctx->ghash_h, GF_REPRESENTATION);
gf_mul(a, scr);
# else
gf_mul(a, ctx->ghash_h);
# endif
#endif
#if defined( GF_REPRESENTATION )
convert_representation(a, a, GF_REPRESENTATION);
#endif
}
ret_type gcm_init_message( /* initialise a new message */
const unsigned char iv[], /* the initialisation vector */
unsigned long iv_len, /* and its length in bytes */
gcm_ctx ctx[1]) /* the mode context */
{ uint32_t i, n_pos = 0;
uint8_t *p;
memset(ctx->ctr_val, 0, BLOCK_SIZE);
if(iv_len == CTR_POS)
{
memcpy(ctx->ctr_val, iv, CTR_POS); UI8_PTR(ctx->ctr_val)[15] = 0x01;
}
else
{ n_pos = iv_len;
while(n_pos >= BLOCK_SIZE)
{
xor_block_aligned(ctx->ctr_val, ctx->ctr_val, iv);
n_pos -= BLOCK_SIZE;
iv += BLOCK_SIZE;
gf_mul_hh((gf_t*)ctx->ctr_val, ctx);
}
if(n_pos)
{
p = UI8_PTR(ctx->ctr_val);
while(n_pos-- > 0)
*p++ ^= *iv++;
gf_mul_hh((gf_t*)ctx->ctr_val, ctx);
}
n_pos = (iv_len << 3);
for(i = BLOCK_SIZE - 1; n_pos; --i, n_pos >>= 8)
UI8_PTR(ctx->ctr_val)[i] ^= (unsigned char)n_pos;
gf_mul_hh((gf_t*)ctx->ctr_val, ctx);
}
ctx->y0_val = *UI32_PTR(UI8_PTR(ctx->ctr_val) + CTR_POS);
memset(ctx->hdr_ghv, 0, BLOCK_SIZE);
memset(ctx->txt_ghv, 0, BLOCK_SIZE);
ctx->hdr_cnt = 0;
ctx->txt_ccnt = ctx->txt_acnt = 0;
return RETURN_GOOD;
}
ret_type gcm_auth_header( /* authenticate the header */
const unsigned char hdr[], /* the header buffer */
unsigned long hdr_len, /* and its length in bytes */
gcm_ctx ctx[1]) /* the mode context */
{ uint32_t cnt = 0, b_pos = (uint32_t)ctx->hdr_cnt & BLK_ADR_MASK;
if(!hdr_len)
return RETURN_GOOD;
if(ctx->hdr_cnt && b_pos == 0)
gf_mul_hh((gf_t*)ctx->hdr_ghv, ctx);
if(!((hdr - (UI8_PTR(ctx->hdr_ghv) + b_pos)) & BUF_ADRMASK))
{
while(cnt < hdr_len && (b_pos & BUF_ADRMASK))
UI8_PTR(ctx->hdr_ghv)[b_pos++] ^= hdr[cnt++];
while(cnt + BUF_INC <= hdr_len && b_pos <= BLOCK_SIZE - BUF_INC)
{
*UNIT_PTR(UI8_PTR(ctx->hdr_ghv) + b_pos) ^= *UNIT_PTR(hdr + cnt);
cnt += BUF_INC; b_pos += BUF_INC;
}
while(cnt + BLOCK_SIZE <= hdr_len)
{
gf_mul_hh((gf_t*)ctx->hdr_ghv, ctx);
xor_block_aligned(ctx->hdr_ghv, ctx->hdr_ghv, hdr + cnt);
cnt += BLOCK_SIZE;
}
}
else
{
while(cnt < hdr_len && b_pos < BLOCK_SIZE)
UI8_PTR(ctx->hdr_ghv)[b_pos++] ^= hdr[cnt++];
while(cnt + BLOCK_SIZE <= hdr_len)
{
gf_mul_hh((gf_t*)ctx->hdr_ghv, ctx);
xor_block(ctx->hdr_ghv, ctx->hdr_ghv, hdr + cnt);
cnt += BLOCK_SIZE;
}
}
while(cnt < hdr_len)
{
if(b_pos == BLOCK_SIZE)
{
gf_mul_hh((gf_t*)ctx->hdr_ghv, ctx);
b_pos = 0;
}
UI8_PTR(ctx->hdr_ghv)[b_pos++] ^= hdr[cnt++];
}
ctx->hdr_cnt += cnt;
return RETURN_GOOD;
}
ret_type gcm_auth_data( /* authenticate ciphertext data */
const unsigned char data[], /* the data buffer */
unsigned long data_len, /* and its length in bytes */
gcm_ctx ctx[1]) /* the mode context */
{ uint32_t cnt = 0, b_pos = (uint32_t)ctx->txt_acnt & BLK_ADR_MASK;
if(!data_len)
return RETURN_GOOD;
if(ctx->txt_acnt && b_pos == 0)
gf_mul_hh((gf_t*)ctx->txt_ghv, ctx);
if(!((data - (UI8_PTR(ctx->txt_ghv) + b_pos)) & BUF_ADRMASK))
{
while(cnt < data_len && (b_pos & BUF_ADRMASK))
UI8_PTR(ctx->txt_ghv)[b_pos++] ^= data[cnt++];
while(cnt + BUF_INC <= data_len && b_pos <= BLOCK_SIZE - BUF_INC)
{
*UNIT_PTR(UI8_PTR(ctx->txt_ghv) + b_pos) ^= *UNIT_PTR(data + cnt);
cnt += BUF_INC; b_pos += BUF_INC;
}
while(cnt + BLOCK_SIZE <= data_len)
{
gf_mul_hh((gf_t*)ctx->txt_ghv, ctx);
xor_block_aligned(ctx->txt_ghv, ctx->txt_ghv, data + cnt);
cnt += BLOCK_SIZE;
}
}
else
{
while(cnt < data_len && b_pos < BLOCK_SIZE)
UI8_PTR(ctx->txt_ghv)[b_pos++] ^= data[cnt++];
while(cnt + BLOCK_SIZE <= data_len)
{
gf_mul_hh((gf_t*)ctx->txt_ghv, ctx);
xor_block(ctx->txt_ghv, ctx->txt_ghv, data + cnt);
cnt += BLOCK_SIZE;
}
}
while(cnt < data_len)
{
if(b_pos == BLOCK_SIZE)
{
gf_mul_hh((gf_t*)ctx->txt_ghv, ctx);
b_pos = 0;
}
UI8_PTR(ctx->txt_ghv)[b_pos++] ^= data[cnt++];
}
ctx->txt_acnt += cnt;
return RETURN_GOOD;
}
ret_type gcm_crypt_data( /* encrypt or decrypt data */
unsigned char data[], /* the data buffer */
unsigned long data_len, /* and its length in bytes */
gcm_ctx ctx[1]) /* the mode context */
{ uint32_t cnt = 0, b_pos = (uint32_t)ctx->txt_ccnt & BLK_ADR_MASK;
if(!data_len)
return RETURN_GOOD;
if(!((data - (UI8_PTR(ctx->enc_ctr) + b_pos)) & BUF_ADRMASK))
{
if(b_pos)
{
while(cnt < data_len && (b_pos & BUF_ADRMASK))
data[cnt++] ^= UI8_PTR(ctx->enc_ctr)[b_pos++];
while(cnt + BUF_INC <= data_len && b_pos <= BLOCK_SIZE - BUF_INC)
{
*UNIT_PTR(data + cnt) ^= *UNIT_PTR(UI8_PTR(ctx->enc_ctr) + b_pos);
cnt += BUF_INC; b_pos += BUF_INC;
}
}
while(cnt + BLOCK_SIZE <= data_len)
{
inc_ctr(ctx->ctr_val);
aes_encrypt(UI8_PTR(ctx->ctr_val), UI8_PTR(ctx->enc_ctr), ctx->aes);
xor_block_aligned(data + cnt, data + cnt, ctx->enc_ctr);
cnt += BLOCK_SIZE;
}
}
else
{
if(b_pos)
while(cnt < data_len && b_pos < BLOCK_SIZE)
data[cnt++] ^= UI8_PTR(ctx->enc_ctr)[b_pos++];
while(cnt + BLOCK_SIZE <= data_len)
{
inc_ctr(ctx->ctr_val);
aes_encrypt(UI8_PTR(ctx->ctr_val), UI8_PTR(ctx->enc_ctr), ctx->aes);
xor_block(data + cnt, data + cnt, ctx->enc_ctr);
cnt += BLOCK_SIZE;
}
}
while(cnt < data_len)
{
if(b_pos == BLOCK_SIZE || !b_pos)
{
inc_ctr(ctx->ctr_val);
aes_encrypt(UI8_PTR(ctx->ctr_val), UI8_PTR(ctx->enc_ctr), ctx->aes);
b_pos = 0;
}
data[cnt++] ^= UI8_PTR(ctx->enc_ctr)[b_pos++];
}
ctx->txt_ccnt += cnt;
return RETURN_GOOD;
}
ret_type gcm_compute_tag( /* compute authentication tag */
unsigned char tag[], /* the buffer for the tag */
unsigned long tag_len, /* and its length in bytes */
gcm_ctx ctx[1]) /* the mode context */
{ uint32_t i, ln;
gf_t tbuf;
if(ctx->txt_acnt != ctx->txt_ccnt && ctx->txt_ccnt > 0)
return RETURN_ERROR;
gf_mul_hh((gf_t*)ctx->hdr_ghv, ctx);
gf_mul_hh((gf_t*)ctx->txt_ghv, ctx);
if(ctx->hdr_cnt)
{
ln = (uint32_t)((ctx->txt_acnt + BLOCK_SIZE - 1) / BLOCK_SIZE);
if(ln)
{
#if 1 /* alternative versions of the exponentiation operation */
memcpy(tbuf, ctx->ghash_h, BLOCK_SIZE);
# if defined( GF_REPRESENTATION )
convert_representation(tbuf, tbuf, GF_REPRESENTATION);
convert_representation(ctx->hdr_ghv, ctx->hdr_ghv, GF_REPRESENTATION);
# endif
for( ; ; )
{
if(ln & 1)
{
gf_mul((void*)ctx->hdr_ghv, tbuf);
}
if(!(ln >>= 1))
break;
gf_mul(tbuf, tbuf);
}
#else /* this one seems slower on x86 and x86_64 :-( */
i = ln | ln >> 1; i |= i >> 2; i |= i >> 4;
i |= i >> 8; i |= i >> 16; i &= ~(i >> 1);
memset(tbuf, 0, BLOCK_SIZE);
UI8_PTR(tbuf)[0] = 0x80;
while(i)
{
# if defined( GF_REPRESENTATION )
convert_representation(tbuf, tbuf, GF_REPRESENTATION);
# endif
gf_mul(tbuf, tbuf);
# if defined( GF_REPRESENTATION )
convert_representation(tbuf, tbuf, GF_REPRESENTATION);
# endif
if(i & ln)
gf_mul_hh((gf_t*)tbuf, ctx);
i >>= 1;
}
# if defined( GF_REPRESENTATION )
convert_representation(tbuf, tbuf, GF_REPRESENTATION);
convert_representation(ctx->hdr_ghv, ctx->hdr_ghv, GF_REPRESENTATION);
# endif
gf_mul((void*)ctx->hdr_ghv, tbuf);
#endif
#if defined( GF_REPRESENTATION )
convert_representation(ctx->hdr_ghv, ctx->hdr_ghv, GF_REPRESENTATION);
# endif
}
}
i = BLOCK_SIZE;
#ifdef BRG_UI64
{ uint64_t tm = ((uint64_t)ctx->txt_acnt) << 3;
while(i-- > 0)
{
UI8_PTR(ctx->hdr_ghv)[i] ^= UI8_PTR(ctx->txt_ghv)[i] ^ (unsigned char)tm;
tm = (i == 8 ? (((uint64_t)ctx->hdr_cnt) << 3) : tm >> 8);
}
}
#else
{ uint32_t tm = ctx->txt_acnt << 3;
while(i-- > 0)
{
UI8_PTR(ctx->hdr_ghv)[i] ^= UI8_PTR(ctx->txt_ghv)[i] ^ (unsigned char)tm;
if(i & 3)
tm >>= 8;
else if(i == 4)
tm = ctx->txt_acnt >> 29;
else if(i == 8)
tm = ctx->hdr_cnt << 3;
else
tm = ctx->hdr_cnt >> 29;
}
}
#endif
gf_mul_hh((gf_t*)ctx->hdr_ghv, ctx);
memcpy(ctx->enc_ctr, ctx->ctr_val, BLOCK_SIZE);
*UI32_PTR(UI8_PTR(ctx->enc_ctr) + CTR_POS) = ctx->y0_val;
aes_encrypt(UI8_PTR(ctx->enc_ctr), UI8_PTR(ctx->enc_ctr), ctx->aes);
for(i = 0; i < (unsigned int)tag_len; ++i)
tag[i] = (unsigned char)(UI8_PTR(ctx->hdr_ghv)[i] ^ UI8_PTR(ctx->enc_ctr)[i]);
return (ctx->txt_ccnt == ctx->txt_acnt ? RETURN_GOOD : RETURN_WARN);
}
ret_type gcm_end( /* clean up and end operation */
gcm_ctx ctx[1]) /* the mode context */
{
memset(ctx, 0, sizeof(gcm_ctx));
return RETURN_GOOD;
}
ret_type gcm_encrypt( /* encrypt & authenticate data */
unsigned char data[], /* the data buffer */
unsigned long data_len, /* and its length in bytes */
gcm_ctx ctx[1]) /* the mode context */
{
gcm_crypt_data(data, data_len, ctx);
gcm_auth_data(data, data_len, ctx);
return RETURN_GOOD;
}
ret_type gcm_decrypt( /* authenticate & decrypt data */
unsigned char data[], /* the data buffer */
unsigned long data_len, /* and its length in bytes */
gcm_ctx ctx[1]) /* the mode context */
{
gcm_auth_data(data, data_len, ctx);
gcm_crypt_data(data, data_len, ctx);
return RETURN_GOOD;
}
ret_type gcm_encrypt_message( /* encrypt an entire message */
const unsigned char iv[], /* the initialisation vector */
unsigned long iv_len, /* and its length in bytes */
const unsigned char hdr[], /* the header buffer */
unsigned long hdr_len, /* and its length in bytes */
unsigned char msg[], /* the message buffer */
unsigned long msg_len, /* and its length in bytes */
unsigned char tag[], /* the buffer for the tag */
unsigned long tag_len, /* and its length in bytes */
gcm_ctx ctx[1]) /* the mode context */
{
gcm_init_message(iv, iv_len, ctx);
gcm_auth_header(hdr, hdr_len, ctx);
gcm_encrypt(msg, msg_len, ctx);
return gcm_compute_tag(tag, tag_len, ctx) ? RETURN_ERROR : RETURN_GOOD;
}
ret_type gcm_decrypt_message( /* decrypt an entire message */
const unsigned char iv[], /* the initialisation vector */
unsigned long iv_len, /* and its length in bytes */
const unsigned char hdr[], /* the header buffer */
unsigned long hdr_len, /* and its length in bytes */
unsigned char msg[], /* the message buffer */
unsigned long msg_len, /* and its length in bytes */
const unsigned char tag[], /* the buffer for the tag */
unsigned long tag_len, /* and its length in bytes */
gcm_ctx ctx[1]) /* the mode context */
{ uint8_t local_tag[BLOCK_SIZE];
ret_type rr;
gcm_init_message(iv, iv_len, ctx);
gcm_auth_header(hdr, hdr_len, ctx);
gcm_decrypt(msg, msg_len, ctx);
rr = gcm_compute_tag(local_tag, tag_len, ctx);
return (rr != RETURN_GOOD || memcmp(tag, local_tag, tag_len)) ? RETURN_ERROR : RETURN_GOOD;
}
#if defined(__cplusplus)
}
#endif

@ -0,0 +1,233 @@
/*
---------------------------------------------------------------------------
Copyright (c) 1998-2010, Brian Gladman, Worcester, UK. All rights reserved.
The redistribution and use of this software (with or without changes)
is allowed without the payment of fees or royalties provided that:
source code distributions include the above copyright notice, this
list of conditions and the following disclaimer;
binary distributions include the above copyright notice, this list
of conditions and the following disclaimer in their documentation.
This software is provided 'as is' with no explicit or implied warranties
in respect of its operation, including, but not limited to, correctness
and fitness for purpose.
---------------------------------------------------------------------------
Issue Date: 11/01/2011
I am grateful for the work done by Mark Rodenkirch and Jason Papadopoulos
in helping to remove a bug in the operation of this code on big endian
systems when fast buffer operations are enabled.
---------------------------------------------------------------------------
*/
#ifndef _GCM_H
#define _GCM_H
#include "aes.h"
#include "gf128mul.h"
/* USER DEFINABLE OPTIONS (Further options need to be set in gf128mul.h) */
/* UNIT_BITS sets the size of variables used to process 16 byte buffers
when the buffer alignment allows this. When buffers are processed
in bytes, 16 individual operations are invoolved. But if, say, such
a buffer is divided into 4 32 bit variables, it can then be processed
in 4 operations, making the code typically much faster. In general
it will pay to use the longest natively supported size, which will
probably be 32 or 64 bits in 32 and 64 bit systems respectively.
*/
#if defined( UNIT_BITS )
# undef UNIT_BITS
#endif
#if !defined( UNIT_BITS )
# if PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN
# if 0
# define UNIT_BITS 8
# elif 0
# define UNIT_BITS 32
# elif 1
# define UNIT_BITS 64
# endif
# elif defined( _WIN64 )
# define UNIT_BITS 64
# else
# define UNIT_BITS 32
# endif
#endif
#if UNIT_BITS == 64 && !defined( NEED_UINT_64T )
# define NEED_UINT_64T
#endif
/* END OF USER DEFINABLE OPTIONS */
/* After encryption or decryption operations the return value of
'compute tag' will be one of the values RETURN_GOOD, RETURN_WARN
or RETURN_ERROR, the latter indicating an error. A return value
RETURN_GOOD indicates that both encryption and authentication
have taken place and resulted in the returned tag value. If
the returned value is RETURN_WARN, the tag value is the result
of authentication alone without encryption (CCM) or decryption
(GCM and EAX).
*/
#ifndef RETURN_GOOD
# define RETURN_WARN 1
# define RETURN_GOOD 0
# define RETURN_ERROR -1
#endif
#if defined(__cplusplus)
extern "C"
{
#endif
#ifndef RET_TYPE_DEFINED
typedef int ret_type;
#endif
UNIT_TYPEDEF(gcm_unit_t, UNIT_BITS);
BUFR_TYPEDEF(gcm_buf_t, UNIT_BITS, AES_BLOCK_SIZE);
#define GCM_BLOCK_SIZE AES_BLOCK_SIZE
/* The GCM-AES context */
typedef struct
{
#if defined( TABLES_64K )
gf_t64k_a gf_t64k;
#endif
#if defined( TABLES_8K )
gf_t8k_a gf_t8k;
#endif
#if defined( TABLES_4K )
gf_t4k_a gf_t4k;
#endif
#if defined( TABLES_256 )
gf_t256_a gf_t256;
#endif
gcm_buf_t ctr_val; /* CTR counter value */
gcm_buf_t enc_ctr; /* encrypted CTR block */
gcm_buf_t hdr_ghv; /* ghash buffer (header) */
gcm_buf_t txt_ghv; /* ghash buffer (ciphertext) */
gf_t ghash_h; /* ghash H value */
aes_encrypt_ctx aes[1]; /* AES encryption context */
uint32_t y0_val; /* initial counter value */
uint32_t hdr_cnt; /* header bytes so far */
uint32_t txt_ccnt; /* text bytes so far (encrypt) */
uint32_t txt_acnt; /* text bytes so far (auth) */
} gcm_ctx;
/* The following calls handle mode initialisation, keying and completion */
ret_type gcm_init_and_key( /* initialise mode and set key */
const unsigned char key[], /* the key value */
unsigned long key_len, /* and its length in bytes */
gcm_ctx ctx[1]); /* the mode context */
ret_type gcm_end( /* clean up and end operation */
gcm_ctx ctx[1]); /* the mode context */
/* The following calls handle complete messages in memory as one operation */
ret_type gcm_encrypt_message( /* encrypt an entire message */
const unsigned char iv[], /* the initialisation vector */
unsigned long iv_len, /* and its length in bytes */
const unsigned char hdr[], /* the header buffer */
unsigned long hdr_len, /* and its length in bytes */
unsigned char msg[], /* the message buffer */
unsigned long msg_len, /* and its length in bytes */
unsigned char tag[], /* the buffer for the tag */
unsigned long tag_len, /* and its length in bytes */
gcm_ctx ctx[1]); /* the mode context */
/* RETURN_GOOD is returned if the input tag */
/* matches that for the decrypted message */
ret_type gcm_decrypt_message( /* decrypt an entire message */
const unsigned char iv[], /* the initialisation vector */
unsigned long iv_len, /* and its length in bytes */
const unsigned char hdr[], /* the header buffer */
unsigned long hdr_len, /* and its length in bytes */
unsigned char msg[], /* the message buffer */
unsigned long msg_len, /* and its length in bytes */
const unsigned char tag[], /* the buffer for the tag */
unsigned long tag_len, /* and its length in bytes */
gcm_ctx ctx[1]); /* the mode context */
/* The following calls handle messages in a sequence of operations followed */
/* by tag computation after the sequence has been completed. In these calls */
/* the user is responsible for verfiying the computed tag on decryption */
ret_type gcm_init_message( /* initialise a new message */
const unsigned char iv[], /* the initialisation vector */
unsigned long iv_len, /* and its length in bytes */
gcm_ctx ctx[1]); /* the mode context */
ret_type gcm_auth_header( /* authenticate the header */
const unsigned char hdr[], /* the header buffer */
unsigned long hdr_len, /* and its length in bytes */
gcm_ctx ctx[1]); /* the mode context */
ret_type gcm_encrypt( /* encrypt & authenticate data */
unsigned char data[], /* the data buffer */
unsigned long data_len, /* and its length in bytes */
gcm_ctx ctx[1]); /* the mode context */
ret_type gcm_decrypt( /* authenticate & decrypt data */
unsigned char data[], /* the data buffer */
unsigned long data_len, /* and its length in bytes */
gcm_ctx ctx[1]); /* the mode context */
ret_type gcm_compute_tag( /* compute authentication tag */
unsigned char tag[], /* the buffer for the tag */
unsigned long tag_len, /* and its length in bytes */
gcm_ctx ctx[1]); /* the mode context */
/* The use of the following calls should be avoided if possible because
their use requires a very good understanding of the way this encryption
mode works and the way in which this code implements it in order to use
them correctly.
The gcm_auth_data routine is used to authenticate encrypted message data.
In message encryption gcm_crypt_data must be called before gcm_auth_data
is called since it is encrypted data that is authenticated. In message
decryption authentication must occur before decryption and data can be
authenticated without being decrypted if necessary.
If these calls are used it is up to the user to ensure that these routines
are called in the correct order and that the correct data is passed to
them.
When gcm_compute_tag is called it is assumed that an error in use has
occurred if both encryption (or decryption) and authentication have taken
place but the total lengths of the message data respectively authenticated
and encrypted are not the same. If authentication has taken place but
there has been no corresponding encryption or decryption operations (none
at all) only a warning is issued. This should be treated as an error if it
occurs during encryption but it is only signalled as a warning as it might
be intentional when decryption operations are involved (this avoids having
different compute tag functions for encryption and decryption). Decryption
operations can be undertaken freely after authetication but if the tag is
computed after such operations an error will be signalled if the lengths
of the data authenticated and decrypted don't match.
*/
ret_type gcm_auth_data( /* authenticate ciphertext data */
const unsigned char data[], /* the data buffer */
unsigned long data_len, /* and its length in bytes */
gcm_ctx ctx[1]); /* the mode context */
ret_type gcm_crypt_data( /* encrypt or decrypt data */
unsigned char data[], /* the data buffer */
unsigned long data_len, /* and its length in bytes */
gcm_ctx ctx[1]); /* the mode context */
#if defined(__cplusplus)
}
#endif
#endif

@ -0,0 +1,29 @@
/*
---------------------------------------------------------------------------
Copyright (c) 1998-2013, Brian Gladman, Worcester, UK. All rights reserved.
The redistribution and use of this software (with or without changes)
is allowed without the payment of fees or royalties provided that:
source code distributions include the above copyright notice, this
list of conditions and the following disclaimer;
binary distributions include the above copyright notice, this list
of conditions and the following disclaimer in their documentation.
This software is provided 'as is' with no explicit or implied warranties
in respect of its operation, including, but not limited to, correctness
and fitness for purpose.
---------------------------------------------------------------------------
Issue Date: 10/09/2018
*/
#ifndef _BRG_ENDIAN_H
#define _BRG_ENDIAN_H
#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */
#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */
#define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#endif

@ -0,0 +1,471 @@
/*
---------------------------------------------------------------------------
Copyright (c) 1998-2010, Brian Gladman, Worcester, UK. All rights reserved.
The redistribution and use of this software (with or without changes)
is allowed without the payment of fees or royalties provided that:
source code distributions include the above copyright notice, this
list of conditions and the following disclaimer;
binary distributions include the above copyright notice, this list
of conditions and the following disclaimer in their documentation.
This software is provided 'as is' with no explicit or implied warranties
in respect of its operation, including, but not limited to, correctness
and fitness for purpose.
---------------------------------------------------------------------------
Issue Date: 20/12/2007
This file provides fast multiplication in GF(128) as required by several
cryptographic authentication modes (see gfmul128.h).
*/
/* Speed critical loops can be unrolled to gain speed but consume more memory */
#if 1
# define UNROLL_LOOPS
#endif
/* The order of these includes matters */
#include "mode_hdr.h"
#include "gf128mul.h"
#include "gf_mul_lo.h"
#if defined( GF_MODE_LL )
# define mode _ll
#elif defined( GF_MODE_BL )
# define mode _bl
#elif defined( GF_MODE_LB )
# define mode _lb
#elif defined( GF_MODE_BB )
# define mode _bb
#else
# error mode is not defined
#endif
#if defined( GF_MODE_LL) || defined( GF_MODE_LB )
# define GF_INDEX(i) (i)
#else
# define GF_INDEX(i) (15 - (i))
#endif
/* A slow field multiplier */
void gf_mul(gf_t a, const gf_t b)
{ gf_t p[8];
uint8_t *q, ch;
int i;
copy_block_aligned(p[0], a);
for(i = 0; i < 7; ++i)
gf_mulx1(mode)(p[i + 1], p[i]);
q = (uint8_t*)(a == b ? p[0] : b);
memset(a, 0, GF_BYTE_LEN);
for(i = 15 ; ; )
{
ch = q[GF_INDEX(i)];
if(ch & X_0)
xor_block_aligned(a, a, p[0]);
if(ch & X_1)
xor_block_aligned(a, a, p[1]);
if(ch & X_2)
xor_block_aligned(a, a, p[2]);
if(ch & X_3)
xor_block_aligned(a, a, p[3]);
if(ch & X_4)
xor_block_aligned(a, a, p[4]);
if(ch & X_5)
xor_block_aligned(a, a, p[5]);
if(ch & X_6)
xor_block_aligned(a, a, p[6]);
if(ch & X_7)
xor_block_aligned(a, a, p[7]);
if(!i--)
break;
gf_mulx8(mode)(a);
}
}
#if defined( TABLES_64K )
/* This version uses 64k bytes of table space on the stack.
An input variable field value in a[] has to be multiplied
by a key value in g[] that changes far less frequently.
To do this a[] is split up into 16 smaller field values,
each one byte in length. For the 256 values of each of
these smaller values, we can precompute the result of
mulltiplying g by this field value. We can then combine
these values to provide the full multiply. So for each
of 16 bytes we have a table of 256 field values each of
16 bytes - 64k bytes in total.
*/
void init_64k_table(const gf_t g, gf_t64k_t t)
{ int i = 0, j, k;
/*
depending on the representation we have to process bits
within bytes high to low (0xe1 style ) or low to high
(0x87 style). We start by producing the powers x ,x^2
.. x^7 and put them in t[0][1], t[0][2] .. t[128] or in
t[128], t[64] .. t[1] depending on the bit order in use.
*/
/* clear the element for the zero field element */
memset(t[0][0], 0, GF_BYTE_LEN);
#if defined( GF_MODE_LL ) || defined( GF_MODE_BL )
/* g -> t[0][1], generate t[0][2] ... */
memcpy(t[0][1], g, GF_BYTE_LEN);
for(j = 1; j <= 64; j <<= 1)
gf_mulx1(mode)(t[0][j + j], t[0][j]);
#else
/* g -> t[0][128], generate t[0][64] ... */
memcpy(t[0][128], g, GF_BYTE_LEN);
for(j = 64; j >= 1; j >>= 1)
gf_mulx1(mode)(t[0][j], t[0][j + j]);
#endif
for( ; ; )
{
/* if { n } stands for the field value represented by
the integer n, we can express higher multiplies in
the table as follows:
1. g * { 3} = g * {2} ^ g * {1}
2. g * { 5} = g * {4} ^ g * {1}
g * { 6} = g * {4} ^ g * {2}
g * { 7} = g * {4} ^ g * {3}
3. g * { 9} = g * {8} ^ g * {1}
g * {10} = g * {8} ^ g * {2}
....
and so on. This is what the following loops do.
*/
for(j = 2; j < 256; j += j)
for(k = 1; k < j; ++k)
xor_block_aligned(t[i][j + k], t[i][j], t[i][k]);
if(++i == GF_BYTE_LEN) /* all 16 byte positions done */
return;
/* We now move to the next byte up and set up its eight
starting values by multiplying the values in the
lower table by x^8
*/
memset(t[i][0], 0, GF_BYTE_LEN);
for(j = 128; j > 0; j >>= 1)
{
memcpy(t[i][j], t[i - 1][j], GF_BYTE_LEN);
gf_mulx8(mode)(t[i][j]);
}
}
}
#define xor_64k(i,ap,t,r) xor_block_aligned(r, r, t[i][ap[GF_INDEX(i)]])
#if defined( UNROLL_LOOPS )
void gf_mul_64k(gf_t a, const gf_t64k_t t, gf_t r)
{ uint8_t *ap = (uint8_t*)a;
memset(r, 0, GF_BYTE_LEN);
xor_64k(15, ap, t, r); xor_64k(14, ap, t, r);
xor_64k(13, ap, t, r); xor_64k(12, ap, t, r);
xor_64k(11, ap, t, r); xor_64k(10, ap, t, r);
xor_64k( 9, ap, t, r); xor_64k( 8, ap, t, r);
xor_64k( 7, ap, t, r); xor_64k( 6, ap, t, r);
xor_64k( 5, ap, t, r); xor_64k( 4, ap, t, r);
xor_64k( 3, ap, t, r); xor_64k( 2, ap, t, r);
xor_64k( 1, ap, t, r); xor_64k( 0, ap, t, r);
copy_block_aligned(a, r);
}
#else
void gf_mul_64k(gf_t a, const gf_t64k_t t, gf_t r)
{ int i;
uint8_t *ap = (uint8_t*)a;
memset(r, 0, GF_BYTE_LEN);
for(i = 15; i >= 0; --i)
{
xor_64k(i,ap,t,r);
}
copy_block_aligned(a, r);
}
#endif
#endif
#if defined( TABLES_8K )
/* This version uses 8k bytes of table space on the stack.
An input field value in a[] has to be multiplied by a
key value in g[]. To do this a[] is split up into 32
smaller field values each 4-bits in length. For the
16 values of each of these smaller field values we can
precompute the result of mulltiplying g[] by the field
value in question. So for each of 32 nibbles we have a
table of 16 field values, each of 16 bytes - 8k bytes
in total.
*/
void init_8k_table(const gf_t g, gf_t8k_t t)
{ int i = 0, j, k;
/* do the low 4-bit nibble first - t[0][16] - and note
that the unit multiplier sits at 0x01 - t[0][1] in
the table. Then multiplies by x go at 2, 4, 8
*/
/* set the table elements for a zero multiplier */
memset(t[0][0], 0, GF_BYTE_LEN);
memset(t[1][0], 0, GF_BYTE_LEN);
#if defined( GF_MODE_LL ) || defined( GF_MODE_BL )
/* t[0][1] = g, compute t[0][2], t[0][4], t[0][8] */
memcpy(t[0][1], g, GF_BYTE_LEN);
for(j = 1; j <= 4; j <<= 1)
gf_mulx1(mode)(t[0][j + j], t[0][j]);
/* t[1][1] = t[0][1] * x^4 = t[0][8] * x */
gf_mulx1(mode)(t[1][1], t[0][8]);
for(j = 1; j <= 4; j <<= 1)
gf_mulx1(mode)(t[1][j + j], t[1][j]);
#else
/* g -> t[0][8], compute t[0][4], t[0][2], t[0][1] */
memcpy(t[1][8], g, GF_BYTE_LEN);
for(j = 4; j >= 1; j >>= 1)
gf_mulx1(mode)(t[1][j], t[1][j + j]);
/* t[1][1] = t[0][1] * x^4 = t[0][8] * x */
gf_mulx1(mode)(t[0][8], t[1][1]);
for(j = 4; j >= 1; j >>= 1)
gf_mulx1(mode)(t[0][j], t[0][j + j]);
#endif
for( ; ; )
{
for(j = 2; j < 16; j += j)
for(k = 1; k < j; ++k)
xor_block_aligned(t[i][j + k], t[i][j], t[i][k]);
if(++i == 2 * GF_BYTE_LEN)
return;
if(i > 1)
{
memset(t[i][0], 0, GF_BYTE_LEN);
for(j = 8; j > 0; j >>= 1)
{
memcpy(t[i][j], t[i - 2][j], GF_BYTE_LEN);
gf_mulx8(mode)(t[i][j]);
}
}
}
}
#define xor_8k(i,ap,t,r) \
xor_block_aligned(r, r, t[i + i][ap[GF_INDEX(i)] & 15]); \
xor_block_aligned(r, r, t[i + i + 1][ap[GF_INDEX(i)] >> 4])
#if defined( UNROLL_LOOPS )
void gf_mul_8k(gf_t a, const gf_t8k_t t, gf_t r)
{ uint8_t *ap = (uint8_t*)a;
memset(r, 0, GF_BYTE_LEN);
xor_8k(15, ap, t, r); xor_8k(14, ap, t, r);
xor_8k(13, ap, t, r); xor_8k(12, ap, t, r);
xor_8k(11, ap, t, r); xor_8k(10, ap, t, r);
xor_8k( 9, ap, t, r); xor_8k( 8, ap, t, r);
xor_8k( 7, ap, t, r); xor_8k( 6, ap, t, r);
xor_8k( 5, ap, t, r); xor_8k( 4, ap, t, r);
xor_8k( 3, ap, t, r); xor_8k( 2, ap, t, r);
xor_8k( 1, ap, t, r); xor_8k( 0, ap, t, r);
copy_block_aligned(a, r);
}
#else
void gf_mul_8k(gf_t a, const gf_t8k_t t, gf_t r)
{ int i;
uint8_t *ap = (uint8_t*)a;
memset(r, 0, GF_BYTE_LEN);
for(i = 15; i >= 0; --i)
{
xor_8k(i,ap,t,r);
}
memcpy(a, r, GF_BYTE_LEN);
}
#endif
#endif
#if defined( TABLES_4K )
/* This version uses 4k bytes of table space on the stack.
A 16 byte buffer has to be multiplied by a 16 byte key
value in GF(128). If we consider a GF(128) value in a
single byte, we can construct a table of the 256 16
byte values that result from multiplying g by the 256
values of this byte. This requires 4096 bytes.
If we take the highest byte in the buffer and use this
table to multiply it by g, we then have to multiply it
by x^120 to get the final value. For the next highest
byte the result has to be multiplied by x^112 and so on.
But we can do this by accumulating the result in an
accumulator starting with the result for the top byte.
We repeatedly multiply the accumulator value by x^8 and
then add in (i.e. xor) the 16 bytes of the next lower
byte in the buffer, stopping when we reach the lowest
byte. This requires a 4096 byte table.
*/
void init_4k_table(const gf_t g, gf_t4k_t t)
{ int j, k;
memset(t[0], 0, GF_BYTE_LEN);
#if defined( GF_MODE_LL ) || defined( GF_MODE_BL )
memcpy(t[1], g, GF_BYTE_LEN);
for(j = 1; j <= 64; j <<= 1)
gf_mulx1(mode)(t[j + j], t[j]);
#else
memcpy(t[128], g, GF_BYTE_LEN);
for(j = 64; j >= 1; j >>= 1)
gf_mulx1(mode)(t[j], t[j + j]);
#endif
for(j = 2; j < 256; j += j)
for(k = 1; k < j; ++k)
xor_block_aligned(t[j + k], t[j], t[k]);
}
#define xor_4k(i,ap,t,r) gf_mulx8(mode)(r); xor_block_aligned(r, r, t[ap[GF_INDEX(i)]])
#if defined( UNROLL_LOOPS )
void gf_mul_4k(gf_t a, const gf_t4k_t t, gf_t r)
{ uint8_t *ap = (uint8_t*)a;
memset(r, 0, GF_BYTE_LEN);
xor_4k(15, ap, t, r); xor_4k(14, ap, t, r);
xor_4k(13, ap, t, r); xor_4k(12, ap, t, r);
xor_4k(11, ap, t, r); xor_4k(10, ap, t, r);
xor_4k( 9, ap, t, r); xor_4k( 8, ap, t, r);
xor_4k( 7, ap, t, r); xor_4k( 6, ap, t, r);
xor_4k( 5, ap, t, r); xor_4k( 4, ap, t, r);
xor_4k( 3, ap, t, r); xor_4k( 2, ap, t, r);
xor_4k( 1, ap, t, r); xor_4k( 0, ap, t, r);
copy_block_aligned(a, r);
}
#else
void gf_mul_4k(gf_t a, const gf_t4k_t t, gf_t r)
{ int i = 15;
uint8_t *ap = (uint8_t*)a;
memset(r, 0, GF_BYTE_LEN);
for(i = 15; i >=0; --i)
{
xor_4k(i, ap, t, r);
}
copy_block_aligned(a, r);
}
#endif
#endif
#if defined( TABLES_256 )
/* This version uses 256 bytes of table space on the stack.
A 16 byte buffer has to be multiplied by a 16 byte key
value in GF(128). If we consider a GF(128) value in a
single 4-bit nibble, we can construct a table of the 16
16 byte values that result from the 16 values of this
byte. This requires 256 bytes. If we take the highest
4-bit nibble in the buffer and use this table to get the
result, we then have to multiply by x^124 to get the
final value. For the next highest byte the result has to
be multiplied by x^120 and so on. But we can do this by
accumulating the result in an accumulator starting with
the result for the top nibble. We repeatedly multiply
the accumulator value by x^4 and then add in (i.e. xor)
the 16 bytes of the next lower nibble in the buffer,
stopping when we reach the lowest nibble. This uses a
256 byte table.
*/
void init_256_table(const gf_t g, gf_t256_t t)
{ int j, k;
memset(t[0], 0, GF_BYTE_LEN);
#if defined( GF_MODE_LL ) || defined( GF_MODE_BL )
memcpy(t[1], g, GF_BYTE_LEN);
for(j = 1; j <= 4; j <<= 1)
gf_mulx1(mode)(t[j + j], t[j]);
#else
memcpy(t[8], g, GF_BYTE_LEN);
for(j = 4; j >= 1; j >>= 1)
gf_mulx1(mode)(t[j], t[j + j]);
#endif
for(j = 2; j < 16; j += j)
for(k = 1; k < j; ++k)
xor_block_aligned(t[j + k], t[j], t[k]);
}
#define x_lo(i,ap,t,r) gf_mulx4(mode)(r); xor_block_aligned(r, r, t[ap[GF_INDEX(i)] & 0x0f])
#define x_hi(i,ap,t,r) gf_mulx4(mode)(r); xor_block_aligned(r, r, t[ap[GF_INDEX(i)] >> 4])
#if defined( GF_MODE_LL ) || defined( GF_MODE_BL )
#define xor_256(a,b,c,d) x_hi(a,b,c,d); x_lo(a,b,c,d)
#else
#define xor_256(a,b,c,d) x_lo(a,b,c,d); x_hi(a,b,c,d)
#endif
#if defined( UNROLL_LOOPS )
void gf_mul_256(gf_t a, const gf_t256_t t, gf_t r)
{ uint8_t *ap = (uint8_t*)a;
memset(r, 0, GF_BYTE_LEN);
xor_256(15, ap, t, r); xor_256(14, ap, t, r);
xor_256(13, ap, t, r); xor_256(12, ap, t, r);
xor_256(11, ap, t, r); xor_256(10, ap, t, r);
xor_256( 9, ap, t, r); xor_256( 8, ap, t, r);
xor_256( 7, ap, t, r); xor_256( 6, ap, t, r);
xor_256( 5, ap, t, r); xor_256( 4, ap, t, r);
xor_256( 3, ap, t, r); xor_256( 2, ap, t, r);
xor_256( 1, ap, t, r); xor_256( 0, ap, t, r);
copy_block_aligned(a, r);
}
#else
void gf_mul_256(gf_t a, const gf_t256_t t, gf_t r)
{ int i;
uint8_t *ap = (uint8_t*)a;
memset(r, 0, GF_BYTE_LEN);
for(i = 15; i >= 0; --i)
{
xor_256(i, ap, t, r);
}
copy_block_aligned(a, r);
}
#endif
#endif

@ -0,0 +1,215 @@
/*
---------------------------------------------------------------------------
Copyright (c) 1998-2010, Brian Gladman, Worcester, UK. All rights reserved.
The redistribution and use of this software (with or without changes)
is allowed without the payment of fees or royalties provided that:
source code distributions include the above copyright notice, this
list of conditions and the following disclaimer;
binary distributions include the above copyright notice, this list
of conditions and the following disclaimer in their documentation.
This software is provided 'as is' with no explicit or implied warranties
in respect of its operation, including, but not limited to, correctness
and fitness for purpose.
---------------------------------------------------------------------------
Issue Date: 11/01/2011
I am grateful for the work done by Mark Rodenkirch and Jason Papadopoulos
in helping to remove a bug in the operation of this code on big endian
systems when fast buffer operations are enabled.
---------------------------------------------------------------------------
An implementation of field multiplication in the Galois Field GF(2^128)
A polynomial representation is used for the field with the coefficients
held in bit sequences in which the bit numbers are the powers of x that
a bit represents. The field polynomial used is (x^128+x^7+x^2+x+1).
The obvious way of representing field elements in a computer system is
to map 'x' in the field to the binary integer '2'. But this was way too
obvious for cryptographers!
Here bytes are numbered in their memory order and bits within bytes are
numbered according to their integer numeric significance (that is as is
now normal with bit 0 representing unity). The term 'little endian'
will then used to describe mappings where numeric (power of 2) or field
(power of x) significance increases with increasing bit or byte numbers
with 'big endian' being used to describe the inverse situation.
The GF bit sequence can then be mapped onto 8-bit bytes in computer
memory in one of four simple ways:
A mapping in which x maps to the integer 2 in little endian
form for both bytes and bits within bytes:
LL: bit for x^n ==> bit for 2^(n % 8) in byte[n / 8]
A mapping in which x maps to the integer 2 in big endian form
for both bytes and bits within bytes:
BL: bit for x^n ==> bit for 2^(n % 8) in byte[15 - n / 8]
A little endian mapping for bytes but with the bits within
bytes in reverse order (big endian bytes):
LB: bit for x^n ==> bit for 2^(7 - n % 8) in byte[n / 8]
A big endian mapping for bytes but with the bits within
bytes in reverse order (big endian bytes):
BB: bit for x^n ==> bit for 2^(7 - n % 8) in byte[15 - n / 8]
128-bit field elements are represented by 16 byte buffers but for
processing efficiency reasons it is often desirable to process arrays
of bytes using longer types such as, for example, unsigned long values.
The type used for representing these buffers will be called a 'gf_unit'
and the buffer itself will be referred to as a 'gf_t' type.
THe field multiplier is based on the assumption that one of the two
field elements involved in multiplication will change only relatively
infrequently, making it worthwhile to precompute tables to speed up
multiplication by this value.
*/
#ifndef _GF128MUL_H
#define _GF128MUL_H
#include <stdlib.h>
#include <string.h>
#include "brg_endian.h"
/* USER DEFINABLE OPTIONS */
/* UNIT_BITS sets the size of variables used to process 16 byte buffers
when the buffer alignment allows this. When buffers are processed
in bytes, 16 individual operations are invoolved. But if, say, such
a buffer is divided into 4 32 bit variables, it can then be processed
in 4 operations, making the code typically much faster. In general
it will pay to use the longest natively supported size, which will
probably be 32 or 64 bits in 32 and 64 bit systems respectively.
*/
#if defined( UNIT_BITS )
# undef UNIT_BITS
#endif
#if !defined( UNIT_BITS )
# if PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN
# if 0
# define UNIT_BITS 8
# elif 0
# define UNIT_BITS 32
# elif 1
# define UNIT_BITS 64
# endif
# elif defined( _WIN64 )
# define UNIT_BITS 64
# else
# define UNIT_BITS 32
# endif
#endif
#if UNIT_BITS == 64 && !defined( NEED_UINT_64T )
# define NEED_UINT_64T
#endif
#include "brg_types.h"
/* Choose the Galois Field representation to use (see above) */
#if 0
# define GF_MODE_LL
#elif 0
# define GF_MODE_BL
#elif 1
# define GF_MODE_LB /* the representation used by GCM */
#elif 0
# define GF_MODE_BB
#else
# error mode is not defined
#endif
/* Table sizes for GF(128) Multiply. Normally larger tables give
higher speed but cache loading might change this. Normally only
one table size (or none at all) will be specified here
*/
#if 0
# define TABLES_64K
#endif
#if 0
# define TABLES_8K
#endif
#if 1
# define TABLES_4K
#endif
#if 0
# define TABLES_256
#endif
/* END OF USER DEFINABLE OPTIONS */
#if !(defined( TABLES_64K ) || defined( TABLES_8K ) \
|| defined( TABLES_4K ) || defined( TABLES_256 ))
# define NO_TABLES
#endif
#if defined(__cplusplus)
extern "C"
{
#endif
#define GF_BYTE_LEN 16
#define GF_UNIT_LEN (GF_BYTE_LEN / (UNIT_BITS >> 3))
UNIT_TYPEDEF(gf_unit_t, UNIT_BITS);
BUFR_TYPEDEF(gf_t, UNIT_BITS, GF_BYTE_LEN);
/* Code for conversion between the four different galois field representations
is optionally available using gf_convert.c
*/
typedef enum { REVERSE_NONE = 0, REVERSE_BITS = 1, REVERSE_BYTES = 2 } transform;
void convert_representation(gf_t dest, const gf_t source, transform rev);
void gf_mul(gf_t a, const gf_t b); /* slow field multiply */
/* types and calls for 64k table driven field multiplier */
typedef gf_t gf_t64k_a[16][256];
typedef gf_t (*gf_t64k_t)[256];
void init_64k_table(const gf_t g, gf_t64k_t t);
void gf_mul_64k(gf_t a, const gf_t64k_t t, void *r);
/* types and calls for 8k table driven field multiplier */
typedef gf_t gf_t8k_a[32][16];
typedef gf_t (*gf_t8k_t)[16];
void init_8k_table(const gf_t g, gf_t8k_t t);
void gf_mul_8k(gf_t a, const gf_t8k_t t, gf_t r);
/* types and calls for 8k table driven field multiplier */
typedef gf_t gf_t4k_a[256];
typedef gf_t (*gf_t4k_t);
void init_4k_table(const gf_t g, gf_t4k_t t);
void gf_mul_4k(gf_t a, const gf_t4k_t t, gf_t r);
/* types and calls for 8k table driven field multiplier */
typedef gf_t gf_t256_a[16];
typedef gf_t (*gf_t256_t);
void init_256_table(const gf_t g, gf_t256_t t);
void gf_mul_256(gf_t a, const gf_t256_t t, gf_t r);
#if defined(__cplusplus)
}
#endif
#endif

@ -0,0 +1,773 @@
/*
---------------------------------------------------------------------------
Copyright (c) 1998-2010, Brian Gladman, Worcester, UK. All rights reserved.
The redistribution and use of this software (with or without changes)
is allowed without the payment of fees or royalties provided that:
source code distributions include the above copyright notice, this
list of conditions and the following disclaimer;
binary distributions include the above copyright notice, this list
of conditions and the following disclaimer in their documentation.
This software is provided 'as is' with no explicit or implied warranties
in respect of its operation, including, but not limited to, correctness
and fitness for purpose.
---------------------------------------------------------------------------
Issue Date: 18/02/2014
This file provides the low level primitives needed for Galois Field
operations in GF(2^128) for the four most likely field representations.
*/
#ifndef _GF_MUL_LO_H
#define _GF_MUL_LO_H
#if defined( USE_INLINING )
# if defined( _MSC_VER )
# define gf_decl __inline
# elif defined( __GNUC__ ) || defined( __GNU_LIBRARY__ )
# define gf_decl static inline
# else
# define gf_decl static
# endif
#endif
#if 0 /* used for testing only: t1(UNIT_BITS), t2(UNIT_BITS) */
# define _t1(n) bswap ## n ## _block(x, x)
# define t1(n) _t1(n)
# define _t2(n) bswap ## n ## _block(x, x); bswap ## n ## _block(r, r)
# define t2(n) _t2(n)
#endif
#define gf_m(n,x) gf_mulx ## n ## x
#define gf_mulx1(x) gf_m(1,x)
#define gf_mulx4(x) gf_m(4,x)
#define gf_mulx8(x) gf_m(8,x)
#define MASK(x) ((x) * (UNIT_CAST(-1,UNIT_BITS) / 0xff))
#define DATA_256(q) {\
q(0x00), q(0x01), q(0x02), q(0x03), q(0x04), q(0x05), q(0x06), q(0x07),\
q(0x08), q(0x09), q(0x0a), q(0x0b), q(0x0c), q(0x0d), q(0x0e), q(0x0f),\
q(0x10), q(0x11), q(0x12), q(0x13), q(0x14), q(0x15), q(0x16), q(0x17),\
q(0x18), q(0x19), q(0x1a), q(0x1b), q(0x1c), q(0x1d), q(0x1e), q(0x1f),\
q(0x20), q(0x21), q(0x22), q(0x23), q(0x24), q(0x25), q(0x26), q(0x27),\
q(0x28), q(0x29), q(0x2a), q(0x2b), q(0x2c), q(0x2d), q(0x2e), q(0x2f),\
q(0x30), q(0x31), q(0x32), q(0x33), q(0x34), q(0x35), q(0x36), q(0x37),\
q(0x38), q(0x39), q(0x3a), q(0x3b), q(0x3c), q(0x3d), q(0x3e), q(0x3f),\
q(0x40), q(0x41), q(0x42), q(0x43), q(0x44), q(0x45), q(0x46), q(0x47),\
q(0x48), q(0x49), q(0x4a), q(0x4b), q(0x4c), q(0x4d), q(0x4e), q(0x4f),\
q(0x50), q(0x51), q(0x52), q(0x53), q(0x54), q(0x55), q(0x56), q(0x57),\
q(0x58), q(0x59), q(0x5a), q(0x5b), q(0x5c), q(0x5d), q(0x5e), q(0x5f),\
q(0x60), q(0x61), q(0x62), q(0x63), q(0x64), q(0x65), q(0x66), q(0x67),\
q(0x68), q(0x69), q(0x6a), q(0x6b), q(0x6c), q(0x6d), q(0x6e), q(0x6f),\
q(0x70), q(0x71), q(0x72), q(0x73), q(0x74), q(0x75), q(0x76), q(0x77),\
q(0x78), q(0x79), q(0x7a), q(0x7b), q(0x7c), q(0x7d), q(0x7e), q(0x7f),\
q(0x80), q(0x81), q(0x82), q(0x83), q(0x84), q(0x85), q(0x86), q(0x87),\
q(0x88), q(0x89), q(0x8a), q(0x8b), q(0x8c), q(0x8d), q(0x8e), q(0x8f),\
q(0x90), q(0x91), q(0x92), q(0x93), q(0x94), q(0x95), q(0x96), q(0x97),\
q(0x98), q(0x99), q(0x9a), q(0x9b), q(0x9c), q(0x9d), q(0x9e), q(0x9f),\
q(0xa0), q(0xa1), q(0xa2), q(0xa3), q(0xa4), q(0xa5), q(0xa6), q(0xa7),\
q(0xa8), q(0xa9), q(0xaa), q(0xab), q(0xac), q(0xad), q(0xae), q(0xaf),\
q(0xb0), q(0xb1), q(0xb2), q(0xb3), q(0xb4), q(0xb5), q(0xb6), q(0xb7),\
q(0xb8), q(0xb9), q(0xba), q(0xbb), q(0xbc), q(0xbd), q(0xbe), q(0xbf),\
q(0xc0), q(0xc1), q(0xc2), q(0xc3), q(0xc4), q(0xc5), q(0xc6), q(0xc7),\
q(0xc8), q(0xc9), q(0xca), q(0xcb), q(0xcc), q(0xcd), q(0xce), q(0xcf),\
q(0xd0), q(0xd1), q(0xd2), q(0xd3), q(0xd4), q(0xd5), q(0xd6), q(0xd7),\
q(0xd8), q(0xd9), q(0xda), q(0xdb), q(0xdc), q(0xdd), q(0xde), q(0xdf),\
q(0xe0), q(0xe1), q(0xe2), q(0xe3), q(0xe4), q(0xe5), q(0xe6), q(0xe7),\
q(0xe8), q(0xe9), q(0xea), q(0xeb), q(0xec), q(0xed), q(0xee), q(0xef),\
q(0xf0), q(0xf1), q(0xf2), q(0xf3), q(0xf4), q(0xf5), q(0xf6), q(0xf7),\
q(0xf8), q(0xf9), q(0xfa), q(0xfb), q(0xfc), q(0xfd), q(0xfe), q(0xff) }
/* Within the 16 bytes of the field element the top and bottom field bits
are within bytes as follows (bit numbers in bytes 0 from ls up) for
each of the four field representations supported (see gf128mul.txt):
GF_BIT 127 126 125 124 123 122 121 120 ..... 7 6 5 4 3 2 1 0
0x87 1 0 0 0 0 1 1 1
BL x[ 0] 7 6 5 4 3 2 1 0 x[15] 7 6 5 4 3 2 1 0
LL x[15] 7 6 5 4 3 2 1 0 x[ 0] 7 6 5 4 3 2 1 0
GF_BIT 120 121 122 123 124 125 126 127 ..... 0 1 2 3 4 5 6 7
0xc1 1 1 1 0 0 0 0 1
BB x[ 0] 7 6 5 4 3 2 1 0 x[15] 7 6 5 4 3 2 1 0
LB x[15] 7 6 5 4 3 2 1 0 x[ 0] 7 6 5 4 3 2 1 0
When the field element is multiplied by x^n, the high bits overflow
and are used to form an overflow byte. For the BL and LL modes this
byte has the lowest overflow bit in bit 0 whereas for the BB and LB
modes this bit is in biit 7. So we have for this byte:
bit (bit n = 2^n) 7 6 5 4 3 2 1 0
BL and LL x^7 x^6 x^5 x^4 x^3 x^2 x^1 x^0
BB and LB x^0 x^1 x^2 x^3 x^4 x^5 x^6 x^7
This byte then has to be multiplied by the low bits of the field
polynomial, which produces a value of 16 bits to be xored into the
left shifted field value. For the BL and LL modes bit 0 gives the
word value 0x0087, bit 1 gives 0x010e (0x87 left shifted 1), 0x021c
(0x87 left shifted 2), ... For the BB and LB modes, bit 7 gives the
value 0x00e1, bit 6 gives 0x8070, bit 5 gives 0x4038, ... Each bit
in the overflow byte is expanded in this way and is xored into the
overall result, so eaach of the 256 byte values will produce a
corresponding word value that is computed by the gf_uint16_xor(i)
macros below.
These word values have to be xored into the low 16 bits of the
field value. If the byte endianess of the mode matches that of
the architecture xoring the word value will be correct. But if
the mode has the opposite endianess, the word value has to be
xored in byte reversed order. This is done by the ord() macro.
*/
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN \
&& (defined( GF_MODE_LB ) || defined( GF_MODE_LL )) || \
PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN \
&& (defined( GF_MODE_BB ) || defined( GF_MODE_BL ))
# define ord(hi, lo) 0x##hi##lo
#else
# define ord(hi, lo) 0x##lo##hi
#endif
#if defined( GF_MODE_BL ) || defined( GF_MODE_LL )
/* field and numeric bit significance correspond */
#define gf_uint16_xor(i) ( \
(i & 0x01 ? ord(00,87) : 0) ^ (i & 0x02 ? ord(01,0e) : 0) ^ \
(i & 0x04 ? ord(02,1c) : 0) ^ (i & 0x08 ? ord(04,38) : 0) ^ \
(i & 0x10 ? ord(08,70) : 0) ^ (i & 0x20 ? ord(10,e0) : 0) ^ \
(i & 0x40 ? ord(21,c0) : 0) ^ (i & 0x80 ? ord(43,80) : 0) )
enum x_bit
{
X_0 = 0x01, X_1 = 0x02, X_2 = 0x04, X_3 = 0x08,
X_4 = 0x10, X_5 = 0x20, X_6 = 0x40, X_7 = 0x80
};
#elif defined( GF_MODE_BB ) || defined( GF_MODE_LB )
/* field and numeric bit significance are in reverse */
#define gf_uint16_xor(i) ( \
(i & 0x80 ? ord(00,e1) : 0) ^ (i & 0x40 ? ord(80,70) : 0) ^ \
(i & 0x20 ? ord(40,38) : 0) ^ (i & 0x10 ? ord(20,1c) : 0) ^ \
(i & 0x08 ? ord(10,0e) : 0) ^ (i & 0x04 ? ord(08,07) : 0) ^ \
(i & 0x02 ? ord(84,03) : 0) ^ (i & 0x01 ? ord(c2,01) : 0) )
enum x_bit
{
X_0 = 0x80, X_1 = 0x40, X_2 = 0x20, X_3 = 0x10,
X_4 = 0x08, X_5 = 0x04, X_6 = 0x02, X_7 = 0x01
};
#else
#error Galois Field representation has not been set
#endif
const uint16_t gf_tab[256] = DATA_256(gf_uint16_xor);
/* LL Mode Galois Field operations
x[0] x[1] x[2] x[3] x[4] x[5] x[6] x[7]
ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls
10000111 ........ ........ ........ ........ ........ ........ ........
07....00 15....08 23....16 31....24 39....32 47....40 55....48 63....56
x[8] x[9] x[10] x[11] x[12] x[13] x[14] x[15]
ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls
........ ........ ........ ........ ........ ........ ........ M.......
71....64 79....72 87....80 95....88 103...96 111..104 119..112 127..120
*/
#if UNIT_BITS == 64
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
#define f1_ll(n,r,x) r[n] = (x[n] << 1) | (n ? x[n-1] >> 63 : 0)
#define f4_ll(n,r,x) r[n] = (x[n] << 4) | (n ? x[n-1] >> 60 : 0)
#define f8_ll(n,r,x) r[n] = (x[n] << 8) | (n ? x[n-1] >> 56 : 0)
#else
#define f1_ll(n,r,x) r[n] = ((x[n] << 1) & ~MASK(0x01)) | (((x[n] >> 15) \
| (n ? x[n-1] << 49 : 0)) & MASK(0x01))
#define f4_ll(n,r,x) r[n] = ((x[n] << 4) & ~MASK(0x0f)) | (((x[n] >> 12) \
| (n ? x[n-1] << 52 : 0)) & MASK(0x0f))
#define f8_ll(n,r,x) r[n] = (x[n] >> 8) | (n ? x[n-1] << 56 : 0)
#endif
gf_decl void gf_mulx1_ll(gf_t r, const gf_t x)
{ gf_unit_t _tt;
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
_tt = gf_tab[(UNIT_PTR(x)[1] >> 63) & 0x01];
#else
_tt = ((gf_unit_t)(gf_tab[(UNIT_PTR(x)[1] >> 7) & 0x01])) << 48;
#endif
rep2_d2(f1_ll, UNIT_PTR(r), UNIT_PTR(x));
UNIT_PTR(r)[0] ^= _tt;
}
gf_decl void gf_mulx4_ll(gf_t x)
{ gf_unit_t _tt;
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
_tt = gf_tab[(UNIT_PTR(x)[1] >> 60) & 0x0f];
#else
_tt = ((gf_unit_t)(gf_tab[(UNIT_PTR(x)[1] >> 4) & 0x0f])) << 48;
#endif
rep2_d2(f4_ll, UNIT_PTR(x), UNIT_PTR(x));
UNIT_PTR(x)[0] ^= _tt;
}
gf_decl void gf_mulx8_ll(gf_t x)
{ gf_unit_t _tt;
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
_tt = gf_tab[UNIT_PTR(x)[1] >> 56];
#else
_tt = ((gf_unit_t)(gf_tab[UNIT_PTR(x)[1] & 0xff])) << 48;
#endif
rep2_d2(f8_ll, UNIT_PTR(x), UNIT_PTR(x));
UNIT_PTR(x)[0] ^= _tt;
}
#elif UNIT_BITS == 32
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
#define f1_ll(n,r,x) r[n] = (x[n] << 1) | (n ? x[n-1] >> 31 : 0)
#define f4_ll(n,r,x) r[n] = (x[n] << 4) | (n ? x[n-1] >> 28 : 0)
#define f8_ll(n,r,x) r[n] = (x[n] << 8) | (n ? x[n-1] >> 24 : 0)
#else
#define f1_ll(n,r,x) r[n] = ((x[n] << 1) & ~MASK(0x01)) | (((x[n] >> 15) \
| (n ? x[n-1] << 17 : 0)) & MASK(0x01))
#define f4_ll(n,r,x) r[n] = ((x[n] << 4) & ~MASK(0x0f)) | (((x[n] >> 12) \
| (n ? x[n-1] << 20 : 0)) & MASK(0x0f))
#define f8_ll(n,r,x) r[n] = (x[n] >> 8) | (n ? x[n-1] << 24 : 0)
#endif
gf_decl void gf_mulx1_ll(gf_t r, const gf_t x)
{ gf_unit_t _tt;
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
_tt = gf_tab[(UNIT_PTR(x)[3] >> 31) & 0x01];
#else
_tt = ((gf_unit_t)(gf_tab[(UNIT_PTR(x)[3] >> 7) & 0x01])) << 16;
#endif
rep2_d4(f1_ll, UNIT_PTR(r), UNIT_PTR(x));
UNIT_PTR(r)[0] ^= _tt;
}
gf_decl void gf_mulx4_ll(gf_t x)
{ gf_unit_t _tt;
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
_tt = gf_tab[(UNIT_PTR(x)[3] >> 28) & 0x0f];
#else
_tt = ((gf_unit_t)(gf_tab[(UNIT_PTR(x)[3] >> 4) & 0x0f])) << 16;
#endif
rep2_d4(f4_ll, UNIT_PTR(x), UNIT_PTR(x));
UNIT_PTR(x)[0] ^= _tt;
}
gf_decl void gf_mulx8_ll(gf_t x)
{ gf_unit_t _tt;
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
_tt = gf_tab[UNIT_PTR(x)[3] >> 24];
#else
_tt = ((gf_unit_t)(gf_tab[UNIT_PTR(x)[3] & 0xff])) << 16;
#endif
rep2_d4(f8_ll, UNIT_PTR(x), UNIT_PTR(x));
UNIT_PTR(x)[0] ^= _tt;
}
#else
#define f1_ll(n,r,x) r[n] = (x[n] << 1) | (n ? x[n-1] >> 7 : 0)
#define f4_ll(n,r,x) r[n] = (x[n] << 4) | (n ? x[n-1] >> 4 : 0)
gf_decl void gf_mulx1_ll(gf_t r, const gf_t x)
{ uint16_t _tt;
_tt = gf_tab[(UNIT_PTR(x)[15] >> 7) & 0x01];
rep2_d16(f1_ll, UNIT_PTR(r), UNIT_PTR(x));
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
UNIT_PTR(r)[0] ^= _tt & 0xff;
#else
UNIT_PTR(r)[0] ^= _tt >> 8;
#endif
}
gf_decl void gf_mulx4_ll(gf_t x)
{ uint16_t _tt;
_tt = gf_tab[(UNIT_PTR(x)[15] >> 4) & 0x0f];
rep2_d16(f4_ll, UNIT_PTR(x), UNIT_PTR(x));
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
UNIT_PTR(x)[1] ^= _tt >> 8;
UNIT_PTR(x)[0] ^= _tt & 0xff;
#else
UNIT_PTR(x)[1] ^= _tt & 0xff;
UNIT_PTR(x)[0] = _tt >> 8;
#endif
}
gf_decl void gf_mulx8_ll(gf_t x)
{ uint16_t _tt;
_tt = gf_tab[UNIT_PTR(x)[15]];
memmove(UNIT_PTR(x) + 1, UNIT_PTR(x), 15);
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
UNIT_PTR(x)[1] ^= _tt >> 8;
UNIT_PTR(x)[0] = _tt & 0xff;
#else
UNIT_PTR(x)[1] ^= _tt & 0xff;
UNIT_PTR(x)[0] = _tt >> 8;
#endif
}
#endif
/* BL Mode Galois Field operations
x[0] x[1] x[2] x[3] x[4] x[5] x[6] x[7]
ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls
M....... ........ ........ ........ ........ ........ ........ ........
127..120 119..112 111..104 103...96 95....88 87....80 79....72 71....64
x[8] x[9] x[10] x[11] x[12] x[13] x[14] x[15]
ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls
........ ........ ........ ........ ........ ........ ........ 10000111
63....56 55....48 47....40 39....32 31....24 23....16 15....08 07....00
*/
#if UNIT_BITS == 64
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
#define f1_bl(n,r,x) r[n] = ((x[n] << 1) & ~MASK(0x01)) | (((x[n] >> 15) \
| (!n ? x[n+1] << 49 : 0)) & MASK(0x01))
#define f4_bl(n,r,x) r[n] = ((x[n] << 4) & ~MASK(0x0f)) | (((x[n] >> 12) \
| (!n ? x[n+1] << 52 : 0)) & MASK(0x0f))
#define f8_bl(n,r,x) r[n] = (x[n] >> 8) | (!n ? x[n+1] << 56 : 0)
#else
#define f1_bl(n,r,x) r[n] = (x[n] << 1) | (!n ? x[n+1] >> 63 : 0)
#define f4_bl(n,r,x) r[n] = (x[n] << 4) | (!n ? x[n+1] >> 60 : 0)
#define f8_bl(n,r,x) r[n] = (x[n] << 8) | (!n ? x[n+1] >> 56 : 0)
#endif
gf_decl void gf_mulx1_bl(gf_t r, const gf_t x)
{ gf_unit_t _tt;
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
_tt = ((gf_unit_t)(gf_tab[(UNIT_PTR(x)[0] >> 7) & 0x01])) << 48;
#else
_tt = gf_tab[(UNIT_PTR(x)[0] >> 63) & 0x01];
#endif
rep2_u2(f1_bl, UNIT_PTR(r), UNIT_PTR(x));
UNIT_PTR(r)[1] ^= _tt;
}
gf_decl void gf_mulx4_bl(gf_t x)
{ gf_unit_t _tt;
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
_tt = ((gf_unit_t)(gf_tab[(UNIT_PTR(x)[0] >> 4) & 0x0f])) << 48;
#else
_tt = gf_tab[(UNIT_PTR(x)[0] >> 60) & 0x0f];
#endif
rep2_u2(f4_bl, UNIT_PTR(x), UNIT_PTR(x));
UNIT_PTR(x)[1] ^= _tt;
}
gf_decl void gf_mulx8_bl(gf_t x)
{ gf_unit_t _tt;
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
_tt = ((gf_unit_t)(gf_tab[UNIT_PTR(x)[0] & 0xff])) << 48;
#else
_tt = gf_tab[(UNIT_PTR(x)[0] >> 56) & 0xff];
#endif
rep2_u2(f8_bl, UNIT_PTR(x), UNIT_PTR(x));
UNIT_PTR(x)[1] ^= _tt;
}
#elif UNIT_BITS == 32
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
#define f1_bl(n,r,x) r[n] = ((x[n] << 1) & ~MASK(0x01)) | (((x[n] >> 15) \
| (n < 3 ? x[n+1] << 17 : 0)) & MASK(0x01))
#define f4_bl(n,r,x) r[n] = ((x[n] << 4) & ~MASK(0x0f)) | (((x[n] >> 12) \
| (n < 3 ? x[n+1] << 20 : 0)) & MASK(0x0f))
#define f8_bl(n,r,x) r[n] = (x[n] >> 8) | (n < 3 ? x[n+1] << 24 : 0)
#else
#define f1_bl(n,r,x) r[n] = (x[n] << 1) | (n < 3 ? x[n+1] >> 31 : 0)
#define f4_bl(n,r,x) r[n] = (x[n] << 4) | (n < 3 ? x[n+1] >> 28 : 0)
#define f8_bl(n,r,x) r[n] = (x[n] << 8) | (n < 3 ? x[n+1] >> 24 : 0)
#endif
gf_decl void gf_mulx1_bl(gf_t r, const gf_t x)
{ gf_unit_t _tt;
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
_tt = ((gf_unit_t)(gf_tab[(UNIT_PTR(x)[0] >> 7) & 0x01])) << 16;
#else
_tt = gf_tab[(UNIT_PTR(x)[0] >> 31) & 0x01];
#endif
rep2_u4(f1_bl, UNIT_PTR(r), UNIT_PTR(x));
UNIT_PTR(r)[3] ^= _tt;
}
gf_decl void gf_mulx4_bl(gf_t x)
{ gf_unit_t _tt;
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
_tt = ((gf_unit_t)(gf_tab[(UNIT_PTR(x)[0] >> 4) & 0x0f])) << 16;
#else
_tt = gf_tab[(UNIT_PTR(x)[0] >> 28) & 0x0f];
#endif
rep2_u4(f4_bl, UNIT_PTR(x), UNIT_PTR(x));
UNIT_PTR(x)[3] ^= _tt;
}
gf_decl void gf_mulx8_bl(gf_t x)
{ gf_unit_t _tt;
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
_tt = ((gf_unit_t)(gf_tab[UNIT_PTR(x)[0] & 0xff])) << 16;
#else
_tt = gf_tab[(UNIT_PTR(x)[0] >> 24) & 0xff];
#endif
rep2_u4(f8_bl, UNIT_PTR(x), UNIT_PTR(x));
UNIT_PTR(x)[3] ^= _tt;
}
#else
#define f1_bl(n,r,x) r[n] = (x[n] << 1) | (n < 15 ? x[n+1] >> 7 : 0)
#define f4_bl(n,r,x) r[n] = (x[n] << 4) | (n < 15 ? x[n+1] >> 4 : 0)
gf_decl void gf_mulx1_bl(gf_t r, const gf_t x)
{ uint16_t _tt;
_tt = gf_tab[(UNIT_PTR(x)[0] >> 7) & 0x01];
rep2_u16(f1_bl, UNIT_PTR(r), UNIT_PTR(x));
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
UNIT_PTR(r)[15] ^= _tt >> 8;
#else
UNIT_PTR(r)[15] ^= _tt & 0xff;
#endif
}
gf_decl void gf_mulx4_bl(gf_t x)
{ uint16_t _tt;
_tt = gf_tab[(UNIT_PTR(x)[0] >> 4) & 0x0f];
rep2_u16(f4_bl, UNIT_PTR(x), UNIT_PTR(x));
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
UNIT_PTR(x)[14] ^= _tt & 0xff;
UNIT_PTR(x)[15] ^= _tt >> 8;
#else
UNIT_PTR(x)[14] ^= _tt >> 8;
UNIT_PTR(x)[15] = _tt & 0xff;
#endif
}
gf_decl void gf_mulx8_bl(gf_t x)
{ uint16_t _tt;
_tt = gf_tab[UNIT_PTR(x)[0]];
memmove(UNIT_PTR(x), UNIT_PTR(x) + 1, 15);
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
UNIT_PTR(x)[14] ^= _tt & 0xff;
UNIT_PTR(x)[15] = _tt >> 8;
#else
UNIT_PTR(x)[14] ^= _tt >> 8;
UNIT_PTR(x)[15] = _tt & 0xff;
#endif
}
#endif
/* LB Mode Galois Field operations
x[0] x[1] x[2] x[3] x[4] x[5] x[6] x[7]
ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls
11100001 ........ ........ ........ ........ ........ ........ ........
00....07 08....15 16....23 24....31 32....39 40....47 48....55 56....63
x[8] x[9] x[10] x[11] x[12] x[13] x[14] x[15]
ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls
........ ........ ........ ........ ........ ........ ........ .......M
64....71 72....79 80....87 88....95 96...103 104..111 112..119 120..127
*/
#if UNIT_BITS == 64
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
#define f1_lb(n,r,x) r[n] = ((x[n] >> 1) & ~MASK(0x80)) | (((x[n] << 15) \
| (n ? x[n-1] >> 49 : 0)) & MASK(0x80))
#define f4_lb(n,r,x) r[n] = ((x[n] >> 4) & ~MASK(0xf0)) | (((x[n] << 12) \
| (n ? x[n-1] >> 52 : 0)) & MASK(0xf0))
#define f8_lb(n,r,x) r[n] = (x[n] << 8) | (n ? x[n-1] >> 56 : 0)
#else
#define f1_lb(n,r,x) r[n] = (x[n] >> 1) | (n ? x[n-1] << 63 : 0)
#define f4_lb(n,r,x) r[n] = (x[n] >> 4) | (n ? x[n-1] << 60 : 0)
#define f8_lb(n,r,x) x[n] = (x[n] >> 8) | (n ? x[n-1] << 56 : 0)
#endif
gf_decl void gf_mulx1_lb(gf_t r, const gf_t x)
{ gf_unit_t _tt;
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
_tt = gf_tab[(UNIT_PTR(x)[1] >> 49) & MASK(0x80)];
#else
_tt = ((gf_unit_t)(gf_tab[(UNIT_PTR(x)[1] << 7) & 0xff])) << 48;
#endif
rep2_d2(f1_lb, UNIT_PTR(r), UNIT_PTR(x));
UNIT_PTR(r)[0] ^= _tt;
}
gf_decl void gf_mulx4_lb(gf_t x)
{ gf_unit_t _tt;
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
_tt = gf_tab[(UNIT_PTR(x)[1] >> 52) & MASK(0xf0)];
#else
_tt = ((gf_unit_t)(gf_tab[(UNIT_PTR(x)[1] << 4) & 0xff])) << 48;
#endif
rep2_d2(f4_lb, UNIT_PTR(x), UNIT_PTR(x));
UNIT_PTR(x)[0] ^= _tt;
}
gf_decl void gf_mulx8_lb(gf_t x)
{ gf_unit_t _tt;
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
_tt = gf_tab[UNIT_PTR(x)[1] >> 56];
#else
_tt = ((gf_unit_t)(gf_tab[UNIT_PTR(x)[1] & 0xff])) << 48;
#endif
rep2_d2(f8_lb, UNIT_PTR(x), UNIT_PTR(x));
UNIT_PTR(x)[0] ^= _tt;
}
#elif UNIT_BITS == 32
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
#define f1_lb(n,r,x) r[n] = ((x[n] >> 1) & ~MASK(0x80)) | (((x[n] << 15) \
| (n ? x[n-1] >> 17 : 0)) & MASK(0x80))
#define f4_lb(n,r,x) r[n] = ((x[n] >> 4) & ~MASK(0xf0)) | (((x[n] << 12) \
| (n ? x[n-1] >> 20 : 0)) & MASK(0xf0))
#define f8_lb(n,r,x) r[n] = (x[n] << 8) | (n ? x[n-1] >> 24 : 0)
#else
#define f1_lb(n,r,x) r[n] = (x[n] >> 1) | (n ? x[n-1] << 31 : 0)
#define f4_lb(n,r,x) r[n] = (x[n] >> 4) | (n ? x[n-1] << 28 : 0)
#define f8_lb(n,r,x) r[n] = (x[n] >> 8) | (n ? x[n-1] << 24 : 0)
#endif
gf_decl void gf_mulx1_lb(gf_t r, const gf_t x)
{ gf_unit_t _tt;
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
_tt = gf_tab[(UNIT_PTR(x)[3] >> 17) & MASK(0x80)];
#else
_tt = ((gf_unit_t)(gf_tab[(UNIT_PTR(x)[3] << 7) & 0xff])) << 16;
#endif
rep2_d4(f1_lb, UNIT_PTR(r), UNIT_PTR(x));
UNIT_PTR(r)[0] ^= _tt;
}
gf_decl void gf_mulx4_lb(gf_t x)
{ gf_unit_t _tt;
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
_tt = gf_tab[(UNIT_PTR(x)[3] >> 20) & MASK(0xf0)];
#else
_tt = ((gf_unit_t)(gf_tab[(UNIT_PTR(x)[3] << 4) & 0xff])) << 16;
#endif
rep2_d4(f4_lb, UNIT_PTR(x), UNIT_PTR(x));
UNIT_PTR(x)[0] ^= _tt;
}
gf_decl void gf_mulx8_lb(gf_t x)
{ gf_unit_t _tt;
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
_tt = gf_tab[UNIT_PTR(x)[3] >> 24];
#else
_tt = ((gf_unit_t)(gf_tab[UNIT_PTR(x)[3] & 0xff])) << 16;
#endif
rep2_d4(f8_lb, UNIT_PTR(x), UNIT_PTR(x));
UNIT_PTR(x)[0] ^= _tt;
}
#else
#define f1_lb(n,r,x) r[n] = (x[n] >> 1) | (n ? x[n-1] << 7 : 0)
#define f4_lb(n,r,x) r[n] = (x[n] >> 4) | (n ? x[n-1] << 4 : 0)
gf_decl void gf_mulx1_lb(gf_t r, const gf_t x)
{ uint16_t _tt;
_tt = gf_tab[(UNIT_PTR(x)[15] << 7) & 0x80];
rep2_d16(f1_lb, UNIT_PTR(r), UNIT_PTR(x));
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
UNIT_PTR(r)[0] ^= _tt;
#else
UNIT_PTR(r)[0] ^= _tt >> 8;
#endif
}
gf_decl void gf_mulx4_lb(gf_t x)
{ uint16_t _tt;
_tt = gf_tab[(UNIT_PTR(x)[15] << 4) & 0xf0];
rep2_d16(f4_lb, UNIT_PTR(x), UNIT_PTR(x));
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
UNIT_PTR(x)[1] ^= _tt >> 8;
UNIT_PTR(x)[0] ^= _tt & 0xff;
#else
UNIT_PTR(x)[1] ^= _tt & 0xff;
UNIT_PTR(x)[0] ^= _tt >> 8;
#endif
}
gf_decl void gf_mulx8_lb(gf_t x)
{ uint16_t _tt;
_tt = gf_tab[UNIT_PTR(x)[15]];
memmove(UNIT_PTR(x) + 1, UNIT_PTR(x), 15);
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
UNIT_PTR(x)[1] ^= _tt >> 8;
UNIT_PTR(x)[0] = _tt & 0xff;
#else
UNIT_PTR(x)[1] ^= _tt & 0xff;
UNIT_PTR(x)[0] = _tt >> 8;
#endif
}
#endif
/* BB Mode Galois Field operations
x[0] x[1] x[2] x[3] x[4] x[5] x[6] x[7]
ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls
.......M ........ ........ ........ ........ ........ ........ ........
120..127 112..119 104..111 96...103 88....95 80....87 72....79 64....71
x[8] x[9] x[10] x[11] x[12] x[13] x[14] x[15]
ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls
........ ........ ........ ........ ........ ........ ........ 11100001
56....63 48....55 40....47 32....39 24....31 16....23 08....15 00....07
*/
#if UNIT_BITS == 64
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
#define f1_bb(n,r,x) r[n] = (x[n] >> 1) | (!n ? x[n+1] << 63 : 0)
#define f4_bb(n,r,x) r[n] = (x[n] >> 4) | (!n ? x[n+1] << 60 : 0)
#define f8_bb(n,r,x) r[n] = (x[n] >> 8) | (!n ? x[n+1] << 56 : 0)
#else
#define f1_bb(n,r,x) r[n] = ((x[n] >> 1) & ~MASK(0x80)) | (((x[n] << 15) \
| (!n ? x[n+1] >> 49 : 0)) & MASK(0x80))
#define f4_bb(n,r,x) r[n] = ((x[n] >> 4) & ~MASK(0xf0)) | (((x[n] << 12) \
| (!n ? x[n+1] >> 52 : 0)) & MASK(0xf0))
#define f8_bb(n,r,x) r[n] = (x[n] << 8) | (!n ? x[n+1] >> 56 : 0)
#endif
gf_decl void gf_mulx1_bb(gf_t r, const gf_t x)
{ gf_unit_t _tt;
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
_tt = (( gf_unit_t)(gf_tab[(UNIT_PTR(x)[0] << 7) & 0x80])) << 48;
#else
_tt = gf_tab[(UNIT_PTR(x)[0] >> 49) & 0x80];
#endif
rep2_u2(f1_bb, UNIT_PTR(r), UNIT_PTR(x));
UNIT_PTR(r)[1] ^= _tt;
}
gf_decl void gf_mulx4_bb(gf_t x)
{ gf_unit_t _tt;
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
_tt = ((gf_unit_t)(gf_tab[(UNIT_PTR(x)[0] << 4) & 0xf0])) << 48;
#else
_tt = gf_tab[(UNIT_PTR(x)[0] >> 52) & 0xf0];
#endif
rep2_u2(f4_bb, UNIT_PTR(x), UNIT_PTR(x));
UNIT_PTR(x)[1] ^= _tt;
}
gf_decl void gf_mulx8_bb(gf_t x)
{ gf_unit_t _tt;
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
_tt = ((gf_unit_t)(gf_tab[UNIT_PTR(x)[0] & 0xff])) << 48;
#else
_tt = gf_tab[(UNIT_PTR(x)[0] >> 56) & 0xff];
#endif
rep2_u2(f8_bb, UNIT_PTR(x), UNIT_PTR(x));
UNIT_PTR(x)[1] ^= _tt;
}
#elif UNIT_BITS == 32
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
#define f1_bb(n,r,x) r[n] = (x[n] >> 1) | (n < 3 ? x[n+1] << 31 : 0)
#define f4_bb(n,r,x) r[n] = (x[n] >> 4) | (n < 3 ? x[n+1] << 28 : 0)
#define f8_bb(n,r,x) r[n] = (x[n] >> 8) | (n < 3 ? x[n+1] << 24 : 0)
#else
#define f1_bb(n,r,x) r[n] = ((x[n] >> 1) & ~MASK(0x80)) | (((x[n] << 15) \
| (n < 3 ? x[n+1] >> 17 : 0)) & MASK(0x80))
#define f4_bb(n,r,x) r[n] = ((x[n] >> 4) & ~MASK(0xf0)) | (((x[n] << 12) \
| (n < 3 ? x[n+1] >> 20 : 0)) & MASK(0xf0))
#define f8_bb(n,r,x) r[n] = (x[n] << 8) | (n < 3 ? x[n+1] >> 24 : 0)
#endif
gf_decl void gf_mulx1_bb(gf_t r, const gf_t x)
{ gf_unit_t _tt;
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
_tt = ((gf_unit_t)(gf_tab[(UNIT_PTR(x)[0] << 7) & 0x80])) << 16;
#else
_tt = gf_tab[(UNIT_PTR(x)[0] >> 17) & 0x80];
#endif
rep2_u4(f1_bb, UNIT_PTR(r), UNIT_PTR(x));
UNIT_PTR(r)[3] ^= _tt;
}
gf_decl void gf_mulx4_bb(gf_t x)
{ gf_unit_t _tt;
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
_tt = ((gf_unit_t)(gf_tab[(UNIT_PTR(x)[0] << 4) & 0xf0])) << 16;
#else
_tt = gf_tab[(UNIT_PTR(x)[0] >> 20) & 0xf0];
#endif
rep2_u4(f4_bb, UNIT_PTR(x), UNIT_PTR(x));
UNIT_PTR(x)[3] ^= _tt;
}
gf_decl void gf_mulx8_bb(gf_t x)
{ gf_unit_t _tt;
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
_tt = ((gf_unit_t)(gf_tab[UNIT_PTR(x)[0] & 0xff])) << 16;
#else
_tt = gf_tab[(UNIT_PTR(x)[0] >> 24) & 0xff];
#endif
rep2_u4(f8_bb, UNIT_PTR(x), UNIT_PTR(x));
UNIT_PTR(x)[3] ^= _tt;
}
#else
#define f1_bb(n,r,x) r[n] = (x[n] >> 1) | (n < 15 ? x[n+1] << 7 : 0)
#define f4_bb(n,r,x) r[n] = (x[n] >> 4) | (n < 15 ? x[n+1] << 4 : 0)
gf_decl void gf_mulx1_bb(gf_t r, const gf_t x)
{ uint16_t _tt;
_tt = gf_tab[(UNIT_PTR(x)[0] << 7) & 0x80];
rep2_u16(f1_bb, UNIT_PTR(r), UNIT_PTR(x));
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
UNIT_PTR(r)[15] ^= _tt >> 8;
#else
UNIT_PTR(r)[15] ^= _tt;
#endif
}
gf_decl void gf_mulx4_bb(gf_t x)
{ uint16_t _tt;
_tt = gf_tab[(UNIT_PTR(x)[0] << 4) & 0xf0];
rep2_u16(f4_bb, UNIT_PTR(x), UNIT_PTR(x));
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
UNIT_PTR(x)[14] ^= _tt & 0xff;
UNIT_PTR(x)[15] ^= _tt >> 8;
#else
UNIT_PTR(x)[14] ^= _tt >> 8;
UNIT_PTR(x)[15] ^= _tt & 0xff;
#endif
}
gf_decl void gf_mulx8_bb(gf_t x)
{ uint16_t _tt;
_tt = gf_tab[UNIT_PTR(x)[0]];
memmove(UNIT_PTR(x), UNIT_PTR(x) + 1, 15);
#if PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN
UNIT_PTR(x)[14] ^= _tt & 0xff;
UNIT_PTR(x)[15] = _tt >> 8;
#else
UNIT_PTR(x)[14] ^= _tt >> 8;
UNIT_PTR(x)[15] = _tt & 0xff;
#endif
}
#endif
#endif

@ -0,0 +1,329 @@
/*
---------------------------------------------------------------------------
Copyright (c) 1998-2014, Brian Gladman, Worcester, UK. All rights reserved.
The redistribution and use of this software (with or without changes)
is allowed without the payment of fees or royalties provided that:
source code distributions include the above copyright notice, this
list of conditions and the following disclaimer;
binary distributions include the above copyright notice, this list
of conditions and the following disclaimer in their documentation.
This software is provided 'as is' with no explicit or implied warranties
in respect of its operation, including, but not limited to, correctness
and fitness for purpose.
---------------------------------------------------------------------------
Issue Date: 18/02/2014
This header file is an INTERNAL file which supports mode implementation
*/
#ifndef _MODE_HDR_H
#define _MODE_HDR_H
#include <string.h>
#include <limits.h>
#include "brg_endian.h"
/* This define sets the units in which buffers are processed. This code
can provide significant speed gains if buffers can be processed in
32 or 64 bit chunks rather than in bytes. This define sets the units
in which buffers will be accessed if possible
*/
#if !defined( UNIT_BITS )
# if PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN
# if 0
# define UNIT_BITS 32
# elif 1
# define UNIT_BITS 64
# endif
# elif defined( _WIN64 )
# define UNIT_BITS 64
# else
# define UNIT_BITS 32
# endif
#endif
#if UNIT_BITS == 64 && !defined( NEED_UINT_64T )
# define NEED_UINT_64T
#endif
#include "brg_types.h"
/* Use of inlines is preferred but code blocks can also be expanded inline
using 'defines'. But the latter approach will typically generate a LOT
of code and is not recommended.
*/
#if 1 && !defined( USE_INLINING )
# define USE_INLINING
#endif
#if defined( _MSC_VER )
# if _MSC_VER >= 1400
# include <stdlib.h>
# include <intrin.h>
# pragma intrinsic(memset)
# pragma intrinsic(memcpy)
# define rotl32 _rotl
# define rotr32 _rotr
# define rotl64 _rotl64
# define rotr64 _rotl64
# define bswap_16(x) _byteswap_ushort(x)
# define bswap_32(x) _byteswap_ulong(x)
# define bswap_64(x) _byteswap_uint64(x)
# else
# define rotl32 _lrotl
# define rotr32 _lrotr
# endif
#endif
#if defined( USE_INLINING )
# if defined( _MSC_VER )
# define mh_decl __inline
# elif defined( __GNUC__ ) || defined( __GNU_LIBRARY__ )
# define mh_decl static inline
# else
# define mh_decl static
# endif
#endif
#if defined(__cplusplus)
extern "C" {
#endif
#define UI8_PTR(x) UPTR_CAST(x, 8)
#define UI16_PTR(x) UPTR_CAST(x, 16)
#define UI32_PTR(x) UPTR_CAST(x, 32)
#define UI64_PTR(x) UPTR_CAST(x, 64)
#define UNIT_PTR(x) UPTR_CAST(x, UNIT_BITS)
#define UI8_VAL(x) UNIT_CAST(x, 8)
#define UI16_VAL(x) UNIT_CAST(x, 16)
#define UI32_VAL(x) UNIT_CAST(x, 32)
#define UI64_VAL(x) UNIT_CAST(x, 64)
#define UNIT_VAL(x) UNIT_CAST(x, UNIT_BITS)
#define BUF_INC (UNIT_BITS >> 3)
#define BUF_ADRMASK ((UNIT_BITS >> 3) - 1)
#define rep2_u2(f,r,x) f( 0,r,x); f( 1,r,x)
#define rep2_u4(f,r,x) f( 0,r,x); f( 1,r,x); f( 2,r,x); f( 3,r,x)
#define rep2_u16(f,r,x) f( 0,r,x); f( 1,r,x); f( 2,r,x); f( 3,r,x); \
f( 4,r,x); f( 5,r,x); f( 6,r,x); f( 7,r,x); \
f( 8,r,x); f( 9,r,x); f(10,r,x); f(11,r,x); \
f(12,r,x); f(13,r,x); f(14,r,x); f(15,r,x)
#define rep2_d2(f,r,x) f( 1,r,x); f( 0,r,x)
#define rep2_d4(f,r,x) f( 3,r,x); f( 2,r,x); f( 1,r,x); f( 0,r,x)
#define rep2_d16(f,r,x) f(15,r,x); f(14,r,x); f(13,r,x); f(12,r,x); \
f(11,r,x); f(10,r,x); f( 9,r,x); f( 8,r,x); \
f( 7,r,x); f( 6,r,x); f( 5,r,x); f( 4,r,x); \
f( 3,r,x); f( 2,r,x); f( 1,r,x); f( 0,r,x)
#define rep3_u2(f,r,x,y,c) f( 0,r,x,y,c); f( 1,r,x,y,c)
#define rep3_u4(f,r,x,y,c) f( 0,r,x,y,c); f( 1,r,x,y,c); f( 2,r,x,y,c); f( 3,r,x,y,c)
#define rep3_u16(f,r,x,y,c) f( 0,r,x,y,c); f( 1,r,x,y,c); f( 2,r,x,y,c); f( 3,r,x,y,c); \
f( 4,r,x,y,c); f( 5,r,x,y,c); f( 6,r,x,y,c); f( 7,r,x,y,c); \
f( 8,r,x,y,c); f( 9,r,x,y,c); f(10,r,x,y,c); f(11,r,x,y,c); \
f(12,r,x,y,c); f(13,r,x,y,c); f(14,r,x,y,c); f(15,r,x,y,c)
#define rep3_d2(f,r,x,y,c) f( 1,r,x,y,c); f( 0,r,x,y,c)
#define rep3_d4(f,r,x,y,c) f( 3,r,x,y,c); f( 2,r,x,y,c); f( 1,r,x,y,c); f( 0,r,x,y,c)
#define rep3_d16(f,r,x,y,c) f(15,r,x,y,c); f(14,r,x,y,c); f(13,r,x,y,c); f(12,r,x,y,c); \
f(11,r,x,y,c); f(10,r,x,y,c); f( 9,r,x,y,c); f( 8,r,x,y,c); \
f( 7,r,x,y,c); f( 6,r,x,y,c); f( 5,r,x,y,c); f( 4,r,x,y,c); \
f( 3,r,x,y,c); f( 2,r,x,y,c); f( 1,r,x,y,c); f( 0,r,x,y,c)
/* function pointers might be used for fast XOR operations */
typedef void (*xor_function)(void* r, const void* p, const void* q);
/* left and right rotates on 32 and 64 bit variables */
#if !defined( rotl32 ) /* NOTE: 0 <= n <= 32 ASSUMED */
mh_decl uint32_t rotl32(uint32_t x, int n)
{
return (((x) << n) | ((x) >> (32 - n)));
}
#endif
#if !defined( rotr32 ) /* NOTE: 0 <= n <= 32 ASSUMED */
mh_decl uint32_t rotr32(uint32_t x, int n)
{
return (((x) >> n) | ((x) << (32 - n)));
}
#endif
#if ( UNIT_BITS == 64 ) && !defined( rotl64 ) /* NOTE: 0 <= n <= 64 ASSUMED */
mh_decl uint64_t rotl64(uint64_t x, int n)
{
return (((x) << n) | ((x) >> (64 - n)));
}
#endif
#if ( UNIT_BITS == 64 ) && !defined( rotr64 ) /* NOTE: 0 <= n <= 64 ASSUMED */
mh_decl uint64_t rotr64(uint64_t x, int n)
{
return (((x) >> n) | ((x) << (64 - n)));
}
#endif
/* byte order inversions for 16, 32 and 64 bit variables */
#if !defined(bswap_16)
mh_decl uint16_t bswap_16(uint16_t x)
{
return (uint16_t)((x >> 8) | (x << 8));
}
#endif
#if !defined(bswap_32)
mh_decl uint32_t bswap_32(uint32_t x)
{
return ((rotr32((x), 24) & 0x00ff00ff) | (rotr32((x), 8) & 0xff00ff00));
}
#endif
#if ( UNIT_BITS == 64 ) && !defined(bswap_64)
mh_decl uint64_t bswap_64(uint64_t x)
{
return bswap_32((uint32_t)(x >> 32)) | ((uint64_t)bswap_32((uint32_t)x) << 32);
}
#endif
/* support for fast aligned buffer move, xor and byte swap operations -
source and destination buffers for move and xor operations must not
overlap, those for byte order revesal must either not overlap or
must be identical
*/
#define f_copy(n,p,q) p[n] = q[n]
#define f_xor(n,r,p,q,c) r[n] = c(p[n] ^ q[n])
mh_decl void copy_block(void* p, const void* q)
{
memcpy(p, q, 16);
}
mh_decl void copy_block_aligned(void *p, const void *q)
{
#if UNIT_BITS == 8
memcpy(p, q, 16);
#elif UNIT_BITS == 32
rep2_u4(f_copy,UNIT_PTR(p),UNIT_PTR(q));
#else
rep2_u2(f_copy,UNIT_PTR(p),UNIT_PTR(q));
#endif
}
mh_decl void xor_block(void *r, const void* p, const void* q)
{
rep3_u16(f_xor, UI8_PTR(r), UI8_PTR(p), UI8_PTR(q), UI8_VAL);
}
mh_decl void xor_block_aligned(void *r, const void *p, const void *q)
{
#if UNIT_BITS == 8
rep3_u16(f_xor, UNIT_PTR(r), UNIT_PTR(p), UNIT_PTR(q), UNIT_VAL);
#elif UNIT_BITS == 32
rep3_u4(f_xor, UNIT_PTR(r), UNIT_PTR(p), UNIT_PTR(q), UNIT_VAL);
#else
rep3_u2(f_xor, UNIT_PTR(r), UNIT_PTR(p), UNIT_PTR(q), UNIT_VAL);
#endif
}
/* byte swap within 32-bit words in a 16 byte block; don't move 32-bit words */
mh_decl void bswap32_block(void *d, const void* s)
{
#if UNIT_BITS == 8
uint8_t t;
t = UNIT_PTR(s)[ 0]; UNIT_PTR(d)[ 0] = UNIT_PTR(s)[ 3]; UNIT_PTR(d)[ 3] = t;
t = UNIT_PTR(s)[ 1]; UNIT_PTR(d)[ 1] = UNIT_PTR(s)[ 2]; UNIT_PTR(d)[ 2] = t;
t = UNIT_PTR(s)[ 4]; UNIT_PTR(d)[ 4] = UNIT_PTR(s)[ 7]; UNIT_PTR(d)[ 7] = t;
t = UNIT_PTR(s)[ 5]; UNIT_PTR(d)[ 5] = UNIT_PTR(s)[ 6]; UNIT_PTR(d) [6] = t;
t = UNIT_PTR(s)[ 8]; UNIT_PTR(d)[ 8] = UNIT_PTR(s)[11]; UNIT_PTR(d)[12] = t;
t = UNIT_PTR(s)[ 9]; UNIT_PTR(d)[ 9] = UNIT_PTR(s)[10]; UNIT_PTR(d)[10] = t;
t = UNIT_PTR(s)[12]; UNIT_PTR(d)[12] = UNIT_PTR(s)[15]; UNIT_PTR(d)[15] = t;
t = UNIT_PTR(s)[13]; UNIT_PTR(d)[ 3] = UNIT_PTR(s)[14]; UNIT_PTR(d)[14] = t;
#elif UNIT_BITS == 32
UNIT_PTR(d)[0] = bswap_32(UNIT_PTR(s)[0]); UNIT_PTR(d)[1] = bswap_32(UNIT_PTR(s)[1]);
UNIT_PTR(d)[2] = bswap_32(UNIT_PTR(s)[2]); UNIT_PTR(d)[3] = bswap_32(UNIT_PTR(s)[3]);
#else
UI32_PTR(d)[0] = bswap_32(UI32_PTR(s)[0]); UI32_PTR(d)[1] = bswap_32(UI32_PTR(s)[1]);
UI32_PTR(d)[2] = bswap_32(UI32_PTR(s)[2]); UI32_PTR(d)[3] = bswap_32(UI32_PTR(s)[3]);
#endif
}
/* byte swap within 64-bit words in a 16 byte block; don't move 64-bit words */
mh_decl void bswap64_block(void *d, const void* s)
{
#if UNIT_BITS == 8
uint8_t t;
t = UNIT_PTR(s)[ 0]; UNIT_PTR(d)[ 0] = UNIT_PTR(s)[ 7]; UNIT_PTR(d)[ 7] = t;
t = UNIT_PTR(s)[ 1]; UNIT_PTR(d)[ 1] = UNIT_PTR(s)[ 6]; UNIT_PTR(d)[ 6] = t;
t = UNIT_PTR(s)[ 2]; UNIT_PTR(d)[ 2] = UNIT_PTR(s)[ 5]; UNIT_PTR(d)[ 5] = t;
t = UNIT_PTR(s)[ 3]; UNIT_PTR(d)[ 3] = UNIT_PTR(s)[ 3]; UNIT_PTR(d) [3] = t;
t = UNIT_PTR(s)[ 8]; UNIT_PTR(d)[ 8] = UNIT_PTR(s)[15]; UNIT_PTR(d)[15] = t;
t = UNIT_PTR(s)[ 9]; UNIT_PTR(d)[ 9] = UNIT_PTR(s)[14]; UNIT_PTR(d)[14] = t;
t = UNIT_PTR(s)[10]; UNIT_PTR(d)[10] = UNIT_PTR(s)[13]; UNIT_PTR(d)[13] = t;
t = UNIT_PTR(s)[11]; UNIT_PTR(d)[11] = UNIT_PTR(s)[12]; UNIT_PTR(d)[12] = t;
#elif UNIT_BITS == 32
uint32_t t;
t = bswap_32(UNIT_PTR(s)[0]); UNIT_PTR(d)[0] = bswap_32(UNIT_PTR(s)[1]); UNIT_PTR(d)[1] = t;
t = bswap_32(UNIT_PTR(s)[2]); UNIT_PTR(d)[2] = bswap_32(UNIT_PTR(s)[2]); UNIT_PTR(d)[3] = t;
#else
UNIT_PTR(d)[0] = bswap_64(UNIT_PTR(s)[0]); UNIT_PTR(d)[1] = bswap_64(UNIT_PTR(s)[1]);
#endif
}
mh_decl void bswap128_block(void *d, const void* s)
{
#if UNIT_BITS == 8
uint8_t t;
t = UNIT_PTR(s)[0]; UNIT_PTR(d)[0] = UNIT_PTR(s)[15]; UNIT_PTR(d)[15] = t;
t = UNIT_PTR(s)[1]; UNIT_PTR(d)[1] = UNIT_PTR(s)[14]; UNIT_PTR(d)[14] = t;
t = UNIT_PTR(s)[2]; UNIT_PTR(d)[2] = UNIT_PTR(s)[13]; UNIT_PTR(d)[13] = t;
t = UNIT_PTR(s)[3]; UNIT_PTR(d)[3] = UNIT_PTR(s)[12]; UNIT_PTR(d)[12] = t;
t = UNIT_PTR(s)[4]; UNIT_PTR(d)[4] = UNIT_PTR(s)[11]; UNIT_PTR(d)[11] = t;
t = UNIT_PTR(s)[5]; UNIT_PTR(d)[5] = UNIT_PTR(s)[10]; UNIT_PTR(d)[10] = t;
t = UNIT_PTR(s)[6]; UNIT_PTR(d)[6] = UNIT_PTR(s)[ 9]; UNIT_PTR(d)[ 9] = t;
t = UNIT_PTR(s)[7]; UNIT_PTR(d)[7] = UNIT_PTR(s)[ 8]; UNIT_PTR(d)[ 8] = t;
#elif UNIT_BITS == 32
uint32_t t;
t = bswap_32(UNIT_PTR(s)[0]); UNIT_PTR(d)[0] = bswap_32(UNIT_PTR(s)[3]); UNIT_PTR(d)[3] = t;
t = bswap_32(UNIT_PTR(s)[1]); UNIT_PTR(d)[1] = bswap_32(UNIT_PTR(s)[2]); UNIT_PTR(d)[2] = t;
#else
uint64_t t;
t = bswap_64(UNIT_PTR(s)[0]); UNIT_PTR(d)[0] = bswap_64(UNIT_PTR(s)[1]); UNIT_PTR(d)[1] = t;
#endif
}
/* platform byte order to big or little endian order for 16, 32 and 64 bit variables */
#if PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN
# define uint16_t_to_le(x) (x) = bswap_16((x))
# define uint32_t_to_le(x) (x) = bswap_32((x))
# define uint64_t_to_le(x) (x) = bswap_64((x))
# define uint16_t_to_be(x)
# define uint32_t_to_be(x)
# define uint64_t_to_be(x)
#else
# define uint16_t_to_le(x)
# define uint32_t_to_le(x)
# define uint64_t_to_le(x)
# define uint16_t_to_be(x) (x) = bswap_16((x))
# define uint32_t_to_be(x) (x) = bswap_32((x))
# define uint64_t_to_be(x) (x) = bswap_64((x))
#endif
#if defined(__cplusplus)
}
#endif
#endif

@ -1,5 +1,5 @@
^\./core/embed/bootloader/protob/
^\./crypto/aes/aes\(\|crypt\|key\|_modes\|opt\|tab\|tst\)\.
^\./crypto/aes/
^\./crypto/chacha20poly1305/
^\./crypto/ed25519-donna/
^\./crypto/gui/

Loading…
Cancel
Save