/* --------------------------------------------------------------------------- Copyright (c) 1998-2010, Brian Gladman, Worcester, UK. All rights reserved. The redistribution and use of this software (with or without changes) is allowed without the payment of fees or royalties provided that: source code distributions include the above copyright notice, this list of conditions and the following disclaimer; binary distributions include the above copyright notice, this list of conditions and the following disclaimer in their documentation. This software is provided 'as is' with no explicit or implied warranties in respect of its operation, including, but not limited to, correctness and fitness for purpose. --------------------------------------------------------------------------- Issue Date: 11/01/2011 I am grateful for the work done by Mark Rodenkirch and Jason Papadopoulos in helping to remove a bug in the operation of this code on big endian systems when fast buffer operations are enabled. --------------------------------------------------------------------------- An implementation of field multiplication in the Galois Field GF(2^128) A polynomial representation is used for the field with the coefficients held in bit sequences in which the bit numbers are the powers of x that a bit represents. The field polynomial used is (x^128+x^7+x^2+x+1). The obvious way of representing field elements in a computer system is to map 'x' in the field to the binary integer '2'. But this was way too obvious for cryptographers! Here bytes are numbered in their memory order and bits within bytes are numbered according to their integer numeric significance (that is as is now normal with bit 0 representing unity). The term 'little endian' will then used to describe mappings where numeric (power of 2) or field (power of x) significance increases with increasing bit or byte numbers with 'big endian' being used to describe the inverse situation. The GF bit sequence can then be mapped onto 8-bit bytes in computer memory in one of four simple ways: A mapping in which x maps to the integer 2 in little endian form for both bytes and bits within bytes: LL: bit for x^n ==> bit for 2^(n % 8) in byte[n / 8] A mapping in which x maps to the integer 2 in big endian form for both bytes and bits within bytes: BL: bit for x^n ==> bit for 2^(n % 8) in byte[15 - n / 8] A little endian mapping for bytes but with the bits within bytes in reverse order (big endian bytes): LB: bit for x^n ==> bit for 2^(7 - n % 8) in byte[n / 8] A big endian mapping for bytes but with the bits within bytes in reverse order (big endian bytes): BB: bit for x^n ==> bit for 2^(7 - n % 8) in byte[15 - n / 8] 128-bit field elements are represented by 16 byte buffers but for processing efficiency reasons it is often desirable to process arrays of bytes using longer types such as, for example, unsigned long values. The type used for representing these buffers will be called a 'gf_unit' and the buffer itself will be referred to as a 'gf_t' type. THe field multiplier is based on the assumption that one of the two field elements involved in multiplication will change only relatively infrequently, making it worthwhile to precompute tables to speed up multiplication by this value. */ #ifndef _GF128MUL_H #define _GF128MUL_H #include <stdlib.h> #include <string.h> #include "brg_endian.h" /* USER DEFINABLE OPTIONS */ /* UNIT_BITS sets the size of variables used to process 16 byte buffers when the buffer alignment allows this. When buffers are processed in bytes, 16 individual operations are invoolved. But if, say, such a buffer is divided into 4 32 bit variables, it can then be processed in 4 operations, making the code typically much faster. In general it will pay to use the longest natively supported size, which will probably be 32 or 64 bits in 32 and 64 bit systems respectively. */ #if defined( UNIT_BITS ) # undef UNIT_BITS #endif #if !defined( UNIT_BITS ) # if PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN # if 0 # define UNIT_BITS 8 # elif 0 # define UNIT_BITS 32 # elif 1 # define UNIT_BITS 64 # endif # elif defined( _WIN64 ) # define UNIT_BITS 64 # else # define UNIT_BITS 32 # endif #endif #if UNIT_BITS == 64 && !defined( NEED_UINT_64T ) # define NEED_UINT_64T #endif #include "mode_hdr.h" /* Choose the Galois Field representation to use (see above) */ #if 0 # define GF_MODE_LL #elif 0 # define GF_MODE_BL #elif 1 # define GF_MODE_LB /* the representation used by GCM */ #elif 0 # define GF_MODE_BB #else # error mode is not defined #endif /* Table sizes for GF(128) Multiply. Normally larger tables give higher speed but cache loading might change this. Normally only one table size (or none at all) will be specified here */ #if 0 # define TABLES_64K #endif #if 0 # define TABLES_8K #endif #if 0 # define TABLES_4K #endif #if 0 # define TABLES_256 #endif /* END OF USER DEFINABLE OPTIONS */ #if !(defined( TABLES_64K ) || defined( TABLES_8K ) \ || defined( TABLES_4K ) || defined( TABLES_256 )) # define NO_TABLES #endif #if defined(__cplusplus) extern "C" { #endif #define GF_BYTE_LEN 16 #define GF_UNIT_LEN (GF_BYTE_LEN / (UNIT_BITS >> 3)) UNIT_TYPEDEF(gf_unit_t, UNIT_BITS); BUFR_TYPEDEF(gf_t, UNIT_BITS, GF_BYTE_LEN); /* Code for conversion between the four different galois field representations is optionally available using gf_convert.c */ typedef enum { REVERSE_NONE = 0, REVERSE_BITS = 1, REVERSE_BYTES = 2 } transform; void convert_representation(gf_t dest, const gf_t source, transform rev); void gf_mul(gf_t a, const gf_t b); /* slow field multiply */ /* types and calls for 64k table driven field multiplier */ typedef gf_t gf_t64k_a[16][256]; typedef gf_t (*gf_t64k_t)[256]; void init_64k_table(const gf_t g, gf_t64k_t t); void gf_mul_64k(gf_t a, const gf_t64k_t t, void *r); /* types and calls for 8k table driven field multiplier */ typedef gf_t gf_t8k_a[32][16]; typedef gf_t (*gf_t8k_t)[16]; void init_8k_table(const gf_t g, gf_t8k_t t); void gf_mul_8k(gf_t a, const gf_t8k_t t, gf_t r); /* types and calls for 8k table driven field multiplier */ typedef gf_t gf_t4k_a[256]; typedef gf_t (*gf_t4k_t); void init_4k_table(const gf_t g, gf_t4k_t t); void gf_mul_4k(gf_t a, const gf_t4k_t t, gf_t r); /* types and calls for 8k table driven field multiplier */ typedef gf_t gf_t256_a[16]; typedef gf_t (*gf_t256_t); void init_256_table(const gf_t g, gf_t256_t t); void gf_mul_256(gf_t a, const gf_t256_t t, gf_t r); #if defined(__cplusplus) } #endif #endif