From 57213e6c6cbd617a928c66f47b7f9427e6d0007f Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Thu, 8 Apr 2021 13:19:27 +0200 Subject: [PATCH] Add AES_GCM_GHASH_GLOBAL() to allow using encrypted data directly from GPU memory to avoid reserving registers --- OpenCL/inc_cipher_aes-gcm.cl | 97 +++++++++++++++++++++++++++++++++++- OpenCL/inc_cipher_aes-gcm.h | 6 ++- OpenCL/m25500-optimized.cl | 3 +- OpenCL/m25500-pure.cl | 9 ++-- src/modules/module_25500.c | 5 +- 5 files changed, 110 insertions(+), 10 deletions(-) diff --git a/OpenCL/inc_cipher_aes-gcm.cl b/OpenCL/inc_cipher_aes-gcm.cl index 97d7b0f28..9f6b4905a 100644 --- a/OpenCL/inc_cipher_aes-gcm.cl +++ b/OpenCL/inc_cipher_aes-gcm.cl @@ -115,6 +115,65 @@ DECLSPEC void AES_GCM_ghash (const u32 *subkey, const u32 *in, u32 in_len, u32 * } } +DECLSPEC void AES_GCM_ghash_global (const u32 *subkey, GLOBAL_AS const u32 *in, u32 in_len, u32 *out) +{ + u32 m = in_len / 16; + + GLOBAL_AS const u32 *xpos = in; + + u32 tmp[4] = { 0 }; + + for (u32 i = 0; i < m; i++) + { + u32 t2[4]; + + t2[0] = xpos[0]; + t2[1] = xpos[1]; + t2[2] = xpos[2]; + t2[3] = xpos[3]; + + AES_GCM_xor_block (out, t2); + + xpos += 4; + + AES_GCM_gf_mult (out, subkey, tmp); + + out[0] = tmp[0]; + out[1] = tmp[1]; + out[2] = tmp[2]; + out[3] = tmp[3]; + } + + if (in + (in_len/4) > xpos) + { + u32 last = in + (in_len/4) - xpos; + + for (u32 i = 0; i < last; i++) + { + tmp[i] = xpos[i]; + } + + for (u32 i = last; i < 4; i++) + { + tmp[i] = 0; + } + + AES_GCM_xor_block (out, tmp); + + AES_GCM_gf_mult (out, subkey, tmp); + + tmp[0] = hc_swap32_S (tmp[0]); + tmp[1] = hc_swap32_S (tmp[1]); + tmp[2] = hc_swap32_S (tmp[2]); + tmp[3] = hc_swap32_S (tmp[3]); + + out[0] = tmp[0]; + out[1] = tmp[1]; + out[2] = tmp[2]; + out[3] = tmp[3]; + } +} + DECLSPEC void AES_GCM_Init (const u32 *ukey, u32 key_len, u32 *key, u32 *subkey, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4) { if (key_len == 128) @@ -205,7 +264,7 @@ DECLSPEC void AES_GCM_gctr (const u32 *key, const u32 *iv, const u32 *in, u32 in } } -DECLSPEC void AES_GCM_GCTR (u32 *key, u32 *J0, u32 *in, u32 in_len, u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4) +DECLSPEC void AES_GCM_GCTR (u32 *key, u32 *J0, const u32 *in, u32 in_len, u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4) { u32 J0_incr[4]; @@ -217,7 +276,7 @@ DECLSPEC void AES_GCM_GCTR (u32 *key, u32 *J0, u32 *in, u32 in_len, u32 *out, SH AES_GCM_gctr (key, J0_incr, in, in_len, out, s_te0, s_te1, s_te2, s_te3, s_te4); } -DECLSPEC void AES_GCM_GHASH (const u32 *subkey, const u32 *aad_buf, u32 aad_len, u32 *enc_buf, u32 enc_len, u32 *out) +DECLSPEC void AES_GCM_GHASH (const u32 *subkey, const u32 *aad_buf, u32 aad_len, const u32 *enc_buf, u32 enc_len, u32 *out) { out[0] = 0; out[1] = 0; @@ -250,3 +309,37 @@ DECLSPEC void AES_GCM_GHASH (const u32 *subkey, const u32 *aad_buf, u32 aad_len, AES_GCM_ghash (subkey, len_buf, 16, out); } + +DECLSPEC void AES_GCM_GHASH_GLOBAL (const u32 *subkey, const u32 *aad_buf, u32 aad_len, GLOBAL_AS const u32 *enc_buf, u32 enc_len, u32 *out) +{ + out[0] = 0; + out[1] = 0; + out[2] = 0; + out[3] = 0; + + AES_GCM_ghash (subkey, aad_buf, aad_len, out); + + // untested swap + /* + out[0] = hc_swap32_S (out[0]); + out[1] = hc_swap32_S (out[1]); + out[2] = hc_swap32_S (out[2]); + out[3] = hc_swap32_S (out[3]); + */ + + AES_GCM_ghash_global (subkey, enc_buf, enc_len, out); + + out[0] = hc_swap32_S (out[0]); + out[1] = hc_swap32_S (out[1]); + out[2] = hc_swap32_S (out[2]); + out[3] = hc_swap32_S (out[3]); + + u32 len_buf[4]; + + len_buf[0] = aad_len * 8; + len_buf[1] = 0; + len_buf[2] = 0; + len_buf[3] = enc_len * 8; + + AES_GCM_ghash (subkey, len_buf, 16, out); +} diff --git a/OpenCL/inc_cipher_aes-gcm.h b/OpenCL/inc_cipher_aes-gcm.h index ba44729e4..ae6bd7fb2 100644 --- a/OpenCL/inc_cipher_aes-gcm.h +++ b/OpenCL/inc_cipher_aes-gcm.h @@ -10,10 +10,12 @@ DECLSPEC void AES_GCM_inc32 (u32 *block); DECLSPEC void AES_GCM_xor_block (u32 *dst, const u32 *src); DECLSPEC void AES_GCM_gf_mult (const u32 *x, const u32 *y, u32 *z); DECLSPEC void AES_GCM_ghash (const u32 *subkey, const u32 *in, u32 in_len, u32 *out); +DECLSPEC void AES_GCM_ghash_global (const u32 *subkey, GLOBAL_AS const u32 *in, u32 in_len, u32 *out); DECLSPEC void AES_GCM_Init (const u32 *ukey, u32 key_len, u32 *key, u32 *subkey, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4); DECLSPEC void AES_GCM_Prepare_J0 (const u32 *iv, u32 iv_len, const u32 *subkey, u32 *J0); DECLSPEC void AES_GCM_gctr (const u32 *key, const u32 *iv, const u32 *in, u32 in_len, u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4); -DECLSPEC void AES_GCM_GCTR (u32 *key, u32 *J0, u32 *in, u32 in_len, u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4); -DECLSPEC void AES_GCM_GHASH (const u32 *subkey, const u32 *aad_buf, u32 aad_len, u32 *enc_buf, u32 enc_len, u32 *out); +DECLSPEC void AES_GCM_GCTR (u32 *key, u32 *J0, const u32 *in, u32 in_len, u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4); +DECLSPEC void AES_GCM_GHASH (const u32 *subkey, const u32 *aad_buf, u32 aad_len, const u32 *enc_buf, u32 enc_len, u32 *out); +DECLSPEC void AES_GCM_GHASH_GLOBAL (const u32 *subkey, const u32 *aad_buf, u32 aad_len, GLOBAL_AS const u32 *enc_buf, u32 enc_len, u32 *out); #endif // _INC_CIPHER_AES_GCM_H diff --git a/OpenCL/m25500-optimized.cl b/OpenCL/m25500-optimized.cl index fde776d58..dfc292a26 100644 --- a/OpenCL/m25500-optimized.cl +++ b/OpenCL/m25500-optimized.cl @@ -4,7 +4,6 @@ */ #define NEW_SIMD_CODE -#define AES_GCM_ALT1 #ifdef KERNEL_STATIC #include "inc_vendor.h" @@ -35,7 +34,7 @@ typedef struct pbkdf2_sha256_aes_gcm u32 salt_buf[64]; u32 iv_buf[4]; u32 iv_len; - u32 ct_buf[14]; + u32 ct_buf[16]; u32 ct_len; } pbkdf2_sha256_aes_gcm_t; diff --git a/OpenCL/m25500-pure.cl b/OpenCL/m25500-pure.cl index 3bb9c3af6..56dddb096 100644 --- a/OpenCL/m25500-pure.cl +++ b/OpenCL/m25500-pure.cl @@ -4,7 +4,6 @@ */ #define NEW_SIMD_CODE -#define AES_GCM_ALT1 #ifdef KERNEL_STATIC #include "inc_vendor.h" @@ -35,7 +34,7 @@ typedef struct pbkdf2_sha256_aes_gcm u32 salt_buf[64]; u32 iv_buf[4]; u32 iv_len; - u32 ct_buf[14]; + u32 ct_buf[16]; u32 ct_len; } pbkdf2_sha256_aes_gcm_t; @@ -350,6 +349,7 @@ KERNEL_FQ void m25500_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sh // ct + /* u32 enc[14] = { 0 }; enc[ 0] = esalt_bufs[DIGESTS_OFFSET].ct_buf[ 0]; @@ -368,6 +368,7 @@ KERNEL_FQ void m25500_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sh enc[13] = esalt_bufs[DIGESTS_OFFSET].ct_buf[13]; u32 enc_len = esalt_bufs[DIGESTS_OFFSET].ct_len; + */ /* // decrypt buffer is not usefull here, skip @@ -383,7 +384,9 @@ KERNEL_FQ void m25500_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sh u32 aad_buf[4] = { 0 }; u32 aad_len = 0; - AES_GCM_GHASH (subKey, aad_buf, aad_len, enc, enc_len, S); + //AES_GCM_GHASH (subKey, aad_buf, aad_len, enc, enc_len, S); + + AES_GCM_GHASH_GLOBAL (subKey, aad_buf, aad_len, esalt_bufs[DIGESTS_OFFSET].ct_buf, esalt_bufs[DIGESTS_OFFSET].ct_len, S); AES_GCM_GCTR (key, J0, S, S_len, T, s_te0, s_te1, s_te2, s_te3, s_te4); diff --git a/src/modules/module_25500.c b/src/modules/module_25500.c index de4fb8e92..5201ef5ed 100644 --- a/src/modules/module_25500.c +++ b/src/modules/module_25500.c @@ -59,7 +59,7 @@ typedef struct pbkdf2_sha256_aes_gcm u32 salt_buf[64]; u32 iv_buf[4]; u32 iv_len; - u32 ct_buf[14]; + u32 ct_buf[16]; u32 ct_len; } pbkdf2_sha256_aes_gcm_t; @@ -214,6 +214,9 @@ int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSE stellar->ct_buf[i] = byte_swap_32 (stellar->ct_buf[i]); } + stellar->ct_buf[14] = 0; + stellar->ct_buf[15] = 0; + stellar->ct_len = tmp_len - 16; // tag