From 9a4a8d942eea0d4db4d959272a0a2715c3c9f0d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20G=C3=BCtzkow?= Date: Sat, 3 Apr 2021 19:07:06 +0200 Subject: [PATCH 1/4] Plugin for KNX IP Secure's device authentication code --- OpenCL/m25900-pure.cl | 401 +++++++++++++++++++++++++++++++++++ src/modules/module_25900.c | 326 ++++++++++++++++++++++++++++ tools/test_modules/m25900.pm | 144 +++++++++++++ 3 files changed, 871 insertions(+) create mode 100644 OpenCL/m25900-pure.cl create mode 100644 src/modules/module_25900.c create mode 100644 tools/test_modules/m25900.pm diff --git a/OpenCL/m25900-pure.cl b/OpenCL/m25900-pure.cl new file mode 100644 index 000000000..6ab59ad2e --- /dev/null +++ b/OpenCL/m25900-pure.cl @@ -0,0 +1,401 @@ +/** + * Author......: See docs/credits.txt and Robert Guetzkow + * License.....: MIT + */ + +// The code is mostly reused from m10900-pure.cl and m19800-pure.cl + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha256.cl" +#include "inc_cipher_aes.cl" +#endif + +#define COMPARE_S "inc_comp_single.cl" +#define COMPARE_M "inc_comp_multi.cl" + +typedef struct blocks +{ + u32 b1[4]; + u32 b2[4]; + u32 b3[4]; + +} blocks_t; + +typedef struct pbkdf2_sha256_tmp +{ + u32 ipad[8]; + u32 opad[8]; + + u32 dgst[32]; + u32 out[32]; + +} pbkdf2_sha256_tmp_t; + +DECLSPEC void hmac_sha256_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest) +{ + digest[0] = ipad[0]; + digest[1] = ipad[1]; + digest[2] = ipad[2]; + digest[3] = ipad[3]; + digest[4] = ipad[4]; + digest[5] = ipad[5]; + digest[6] = ipad[6]; + digest[7] = ipad[7]; + + sha256_transform_vector(w0, w1, w2, w3, digest); + + w0[0] = digest[0]; + w0[1] = digest[1]; + w0[2] = digest[2]; + w0[3] = digest[3]; + w1[0] = digest[4]; + w1[1] = digest[5]; + w1[2] = digest[6]; + w1[3] = digest[7]; + w2[0] = 0x80000000; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 32) * 8; + + digest[0] = opad[0]; + digest[1] = opad[1]; + digest[2] = opad[2]; + digest[3] = opad[3]; + digest[4] = opad[4]; + digest[5] = opad[5]; + digest[6] = opad[6]; + digest[7] = opad[7]; + + sha256_transform_vector(w0, w1, w2, w3, digest); +} + +DECLSPEC void aes128_encrypt_cbc (const u32 *aes_ks, u32 *aes_iv, const u32 *in, u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4) +{ + u32 in_s[4]; + + in_s[0] = in[0]; + in_s[1] = in[1]; + in_s[2] = in[2]; + in_s[3] = in[3]; + + in_s[0] ^= aes_iv[0]; + in_s[1] ^= aes_iv[1]; + in_s[2] ^= aes_iv[2]; + in_s[3] ^= aes_iv[3]; + + aes128_encrypt (aes_ks, in_s, out, s_te0, s_te1, s_te2, s_te3, s_te4); + + aes_iv[0] = out[0]; + aes_iv[1] = out[1]; + aes_iv[2] = out[2]; + aes_iv[3] = out[3]; +} + +KERNEL_FQ void m25900_init(KERN_ATTR_TMPS(pbkdf2_sha256_tmp_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id(0); + + if (gid >= gid_max) return; + + sha256_hmac_ctx_t sha256_hmac_ctx; + + sha256_hmac_init_global_swap(&sha256_hmac_ctx, pws[gid].i, pws[gid].pw_len); + + tmps[gid].ipad[0] = sha256_hmac_ctx.ipad.h[0]; + tmps[gid].ipad[1] = sha256_hmac_ctx.ipad.h[1]; + tmps[gid].ipad[2] = sha256_hmac_ctx.ipad.h[2]; + tmps[gid].ipad[3] = sha256_hmac_ctx.ipad.h[3]; + tmps[gid].ipad[4] = sha256_hmac_ctx.ipad.h[4]; + tmps[gid].ipad[5] = sha256_hmac_ctx.ipad.h[5]; + tmps[gid].ipad[6] = sha256_hmac_ctx.ipad.h[6]; + tmps[gid].ipad[7] = sha256_hmac_ctx.ipad.h[7]; + + tmps[gid].opad[0] = sha256_hmac_ctx.opad.h[0]; + tmps[gid].opad[1] = sha256_hmac_ctx.opad.h[1]; + tmps[gid].opad[2] = sha256_hmac_ctx.opad.h[2]; + tmps[gid].opad[3] = sha256_hmac_ctx.opad.h[3]; + tmps[gid].opad[4] = sha256_hmac_ctx.opad.h[4]; + tmps[gid].opad[5] = sha256_hmac_ctx.opad.h[5]; + tmps[gid].opad[6] = sha256_hmac_ctx.opad.h[6]; + tmps[gid].opad[7] = sha256_hmac_ctx.opad.h[7]; + + sha256_hmac_update_global_swap(&sha256_hmac_ctx, salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); + + for (u32 i = 0, j = 1; i < 8; i += 8, j += 1) + { + sha256_hmac_ctx_t sha256_hmac_ctx2 = sha256_hmac_ctx; + + u32 w0[4]; + u32 w1[4]; + u32 w2[4]; + u32 w3[4]; + + w0[0] = j; + w0[1] = 0; + w0[2] = 0; + w0[3] = 0; + w1[0] = 0; + w1[1] = 0; + w1[2] = 0; + w1[3] = 0; + w2[0] = 0; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 0; + + sha256_hmac_update_64(&sha256_hmac_ctx2, w0, w1, w2, w3, 4); + + sha256_hmac_final(&sha256_hmac_ctx2); + + tmps[gid].dgst[i + 0] = sha256_hmac_ctx2.opad.h[0]; + tmps[gid].dgst[i + 1] = sha256_hmac_ctx2.opad.h[1]; + tmps[gid].dgst[i + 2] = sha256_hmac_ctx2.opad.h[2]; + tmps[gid].dgst[i + 3] = sha256_hmac_ctx2.opad.h[3]; + tmps[gid].dgst[i + 4] = sha256_hmac_ctx2.opad.h[4]; + tmps[gid].dgst[i + 5] = sha256_hmac_ctx2.opad.h[5]; + tmps[gid].dgst[i + 6] = sha256_hmac_ctx2.opad.h[6]; + tmps[gid].dgst[i + 7] = sha256_hmac_ctx2.opad.h[7]; + + tmps[gid].out[i + 0] = tmps[gid].dgst[i + 0]; + tmps[gid].out[i + 1] = tmps[gid].dgst[i + 1]; + tmps[gid].out[i + 2] = tmps[gid].dgst[i + 2]; + tmps[gid].out[i + 3] = tmps[gid].dgst[i + 3]; + tmps[gid].out[i + 4] = tmps[gid].dgst[i + 4]; + tmps[gid].out[i + 5] = tmps[gid].dgst[i + 5]; + tmps[gid].out[i + 6] = tmps[gid].dgst[i + 6]; + tmps[gid].out[i + 7] = tmps[gid].dgst[i + 7]; + } +} + +KERNEL_FQ void m25900_loop(KERN_ATTR_TMPS(pbkdf2_sha256_tmp_t)) +{ + const u64 gid = get_global_id(0); + + if ((gid * VECT_SIZE) >= gid_max) return; + + u32x ipad[8]; + u32x opad[8]; + + ipad[0] = packv(tmps, ipad, gid, 0); + ipad[1] = packv(tmps, ipad, gid, 1); + ipad[2] = packv(tmps, ipad, gid, 2); + ipad[3] = packv(tmps, ipad, gid, 3); + ipad[4] = packv(tmps, ipad, gid, 4); + ipad[5] = packv(tmps, ipad, gid, 5); + ipad[6] = packv(tmps, ipad, gid, 6); + ipad[7] = packv(tmps, ipad, gid, 7); + + opad[0] = packv(tmps, opad, gid, 0); + opad[1] = packv(tmps, opad, gid, 1); + opad[2] = packv(tmps, opad, gid, 2); + opad[3] = packv(tmps, opad, gid, 3); + opad[4] = packv(tmps, opad, gid, 4); + opad[5] = packv(tmps, opad, gid, 5); + opad[6] = packv(tmps, opad, gid, 6); + opad[7] = packv(tmps, opad, gid, 7); + + for (u32 i = 0; i < 8; i += 8) + { + u32x dgst[8]; + u32x out[8]; + + dgst[0] = packv(tmps, dgst, gid, i + 0); + dgst[1] = packv(tmps, dgst, gid, i + 1); + dgst[2] = packv(tmps, dgst, gid, i + 2); + dgst[3] = packv(tmps, dgst, gid, i + 3); + dgst[4] = packv(tmps, dgst, gid, i + 4); + dgst[5] = packv(tmps, dgst, gid, i + 5); + dgst[6] = packv(tmps, dgst, gid, i + 6); + dgst[7] = packv(tmps, dgst, gid, i + 7); + + out[0] = packv(tmps, out, gid, i + 0); + out[1] = packv(tmps, out, gid, i + 1); + out[2] = packv(tmps, out, gid, i + 2); + out[3] = packv(tmps, out, gid, i + 3); + out[4] = packv(tmps, out, gid, i + 4); + out[5] = packv(tmps, out, gid, i + 5); + out[6] = packv(tmps, out, gid, i + 6); + out[7] = packv(tmps, out, gid, i + 7); + + for (u32 j = 0; j < loop_cnt; j++) + { + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = dgst[0]; + w0[1] = dgst[1]; + w0[2] = dgst[2]; + w0[3] = dgst[3]; + w1[0] = dgst[4]; + w1[1] = dgst[5]; + w1[2] = dgst[6]; + w1[3] = dgst[7]; + w2[0] = 0x80000000; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = (64 + 32) * 8; + + hmac_sha256_run_V(w0, w1, w2, w3, ipad, opad, dgst); + + out[0] ^= dgst[0]; + out[1] ^= dgst[1]; + out[2] ^= dgst[2]; + out[3] ^= dgst[3]; + out[4] ^= dgst[4]; + out[5] ^= dgst[5]; + out[6] ^= dgst[6]; + out[7] ^= dgst[7]; + } + + unpackv(tmps, dgst, gid, i + 0, dgst[0]); + unpackv(tmps, dgst, gid, i + 1, dgst[1]); + unpackv(tmps, dgst, gid, i + 2, dgst[2]); + unpackv(tmps, dgst, gid, i + 3, dgst[3]); + unpackv(tmps, dgst, gid, i + 4, dgst[4]); + unpackv(tmps, dgst, gid, i + 5, dgst[5]); + unpackv(tmps, dgst, gid, i + 6, dgst[6]); + unpackv(tmps, dgst, gid, i + 7, dgst[7]); + + unpackv(tmps, out, gid, i + 0, out[0]); + unpackv(tmps, out, gid, i + 1, out[1]); + unpackv(tmps, out, gid, i + 2, out[2]); + unpackv(tmps, out, gid, i + 3, out[3]); + unpackv(tmps, out, gid, i + 4, out[4]); + unpackv(tmps, out, gid, i + 5, out[5]); + unpackv(tmps, out, gid, i + 6, out[6]); + unpackv(tmps, out, gid, i + 7, out[7]); + } +} + +KERNEL_FQ void m25900_comp(KERN_ATTR_TMPS_ESALT(pbkdf2_sha256_tmp_t, blocks_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id(0); + const u64 lid = get_local_id(0); + const u64 lsz = get_local_size(0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS(); + + #else + + CONSTANT_AS u32a* s_td0 = td0; + CONSTANT_AS u32a* s_td1 = td1; + CONSTANT_AS u32a* s_td2 = td2; + CONSTANT_AS u32a* s_td3 = td3; + CONSTANT_AS u32a* s_td4 = td4; + + CONSTANT_AS u32a* s_te0 = te0; + CONSTANT_AS u32a* s_te1 = te1; + CONSTANT_AS u32a* s_te2 = te2; + CONSTANT_AS u32a* s_te3 = te3; + CONSTANT_AS u32a* s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + u32 key[4]; + + key[0] = tmps[gid].out[DGST_R0]; + key[1] = tmps[gid].out[DGST_R1]; + key[2] = tmps[gid].out[DGST_R2]; + key[3] = tmps[gid].out[DGST_R3]; + + u32 aes_ks[44]; + + AES128_set_encrypt_key (aes_ks, key, s_te0, s_te1, s_te2, s_te3); + + u32 b0[4] = { 0 }; + + u32 aes_cbc_iv[4] = { 0 }; + + u32 yn[4]; + + aes128_encrypt_cbc (aes_ks, aes_cbc_iv, b0, yn, s_te0, s_te1, s_te2, s_te3, s_te4); + aes128_encrypt_cbc (aes_ks, aes_cbc_iv, esalt_bufs[DIGESTS_OFFSET].b1, yn, s_te0, s_te1, s_te2, s_te3, s_te4); + aes128_encrypt_cbc (aes_ks, aes_cbc_iv, esalt_bufs[DIGESTS_OFFSET].b2, yn, s_te0, s_te1, s_te2, s_te3, s_te4); + aes128_encrypt_cbc (aes_ks, aes_cbc_iv, esalt_bufs[DIGESTS_OFFSET].b3, yn, s_te0, s_te1, s_te2, s_te3, s_te4); + + u32 nonce[4]; + + nonce[0] = 0; + nonce[1] = 0; + nonce[2] = 0; + nonce[3] = 0x00ff0000; // already swapped + + u32 s0[4]; + + aes128_encrypt(aes_ks, nonce, s0, s_te0, s_te1, s_te2, s_te3, s_te4); + + const u32 r0 = yn[0] ^ s0[0]; + const u32 r1 = yn[1] ^ s0[1]; + const u32 r2 = yn[2] ^ s0[2]; + const u32 r3 = yn[3] ^ s0[3]; + +#define il_pos 0 + +#ifdef KERNEL_STATIC +#include COMPARE_M +#endif +} diff --git a/src/modules/module_25900.c b/src/modules/module_25900.c new file mode 100644 index 000000000..9150a614e --- /dev/null +++ b/src/modules/module_25900.c @@ -0,0 +1,326 @@ +/** + * Author......: Robert Guetzkow + * License.....: MIT + */ + +#include "common.h" +#include "types.h" +#include "modules.h" +#include "bitops.h" +#include "convert.h" +#include "shared.h" + +static const u32 ATTACK_EXEC = ATTACK_EXEC_OUTSIDE_KERNEL; +static const u32 DGST_POS0 = 0; +static const u32 DGST_POS1 = 1; +static const u32 DGST_POS2 = 2; +static const u32 DGST_POS3 = 3; +static const u32 DGST_SIZE = DGST_SIZE_4_4; +static const u32 HASH_CATEGORY = HASH_CATEGORY_NETWORK_PROTOCOL; +static const char *HASH_NAME = "KNX IP Secure - Device Authentication Code"; +static const u64 KERN_TYPE = 25900; +static const u32 OPTI_TYPE = OPTI_TYPE_SLOW_HASH_SIMD_LOOP; +static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE; +static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED; +static const char *ST_PASS = "hashcat"; +static const char *ST_HASH = "$knx-ip-secure-device-authentication-code$*3033*fa7c0d787a9467c209f0a6e7cf16069ed704f3959dce19e45d7935c0a91bce41*f927640d9bbe9a4b0b74dd3289ad41ec"; + +u32 module_attack_exec (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ATTACK_EXEC; } +u32 module_dgst_pos0 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS0; } +u32 module_dgst_pos1 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS1; } +u32 module_dgst_pos2 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS2; } +u32 module_dgst_pos3 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS3; } +u32 module_dgst_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_SIZE; } +u32 module_hash_category (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_CATEGORY; } +const char *module_hash_name (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_NAME; } +u64 module_kern_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return KERN_TYPE; } +u32 module_opti_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTI_TYPE; } +u64 module_opts_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTS_TYPE; } +u32 module_salt_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return SALT_TYPE; } +const char *module_st_hash (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_HASH; } +const char *module_st_pass (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_PASS; } + +/* Details of the protocol design can be found in ISO 22510:2019 and the application notes published by the KNX Association. */ + +typedef struct blocks +{ + u32 b1[4]; + u32 b2[4]; + u32 b3[4]; + +} blocks_t; + +typedef struct pbkdf2_sha256_tmp +{ + u32 ipad[8]; + u32 opad[8]; + + u32 dgst[32]; + u32 out[32]; + +} pbkdf2_sha256_tmp_t; + +static const char *SIGNATURE_DEVICE_AUTHENTICATION_CODE = "$knx-ip-secure-device-authentication-code$"; +static const char *SALT_DEVICE_AUTHENTICATION_CODE = "device-authentication-code.1.secure.ip.knx.org"; +static const int ROUNDS_DEVICE_AUTHENTICATION_CODE = 65536; + +char* module_jit_build_options(MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra, MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const hc_device_param_t *device_param) +{ + char* jit_build_options = NULL; + + // Extra treatment for Apple systems + if (device_param->opencl_platform_vendor_id == VENDOR_ID_APPLE) + { + return jit_build_options; + } + + // NVIDIA GPU + if (device_param->opencl_device_vendor_id == VENDOR_ID_NV) + { + hc_asprintf(&jit_build_options, "-D _unroll"); + } + + // ROCM + if ((device_param->opencl_device_vendor_id == VENDOR_ID_AMD) && (device_param->has_vperm == true)) + { + hc_asprintf(&jit_build_options, "-D _unroll"); + } + + return jit_build_options; +} + +u64 module_esalt_size(MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +{ + const u64 esalt_size = (const u64) sizeof (blocks_t); + + return esalt_size; +} + +u64 module_tmp_size(MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +{ + const u64 tmp_size = (const u64) sizeof (pbkdf2_sha256_tmp_t); + + return tmp_size; +} + +u32 module_pw_min(MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +{ + const u32 pw_min = 0; + + return pw_min; +} + +u32 module_pw_max(MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +{ + // The ETS 5 allows a maximum of 20 ASCII characters for the password used to derive the device authentication code. + const u32 pw_max = 20; + + return pw_max; +} + +int module_hash_decode(MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len) +{ + u32 *digest = (u32 *) digest_buf; + + blocks_t *blocks = (blocks_t *) esalt_buf; + + token_t token; + + token.token_cnt = 4; + + token.signatures_cnt = 1; + token.signatures_buf[0] = SIGNATURE_DEVICE_AUTHENTICATION_CODE; + + // Signature + token.sep[0] = '*'; + token.len_min[0] = 42; + token.len_max[0] = 42; + token.attr[0] = TOKEN_ATTR_VERIFY_LENGTH + | TOKEN_ATTR_VERIFY_SIGNATURE; + + // Secure Session Identifier (from SESSION_RESPONSE) + token.sep[1] = '*'; + token.len_min[1] = 4; + token.len_max[1] = 4; + token.attr[1] = TOKEN_ATTR_VERIFY_LENGTH + | TOKEN_ATTR_VERIFY_HEX; + + // XOR of Client Public Value X (from SESSION_REQUEST) + // and Server Public Value Y (from SESSION_RESPONSE) + token.sep[2] = '*'; + token.len_min[2] = 64; + token.len_max[2] = 64; + token.attr[2] = TOKEN_ATTR_VERIFY_LENGTH + | TOKEN_ATTR_VERIFY_HEX; + + // Message Authentication Code (from SESSION_RESPONSE) + token.sep[3] = '*'; + token.len_min[3] = 32; + token.len_max[3] = 32; + token.attr[3] = TOKEN_ATTR_VERIFY_LENGTH + | TOKEN_ATTR_VERIFY_HEX; + + const int rc_tokenizer = input_tokenizer ((const u8 *) line_buf, line_len, &token); + + if (rc_tokenizer != PARSER_OK) return (rc_tokenizer); + + const u8 *secure_session_identifier_pos = token.buf[1]; + const int secure_session_identifier_len = token.len[1]; + + const u8 *public_value_xor_pos = token.buf[2]; + const int public_value_xor_len = token.len[2]; + + const u8 *mac_pos = token.buf[3]; + + u8 secure_session_identifier[2]; + u8 public_value_xor[32]; + + hex_decode (secure_session_identifier_pos, secure_session_identifier_len, (u8 *) &secure_session_identifier); + hex_decode (public_value_xor_pos, public_value_xor_len, (u8 *) &public_value_xor); + + digest[0] = hex_to_u32 ((const u8 *) &mac_pos[0]); + digest[1] = hex_to_u32 ((const u8 *) &mac_pos[8]); + digest[2] = hex_to_u32 ((const u8 *) &mac_pos[16]); + digest[3] = hex_to_u32 ((const u8 *) &mac_pos[24]); + + u8 b1[16] = { 0x00, //-x Length of the associated data + 0x28, //_| + 0x06, //-x KNX IP Secure header of SESSION_RESPONSE + 0x10, // | + 0x09, // | + 0x52, // | + 0x00, // | + 0x38, //_| + secure_session_identifier[0], + secure_session_identifier[1], + public_value_xor[0], + public_value_xor[1], + public_value_xor[2], + public_value_xor[3], + public_value_xor[4], + public_value_xor[5] }; + memcpy (blocks->b1, b1, sizeof(b1)); + + memcpy (blocks->b2, &public_value_xor[6], 16); + + // The bytes that don't get set are zero padding + memset (blocks->b3, 0, 16); + memcpy (blocks->b3, &public_value_xor[22], 10); + + // The salt used in the derivation of the device authentication code is constant + size_t salt_len = strlen(SALT_DEVICE_AUTHENTICATION_CODE); // exclude the null byte + memcpy (salt->salt_buf, SALT_DEVICE_AUTHENTICATION_CODE, salt_len); + salt->salt_len = salt_len; + salt->salt_iter = ROUNDS_DEVICE_AUTHENTICATION_CODE - 1; + + return (PARSER_OK); +} + +int module_hash_encode(MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const void *digest_buf, MAYBE_UNUSED const salt_t *salt, MAYBE_UNUSED const void *esalt_buf, MAYBE_UNUSED const void *hook_salt_buf, MAYBE_UNUSED const hashinfo_t *hash_info, char *line_buf, MAYBE_UNUSED const int line_size) +{ + const u32 *digest = (const u32 *) digest_buf; + + blocks_t *blocks = (blocks_t *) esalt_buf; + + u8 secure_session_identifier[2]; + u8 secure_session_identifier_hex[5] = { 0 }; + u8 public_value_xor[32]; + u8 public_value_xor_hex[65] = { 0 }; + + memcpy (secure_session_identifier, &(blocks->b1[2]), 2); + + memcpy (&public_value_xor[ 0], ((u8 *) &blocks->b1[2]) + 2, 6); + memcpy (&public_value_xor[ 6], &(blocks->b2[0]), 16); + memcpy (&public_value_xor[22], &(blocks->b3[0]), 10); + + hex_encode(secure_session_identifier, 2, secure_session_identifier_hex); + hex_encode(public_value_xor, 32, public_value_xor_hex); + + const int line_len = snprintf (line_buf, line_size, "%s*%s*%s*%08x%08x%08x%08x", + SIGNATURE_DEVICE_AUTHENTICATION_CODE, + secure_session_identifier_hex, + public_value_xor_hex, + byte_swap_32 (digest[0]), + byte_swap_32 (digest[1]), + byte_swap_32 (digest[2]), + byte_swap_32 (digest[3]) + ); + + return line_len; +} + +void module_init(module_ctx_t *module_ctx) +{ + module_ctx->module_context_size = MODULE_CONTEXT_SIZE_CURRENT; + module_ctx->module_interface_version = MODULE_INTERFACE_VERSION_CURRENT; + + module_ctx->module_attack_exec = module_attack_exec; + module_ctx->module_benchmark_esalt = MODULE_DEFAULT; + module_ctx->module_benchmark_hook_salt = MODULE_DEFAULT; + module_ctx->module_benchmark_mask = MODULE_DEFAULT; + module_ctx->module_benchmark_salt = MODULE_DEFAULT; + module_ctx->module_build_plain_postprocess = MODULE_DEFAULT; + module_ctx->module_deep_comp_kernel = MODULE_DEFAULT; + module_ctx->module_dgst_pos0 = module_dgst_pos0; + module_ctx->module_dgst_pos1 = module_dgst_pos1; + module_ctx->module_dgst_pos2 = module_dgst_pos2; + module_ctx->module_dgst_pos3 = module_dgst_pos3; + module_ctx->module_dgst_size = module_dgst_size; + module_ctx->module_dictstat_disable = MODULE_DEFAULT; + module_ctx->module_esalt_size = module_esalt_size; + module_ctx->module_extra_buffer_size = MODULE_DEFAULT; + module_ctx->module_extra_tmp_size = MODULE_DEFAULT; + module_ctx->module_forced_outfile_format = MODULE_DEFAULT; + module_ctx->module_hash_binary_count = MODULE_DEFAULT; + module_ctx->module_hash_binary_parse = MODULE_DEFAULT; + module_ctx->module_hash_binary_save = MODULE_DEFAULT; + module_ctx->module_hash_decode_potfile = MODULE_DEFAULT; + module_ctx->module_hash_decode_zero_hash = MODULE_DEFAULT; + module_ctx->module_hash_decode = module_hash_decode; + module_ctx->module_hash_encode_status = MODULE_DEFAULT; + module_ctx->module_hash_encode_potfile = MODULE_DEFAULT; + module_ctx->module_hash_encode = module_hash_encode; + module_ctx->module_hash_init_selftest = MODULE_DEFAULT; + module_ctx->module_hash_mode = MODULE_DEFAULT; + module_ctx->module_hash_category = module_hash_category; + module_ctx->module_hash_name = module_hash_name; + module_ctx->module_hashes_count_min = MODULE_DEFAULT; + module_ctx->module_hashes_count_max = MODULE_DEFAULT; + module_ctx->module_hlfmt_disable = MODULE_DEFAULT; + module_ctx->module_hook_extra_param_size = MODULE_DEFAULT; + module_ctx->module_hook_extra_param_init = MODULE_DEFAULT; + module_ctx->module_hook_extra_param_term = MODULE_DEFAULT; + module_ctx->module_hook12 = MODULE_DEFAULT; + module_ctx->module_hook23 = MODULE_DEFAULT; + module_ctx->module_hook_salt_size = MODULE_DEFAULT; + module_ctx->module_hook_size = MODULE_DEFAULT; + module_ctx->module_jit_build_options = module_jit_build_options; + module_ctx->module_jit_cache_disable = MODULE_DEFAULT; + module_ctx->module_kernel_accel_max = MODULE_DEFAULT; + module_ctx->module_kernel_accel_min = MODULE_DEFAULT; + module_ctx->module_kernel_loops_max = MODULE_DEFAULT; + module_ctx->module_kernel_loops_min = MODULE_DEFAULT; + module_ctx->module_kernel_threads_max = MODULE_DEFAULT; + module_ctx->module_kernel_threads_min = MODULE_DEFAULT; + module_ctx->module_kern_type = module_kern_type; + module_ctx->module_kern_type_dynamic = MODULE_DEFAULT; + module_ctx->module_opti_type = module_opti_type; + module_ctx->module_opts_type = module_opts_type; + module_ctx->module_outfile_check_disable = MODULE_DEFAULT; + module_ctx->module_outfile_check_nocomp = MODULE_DEFAULT; + module_ctx->module_potfile_custom_check = MODULE_DEFAULT; + module_ctx->module_potfile_disable = MODULE_DEFAULT; + module_ctx->module_potfile_keep_all_hashes = MODULE_DEFAULT; + module_ctx->module_pwdump_column = MODULE_DEFAULT; + module_ctx->module_pw_max = module_pw_max; + module_ctx->module_pw_min = module_pw_min; + module_ctx->module_salt_max = MODULE_DEFAULT; + module_ctx->module_salt_min = MODULE_DEFAULT; + module_ctx->module_salt_type = module_salt_type; + module_ctx->module_separator = MODULE_DEFAULT; + module_ctx->module_st_hash = module_st_hash; + module_ctx->module_st_pass = module_st_pass; + module_ctx->module_tmp_size = module_tmp_size; + module_ctx->module_unstable_warning = MODULE_DEFAULT; + module_ctx->module_warmup_disable = MODULE_DEFAULT; +} diff --git a/tools/test_modules/m25900.pm b/tools/test_modules/m25900.pm new file mode 100644 index 000000000..db16728ea --- /dev/null +++ b/tools/test_modules/m25900.pm @@ -0,0 +1,144 @@ +#!/usr/bin/env perl + +## +## Author......: Robert Guetzkow +## License.....: MIT +## + +use strict; +use warnings; + +use Crypt::PBKDF2; +use Crypt::Mode::CBC; +use Crypt::Mode::ECB; + +# Details of the protocol design can be found in ISO 22510:2019 and +# application notes published by the KNX Association. + +# ETS 5 allows a maximum of 20 characters in a password. +# The salt is used as Secure Session Identifier, which is 2 Bytes long. +sub module_constraints { [[0, 20], [2, 2], [-1, -1], [-1, -1], [-1, -1]] } + +sub device_authentication_code +{ + my $password = shift; + + my $pbkdf2 = Crypt::PBKDF2->new + ( + hasher => Crypt::PBKDF2->hasher_from_algorithm ("HMACSHA2", 256), + iterations => 65536, + output_len => 16 + ); + + my $device_authentication_code = $pbkdf2->PBKDF2 ("device-authentication-code.1.secure.ip.knx.org", + $password); + + return $device_authentication_code; +} + +sub block_formatting +{ + # Simplified block formatting function, where payload is always empty + my $b0 = shift; + my $associated_data = shift; + my $associated_data_length = pack ("s>", length ($associated_data)); + my $blocks_unpadded = $associated_data_length . $associated_data; + my $pad_len = int ((length ($blocks_unpadded) + 16 - 1) / 16) * 16; + my $blocks_padded = $blocks_unpadded . "\0" x ($pad_len - length ($blocks_unpadded)); + + return $b0 . $blocks_padded; +} + +sub encrypt +{ + # Simplified encryption that only performs steps required for the MAC, not full CCM + my $blocks = shift; + my $nonce = shift; + my $key = shift; + my $iv = "\0" x 16; + + my $aes_cbc = Crypt::Mode::CBC->new ("AES", 0); + my $ciphertext = $aes_cbc->encrypt ($blocks, $key, $iv); + my $y_n = substr ($ciphertext, length ($ciphertext) - 16, 16); + + my $aes_ecb = Crypt::Mode::ECB->new ("AES", 0); + my $s_0 = $aes_ecb->encrypt ($nonce, $key); + + return $y_n ^ $s_0; +} + +sub generate_session_response_mac +{ + my $secure_session_identifier = shift; + my $public_value_xor = shift; + my $key = shift; + + # Constants used for the cryptography in Session_Response frames + my $knx_ip_header = pack ("H*", "061009520038"); + my $b0 = pack ("H*", "00000000000000000000000000000000"); + my $nonce = pack ("H*", "0000000000000000000000000000ff00"); + + my $associated_data = $knx_ip_header . $secure_session_identifier . $public_value_xor; + + my $blocks = block_formatting ($b0, $associated_data); + + return encrypt ($blocks, $nonce, $key); +} + +sub module_generate_hash +{ + my $word = shift; + + # Parameters that would be found in the Session_Request and Session_Response frames + my $secure_session_identifier = shift; + my $public_value_xor = shift // random_bytes (32); + + my $device_authentication_code = device_authentication_code ($word); + + my $mac = generate_session_response_mac ($secure_session_identifier, + $public_value_xor, + $device_authentication_code); + + my $hash = sprintf ("\$knx-ip-secure-device-authentication-code\$*%s*%s*%s", + unpack ("H*", $secure_session_identifier), + unpack ("H*", $public_value_xor), + unpack ("H*", $mac)); + + return $hash; +} + +sub module_verify_hash +{ + my $line = shift; + + my ($hash, $word) = split (':', $line); + + return unless defined $hash; + return unless defined $word; + + my @data = split ('\*', $hash); + + return unless scalar (@data) == 4; + + my $signature = shift @data; + + return unless ($signature eq "\$knx-ip-secure-device-authentication-code\$"); + + my $secure_session_identifier = pack ("H*", shift @data); # 2 Bytes expected (using the "salt" for this purpose) + my $public_value_xor = pack ("H*", shift @data); # 32 Bytes expected (xor of client's and server's public value) + my $mac = pack ("H*", shift @data); # 16 Bytes expected + + return unless (length ($secure_session_identifier) == 2); + return unless (length ($public_value_xor) == 32); + return unless (length ($mac) == 16); + + my $word_packed = pack_if_HEX_notation ($word); + + my $new_hash = module_generate_hash ($word_packed, + $secure_session_identifier, + $public_value_xor); + + return ($new_hash, $word); +} + +1; \ No newline at end of file From c7aaf07b412b24ad44c70498e7d848d4e0f61496 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20G=C3=BCtzkow?= Date: Sun, 11 Apr 2021 14:13:47 +0200 Subject: [PATCH 2/4] Optimizations for m25900 --- OpenCL/m25900-optimized.cl | 724 +++++++++++++++++++++++++++++++++++++ 1 file changed, 724 insertions(+) create mode 100644 OpenCL/m25900-optimized.cl diff --git a/OpenCL/m25900-optimized.cl b/OpenCL/m25900-optimized.cl new file mode 100644 index 000000000..2856b54c8 --- /dev/null +++ b/OpenCL/m25900-optimized.cl @@ -0,0 +1,724 @@ +/** + * Author......: See docs/credits.txt and Robert Guetzkow + * License.....: MIT + */ + +/* + * This code implement PBKDF2-HMAC-SHA256 but makes assumptions about the input length for optimizations. + * Please keep this in mind when trying to reuse code. The comments explain what those assumptions are. + * + * The implementation is based on inc_hash_sha256.cl and m10900-pure.cl + */ + +#define NEW_SIMD_CODE + +#ifdef KERNEL_STATIC +#include "inc_vendor.h" +#include "inc_types.h" +#include "inc_platform.cl" +#include "inc_common.cl" +#include "inc_simd.cl" +#include "inc_hash_sha256.cl" +#include "inc_cipher_aes.cl" +#endif + +#define COMPARE_S "inc_comp_single.cl" +#define COMPARE_M "inc_comp_multi.cl" + +typedef struct blocks +{ + u32 b1[4]; + u32 b2[4]; + u32 b3[4]; + +} blocks_t; + +typedef struct pbkdf2_sha256_tmp +{ + u32x ipad_partial_hash[8]; + u32x opad_partial_hash[8]; + + u32x digest[32]; + u32x out[32]; + +} pbkdf2_sha256_tmp_t; + +#define SHA256_STEP_NO_Wt(F0,F1,a,b,c,d,e,f,g,h,K) \ +{ \ + h += K; \ + h = hc_add3 (h, SHA256_S3 (e), F1 (e,f,g)); \ + d += h; \ + h = hc_add3 (h, SHA256_S2 (a), F0 (a,b,c)); \ +} + +/* + * h = h + Kt + Wt -x => T1 (with Wt being 0) + * h + BSIG1(e) + CH(e,f,g) _| + * d += h - => d + T1 (d is used as e in the next step by switching the arguments.) + * h = h + BSIG0(a) + MAJ(a,b,c) - => T1 + T2 (h is used as a in the next step by switching the arguments.) + */ + +#define ROUND_EXPAND() \ +{ \ + w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); \ + w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); \ + w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); \ + w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); \ + w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); \ + w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); \ + w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); \ + w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); \ + w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); \ + w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); \ + wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); \ + wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); \ + wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); \ + wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); \ + we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); \ + wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); \ +} + +#define ROUND_STEP(i) \ +{ \ + SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha256[i + 0]); \ + SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha256[i + 1]); \ + SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha256[i + 2]); \ + SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha256[i + 3]); \ + SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha256[i + 4]); \ + SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha256[i + 5]); \ + SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha256[i + 6]); \ + SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha256[i + 7]); \ + SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha256[i + 8]); \ + SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha256[i + 9]); \ + SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha256[i + 10]); \ + SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha256[i + 11]); \ + SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha256[i + 12]); \ + SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha256[i + 13]); \ + SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, k_sha256[i + 14]); \ + SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha256[i + 15]); \ +} + +DECLSPEC void init_sha256_ctx(sha256_ctx_vector_t *ctx) +{ + ctx->h[0] = SHA256M_A; + ctx->h[1] = SHA256M_B; + ctx->h[2] = SHA256M_C; + ctx->h[3] = SHA256M_D; + ctx->h[4] = SHA256M_E; + ctx->h[5] = SHA256M_F; + ctx->h[6] = SHA256M_G; + ctx->h[7] = SHA256M_H; +} + +DECLSPEC void init_ipad(sha256_ctx_vector_t *ctx, const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3) +{ + init_sha256_ctx(ctx); + + ctx->w0[0] = w0[0] ^ 0x36363636; + ctx->w0[1] = w0[1] ^ 0x36363636; + ctx->w0[2] = w0[2] ^ 0x36363636; + ctx->w0[3] = w0[3] ^ 0x36363636; + ctx->w1[0] = w1[0] ^ 0x36363636; + ctx->w1[1] = w1[1] ^ 0x36363636; + ctx->w1[2] = w1[2] ^ 0x36363636; + ctx->w1[3] = w1[3] ^ 0x36363636; + ctx->w2[0] = w2[0] ^ 0x36363636; + ctx->w2[1] = w2[1] ^ 0x36363636; + ctx->w2[2] = w2[2] ^ 0x36363636; + ctx->w2[3] = w2[3] ^ 0x36363636; + ctx->w3[0] = w3[0] ^ 0x36363636; + ctx->w3[1] = w3[1] ^ 0x36363636; + ctx->w3[2] = w3[2] ^ 0x36363636; + ctx->w3[3] = w3[3] ^ 0x36363636; +} + +DECLSPEC void init_opad(sha256_ctx_vector_t *ctx, const u32 *w0, const u32 *w1, const u32 *w2, const u32 *w3) +{ + init_sha256_ctx(ctx); + + ctx->w0[0] = w0[0] ^ 0x5c5c5c5c; + ctx->w0[1] = w0[1] ^ 0x5c5c5c5c; + ctx->w0[2] = w0[2] ^ 0x5c5c5c5c; + ctx->w0[3] = w0[3] ^ 0x5c5c5c5c; + ctx->w1[0] = w1[0] ^ 0x5c5c5c5c; + ctx->w1[1] = w1[1] ^ 0x5c5c5c5c; + ctx->w1[2] = w1[2] ^ 0x5c5c5c5c; + ctx->w1[3] = w1[3] ^ 0x5c5c5c5c; + ctx->w2[0] = w2[0] ^ 0x5c5c5c5c; + ctx->w2[1] = w2[1] ^ 0x5c5c5c5c; + ctx->w2[2] = w2[2] ^ 0x5c5c5c5c; + ctx->w2[3] = w2[3] ^ 0x5c5c5c5c; + ctx->w3[0] = w3[0] ^ 0x5c5c5c5c; + ctx->w3[1] = w3[1] ^ 0x5c5c5c5c; + ctx->w3[2] = w3[2] ^ 0x5c5c5c5c; + ctx->w3[3] = w3[3] ^ 0x5c5c5c5c; +} + +DECLSPEC void sha256_transform_hash(const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u32x *digest) +{ + /* + * This function assumes that the input is a hash of length 256 bit with padding applied and that the total length + * of all blocks is 768 bit. This allows to perform optimizations in the message schedule and hash round since some + * words are known to be all zero bits, thus not contributing to some of the calculation. Additionally, calculations + * for words that are known to be constant have been precomputed. + * + * The 256 bit hash is located in the first 8 words (index 0 to 7), followed by one word that has one bit set. + * The length is represented as a 128 bit integer in the last 4 words. However, since for the HMAC calculation + * the total size of all blocks doesn't exceed 768 bit, including ipad and opad respectively, only the last + * word (index 15) contains the length bits. Thus the 32 bit words from index 9 to 14 are all zero bits. + * Whenever these words would be used by the message schedule in + * Wt = SSIG1(W(t-2)) + W(t-7) + SSIG0(W(t-15)) + W(t-16) [1] + * or in the hash round in + * T1 = h + BSIG1(e) + CH(e,f,g) + Kt + Wt [1] + * the calculation can be simplified to remove the operand. + * + * The word at index 8, with one bit set, and the word at index 15, containing the length, are know to be constant. + * Therefore, the operations where they are used as an operand can be partially precomputed. For the message schedule + * this is possible for SSIG1(W(t-2)) and SSIG0(W(t-15)). In the hash round the Kt + Wt can be precomputed when Wt + * is constant. + * + * Like sha256_transform_vector it performs the message schedule and hash round calculation jointly for 16 of the + * 32 bit words. This requires fewer variables and thus less memory to hold the state, compared to calculating + * the whole message schedule first and then performing the hash round. + * + * [1] RFC 6234, section 6.2, https://tools.ietf.org/html/rfc6234#section-6.2 + */ + + u32x a = digest[0]; + u32x b = digest[1]; + u32x c = digest[2]; + u32x d = digest[3]; + u32x e = digest[4]; + u32x f = digest[5]; + u32x g = digest[6]; + u32x h = digest[7]; + + // This assignment is equivalent to the message schedule for the first 16 words. + u32x w0_t = w0[0]; + u32x w1_t = w0[1]; + u32x w2_t = w0[2]; + u32x w3_t = w0[3]; + u32x w4_t = w1[0]; + u32x w5_t = w1[1]; + u32x w6_t = w1[2]; + u32x w7_t = w1[3]; + u32x w8_t = w2[0]; + u32x w9_t = w2[1]; + u32x wa_t = w2[2]; + u32x wb_t = w2[3]; + u32x wc_t = w3[0]; + u32x wd_t = w3[1]; + u32x we_t = w3[2]; + u32x wf_t = w3[3]; + + // The first 16 words have already been assigned, perform the first hash round. Don't use W_t when zero. + SHA256_STEP(SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha256[0]); + SHA256_STEP(SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha256[1]); + SHA256_STEP(SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha256[2]); + SHA256_STEP(SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha256[3]); + SHA256_STEP(SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha256[4]); + SHA256_STEP(SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha256[5]); + SHA256_STEP(SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha256[6]); + SHA256_STEP(SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha256[7]); + SHA256_STEP_NO_Wt(SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, 0x5807aa98); + SHA256_STEP_NO_Wt(SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, k_sha256[9]); + SHA256_STEP_NO_Wt(SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, k_sha256[10]); + SHA256_STEP_NO_Wt(SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, k_sha256[11]); + SHA256_STEP_NO_Wt(SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, k_sha256[12]); + SHA256_STEP_NO_Wt(SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, k_sha256[13]); + SHA256_STEP_NO_Wt(SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, k_sha256[14]); + SHA256_STEP_NO_Wt(SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, 0xc19bf474); + + // The message schedule for words 16 to 32 can skip calculations when W_t is zero + w0_t = SHA256_S0(w1_t) + w0_t; + w1_t = 0x01e00000 + SHA256_S0(w2_t) + w1_t; + w2_t = SHA256_S1(w0_t) + SHA256_S0(w3_t) + w2_t; + w3_t = SHA256_S1(w1_t) + SHA256_S0(w4_t) + w3_t; + w4_t = SHA256_S1(w2_t) + SHA256_S0(w5_t) + w4_t; + w5_t = SHA256_S1(w3_t) + SHA256_S0(w6_t) + w5_t; + w6_t = SHA256_S1(w4_t) + wf_t + SHA256_S0(w7_t) + w6_t; + w7_t = SHA256_S1(w5_t) + w0_t + 0x11002000 + w7_t; + w8_t = SHA256_S1(w6_t) + w1_t + w8_t; + w9_t = SHA256_S1(w7_t) + w2_t; + wa_t = SHA256_S1(w8_t) + w3_t; + wb_t = SHA256_S1(w9_t) + w4_t; + wc_t = SHA256_S1(wa_t) + w5_t; + wd_t = SHA256_S1(wb_t) + w6_t; + we_t = SHA256_S1(wc_t) + w7_t + 0x00c00066; + wf_t = SHA256_S1(wd_t) + w8_t + SHA256_S0(w0_t) + wf_t; + + // Following rounds do not have words that are guaranteed to be zero or constant, thus perform full calculations. + ROUND_STEP(16); + ROUND_EXPAND(); + ROUND_STEP(32); + ROUND_EXPAND(); + ROUND_STEP(48); + + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; + digest[5] += f; + digest[6] += g; + digest[7] += h; +} + +DECLSPEC void partial_hashes_ipad_opad(pbkdf2_sha256_tmp *tmp, GLOBAL_AS const u32 *pwd) +{ + /* + * This functions assumes that passwords are smaller than 512 bit, which is the case for KNX IP Secure as the ETS 5 limits + * the maximum length to 20 characters. + * + * Both ipad and opad remain constant for a given password throughout the PBKDF2 computation. Futhermore they are both + * 512 bit long, which is exactly the block size of SHA-256. Thus, it is possible to compute a partial hash for both + * without knowing what will be concatenated to ipad and opad, as the processing in SHA-256 happens in blocks of 512 bit. + * The resulting intermediate result can be stored and reused in all rounds of the PBKDF. + */ + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = make_u32x (hc_swap32_S (pwd[ 0])); + w0[1] = make_u32x (hc_swap32_S (pwd[ 1])); + w0[2] = make_u32x (hc_swap32_S (pwd[ 2])); + w0[3] = make_u32x (hc_swap32_S (pwd[ 3])); + w1[0] = make_u32x (hc_swap32_S (pwd[ 4])); + w1[1] = make_u32x (hc_swap32_S (pwd[ 5])); + w1[2] = make_u32x (hc_swap32_S (pwd[ 6])); + w1[3] = make_u32x (hc_swap32_S (pwd[ 7])); + w2[0] = make_u32x (hc_swap32_S (pwd[ 8])); + w2[1] = make_u32x (hc_swap32_S (pwd[ 9])); + w2[2] = make_u32x (hc_swap32_S (pwd[10])); + w2[3] = make_u32x (hc_swap32_S (pwd[11])); + w3[0] = make_u32x (hc_swap32_S (pwd[12])); + w3[1] = make_u32x (hc_swap32_S (pwd[13])); + w3[2] = make_u32x (hc_swap32_S (pwd[14])); + w3[3] = make_u32x (hc_swap32_S (pwd[15])); + + sha256_hmac_ctx_vector_t sha256_hmac_ctx_vector; + + // The partial hash is equivalent to computing the hash of just that one block + init_ipad (&sha256_hmac_ctx_vector.ipad, w0, w1, w2, w3); + init_opad (&sha256_hmac_ctx_vector.opad, w0, w1, w2, w3); + + sha256_transform_vector (sha256_hmac_ctx_vector.ipad.w0, + sha256_hmac_ctx_vector.ipad.w1, + sha256_hmac_ctx_vector.ipad.w2, + sha256_hmac_ctx_vector.ipad.w3, + sha256_hmac_ctx_vector.ipad.h); + + sha256_transform_vector (sha256_hmac_ctx_vector.opad.w0, + sha256_hmac_ctx_vector.opad.w1, + sha256_hmac_ctx_vector.opad.w2, + sha256_hmac_ctx_vector.opad.w3, + sha256_hmac_ctx_vector.opad.h); + + tmp->ipad_partial_hash[0] = sha256_hmac_ctx_vector.ipad.h[0]; + tmp->ipad_partial_hash[1] = sha256_hmac_ctx_vector.ipad.h[1]; + tmp->ipad_partial_hash[2] = sha256_hmac_ctx_vector.ipad.h[2]; + tmp->ipad_partial_hash[3] = sha256_hmac_ctx_vector.ipad.h[3]; + tmp->ipad_partial_hash[4] = sha256_hmac_ctx_vector.ipad.h[4]; + tmp->ipad_partial_hash[5] = sha256_hmac_ctx_vector.ipad.h[5]; + tmp->ipad_partial_hash[6] = sha256_hmac_ctx_vector.ipad.h[6]; + tmp->ipad_partial_hash[7] = sha256_hmac_ctx_vector.ipad.h[7]; + + tmp->opad_partial_hash[0] = sha256_hmac_ctx_vector.opad.h[0]; + tmp->opad_partial_hash[1] = sha256_hmac_ctx_vector.opad.h[1]; + tmp->opad_partial_hash[2] = sha256_hmac_ctx_vector.opad.h[2]; + tmp->opad_partial_hash[3] = sha256_hmac_ctx_vector.opad.h[3]; + tmp->opad_partial_hash[4] = sha256_hmac_ctx_vector.opad.h[4]; + tmp->opad_partial_hash[5] = sha256_hmac_ctx_vector.opad.h[5]; + tmp->opad_partial_hash[6] = sha256_hmac_ctx_vector.opad.h[6]; + tmp->opad_partial_hash[7] = sha256_hmac_ctx_vector.opad.h[7]; +} + +DECLSPEC void hmac_sha256(u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad_partial_hash, u32x *opad_partial_hash, u32x *digest) +{ + /* + * This function assumes that the input has been padded according to RFC 6234 [3]. + * + * [3] RFC 6234, section 4.1, https://tools.ietf.org/html/rfc6234#section-4.1 + */ + + digest[0] = ipad_partial_hash[0]; + digest[1] = ipad_partial_hash[1]; + digest[2] = ipad_partial_hash[2]; + digest[3] = ipad_partial_hash[3]; + digest[4] = ipad_partial_hash[4]; + digest[5] = ipad_partial_hash[5]; + digest[6] = ipad_partial_hash[6]; + digest[7] = ipad_partial_hash[7]; + + sha256_transform_vector (w0, w1, w2, w3, digest); + + w0[0] = digest[0]; + w0[1] = digest[1]; + w0[2] = digest[2]; + w0[3] = digest[3]; + w1[0] = digest[4]; + w1[1] = digest[5]; + w1[2] = digest[6]; + w1[3] = digest[7]; + w2[0] = 0x80000000; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 768; // 512 bit for they ipad and 256 bit for the previous hash + + digest[0] = opad_partial_hash[0]; + digest[1] = opad_partial_hash[1]; + digest[2] = opad_partial_hash[2]; + digest[3] = opad_partial_hash[3]; + digest[4] = opad_partial_hash[4]; + digest[5] = opad_partial_hash[5]; + digest[6] = opad_partial_hash[6]; + digest[7] = opad_partial_hash[7]; + + sha256_transform_hash (w0, w1, w2, w3, digest); +} + +DECLSPEC void hmac_sha256_for_hash(u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad_partial_hash, u32x *opad_partial_hash, u32x *digest) +{ + /* + * This function assumes that the input is the block containing the hash of 256 bit length and has been padded according to RFC 6234 [3] + * + * [3] RFC 6234, section 4.1, https://tools.ietf.org/html/rfc6234#section-4.1 + */ + + digest[0] = ipad_partial_hash[0]; + digest[1] = ipad_partial_hash[1]; + digest[2] = ipad_partial_hash[2]; + digest[3] = ipad_partial_hash[3]; + digest[4] = ipad_partial_hash[4]; + digest[5] = ipad_partial_hash[5]; + digest[6] = ipad_partial_hash[6]; + digest[7] = ipad_partial_hash[7]; + + sha256_transform_hash (w0, w1, w2, w3, digest); + + w0[0] = digest[0]; + w0[1] = digest[1]; + w0[2] = digest[2]; + w0[3] = digest[3]; + w1[0] = digest[4]; + w1[1] = digest[5]; + w1[2] = digest[6]; + w1[3] = digest[7]; + w2[0] = 0x80000000; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 768; // 512 bit for they ipad and 256 bit for the previous hash + + digest[0] = opad_partial_hash[0]; + digest[1] = opad_partial_hash[1]; + digest[2] = opad_partial_hash[2]; + digest[3] = opad_partial_hash[3]; + digest[4] = opad_partial_hash[4]; + digest[5] = opad_partial_hash[5]; + digest[6] = opad_partial_hash[6]; + digest[7] = opad_partial_hash[7]; + + sha256_transform_hash (w0, w1, w2, w3, digest); +} + +DECLSPEC void hmac_sha256_first_round(pbkdf2_sha256_tmp *tmp, GLOBAL_AS const u32 *salt, const int len) +{ + /* + * This function assumes that the salt is less than 56 byte (448 bit), which is the case for + * KNX IP Secure as the salt is constant and 46 byte (368 bit) long. + */ + + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + w0[0] = make_u32x (hc_swap32_S (salt[ 0])); + w0[1] = make_u32x (hc_swap32_S (salt[ 1])); + w0[2] = make_u32x (hc_swap32_S (salt[ 2])); + w0[3] = make_u32x (hc_swap32_S (salt[ 3])); + w1[0] = make_u32x (hc_swap32_S (salt[ 4])); + w1[1] = make_u32x (hc_swap32_S (salt[ 5])); + w1[2] = make_u32x (hc_swap32_S (salt[ 6])); + w1[3] = make_u32x (hc_swap32_S (salt[ 7])); + w2[0] = make_u32x (hc_swap32_S (salt[ 8])); + w2[1] = make_u32x (hc_swap32_S (salt[ 9])); + w2[2] = make_u32x (hc_swap32_S (salt[10])); + w2[3] = make_u32x (hc_swap32_S (salt[11])); + w3[0] = make_u32x (hc_swap32_S (salt[12])); + w3[1] = make_u32x (hc_swap32_S (salt[13])); + w3[2] = make_u32x (hc_swap32_S (salt[14])); + w3[3] = make_u32x (hc_swap32_S (salt[15])); + + /* + * PBKDF2 requires the one-based 32 bit big-endian block index to be appended to the salt [2]. + * Since the salt is used in the first block, that integer is 1. + * + * [2] RFC 8018, section 5.2, item 3, https://tools.ietf.org/html/rfc8018#section-5.2 + */ + + u32x i0[4]; + u32x i1[4]; + u32x i2[4]; + u32x i3[4]; + + i0[0] = 1; + i0[1] = 0; + i0[2] = 0; + i0[3] = 0; + i1[0] = 0; + i1[1] = 0; + i1[2] = 0; + i1[3] = 0; + i2[0] = 0; + i2[1] = 0; + i2[2] = 0; + i2[3] = 0; + i3[0] = 0; + i3[1] = 0; + i3[2] = 0; + i3[3] = 0; + + switch_buffer_by_offset_be(i0, i1, i2, i3, len & 63); // Shift to the correct position after the end of the salt + + w0[0] |= i0[0]; + w0[1] |= i0[1]; + w0[2] |= i0[2]; + w0[3] |= i0[3]; + w1[0] |= i1[0]; + w1[1] |= i1[1]; + w1[2] |= i1[2]; + w1[3] |= i1[3]; + w2[0] |= i2[0]; + w2[1] |= i2[1]; + w2[2] |= i2[2]; + w2[3] |= i2[3]; + w3[0] |= i3[0]; + w3[1] |= i3[1]; + w3[2] |= i3[2]; + w3[3] |= i3[3]; + + // Updated length with the 32 bit block index appended + MAYBE_VOLATILE const int len_updated = len + 4; + + /* + * Pad salt to 512 bit using the padding scheme described in RFC 6234 [3] + * + * [3] RFC 6234, section 4.1, https://tools.ietf.org/html/rfc6234#section-4.1 + */ + append_0x80_4x4 (w0, w1, w2, w3, (len_updated & 63) ^ 3); + w3[2] = 0; + w3[3] = len_updated * 8 + 512; // Length in bits, ipad is 512 bit + + hmac_sha256 (w0, w1, w2, w3, tmp->ipad_partial_hash, tmp->opad_partial_hash, tmp->digest); + + tmp->out[0] = tmp->digest[0]; + tmp->out[1] = tmp->digest[1]; + tmp->out[2] = tmp->digest[2]; + tmp->out[3] = tmp->digest[3]; + tmp->out[4] = tmp->digest[4]; + tmp->out[5] = tmp->digest[5]; + tmp->out[6] = tmp->digest[6]; + tmp->out[7] = tmp->digest[7]; +} + +DECLSPEC void aes128_encrypt_cbc (const u32 *aes_ks, u32 *aes_iv, const u32 *in, u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4) +{ + u32 in_s[4]; + + in_s[0] = in[0]; + in_s[1] = in[1]; + in_s[2] = in[2]; + in_s[3] = in[3]; + + in_s[0] ^= aes_iv[0]; + in_s[1] ^= aes_iv[1]; + in_s[2] ^= aes_iv[2]; + in_s[3] ^= aes_iv[3]; + + aes128_encrypt (aes_ks, in_s, out, s_te0, s_te1, s_te2, s_te3, s_te4); + + aes_iv[0] = out[0]; + aes_iv[1] = out[1]; + aes_iv[2] = out[2]; + aes_iv[3] = out[3]; +} + +KERNEL_FQ void m25900_init(KERN_ATTR_TMPS(pbkdf2_sha256_tmp_t)) +{ + const u64 gid = get_global_id(0); + + if (gid >= gid_max) return; + + partial_hashes_ipad_opad(&tmps[gid], pws[gid].i); + + hmac_sha256_first_round(&tmps[gid], salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); +} + +KERNEL_FQ void m25900_loop(KERN_ATTR_TMPS(pbkdf2_sha256_tmp_t)) +{ + const u64 gid = get_global_id(0); + + if ((gid * VECT_SIZE) >= gid_max) return; + + u32x* ipad_partial_hash = tmps[gid].ipad_partial_hash; + u32x* opad_partial_hash = tmps[gid].opad_partial_hash; + u32x* digest = tmps[gid].digest; + u32x* out = tmps[gid].out; + + for (u32 j = 0; j < loop_cnt; j++) + { + u32x w0[4]; + u32x w1[4]; + u32x w2[4]; + u32x w3[4]; + + // Pad the 256 bit hash from the previous PBKDF2-HMAC-SHA256 round to 512 bit + w0[0] = digest[0]; + w0[1] = digest[1]; + w0[2] = digest[2]; + w0[3] = digest[3]; + w1[0] = digest[4]; + w1[1] = digest[5]; + w1[2] = digest[6]; + w1[3] = digest[7]; + w2[0] = 0x80000000; + w2[1] = 0; + w2[2] = 0; + w2[3] = 0; + w3[0] = 0; + w3[1] = 0; + w3[2] = 0; + w3[3] = 768; // 512 bit for they ipad and 256 bit for the previous hash + + hmac_sha256_for_hash (w0, w1, w2, w3, ipad_partial_hash, opad_partial_hash, digest); + + // XOR digest created by HMAC-SHA256 for the PBKDF2 round + out[0] ^= digest[0]; + out[1] ^= digest[1]; + out[2] ^= digest[2]; + out[3] ^= digest[3]; + out[4] ^= digest[4]; + out[5] ^= digest[5]; + out[6] ^= digest[6]; + out[7] ^= digest[7]; + } +} + +KERNEL_FQ void m25900_comp(KERN_ATTR_TMPS_ESALT(pbkdf2_sha256_tmp_t, blocks_t)) +{ + /** + * base + */ + + const u64 gid = get_global_id(0); + const u64 lid = get_local_id(0); + const u64 lsz = get_local_size(0); + + /** + * aes shared + */ + + #ifdef REAL_SHM + + LOCAL_VK u32 s_td0[256]; + LOCAL_VK u32 s_td1[256]; + LOCAL_VK u32 s_td2[256]; + LOCAL_VK u32 s_td3[256]; + LOCAL_VK u32 s_td4[256]; + + LOCAL_VK u32 s_te0[256]; + LOCAL_VK u32 s_te1[256]; + LOCAL_VK u32 s_te2[256]; + LOCAL_VK u32 s_te3[256]; + LOCAL_VK u32 s_te4[256]; + + for (u32 i = lid; i < 256; i += lsz) + { + s_td0[i] = td0[i]; + s_td1[i] = td1[i]; + s_td2[i] = td2[i]; + s_td3[i] = td3[i]; + s_td4[i] = td4[i]; + + s_te0[i] = te0[i]; + s_te1[i] = te1[i]; + s_te2[i] = te2[i]; + s_te3[i] = te3[i]; + s_te4[i] = te4[i]; + } + + SYNC_THREADS(); + + #else + + CONSTANT_AS u32a* s_td0 = td0; + CONSTANT_AS u32a* s_td1 = td1; + CONSTANT_AS u32a* s_td2 = td2; + CONSTANT_AS u32a* s_td3 = td3; + CONSTANT_AS u32a* s_td4 = td4; + + CONSTANT_AS u32a* s_te0 = te0; + CONSTANT_AS u32a* s_te1 = te1; + CONSTANT_AS u32a* s_te2 = te2; + CONSTANT_AS u32a* s_te3 = te3; + CONSTANT_AS u32a* s_te4 = te4; + + #endif + + if (gid >= gid_max) return; + + u32 key[4]; + + key[0] = tmps[gid].out[DGST_R0]; + key[1] = tmps[gid].out[DGST_R1]; + key[2] = tmps[gid].out[DGST_R2]; + key[3] = tmps[gid].out[DGST_R3]; + + u32 aes_ks[44]; + + AES128_set_encrypt_key (aes_ks, key, s_te0, s_te1, s_te2, s_te3); + + u32 b0[4] = { 0 }; + + u32 aes_cbc_iv[4] = { 0 }; + + u32 yn[4]; + + aes128_encrypt_cbc (aes_ks, aes_cbc_iv, b0, yn, s_te0, s_te1, s_te2, s_te3, s_te4); + aes128_encrypt_cbc (aes_ks, aes_cbc_iv, esalt_bufs[DIGESTS_OFFSET].b1, yn, s_te0, s_te1, s_te2, s_te3, s_te4); + aes128_encrypt_cbc (aes_ks, aes_cbc_iv, esalt_bufs[DIGESTS_OFFSET].b2, yn, s_te0, s_te1, s_te2, s_te3, s_te4); + aes128_encrypt_cbc (aes_ks, aes_cbc_iv, esalt_bufs[DIGESTS_OFFSET].b3, yn, s_te0, s_te1, s_te2, s_te3, s_te4); + + u32 nonce[4]; + + nonce[0] = 0; + nonce[1] = 0; + nonce[2] = 0; + nonce[3] = 0x00ff0000; // already swapped + + u32 s0[4]; + + aes128_encrypt(aes_ks, nonce, s0, s_te0, s_te1, s_te2, s_te3, s_te4); + + const u32 r0 = yn[0] ^ s0[0]; + const u32 r1 = yn[1] ^ s0[1]; + const u32 r2 = yn[2] ^ s0[2]; + const u32 r3 = yn[3] ^ s0[3]; + +#define il_pos 0 + +#ifdef KERNEL_STATIC +#include COMPARE_M +#endif +} From 6928b9569353ad20ca840dde6673aac5d977c34e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20G=C3=BCtzkow?= Date: Sun, 11 Apr 2021 14:41:16 +0200 Subject: [PATCH 3/4] Add missing new line test module --- tools/test_modules/m25900.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/test_modules/m25900.pm b/tools/test_modules/m25900.pm index db16728ea..a161f662f 100644 --- a/tools/test_modules/m25900.pm +++ b/tools/test_modules/m25900.pm @@ -141,4 +141,4 @@ sub module_verify_hash return ($new_hash, $word); } -1; \ No newline at end of file +1; From ada829fa20693a06c2ce9a00ce9e636ae9b0de7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20G=C3=BCtzkow?= Date: Mon, 12 Apr 2021 09:48:38 +0200 Subject: [PATCH 4/4] Fix the processing for constant salt KNX IP Secure uses a constant salt, which require the use of `OPTS_TYPE_DEEP_COMP_KERNEL`. This commit adds the required options and adjusts the indexing of the esalt accordingly. The attempt at an optimized kernel has been removed as requested in the PR feedback. Additionally, minor formatting improvements have been made. --- OpenCL/m25900-optimized.cl | 724 ------------------------------------- OpenCL/m25900-pure.cl | 136 ++++--- src/modules/module_25900.c | 10 +- 3 files changed, 87 insertions(+), 783 deletions(-) delete mode 100644 OpenCL/m25900-optimized.cl diff --git a/OpenCL/m25900-optimized.cl b/OpenCL/m25900-optimized.cl deleted file mode 100644 index 2856b54c8..000000000 --- a/OpenCL/m25900-optimized.cl +++ /dev/null @@ -1,724 +0,0 @@ -/** - * Author......: See docs/credits.txt and Robert Guetzkow - * License.....: MIT - */ - -/* - * This code implement PBKDF2-HMAC-SHA256 but makes assumptions about the input length for optimizations. - * Please keep this in mind when trying to reuse code. The comments explain what those assumptions are. - * - * The implementation is based on inc_hash_sha256.cl and m10900-pure.cl - */ - -#define NEW_SIMD_CODE - -#ifdef KERNEL_STATIC -#include "inc_vendor.h" -#include "inc_types.h" -#include "inc_platform.cl" -#include "inc_common.cl" -#include "inc_simd.cl" -#include "inc_hash_sha256.cl" -#include "inc_cipher_aes.cl" -#endif - -#define COMPARE_S "inc_comp_single.cl" -#define COMPARE_M "inc_comp_multi.cl" - -typedef struct blocks -{ - u32 b1[4]; - u32 b2[4]; - u32 b3[4]; - -} blocks_t; - -typedef struct pbkdf2_sha256_tmp -{ - u32x ipad_partial_hash[8]; - u32x opad_partial_hash[8]; - - u32x digest[32]; - u32x out[32]; - -} pbkdf2_sha256_tmp_t; - -#define SHA256_STEP_NO_Wt(F0,F1,a,b,c,d,e,f,g,h,K) \ -{ \ - h += K; \ - h = hc_add3 (h, SHA256_S3 (e), F1 (e,f,g)); \ - d += h; \ - h = hc_add3 (h, SHA256_S2 (a), F0 (a,b,c)); \ -} - -/* - * h = h + Kt + Wt -x => T1 (with Wt being 0) - * h + BSIG1(e) + CH(e,f,g) _| - * d += h - => d + T1 (d is used as e in the next step by switching the arguments.) - * h = h + BSIG0(a) + MAJ(a,b,c) - => T1 + T2 (h is used as a in the next step by switching the arguments.) - */ - -#define ROUND_EXPAND() \ -{ \ - w0_t = SHA256_EXPAND (we_t, w9_t, w1_t, w0_t); \ - w1_t = SHA256_EXPAND (wf_t, wa_t, w2_t, w1_t); \ - w2_t = SHA256_EXPAND (w0_t, wb_t, w3_t, w2_t); \ - w3_t = SHA256_EXPAND (w1_t, wc_t, w4_t, w3_t); \ - w4_t = SHA256_EXPAND (w2_t, wd_t, w5_t, w4_t); \ - w5_t = SHA256_EXPAND (w3_t, we_t, w6_t, w5_t); \ - w6_t = SHA256_EXPAND (w4_t, wf_t, w7_t, w6_t); \ - w7_t = SHA256_EXPAND (w5_t, w0_t, w8_t, w7_t); \ - w8_t = SHA256_EXPAND (w6_t, w1_t, w9_t, w8_t); \ - w9_t = SHA256_EXPAND (w7_t, w2_t, wa_t, w9_t); \ - wa_t = SHA256_EXPAND (w8_t, w3_t, wb_t, wa_t); \ - wb_t = SHA256_EXPAND (w9_t, w4_t, wc_t, wb_t); \ - wc_t = SHA256_EXPAND (wa_t, w5_t, wd_t, wc_t); \ - wd_t = SHA256_EXPAND (wb_t, w6_t, we_t, wd_t); \ - we_t = SHA256_EXPAND (wc_t, w7_t, wf_t, we_t); \ - wf_t = SHA256_EXPAND (wd_t, w8_t, w0_t, wf_t); \ -} - -#define ROUND_STEP(i) \ -{ \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha256[i + 0]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha256[i + 1]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha256[i + 2]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha256[i + 3]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha256[i + 4]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha256[i + 5]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha256[i + 6]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha256[i + 7]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w8_t, k_sha256[i + 8]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w9_t, k_sha256[i + 9]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, wa_t, k_sha256[i + 10]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, wb_t, k_sha256[i + 11]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, wc_t, k_sha256[i + 12]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, wd_t, k_sha256[i + 13]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, we_t, k_sha256[i + 14]); \ - SHA256_STEP (SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, wf_t, k_sha256[i + 15]); \ -} - -DECLSPEC void init_sha256_ctx(sha256_ctx_vector_t *ctx) -{ - ctx->h[0] = SHA256M_A; - ctx->h[1] = SHA256M_B; - ctx->h[2] = SHA256M_C; - ctx->h[3] = SHA256M_D; - ctx->h[4] = SHA256M_E; - ctx->h[5] = SHA256M_F; - ctx->h[6] = SHA256M_G; - ctx->h[7] = SHA256M_H; -} - -DECLSPEC void init_ipad(sha256_ctx_vector_t *ctx, const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3) -{ - init_sha256_ctx(ctx); - - ctx->w0[0] = w0[0] ^ 0x36363636; - ctx->w0[1] = w0[1] ^ 0x36363636; - ctx->w0[2] = w0[2] ^ 0x36363636; - ctx->w0[3] = w0[3] ^ 0x36363636; - ctx->w1[0] = w1[0] ^ 0x36363636; - ctx->w1[1] = w1[1] ^ 0x36363636; - ctx->w1[2] = w1[2] ^ 0x36363636; - ctx->w1[3] = w1[3] ^ 0x36363636; - ctx->w2[0] = w2[0] ^ 0x36363636; - ctx->w2[1] = w2[1] ^ 0x36363636; - ctx->w2[2] = w2[2] ^ 0x36363636; - ctx->w2[3] = w2[3] ^ 0x36363636; - ctx->w3[0] = w3[0] ^ 0x36363636; - ctx->w3[1] = w3[1] ^ 0x36363636; - ctx->w3[2] = w3[2] ^ 0x36363636; - ctx->w3[3] = w3[3] ^ 0x36363636; -} - -DECLSPEC void init_opad(sha256_ctx_vector_t *ctx, const u32 *w0, const u32 *w1, const u32 *w2, const u32 *w3) -{ - init_sha256_ctx(ctx); - - ctx->w0[0] = w0[0] ^ 0x5c5c5c5c; - ctx->w0[1] = w0[1] ^ 0x5c5c5c5c; - ctx->w0[2] = w0[2] ^ 0x5c5c5c5c; - ctx->w0[3] = w0[3] ^ 0x5c5c5c5c; - ctx->w1[0] = w1[0] ^ 0x5c5c5c5c; - ctx->w1[1] = w1[1] ^ 0x5c5c5c5c; - ctx->w1[2] = w1[2] ^ 0x5c5c5c5c; - ctx->w1[3] = w1[3] ^ 0x5c5c5c5c; - ctx->w2[0] = w2[0] ^ 0x5c5c5c5c; - ctx->w2[1] = w2[1] ^ 0x5c5c5c5c; - ctx->w2[2] = w2[2] ^ 0x5c5c5c5c; - ctx->w2[3] = w2[3] ^ 0x5c5c5c5c; - ctx->w3[0] = w3[0] ^ 0x5c5c5c5c; - ctx->w3[1] = w3[1] ^ 0x5c5c5c5c; - ctx->w3[2] = w3[2] ^ 0x5c5c5c5c; - ctx->w3[3] = w3[3] ^ 0x5c5c5c5c; -} - -DECLSPEC void sha256_transform_hash(const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u32x *digest) -{ - /* - * This function assumes that the input is a hash of length 256 bit with padding applied and that the total length - * of all blocks is 768 bit. This allows to perform optimizations in the message schedule and hash round since some - * words are known to be all zero bits, thus not contributing to some of the calculation. Additionally, calculations - * for words that are known to be constant have been precomputed. - * - * The 256 bit hash is located in the first 8 words (index 0 to 7), followed by one word that has one bit set. - * The length is represented as a 128 bit integer in the last 4 words. However, since for the HMAC calculation - * the total size of all blocks doesn't exceed 768 bit, including ipad and opad respectively, only the last - * word (index 15) contains the length bits. Thus the 32 bit words from index 9 to 14 are all zero bits. - * Whenever these words would be used by the message schedule in - * Wt = SSIG1(W(t-2)) + W(t-7) + SSIG0(W(t-15)) + W(t-16) [1] - * or in the hash round in - * T1 = h + BSIG1(e) + CH(e,f,g) + Kt + Wt [1] - * the calculation can be simplified to remove the operand. - * - * The word at index 8, with one bit set, and the word at index 15, containing the length, are know to be constant. - * Therefore, the operations where they are used as an operand can be partially precomputed. For the message schedule - * this is possible for SSIG1(W(t-2)) and SSIG0(W(t-15)). In the hash round the Kt + Wt can be precomputed when Wt - * is constant. - * - * Like sha256_transform_vector it performs the message schedule and hash round calculation jointly for 16 of the - * 32 bit words. This requires fewer variables and thus less memory to hold the state, compared to calculating - * the whole message schedule first and then performing the hash round. - * - * [1] RFC 6234, section 6.2, https://tools.ietf.org/html/rfc6234#section-6.2 - */ - - u32x a = digest[0]; - u32x b = digest[1]; - u32x c = digest[2]; - u32x d = digest[3]; - u32x e = digest[4]; - u32x f = digest[5]; - u32x g = digest[6]; - u32x h = digest[7]; - - // This assignment is equivalent to the message schedule for the first 16 words. - u32x w0_t = w0[0]; - u32x w1_t = w0[1]; - u32x w2_t = w0[2]; - u32x w3_t = w0[3]; - u32x w4_t = w1[0]; - u32x w5_t = w1[1]; - u32x w6_t = w1[2]; - u32x w7_t = w1[3]; - u32x w8_t = w2[0]; - u32x w9_t = w2[1]; - u32x wa_t = w2[2]; - u32x wb_t = w2[3]; - u32x wc_t = w3[0]; - u32x wd_t = w3[1]; - u32x we_t = w3[2]; - u32x wf_t = w3[3]; - - // The first 16 words have already been assigned, perform the first hash round. Don't use W_t when zero. - SHA256_STEP(SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, w0_t, k_sha256[0]); - SHA256_STEP(SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, w1_t, k_sha256[1]); - SHA256_STEP(SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, w2_t, k_sha256[2]); - SHA256_STEP(SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, w3_t, k_sha256[3]); - SHA256_STEP(SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, w4_t, k_sha256[4]); - SHA256_STEP(SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, w5_t, k_sha256[5]); - SHA256_STEP(SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, w6_t, k_sha256[6]); - SHA256_STEP(SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, w7_t, k_sha256[7]); - SHA256_STEP_NO_Wt(SHA256_F0o, SHA256_F1o, a, b, c, d, e, f, g, h, 0x5807aa98); - SHA256_STEP_NO_Wt(SHA256_F0o, SHA256_F1o, h, a, b, c, d, e, f, g, k_sha256[9]); - SHA256_STEP_NO_Wt(SHA256_F0o, SHA256_F1o, g, h, a, b, c, d, e, f, k_sha256[10]); - SHA256_STEP_NO_Wt(SHA256_F0o, SHA256_F1o, f, g, h, a, b, c, d, e, k_sha256[11]); - SHA256_STEP_NO_Wt(SHA256_F0o, SHA256_F1o, e, f, g, h, a, b, c, d, k_sha256[12]); - SHA256_STEP_NO_Wt(SHA256_F0o, SHA256_F1o, d, e, f, g, h, a, b, c, k_sha256[13]); - SHA256_STEP_NO_Wt(SHA256_F0o, SHA256_F1o, c, d, e, f, g, h, a, b, k_sha256[14]); - SHA256_STEP_NO_Wt(SHA256_F0o, SHA256_F1o, b, c, d, e, f, g, h, a, 0xc19bf474); - - // The message schedule for words 16 to 32 can skip calculations when W_t is zero - w0_t = SHA256_S0(w1_t) + w0_t; - w1_t = 0x01e00000 + SHA256_S0(w2_t) + w1_t; - w2_t = SHA256_S1(w0_t) + SHA256_S0(w3_t) + w2_t; - w3_t = SHA256_S1(w1_t) + SHA256_S0(w4_t) + w3_t; - w4_t = SHA256_S1(w2_t) + SHA256_S0(w5_t) + w4_t; - w5_t = SHA256_S1(w3_t) + SHA256_S0(w6_t) + w5_t; - w6_t = SHA256_S1(w4_t) + wf_t + SHA256_S0(w7_t) + w6_t; - w7_t = SHA256_S1(w5_t) + w0_t + 0x11002000 + w7_t; - w8_t = SHA256_S1(w6_t) + w1_t + w8_t; - w9_t = SHA256_S1(w7_t) + w2_t; - wa_t = SHA256_S1(w8_t) + w3_t; - wb_t = SHA256_S1(w9_t) + w4_t; - wc_t = SHA256_S1(wa_t) + w5_t; - wd_t = SHA256_S1(wb_t) + w6_t; - we_t = SHA256_S1(wc_t) + w7_t + 0x00c00066; - wf_t = SHA256_S1(wd_t) + w8_t + SHA256_S0(w0_t) + wf_t; - - // Following rounds do not have words that are guaranteed to be zero or constant, thus perform full calculations. - ROUND_STEP(16); - ROUND_EXPAND(); - ROUND_STEP(32); - ROUND_EXPAND(); - ROUND_STEP(48); - - digest[0] += a; - digest[1] += b; - digest[2] += c; - digest[3] += d; - digest[4] += e; - digest[5] += f; - digest[6] += g; - digest[7] += h; -} - -DECLSPEC void partial_hashes_ipad_opad(pbkdf2_sha256_tmp *tmp, GLOBAL_AS const u32 *pwd) -{ - /* - * This functions assumes that passwords are smaller than 512 bit, which is the case for KNX IP Secure as the ETS 5 limits - * the maximum length to 20 characters. - * - * Both ipad and opad remain constant for a given password throughout the PBKDF2 computation. Futhermore they are both - * 512 bit long, which is exactly the block size of SHA-256. Thus, it is possible to compute a partial hash for both - * without knowing what will be concatenated to ipad and opad, as the processing in SHA-256 happens in blocks of 512 bit. - * The resulting intermediate result can be stored and reused in all rounds of the PBKDF. - */ - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = make_u32x (hc_swap32_S (pwd[ 0])); - w0[1] = make_u32x (hc_swap32_S (pwd[ 1])); - w0[2] = make_u32x (hc_swap32_S (pwd[ 2])); - w0[3] = make_u32x (hc_swap32_S (pwd[ 3])); - w1[0] = make_u32x (hc_swap32_S (pwd[ 4])); - w1[1] = make_u32x (hc_swap32_S (pwd[ 5])); - w1[2] = make_u32x (hc_swap32_S (pwd[ 6])); - w1[3] = make_u32x (hc_swap32_S (pwd[ 7])); - w2[0] = make_u32x (hc_swap32_S (pwd[ 8])); - w2[1] = make_u32x (hc_swap32_S (pwd[ 9])); - w2[2] = make_u32x (hc_swap32_S (pwd[10])); - w2[3] = make_u32x (hc_swap32_S (pwd[11])); - w3[0] = make_u32x (hc_swap32_S (pwd[12])); - w3[1] = make_u32x (hc_swap32_S (pwd[13])); - w3[2] = make_u32x (hc_swap32_S (pwd[14])); - w3[3] = make_u32x (hc_swap32_S (pwd[15])); - - sha256_hmac_ctx_vector_t sha256_hmac_ctx_vector; - - // The partial hash is equivalent to computing the hash of just that one block - init_ipad (&sha256_hmac_ctx_vector.ipad, w0, w1, w2, w3); - init_opad (&sha256_hmac_ctx_vector.opad, w0, w1, w2, w3); - - sha256_transform_vector (sha256_hmac_ctx_vector.ipad.w0, - sha256_hmac_ctx_vector.ipad.w1, - sha256_hmac_ctx_vector.ipad.w2, - sha256_hmac_ctx_vector.ipad.w3, - sha256_hmac_ctx_vector.ipad.h); - - sha256_transform_vector (sha256_hmac_ctx_vector.opad.w0, - sha256_hmac_ctx_vector.opad.w1, - sha256_hmac_ctx_vector.opad.w2, - sha256_hmac_ctx_vector.opad.w3, - sha256_hmac_ctx_vector.opad.h); - - tmp->ipad_partial_hash[0] = sha256_hmac_ctx_vector.ipad.h[0]; - tmp->ipad_partial_hash[1] = sha256_hmac_ctx_vector.ipad.h[1]; - tmp->ipad_partial_hash[2] = sha256_hmac_ctx_vector.ipad.h[2]; - tmp->ipad_partial_hash[3] = sha256_hmac_ctx_vector.ipad.h[3]; - tmp->ipad_partial_hash[4] = sha256_hmac_ctx_vector.ipad.h[4]; - tmp->ipad_partial_hash[5] = sha256_hmac_ctx_vector.ipad.h[5]; - tmp->ipad_partial_hash[6] = sha256_hmac_ctx_vector.ipad.h[6]; - tmp->ipad_partial_hash[7] = sha256_hmac_ctx_vector.ipad.h[7]; - - tmp->opad_partial_hash[0] = sha256_hmac_ctx_vector.opad.h[0]; - tmp->opad_partial_hash[1] = sha256_hmac_ctx_vector.opad.h[1]; - tmp->opad_partial_hash[2] = sha256_hmac_ctx_vector.opad.h[2]; - tmp->opad_partial_hash[3] = sha256_hmac_ctx_vector.opad.h[3]; - tmp->opad_partial_hash[4] = sha256_hmac_ctx_vector.opad.h[4]; - tmp->opad_partial_hash[5] = sha256_hmac_ctx_vector.opad.h[5]; - tmp->opad_partial_hash[6] = sha256_hmac_ctx_vector.opad.h[6]; - tmp->opad_partial_hash[7] = sha256_hmac_ctx_vector.opad.h[7]; -} - -DECLSPEC void hmac_sha256(u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad_partial_hash, u32x *opad_partial_hash, u32x *digest) -{ - /* - * This function assumes that the input has been padded according to RFC 6234 [3]. - * - * [3] RFC 6234, section 4.1, https://tools.ietf.org/html/rfc6234#section-4.1 - */ - - digest[0] = ipad_partial_hash[0]; - digest[1] = ipad_partial_hash[1]; - digest[2] = ipad_partial_hash[2]; - digest[3] = ipad_partial_hash[3]; - digest[4] = ipad_partial_hash[4]; - digest[5] = ipad_partial_hash[5]; - digest[6] = ipad_partial_hash[6]; - digest[7] = ipad_partial_hash[7]; - - sha256_transform_vector (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = digest[4]; - w1[1] = digest[5]; - w1[2] = digest[6]; - w1[3] = digest[7]; - w2[0] = 0x80000000; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 768; // 512 bit for they ipad and 256 bit for the previous hash - - digest[0] = opad_partial_hash[0]; - digest[1] = opad_partial_hash[1]; - digest[2] = opad_partial_hash[2]; - digest[3] = opad_partial_hash[3]; - digest[4] = opad_partial_hash[4]; - digest[5] = opad_partial_hash[5]; - digest[6] = opad_partial_hash[6]; - digest[7] = opad_partial_hash[7]; - - sha256_transform_hash (w0, w1, w2, w3, digest); -} - -DECLSPEC void hmac_sha256_for_hash(u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad_partial_hash, u32x *opad_partial_hash, u32x *digest) -{ - /* - * This function assumes that the input is the block containing the hash of 256 bit length and has been padded according to RFC 6234 [3] - * - * [3] RFC 6234, section 4.1, https://tools.ietf.org/html/rfc6234#section-4.1 - */ - - digest[0] = ipad_partial_hash[0]; - digest[1] = ipad_partial_hash[1]; - digest[2] = ipad_partial_hash[2]; - digest[3] = ipad_partial_hash[3]; - digest[4] = ipad_partial_hash[4]; - digest[5] = ipad_partial_hash[5]; - digest[6] = ipad_partial_hash[6]; - digest[7] = ipad_partial_hash[7]; - - sha256_transform_hash (w0, w1, w2, w3, digest); - - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = digest[4]; - w1[1] = digest[5]; - w1[2] = digest[6]; - w1[3] = digest[7]; - w2[0] = 0x80000000; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 768; // 512 bit for they ipad and 256 bit for the previous hash - - digest[0] = opad_partial_hash[0]; - digest[1] = opad_partial_hash[1]; - digest[2] = opad_partial_hash[2]; - digest[3] = opad_partial_hash[3]; - digest[4] = opad_partial_hash[4]; - digest[5] = opad_partial_hash[5]; - digest[6] = opad_partial_hash[6]; - digest[7] = opad_partial_hash[7]; - - sha256_transform_hash (w0, w1, w2, w3, digest); -} - -DECLSPEC void hmac_sha256_first_round(pbkdf2_sha256_tmp *tmp, GLOBAL_AS const u32 *salt, const int len) -{ - /* - * This function assumes that the salt is less than 56 byte (448 bit), which is the case for - * KNX IP Secure as the salt is constant and 46 byte (368 bit) long. - */ - - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - w0[0] = make_u32x (hc_swap32_S (salt[ 0])); - w0[1] = make_u32x (hc_swap32_S (salt[ 1])); - w0[2] = make_u32x (hc_swap32_S (salt[ 2])); - w0[3] = make_u32x (hc_swap32_S (salt[ 3])); - w1[0] = make_u32x (hc_swap32_S (salt[ 4])); - w1[1] = make_u32x (hc_swap32_S (salt[ 5])); - w1[2] = make_u32x (hc_swap32_S (salt[ 6])); - w1[3] = make_u32x (hc_swap32_S (salt[ 7])); - w2[0] = make_u32x (hc_swap32_S (salt[ 8])); - w2[1] = make_u32x (hc_swap32_S (salt[ 9])); - w2[2] = make_u32x (hc_swap32_S (salt[10])); - w2[3] = make_u32x (hc_swap32_S (salt[11])); - w3[0] = make_u32x (hc_swap32_S (salt[12])); - w3[1] = make_u32x (hc_swap32_S (salt[13])); - w3[2] = make_u32x (hc_swap32_S (salt[14])); - w3[3] = make_u32x (hc_swap32_S (salt[15])); - - /* - * PBKDF2 requires the one-based 32 bit big-endian block index to be appended to the salt [2]. - * Since the salt is used in the first block, that integer is 1. - * - * [2] RFC 8018, section 5.2, item 3, https://tools.ietf.org/html/rfc8018#section-5.2 - */ - - u32x i0[4]; - u32x i1[4]; - u32x i2[4]; - u32x i3[4]; - - i0[0] = 1; - i0[1] = 0; - i0[2] = 0; - i0[3] = 0; - i1[0] = 0; - i1[1] = 0; - i1[2] = 0; - i1[3] = 0; - i2[0] = 0; - i2[1] = 0; - i2[2] = 0; - i2[3] = 0; - i3[0] = 0; - i3[1] = 0; - i3[2] = 0; - i3[3] = 0; - - switch_buffer_by_offset_be(i0, i1, i2, i3, len & 63); // Shift to the correct position after the end of the salt - - w0[0] |= i0[0]; - w0[1] |= i0[1]; - w0[2] |= i0[2]; - w0[3] |= i0[3]; - w1[0] |= i1[0]; - w1[1] |= i1[1]; - w1[2] |= i1[2]; - w1[3] |= i1[3]; - w2[0] |= i2[0]; - w2[1] |= i2[1]; - w2[2] |= i2[2]; - w2[3] |= i2[3]; - w3[0] |= i3[0]; - w3[1] |= i3[1]; - w3[2] |= i3[2]; - w3[3] |= i3[3]; - - // Updated length with the 32 bit block index appended - MAYBE_VOLATILE const int len_updated = len + 4; - - /* - * Pad salt to 512 bit using the padding scheme described in RFC 6234 [3] - * - * [3] RFC 6234, section 4.1, https://tools.ietf.org/html/rfc6234#section-4.1 - */ - append_0x80_4x4 (w0, w1, w2, w3, (len_updated & 63) ^ 3); - w3[2] = 0; - w3[3] = len_updated * 8 + 512; // Length in bits, ipad is 512 bit - - hmac_sha256 (w0, w1, w2, w3, tmp->ipad_partial_hash, tmp->opad_partial_hash, tmp->digest); - - tmp->out[0] = tmp->digest[0]; - tmp->out[1] = tmp->digest[1]; - tmp->out[2] = tmp->digest[2]; - tmp->out[3] = tmp->digest[3]; - tmp->out[4] = tmp->digest[4]; - tmp->out[5] = tmp->digest[5]; - tmp->out[6] = tmp->digest[6]; - tmp->out[7] = tmp->digest[7]; -} - -DECLSPEC void aes128_encrypt_cbc (const u32 *aes_ks, u32 *aes_iv, const u32 *in, u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4) -{ - u32 in_s[4]; - - in_s[0] = in[0]; - in_s[1] = in[1]; - in_s[2] = in[2]; - in_s[3] = in[3]; - - in_s[0] ^= aes_iv[0]; - in_s[1] ^= aes_iv[1]; - in_s[2] ^= aes_iv[2]; - in_s[3] ^= aes_iv[3]; - - aes128_encrypt (aes_ks, in_s, out, s_te0, s_te1, s_te2, s_te3, s_te4); - - aes_iv[0] = out[0]; - aes_iv[1] = out[1]; - aes_iv[2] = out[2]; - aes_iv[3] = out[3]; -} - -KERNEL_FQ void m25900_init(KERN_ATTR_TMPS(pbkdf2_sha256_tmp_t)) -{ - const u64 gid = get_global_id(0); - - if (gid >= gid_max) return; - - partial_hashes_ipad_opad(&tmps[gid], pws[gid].i); - - hmac_sha256_first_round(&tmps[gid], salt_bufs[SALT_POS].salt_buf, salt_bufs[SALT_POS].salt_len); -} - -KERNEL_FQ void m25900_loop(KERN_ATTR_TMPS(pbkdf2_sha256_tmp_t)) -{ - const u64 gid = get_global_id(0); - - if ((gid * VECT_SIZE) >= gid_max) return; - - u32x* ipad_partial_hash = tmps[gid].ipad_partial_hash; - u32x* opad_partial_hash = tmps[gid].opad_partial_hash; - u32x* digest = tmps[gid].digest; - u32x* out = tmps[gid].out; - - for (u32 j = 0; j < loop_cnt; j++) - { - u32x w0[4]; - u32x w1[4]; - u32x w2[4]; - u32x w3[4]; - - // Pad the 256 bit hash from the previous PBKDF2-HMAC-SHA256 round to 512 bit - w0[0] = digest[0]; - w0[1] = digest[1]; - w0[2] = digest[2]; - w0[3] = digest[3]; - w1[0] = digest[4]; - w1[1] = digest[5]; - w1[2] = digest[6]; - w1[3] = digest[7]; - w2[0] = 0x80000000; - w2[1] = 0; - w2[2] = 0; - w2[3] = 0; - w3[0] = 0; - w3[1] = 0; - w3[2] = 0; - w3[3] = 768; // 512 bit for they ipad and 256 bit for the previous hash - - hmac_sha256_for_hash (w0, w1, w2, w3, ipad_partial_hash, opad_partial_hash, digest); - - // XOR digest created by HMAC-SHA256 for the PBKDF2 round - out[0] ^= digest[0]; - out[1] ^= digest[1]; - out[2] ^= digest[2]; - out[3] ^= digest[3]; - out[4] ^= digest[4]; - out[5] ^= digest[5]; - out[6] ^= digest[6]; - out[7] ^= digest[7]; - } -} - -KERNEL_FQ void m25900_comp(KERN_ATTR_TMPS_ESALT(pbkdf2_sha256_tmp_t, blocks_t)) -{ - /** - * base - */ - - const u64 gid = get_global_id(0); - const u64 lid = get_local_id(0); - const u64 lsz = get_local_size(0); - - /** - * aes shared - */ - - #ifdef REAL_SHM - - LOCAL_VK u32 s_td0[256]; - LOCAL_VK u32 s_td1[256]; - LOCAL_VK u32 s_td2[256]; - LOCAL_VK u32 s_td3[256]; - LOCAL_VK u32 s_td4[256]; - - LOCAL_VK u32 s_te0[256]; - LOCAL_VK u32 s_te1[256]; - LOCAL_VK u32 s_te2[256]; - LOCAL_VK u32 s_te3[256]; - LOCAL_VK u32 s_te4[256]; - - for (u32 i = lid; i < 256; i += lsz) - { - s_td0[i] = td0[i]; - s_td1[i] = td1[i]; - s_td2[i] = td2[i]; - s_td3[i] = td3[i]; - s_td4[i] = td4[i]; - - s_te0[i] = te0[i]; - s_te1[i] = te1[i]; - s_te2[i] = te2[i]; - s_te3[i] = te3[i]; - s_te4[i] = te4[i]; - } - - SYNC_THREADS(); - - #else - - CONSTANT_AS u32a* s_td0 = td0; - CONSTANT_AS u32a* s_td1 = td1; - CONSTANT_AS u32a* s_td2 = td2; - CONSTANT_AS u32a* s_td3 = td3; - CONSTANT_AS u32a* s_td4 = td4; - - CONSTANT_AS u32a* s_te0 = te0; - CONSTANT_AS u32a* s_te1 = te1; - CONSTANT_AS u32a* s_te2 = te2; - CONSTANT_AS u32a* s_te3 = te3; - CONSTANT_AS u32a* s_te4 = te4; - - #endif - - if (gid >= gid_max) return; - - u32 key[4]; - - key[0] = tmps[gid].out[DGST_R0]; - key[1] = tmps[gid].out[DGST_R1]; - key[2] = tmps[gid].out[DGST_R2]; - key[3] = tmps[gid].out[DGST_R3]; - - u32 aes_ks[44]; - - AES128_set_encrypt_key (aes_ks, key, s_te0, s_te1, s_te2, s_te3); - - u32 b0[4] = { 0 }; - - u32 aes_cbc_iv[4] = { 0 }; - - u32 yn[4]; - - aes128_encrypt_cbc (aes_ks, aes_cbc_iv, b0, yn, s_te0, s_te1, s_te2, s_te3, s_te4); - aes128_encrypt_cbc (aes_ks, aes_cbc_iv, esalt_bufs[DIGESTS_OFFSET].b1, yn, s_te0, s_te1, s_te2, s_te3, s_te4); - aes128_encrypt_cbc (aes_ks, aes_cbc_iv, esalt_bufs[DIGESTS_OFFSET].b2, yn, s_te0, s_te1, s_te2, s_te3, s_te4); - aes128_encrypt_cbc (aes_ks, aes_cbc_iv, esalt_bufs[DIGESTS_OFFSET].b3, yn, s_te0, s_te1, s_te2, s_te3, s_te4); - - u32 nonce[4]; - - nonce[0] = 0; - nonce[1] = 0; - nonce[2] = 0; - nonce[3] = 0x00ff0000; // already swapped - - u32 s0[4]; - - aes128_encrypt(aes_ks, nonce, s0, s_te0, s_te1, s_te2, s_te3, s_te4); - - const u32 r0 = yn[0] ^ s0[0]; - const u32 r1 = yn[1] ^ s0[1]; - const u32 r2 = yn[2] ^ s0[2]; - const u32 r3 = yn[3] ^ s0[3]; - -#define il_pos 0 - -#ifdef KERNEL_STATIC -#include COMPARE_M -#endif -} diff --git a/OpenCL/m25900-pure.cl b/OpenCL/m25900-pure.cl index 6ab59ad2e..36ecd286a 100644 --- a/OpenCL/m25900-pure.cl +++ b/OpenCL/m25900-pure.cl @@ -1,10 +1,8 @@ /** - * Author......: See docs/credits.txt and Robert Guetzkow + * Author......: See docs/credits.txt * License.....: MIT */ -// The code is mostly reused from m10900-pure.cl and m19800-pure.cl - #define NEW_SIMD_CODE #ifdef KERNEL_STATIC @@ -49,7 +47,7 @@ DECLSPEC void hmac_sha256_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *i digest[6] = ipad[6]; digest[7] = ipad[7]; - sha256_transform_vector(w0, w1, w2, w3, digest); + sha256_transform_vector (w0, w1, w2, w3, digest); w0[0] = digest[0]; w0[1] = digest[1]; @@ -77,7 +75,7 @@ DECLSPEC void hmac_sha256_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *i digest[6] = opad[6]; digest[7] = opad[7]; - sha256_transform_vector(w0, w1, w2, w3, digest); + sha256_transform_vector (w0, w1, w2, w3, digest); } DECLSPEC void aes128_encrypt_cbc (const u32 *aes_ks, u32 *aes_iv, const u32 *in, u32 *out, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4) @@ -195,46 +193,46 @@ KERNEL_FQ void m25900_loop(KERN_ATTR_TMPS(pbkdf2_sha256_tmp_t)) u32x ipad[8]; u32x opad[8]; - ipad[0] = packv(tmps, ipad, gid, 0); - ipad[1] = packv(tmps, ipad, gid, 1); - ipad[2] = packv(tmps, ipad, gid, 2); - ipad[3] = packv(tmps, ipad, gid, 3); - ipad[4] = packv(tmps, ipad, gid, 4); - ipad[5] = packv(tmps, ipad, gid, 5); - ipad[6] = packv(tmps, ipad, gid, 6); - ipad[7] = packv(tmps, ipad, gid, 7); + ipad[0] = packv (tmps, ipad, gid, 0); + ipad[1] = packv (tmps, ipad, gid, 1); + ipad[2] = packv (tmps, ipad, gid, 2); + ipad[3] = packv (tmps, ipad, gid, 3); + ipad[4] = packv (tmps, ipad, gid, 4); + ipad[5] = packv (tmps, ipad, gid, 5); + ipad[6] = packv (tmps, ipad, gid, 6); + ipad[7] = packv (tmps, ipad, gid, 7); - opad[0] = packv(tmps, opad, gid, 0); - opad[1] = packv(tmps, opad, gid, 1); - opad[2] = packv(tmps, opad, gid, 2); - opad[3] = packv(tmps, opad, gid, 3); - opad[4] = packv(tmps, opad, gid, 4); - opad[5] = packv(tmps, opad, gid, 5); - opad[6] = packv(tmps, opad, gid, 6); - opad[7] = packv(tmps, opad, gid, 7); + opad[0] = packv (tmps, opad, gid, 0); + opad[1] = packv (tmps, opad, gid, 1); + opad[2] = packv (tmps, opad, gid, 2); + opad[3] = packv (tmps, opad, gid, 3); + opad[4] = packv (tmps, opad, gid, 4); + opad[5] = packv (tmps, opad, gid, 5); + opad[6] = packv (tmps, opad, gid, 6); + opad[7] = packv (tmps, opad, gid, 7); for (u32 i = 0; i < 8; i += 8) { u32x dgst[8]; u32x out[8]; - dgst[0] = packv(tmps, dgst, gid, i + 0); - dgst[1] = packv(tmps, dgst, gid, i + 1); - dgst[2] = packv(tmps, dgst, gid, i + 2); - dgst[3] = packv(tmps, dgst, gid, i + 3); - dgst[4] = packv(tmps, dgst, gid, i + 4); - dgst[5] = packv(tmps, dgst, gid, i + 5); - dgst[6] = packv(tmps, dgst, gid, i + 6); - dgst[7] = packv(tmps, dgst, gid, i + 7); + dgst[0] = packv (tmps, dgst, gid, i + 0); + dgst[1] = packv (tmps, dgst, gid, i + 1); + dgst[2] = packv (tmps, dgst, gid, i + 2); + dgst[3] = packv (tmps, dgst, gid, i + 3); + dgst[4] = packv (tmps, dgst, gid, i + 4); + dgst[5] = packv (tmps, dgst, gid, i + 5); + dgst[6] = packv (tmps, dgst, gid, i + 6); + dgst[7] = packv (tmps, dgst, gid, i + 7); - out[0] = packv(tmps, out, gid, i + 0); - out[1] = packv(tmps, out, gid, i + 1); - out[2] = packv(tmps, out, gid, i + 2); - out[3] = packv(tmps, out, gid, i + 3); - out[4] = packv(tmps, out, gid, i + 4); - out[5] = packv(tmps, out, gid, i + 5); - out[6] = packv(tmps, out, gid, i + 6); - out[7] = packv(tmps, out, gid, i + 7); + out[0] = packv (tmps, out, gid, i + 0); + out[1] = packv (tmps, out, gid, i + 1); + out[2] = packv (tmps, out, gid, i + 2); + out[3] = packv (tmps, out, gid, i + 3); + out[4] = packv (tmps, out, gid, i + 4); + out[5] = packv (tmps, out, gid, i + 5); + out[6] = packv (tmps, out, gid, i + 6); + out[7] = packv (tmps, out, gid, i + 7); for (u32 j = 0; j < loop_cnt; j++) { @@ -260,7 +258,7 @@ KERNEL_FQ void m25900_loop(KERN_ATTR_TMPS(pbkdf2_sha256_tmp_t)) w3[2] = 0; w3[3] = (64 + 32) * 8; - hmac_sha256_run_V(w0, w1, w2, w3, ipad, opad, dgst); + hmac_sha256_run_V (w0, w1, w2, w3, ipad, opad, dgst); out[0] ^= dgst[0]; out[1] ^= dgst[1]; @@ -272,23 +270,23 @@ KERNEL_FQ void m25900_loop(KERN_ATTR_TMPS(pbkdf2_sha256_tmp_t)) out[7] ^= dgst[7]; } - unpackv(tmps, dgst, gid, i + 0, dgst[0]); - unpackv(tmps, dgst, gid, i + 1, dgst[1]); - unpackv(tmps, dgst, gid, i + 2, dgst[2]); - unpackv(tmps, dgst, gid, i + 3, dgst[3]); - unpackv(tmps, dgst, gid, i + 4, dgst[4]); - unpackv(tmps, dgst, gid, i + 5, dgst[5]); - unpackv(tmps, dgst, gid, i + 6, dgst[6]); - unpackv(tmps, dgst, gid, i + 7, dgst[7]); + unpackv (tmps, dgst, gid, i + 0, dgst[0]); + unpackv (tmps, dgst, gid, i + 1, dgst[1]); + unpackv (tmps, dgst, gid, i + 2, dgst[2]); + unpackv (tmps, dgst, gid, i + 3, dgst[3]); + unpackv (tmps, dgst, gid, i + 4, dgst[4]); + unpackv (tmps, dgst, gid, i + 5, dgst[5]); + unpackv (tmps, dgst, gid, i + 6, dgst[6]); + unpackv (tmps, dgst, gid, i + 7, dgst[7]); - unpackv(tmps, out, gid, i + 0, out[0]); - unpackv(tmps, out, gid, i + 1, out[1]); - unpackv(tmps, out, gid, i + 2, out[2]); - unpackv(tmps, out, gid, i + 3, out[3]); - unpackv(tmps, out, gid, i + 4, out[4]); - unpackv(tmps, out, gid, i + 5, out[5]); - unpackv(tmps, out, gid, i + 6, out[6]); - unpackv(tmps, out, gid, i + 7, out[7]); + unpackv (tmps, out, gid, i + 0, out[0]); + unpackv (tmps, out, gid, i + 1, out[1]); + unpackv (tmps, out, gid, i + 2, out[2]); + unpackv (tmps, out, gid, i + 3, out[3]); + unpackv (tmps, out, gid, i + 4, out[4]); + unpackv (tmps, out, gid, i + 5, out[5]); + unpackv (tmps, out, gid, i + 6, out[6]); + unpackv (tmps, out, gid, i + 7, out[7]); } } @@ -372,10 +370,34 @@ KERNEL_FQ void m25900_comp(KERN_ATTR_TMPS_ESALT(pbkdf2_sha256_tmp_t, blocks_t)) u32 yn[4]; + const u32 digest_pos = loop_pos; + const u32 digest_cur = DIGESTS_OFFSET + digest_pos; + + u32 b1[4]; + + b1[0] = esalt_bufs[digest_cur].b1[0]; + b1[1] = esalt_bufs[digest_cur].b1[1]; + b1[2] = esalt_bufs[digest_cur].b1[2]; + b1[3] = esalt_bufs[digest_cur].b1[3]; + + u32 b2[4]; + + b2[0] = esalt_bufs[digest_cur].b2[0]; + b2[1] = esalt_bufs[digest_cur].b2[1]; + b2[2] = esalt_bufs[digest_cur].b2[2]; + b2[3] = esalt_bufs[digest_cur].b2[3]; + + u32 b3[4]; + + b3[0] = esalt_bufs[digest_cur].b3[0]; + b3[1] = esalt_bufs[digest_cur].b3[1]; + b3[2] = esalt_bufs[digest_cur].b3[2]; + b3[3] = esalt_bufs[digest_cur].b3[3]; + aes128_encrypt_cbc (aes_ks, aes_cbc_iv, b0, yn, s_te0, s_te1, s_te2, s_te3, s_te4); - aes128_encrypt_cbc (aes_ks, aes_cbc_iv, esalt_bufs[DIGESTS_OFFSET].b1, yn, s_te0, s_te1, s_te2, s_te3, s_te4); - aes128_encrypt_cbc (aes_ks, aes_cbc_iv, esalt_bufs[DIGESTS_OFFSET].b2, yn, s_te0, s_te1, s_te2, s_te3, s_te4); - aes128_encrypt_cbc (aes_ks, aes_cbc_iv, esalt_bufs[DIGESTS_OFFSET].b3, yn, s_te0, s_te1, s_te2, s_te3, s_te4); + aes128_encrypt_cbc (aes_ks, aes_cbc_iv, b1, yn, s_te0, s_te1, s_te2, s_te3, s_te4); + aes128_encrypt_cbc (aes_ks, aes_cbc_iv, b2, yn, s_te0, s_te1, s_te2, s_te3, s_te4); + aes128_encrypt_cbc (aes_ks, aes_cbc_iv, b3, yn, s_te0, s_te1, s_te2, s_te3, s_te4); u32 nonce[4]; diff --git a/src/modules/module_25900.c b/src/modules/module_25900.c index 9150a614e..e02d323c9 100644 --- a/src/modules/module_25900.c +++ b/src/modules/module_25900.c @@ -20,7 +20,8 @@ static const u32 HASH_CATEGORY = HASH_CATEGORY_NETWORK_PROTOCOL; static const char *HASH_NAME = "KNX IP Secure - Device Authentication Code"; static const u64 KERN_TYPE = 25900; static const u32 OPTI_TYPE = OPTI_TYPE_SLOW_HASH_SIMD_LOOP; -static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE; +static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE + | OPTS_TYPE_DEEP_COMP_KERNEL; static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED; static const char *ST_PASS = "hashcat"; static const char *ST_HASH = "$knx-ip-secure-device-authentication-code$*3033*fa7c0d787a9467c209f0a6e7cf16069ed704f3959dce19e45d7935c0a91bce41*f927640d9bbe9a4b0b74dd3289ad41ec"; @@ -89,6 +90,11 @@ char* module_jit_build_options(MAYBE_UNUSED const hashconfig_t *hashconfig, MAYB return jit_build_options; } +u32 module_deep_comp_kernel(MAYBE_UNUSED const hashes_t *hashes, MAYBE_UNUSED const u32 salt_pos, MAYBE_UNUSED const u32 digest_pos) +{ + return KERN_RUN_3; +} + u64 module_esalt_size(MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { const u64 esalt_size = (const u64) sizeof (blocks_t); @@ -260,7 +266,7 @@ void module_init(module_ctx_t *module_ctx) module_ctx->module_benchmark_mask = MODULE_DEFAULT; module_ctx->module_benchmark_salt = MODULE_DEFAULT; module_ctx->module_build_plain_postprocess = MODULE_DEFAULT; - module_ctx->module_deep_comp_kernel = MODULE_DEFAULT; + module_ctx->module_deep_comp_kernel = module_deep_comp_kernel; module_ctx->module_dgst_pos0 = module_dgst_pos0; module_ctx->module_dgst_pos1 = module_dgst_pos1; module_ctx->module_dgst_pos2 = module_dgst_pos2;