From 8c6bb1094f8c6d5a5087e3969c7f98fdaca99a87 Mon Sep 17 00:00:00 2001 From: jsteube Date: Mon, 13 Aug 2018 12:09:21 +0200 Subject: [PATCH 1/3] Update results from CMIYC 2018 --- docs/team.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/team.txt b/docs/team.txt index 7597c7614..78841431c 100644 --- a/docs/team.txt +++ b/docs/team.txt @@ -13,6 +13,7 @@ We're a group of people participating in the yearly repeating password cracking | Crack Me If You Can | DEF CON, Las Vegas | 2014 | 1st | | Crack Me If You Can | DEF CON, Las Vegas | 2015 | 1st | | Crack Me If You Can | DerbyCon, Louisville | 2017 | 1st | +| Crack Me If You Can | DEF CON, Las Vegas | 2018 | 2nd | | Competition | Conference | Year | Placed | |---------------------|----------------------------|------|--------| From 6469357c74d141e1eda1d760c518267c55de065a Mon Sep 17 00:00:00 2001 From: jsteube Date: Mon, 13 Aug 2018 12:10:03 +0200 Subject: [PATCH 2/3] Remove SCR_TYPE macro from OpenCL code Disable REAL_SHM access to AMD platform devices --- OpenCL/inc_hash_whirlpool.cl | 30 +++++++++--------- OpenCL/inc_vendor.cl | 29 +++++++++-------- OpenCL/m06100_a0-optimized.cl | 20 +----------- OpenCL/m06100_a0-pure.cl | 18 ----------- OpenCL/m06100_a1-optimized.cl | 20 +----------- OpenCL/m06100_a1-pure.cl | 18 ----------- OpenCL/m06100_a3-optimized.cl | 60 ++--------------------------------- OpenCL/m06100_a3-pure.cl | 18 ----------- OpenCL/m06231-pure.cl | 42 +++++++----------------- OpenCL/m06232-pure.cl | 42 +++++++----------------- OpenCL/m06233-pure.cl | 42 +++++++----------------- OpenCL/m07500_a0-optimized.cl | 30 +++--------------- OpenCL/m07500_a0-pure.cl | 30 +++--------------- OpenCL/m07500_a1-optimized.cl | 30 +++--------------- OpenCL/m07500_a1-pure.cl | 30 +++--------------- OpenCL/m07500_a3-optimized.cl | 52 ++++-------------------------- OpenCL/m07500_a3-pure.cl | 30 +++--------------- OpenCL/m13100_a0-optimized.cl | 30 +++--------------- OpenCL/m13100_a0-pure.cl | 30 +++--------------- OpenCL/m13100_a1-optimized.cl | 30 +++--------------- OpenCL/m13100_a1-pure.cl | 30 +++--------------- OpenCL/m13100_a3-optimized.cl | 52 ++++-------------------------- OpenCL/m13100_a3-pure.cl | 30 +++--------------- 23 files changed, 133 insertions(+), 610 deletions(-) diff --git a/OpenCL/inc_hash_whirlpool.cl b/OpenCL/inc_hash_whirlpool.cl index bff268064..9c1d7c294 100644 --- a/OpenCL/inc_hash_whirlpool.cl +++ b/OpenCL/inc_hash_whirlpool.cl @@ -1125,12 +1125,12 @@ typedef struct whirlpool_ctx int len; - SHM_TYPE u32 (*s_Ch)[256]; - SHM_TYPE u32 (*s_Cl)[256]; + __local u32 (*s_Ch)[256]; + __local u32 (*s_Cl)[256]; } whirlpool_ctx_t; -DECLSPEC void whirlpool_transform (const u32 *w0, const u32 *w1, const u32 *w2, const u32 *w3, u32 *digest, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256]) +DECLSPEC void whirlpool_transform (const u32 *w0, const u32 *w1, const u32 *w2, const u32 *w3, u32 *digest, __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256]) { u32 Kh[8]; u32 Kl[8]; @@ -1300,7 +1300,7 @@ DECLSPEC void whirlpool_transform (const u32 *w0, const u32 *w1, const u32 *w2, digest[15] ^= statel[7] ^ w3[3]; } -DECLSPEC void whirlpool_init (whirlpool_ctx_t *ctx, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256]) +DECLSPEC void whirlpool_init (whirlpool_ctx_t *ctx, __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256]) { ctx->h[ 0] = 0; ctx->h[ 1] = 0; @@ -1971,7 +1971,7 @@ typedef struct whirlpool_hmac_ctx } whirlpool_hmac_ctx_t; -DECLSPEC void whirlpool_hmac_init_64 (whirlpool_hmac_ctx_t *ctx, const u32 *w0, const u32 *w1, const u32 *w2, const u32 *w3, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256]) +DECLSPEC void whirlpool_hmac_init_64 (whirlpool_hmac_ctx_t *ctx, const u32 *w0, const u32 *w1, const u32 *w2, const u32 *w3, __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256]) { u32 t0[4]; u32 t1[4]; @@ -2025,7 +2025,7 @@ DECLSPEC void whirlpool_hmac_init_64 (whirlpool_hmac_ctx_t *ctx, const u32 *w0, whirlpool_update_64 (&ctx->opad, t0, t1, t2, t3, 64); } -DECLSPEC void whirlpool_hmac_init (whirlpool_hmac_ctx_t *ctx, const u32 *w, const int len, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256]) +DECLSPEC void whirlpool_hmac_init (whirlpool_hmac_ctx_t *ctx, const u32 *w, const int len, __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256]) { u32 w0[4]; u32 w1[4]; @@ -2082,7 +2082,7 @@ DECLSPEC void whirlpool_hmac_init (whirlpool_hmac_ctx_t *ctx, const u32 *w, cons whirlpool_hmac_init_64 (ctx, w0, w1, w2, w3, s_Ch, s_Cl); } -DECLSPEC void whirlpool_hmac_init_swap (whirlpool_hmac_ctx_t *ctx, const u32 *w, const int len, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256]) +DECLSPEC void whirlpool_hmac_init_swap (whirlpool_hmac_ctx_t *ctx, const u32 *w, const int len, __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256]) { u32 w0[4]; u32 w1[4]; @@ -2139,7 +2139,7 @@ DECLSPEC void whirlpool_hmac_init_swap (whirlpool_hmac_ctx_t *ctx, const u32 *w, whirlpool_hmac_init_64 (ctx, w0, w1, w2, w3, s_Ch, s_Cl); } -DECLSPEC void whirlpool_hmac_init_global (whirlpool_hmac_ctx_t *ctx, __global const u32 *w, const int len, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256]) +DECLSPEC void whirlpool_hmac_init_global (whirlpool_hmac_ctx_t *ctx, __global const u32 *w, const int len, __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256]) { u32 w0[4]; u32 w1[4]; @@ -2196,7 +2196,7 @@ DECLSPEC void whirlpool_hmac_init_global (whirlpool_hmac_ctx_t *ctx, __global co whirlpool_hmac_init_64 (ctx, w0, w1, w2, w3, s_Ch, s_Cl); } -DECLSPEC void whirlpool_hmac_init_global_swap (whirlpool_hmac_ctx_t *ctx, __global const u32 *w, const int len, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256]) +DECLSPEC void whirlpool_hmac_init_global_swap (whirlpool_hmac_ctx_t *ctx, __global const u32 *w, const int len, __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256]) { u32 w0[4]; u32 w1[4]; @@ -2342,12 +2342,12 @@ typedef struct whirlpool_ctx_vector int len; - SHM_TYPE u32 (*s_Ch)[256]; - SHM_TYPE u32 (*s_Cl)[256]; + __local u32 (*s_Ch)[256]; + __local u32 (*s_Cl)[256]; } whirlpool_ctx_vector_t; -DECLSPEC void whirlpool_transform_vector (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u32x *digest, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256]) +DECLSPEC void whirlpool_transform_vector (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u32x *digest, __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256]) { u32x Kh[8]; u32x Kl[8]; @@ -2517,7 +2517,7 @@ DECLSPEC void whirlpool_transform_vector (const u32x *w0, const u32x *w1, const digest[15] ^= statel[7] ^ w3[3]; } -DECLSPEC void whirlpool_init_vector (whirlpool_ctx_vector_t *ctx, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256]) +DECLSPEC void whirlpool_init_vector (whirlpool_ctx_vector_t *ctx, __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256]) { ctx->h[ 0] = 0; ctx->h[ 1] = 0; @@ -2974,7 +2974,7 @@ typedef struct whirlpool_hmac_ctx_vector } whirlpool_hmac_ctx_vector_t; -DECLSPEC void whirlpool_hmac_init_vector_64 (whirlpool_hmac_ctx_vector_t *ctx, const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256]) +DECLSPEC void whirlpool_hmac_init_vector_64 (whirlpool_hmac_ctx_vector_t *ctx, const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256]) { u32x t0[4]; u32x t1[4]; @@ -3028,7 +3028,7 @@ DECLSPEC void whirlpool_hmac_init_vector_64 (whirlpool_hmac_ctx_vector_t *ctx, c whirlpool_update_vector_64 (&ctx->opad, t0, t1, t2, t3, 64); } -DECLSPEC void whirlpool_hmac_init_vector (whirlpool_hmac_ctx_vector_t *ctx, const u32x *w, const int len, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256]) +DECLSPEC void whirlpool_hmac_init_vector (whirlpool_hmac_ctx_vector_t *ctx, const u32x *w, const int len, __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256]) { u32x w0[4]; u32x w1[4]; diff --git a/OpenCL/inc_vendor.cl b/OpenCL/inc_vendor.cl index da7166d93..2095cd60c 100644 --- a/OpenCL/inc_vendor.cl +++ b/OpenCL/inc_vendor.cl @@ -19,20 +19,6 @@ #define IS_ACCEL #endif -#if DEVICE_TYPE == DEVICE_TYPE_CPU -#elif DEVICE_TYPE == DEVICE_TYPE_GPU -#define REAL_SHM -#elif DEVICE_TYPE == DEVICE_TYPE_ACCEL -#endif - -#ifdef REAL_SHM -#define SHM_TYPE __local -#define SCR_TYPE __local -#else -#define SHM_TYPE __constant -#define SCR_TYPE -#endif - /** * vendor specific */ @@ -80,6 +66,21 @@ #define IS_GENERIC #endif +#if DEVICE_TYPE == DEVICE_TYPE_CPU +#elif DEVICE_TYPE == DEVICE_TYPE_GPU +// AMD fails with mode 6211 +#ifdef IS_NV +#define REAL_SHM +#endif +#elif DEVICE_TYPE == DEVICE_TYPE_ACCEL +#endif + +#ifdef REAL_SHM +#define SHM_TYPE __local +#else +#define SHM_TYPE __constant +#endif + /** * function declarations can have a large influence depending on the opencl runtime */ diff --git a/OpenCL/m06100_a0-optimized.cl b/OpenCL/m06100_a0-optimized.cl index be3fe2ee0..5a8e06463 100644 --- a/OpenCL/m06100_a0-optimized.cl +++ b/OpenCL/m06100_a0-optimized.cl @@ -15,7 +15,7 @@ #include "inc_simd.cl" #include "inc_hash_whirlpool.cl" -DECLSPEC void whirlpool_transform_transport_vector (const u32x *w, u32x *digest, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256]) +DECLSPEC void whirlpool_transform_transport_vector (const u32x *w, u32x *digest, __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256]) { whirlpool_transform_vector (w + 0, w + 4, w + 8, w + 12, digest, s_Ch, s_Cl); } @@ -34,8 +34,6 @@ __kernel void m06100_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru * shared */ - #ifdef REAL_SHM - __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; @@ -62,13 +60,6 @@ __kernel void m06100_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru barrier (CLK_LOCAL_MEM_FENCE); - #else - - __constant u32 (*s_Ch)[256] = Ch; - __constant u32 (*s_Cl)[256] = Cl; - - #endif - if (gid >= gid_max) return; /** @@ -174,8 +165,6 @@ __kernel void m06100_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru * shared */ - #ifdef REAL_SHM - __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; @@ -202,13 +191,6 @@ __kernel void m06100_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru barrier (CLK_LOCAL_MEM_FENCE); - #else - - __constant u32 (*s_Ch)[256] = Ch; - __constant u32 (*s_Cl)[256] = Cl; - - #endif - if (gid >= gid_max) return; /** diff --git a/OpenCL/m06100_a0-pure.cl b/OpenCL/m06100_a0-pure.cl index 3bab742bc..779ff92ec 100644 --- a/OpenCL/m06100_a0-pure.cl +++ b/OpenCL/m06100_a0-pure.cl @@ -29,8 +29,6 @@ __kernel void m06100_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru * shared */ - #ifdef REAL_SHM - __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; @@ -57,13 +55,6 @@ __kernel void m06100_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru barrier (CLK_LOCAL_MEM_FENCE); - #else - - __constant u32 (*s_Ch)[256] = Ch; - __constant u32 (*s_Cl)[256] = Cl; - - #endif - if (gid >= gid_max) return; /** @@ -113,8 +104,6 @@ __kernel void m06100_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru * shared */ - #ifdef REAL_SHM - __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; @@ -141,13 +130,6 @@ __kernel void m06100_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru barrier (CLK_LOCAL_MEM_FENCE); - #else - - __constant u32 (*s_Ch)[256] = Ch; - __constant u32 (*s_Cl)[256] = Cl; - - #endif - if (gid >= gid_max) return; /** diff --git a/OpenCL/m06100_a1-optimized.cl b/OpenCL/m06100_a1-optimized.cl index 1325ac1b3..1bbc9c2dc 100644 --- a/OpenCL/m06100_a1-optimized.cl +++ b/OpenCL/m06100_a1-optimized.cl @@ -13,7 +13,7 @@ #include "inc_simd.cl" #include "inc_hash_whirlpool.cl" -DECLSPEC void whirlpool_transform_transport_vector (const u32x *w, u32x *digest, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256]) +DECLSPEC void whirlpool_transform_transport_vector (const u32x *w, u32x *digest, __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256]) { whirlpool_transform_vector (w + 0, w + 4, w + 8, w + 12, digest, s_Ch, s_Cl); } @@ -32,8 +32,6 @@ __kernel void m06100_m04 (__global pw_t *pws, __global const kernel_rule_t *rule * shared */ - #ifdef REAL_SHM - __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; @@ -60,13 +58,6 @@ __kernel void m06100_m04 (__global pw_t *pws, __global const kernel_rule_t *rule barrier (CLK_LOCAL_MEM_FENCE); - #else - - __constant u32 (*s_Ch)[256] = Ch; - __constant u32 (*s_Cl)[256] = Cl; - - #endif - if (gid >= gid_max) return; /** @@ -230,8 +221,6 @@ __kernel void m06100_s04 (__global pw_t *pws, __global const kernel_rule_t *rule * shared */ - #ifdef REAL_SHM - __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; @@ -258,13 +247,6 @@ __kernel void m06100_s04 (__global pw_t *pws, __global const kernel_rule_t *rule barrier (CLK_LOCAL_MEM_FENCE); - #else - - __constant u32 (*s_Ch)[256] = Ch; - __constant u32 (*s_Cl)[256] = Cl; - - #endif - if (gid >= gid_max) return; /** diff --git a/OpenCL/m06100_a1-pure.cl b/OpenCL/m06100_a1-pure.cl index b170ca66e..2c7dbbdc2 100644 --- a/OpenCL/m06100_a1-pure.cl +++ b/OpenCL/m06100_a1-pure.cl @@ -27,8 +27,6 @@ __kernel void m06100_mxx (__global pw_t *pws, __global const kernel_rule_t *rule * shared */ - #ifdef REAL_SHM - __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; @@ -55,13 +53,6 @@ __kernel void m06100_mxx (__global pw_t *pws, __global const kernel_rule_t *rule barrier (CLK_LOCAL_MEM_FENCE); - #else - - __constant u32 (*s_Ch)[256] = Ch; - __constant u32 (*s_Cl)[256] = Cl; - - #endif - if (gid >= gid_max) return; /** @@ -109,8 +100,6 @@ __kernel void m06100_sxx (__global pw_t *pws, __global const kernel_rule_t *rule * shared */ - #ifdef REAL_SHM - __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; @@ -137,13 +126,6 @@ __kernel void m06100_sxx (__global pw_t *pws, __global const kernel_rule_t *rule barrier (CLK_LOCAL_MEM_FENCE); - #else - - __constant u32 (*s_Ch)[256] = Ch; - __constant u32 (*s_Cl)[256] = Cl; - - #endif - if (gid >= gid_max) return; /** diff --git a/OpenCL/m06100_a3-optimized.cl b/OpenCL/m06100_a3-optimized.cl index 11fa4c7fe..5883edea7 100644 --- a/OpenCL/m06100_a3-optimized.cl +++ b/OpenCL/m06100_a3-optimized.cl @@ -13,12 +13,12 @@ #include "inc_simd.cl" #include "inc_hash_whirlpool.cl" -DECLSPEC void whirlpool_transform_transport_vector (const u32x *w, u32x *digest, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256]) +DECLSPEC void whirlpool_transform_transport_vector (const u32x *w, u32x *digest, __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256]) { whirlpool_transform_vector (w + 0, w + 4, w + 8, w + 12, digest, s_Ch, s_Cl); } -DECLSPEC void m06100m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, SHM_TYPE u32 (*s_Cl)[256], SHM_TYPE u32 (*s_Ch)[256]) +DECLSPEC void m06100m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 (*s_Cl)[256], __local u32 (*s_Ch)[256]) { /** * modifier @@ -87,7 +87,7 @@ DECLSPEC void m06100m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __g } } -DECLSPEC void m06100s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, SHM_TYPE u32 (*s_Cl)[256], SHM_TYPE u32 (*s_Ch)[256]) +DECLSPEC void m06100s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 (*s_Cl)[256], __local u32 (*s_Ch)[256]) { /** * modifier @@ -182,8 +182,6 @@ __kernel void m06100_m04 (__global pw_t *pws, __global const kernel_rule_t *rule * shared */ - #ifdef REAL_SHM - __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; @@ -210,13 +208,6 @@ __kernel void m06100_m04 (__global pw_t *pws, __global const kernel_rule_t *rule barrier (CLK_LOCAL_MEM_FENCE); - #else - - __constant u32 (*s_Ch)[256] = Ch; - __constant u32 (*s_Cl)[256] = Cl; - - #endif - if (gid >= gid_max) return; /** @@ -274,8 +265,6 @@ __kernel void m06100_m08 (__global pw_t *pws, __global const kernel_rule_t *rule * shared */ - #ifdef REAL_SHM - __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; @@ -302,13 +291,6 @@ __kernel void m06100_m08 (__global pw_t *pws, __global const kernel_rule_t *rule barrier (CLK_LOCAL_MEM_FENCE); - #else - - __constant u32 (*s_Ch)[256] = Ch; - __constant u32 (*s_Cl)[256] = Cl; - - #endif - if (gid >= gid_max) return; /** @@ -366,8 +348,6 @@ __kernel void m06100_m16 (__global pw_t *pws, __global const kernel_rule_t *rule * shared */ - #ifdef REAL_SHM - __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; @@ -394,13 +374,6 @@ __kernel void m06100_m16 (__global pw_t *pws, __global const kernel_rule_t *rule barrier (CLK_LOCAL_MEM_FENCE); - #else - - __constant u32 (*s_Ch)[256] = Ch; - __constant u32 (*s_Cl)[256] = Cl; - - #endif - if (gid >= gid_max) return; /** @@ -458,8 +431,6 @@ __kernel void m06100_s04 (__global pw_t *pws, __global const kernel_rule_t *rule * shared */ - #ifdef REAL_SHM - __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; @@ -486,13 +457,6 @@ __kernel void m06100_s04 (__global pw_t *pws, __global const kernel_rule_t *rule barrier (CLK_LOCAL_MEM_FENCE); - #else - - __constant u32 (*s_Ch)[256] = Ch; - __constant u32 (*s_Cl)[256] = Cl; - - #endif - if (gid >= gid_max) return; /** @@ -550,8 +514,6 @@ __kernel void m06100_s08 (__global pw_t *pws, __global const kernel_rule_t *rule * shared */ - #ifdef REAL_SHM - __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; @@ -578,13 +540,6 @@ __kernel void m06100_s08 (__global pw_t *pws, __global const kernel_rule_t *rule barrier (CLK_LOCAL_MEM_FENCE); - #else - - __constant u32 (*s_Ch)[256] = Ch; - __constant u32 (*s_Cl)[256] = Cl; - - #endif - if (gid >= gid_max) return; /** @@ -642,8 +597,6 @@ __kernel void m06100_s16 (__global pw_t *pws, __global const kernel_rule_t *rule * shared */ - #ifdef REAL_SHM - __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; @@ -670,13 +623,6 @@ __kernel void m06100_s16 (__global pw_t *pws, __global const kernel_rule_t *rule barrier (CLK_LOCAL_MEM_FENCE); - #else - - __constant u32 (*s_Ch)[256] = Ch; - __constant u32 (*s_Cl)[256] = Cl; - - #endif - if (gid >= gid_max) return; /** diff --git a/OpenCL/m06100_a3-pure.cl b/OpenCL/m06100_a3-pure.cl index 564d39d5e..de353deec 100644 --- a/OpenCL/m06100_a3-pure.cl +++ b/OpenCL/m06100_a3-pure.cl @@ -27,8 +27,6 @@ __kernel void m06100_mxx (__global pw_t *pws, __global const kernel_rule_t *rule * shared */ - #ifdef REAL_SHM - __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; @@ -55,13 +53,6 @@ __kernel void m06100_mxx (__global pw_t *pws, __global const kernel_rule_t *rule barrier (CLK_LOCAL_MEM_FENCE); - #else - - __constant u32 (*s_Ch)[256] = Ch; - __constant u32 (*s_Cl)[256] = Cl; - - #endif - if (gid >= gid_max) return; /** @@ -122,8 +113,6 @@ __kernel void m06100_sxx (__global pw_t *pws, __global const kernel_rule_t *rule * shared */ - #ifdef REAL_SHM - __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; @@ -150,13 +139,6 @@ __kernel void m06100_sxx (__global pw_t *pws, __global const kernel_rule_t *rule barrier (CLK_LOCAL_MEM_FENCE); - #else - - __constant u32 (*s_Ch)[256] = Ch; - __constant u32 (*s_Cl)[256] = Cl; - - #endif - if (gid >= gid_max) return; /** diff --git a/OpenCL/m06231-pure.cl b/OpenCL/m06231-pure.cl index 06a82204f..07804cda7 100644 --- a/OpenCL/m06231-pure.cl +++ b/OpenCL/m06231-pure.cl @@ -45,7 +45,7 @@ DECLSPEC u32 u8add (const u32 a, const u32 b) return r; } -DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256]) +DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest, __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256]) { digest[ 0] = ipad[ 0]; digest[ 1] = ipad[ 1]; @@ -155,8 +155,6 @@ __kernel void m06231_init (__global pw_t *pws, __global const kernel_rule_t *rul * shared */ - #ifdef REAL_SHM - __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; @@ -183,13 +181,6 @@ __kernel void m06231_init (__global pw_t *pws, __global const kernel_rule_t *rul barrier (CLK_LOCAL_MEM_FENCE); - #else - - __constant u32 (*s_Ch)[256] = Ch; - __constant u32 (*s_Cl)[256] = Cl; - - #endif - if (gid >= gid_max) return; u32 w0[4]; @@ -363,8 +354,6 @@ __kernel void m06231_loop (__global pw_t *pws, __global const kernel_rule_t *rul * shared */ - #ifdef REAL_SHM - __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; @@ -391,13 +380,6 @@ __kernel void m06231_loop (__global pw_t *pws, __global const kernel_rule_t *rul barrier (CLK_LOCAL_MEM_FENCE); - #else - - __constant u32 (*s_Ch)[256] = Ch; - __constant u32 (*s_Cl)[256] = Cl; - - #endif - if ((gid * VECT_SIZE) >= gid_max) return; u32x ipad[16]; @@ -568,17 +550,17 @@ __kernel void m06231_comp (__global pw_t *pws, __global const kernel_rule_t *rul #ifdef REAL_SHM - SHM_TYPE u32 s_td0[256]; - SHM_TYPE u32 s_td1[256]; - SHM_TYPE u32 s_td2[256]; - SHM_TYPE u32 s_td3[256]; - SHM_TYPE u32 s_td4[256]; - - SHM_TYPE u32 s_te0[256]; - SHM_TYPE u32 s_te1[256]; - SHM_TYPE u32 s_te2[256]; - SHM_TYPE u32 s_te3[256]; - SHM_TYPE u32 s_te4[256]; + __local u32 s_td0[256]; + __local u32 s_td1[256]; + __local u32 s_td2[256]; + __local u32 s_td3[256]; + __local u32 s_td4[256]; + + __local u32 s_te0[256]; + __local u32 s_te1[256]; + __local u32 s_te2[256]; + __local u32 s_te3[256]; + __local u32 s_te4[256]; for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { diff --git a/OpenCL/m06232-pure.cl b/OpenCL/m06232-pure.cl index 8af85c314..db1e591f2 100644 --- a/OpenCL/m06232-pure.cl +++ b/OpenCL/m06232-pure.cl @@ -45,7 +45,7 @@ DECLSPEC u32 u8add (const u32 a, const u32 b) return r; } -DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256]) +DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest, __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256]) { digest[ 0] = ipad[ 0]; digest[ 1] = ipad[ 1]; @@ -155,8 +155,6 @@ __kernel void m06232_init (__global pw_t *pws, __global const kernel_rule_t *rul * shared */ - #ifdef REAL_SHM - __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; @@ -183,13 +181,6 @@ __kernel void m06232_init (__global pw_t *pws, __global const kernel_rule_t *rul barrier (CLK_LOCAL_MEM_FENCE); - #else - - __constant u32 (*s_Ch)[256] = Ch; - __constant u32 (*s_Cl)[256] = Cl; - - #endif - if (gid >= gid_max) return; u32 w0[4]; @@ -363,8 +354,6 @@ __kernel void m06232_loop (__global pw_t *pws, __global const kernel_rule_t *rul * shared */ - #ifdef REAL_SHM - __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; @@ -391,13 +380,6 @@ __kernel void m06232_loop (__global pw_t *pws, __global const kernel_rule_t *rul barrier (CLK_LOCAL_MEM_FENCE); - #else - - __constant u32 (*s_Ch)[256] = Ch; - __constant u32 (*s_Cl)[256] = Cl; - - #endif - if ((gid * VECT_SIZE) >= gid_max) return; u32x ipad[16]; @@ -568,17 +550,17 @@ __kernel void m06232_comp (__global pw_t *pws, __global const kernel_rule_t *rul #ifdef REAL_SHM - SHM_TYPE u32 s_td0[256]; - SHM_TYPE u32 s_td1[256]; - SHM_TYPE u32 s_td2[256]; - SHM_TYPE u32 s_td3[256]; - SHM_TYPE u32 s_td4[256]; - - SHM_TYPE u32 s_te0[256]; - SHM_TYPE u32 s_te1[256]; - SHM_TYPE u32 s_te2[256]; - SHM_TYPE u32 s_te3[256]; - SHM_TYPE u32 s_te4[256]; + __local u32 s_td0[256]; + __local u32 s_td1[256]; + __local u32 s_td2[256]; + __local u32 s_td3[256]; + __local u32 s_td4[256]; + + __local u32 s_te0[256]; + __local u32 s_te1[256]; + __local u32 s_te2[256]; + __local u32 s_te3[256]; + __local u32 s_te4[256]; for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { diff --git a/OpenCL/m06233-pure.cl b/OpenCL/m06233-pure.cl index 37412dca2..957cbf227 100644 --- a/OpenCL/m06233-pure.cl +++ b/OpenCL/m06233-pure.cl @@ -45,7 +45,7 @@ DECLSPEC u32 u8add (const u32 a, const u32 b) return r; } -DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256]) +DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest, __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256]) { digest[ 0] = ipad[ 0]; digest[ 1] = ipad[ 1]; @@ -155,8 +155,6 @@ __kernel void m06233_init (__global pw_t *pws, __global const kernel_rule_t *rul * shared */ - #ifdef REAL_SHM - __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; @@ -183,13 +181,6 @@ __kernel void m06233_init (__global pw_t *pws, __global const kernel_rule_t *rul barrier (CLK_LOCAL_MEM_FENCE); - #else - - __constant u32 (*s_Ch)[256] = Ch; - __constant u32 (*s_Cl)[256] = Cl; - - #endif - if (gid >= gid_max) return; u32 w0[4]; @@ -363,8 +354,6 @@ __kernel void m06233_loop (__global pw_t *pws, __global const kernel_rule_t *rul * shared */ - #ifdef REAL_SHM - __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; @@ -391,13 +380,6 @@ __kernel void m06233_loop (__global pw_t *pws, __global const kernel_rule_t *rul barrier (CLK_LOCAL_MEM_FENCE); - #else - - __constant u32 (*s_Ch)[256] = Ch; - __constant u32 (*s_Cl)[256] = Cl; - - #endif - if ((gid * VECT_SIZE) >= gid_max) return; u32x ipad[16]; @@ -568,17 +550,17 @@ __kernel void m06233_comp (__global pw_t *pws, __global const kernel_rule_t *rul #ifdef REAL_SHM - SHM_TYPE u32 s_td0[256]; - SHM_TYPE u32 s_td1[256]; - SHM_TYPE u32 s_td2[256]; - SHM_TYPE u32 s_td3[256]; - SHM_TYPE u32 s_td4[256]; - - SHM_TYPE u32 s_te0[256]; - SHM_TYPE u32 s_te1[256]; - SHM_TYPE u32 s_te2[256]; - SHM_TYPE u32 s_te3[256]; - SHM_TYPE u32 s_te4[256]; + __local u32 s_td0[256]; + __local u32 s_td1[256]; + __local u32 s_td2[256]; + __local u32 s_td3[256]; + __local u32 s_td4[256]; + + __local u32 s_te0[256]; + __local u32 s_te1[256]; + __local u32 s_te2[256]; + __local u32 s_te3[256]; + __local u32 s_te4[256]; for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { diff --git a/OpenCL/m07500_a0-optimized.cl b/OpenCL/m07500_a0-optimized.cl index b4712d593..7de7d4ec8 100644 --- a/OpenCL/m07500_a0-optimized.cl +++ b/OpenCL/m07500_a0-optimized.cl @@ -25,7 +25,7 @@ typedef struct } RC4_KEY; -DECLSPEC void swap (SCR_TYPE RC4_KEY *rc4_key, const u8 i, const u8 j) +DECLSPEC void swap (__local RC4_KEY *rc4_key, const u8 i, const u8 j) { u8 tmp; @@ -34,12 +34,12 @@ DECLSPEC void swap (SCR_TYPE RC4_KEY *rc4_key, const u8 i, const u8 j) rc4_key->S[j] = tmp; } -DECLSPEC void rc4_init_16 (SCR_TYPE RC4_KEY *rc4_key, const u32 *data) +DECLSPEC void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 *data) { u32 v = 0x03020100; u32 a = 0x04040404; - SCR_TYPE u32 *ptr = (SCR_TYPE u32 *) rc4_key->S; + __local u32 *ptr = (__local u32 *) rc4_key->S; #ifdef _unroll #pragma unroll @@ -87,7 +87,7 @@ DECLSPEC void rc4_init_16 (SCR_TYPE RC4_KEY *rc4_key, const u32 *data) } } -DECLSPEC u8 rc4_next_16 (SCR_TYPE RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) +DECLSPEC u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) { #ifdef _unroll #pragma unroll @@ -140,7 +140,7 @@ DECLSPEC u8 rc4_next_16 (SCR_TYPE RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u return j; } -DECLSPEC int decrypt_and_check (SCR_TYPE RC4_KEY *rc4_key, u32 *data, u32 *timestamp_ct) +DECLSPEC int decrypt_and_check (__local RC4_KEY *rc4_key, u32 *data, u32 *timestamp_ct) { rc4_init_16 (rc4_key, data); @@ -440,20 +440,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m07500_m04 (__glob * shared */ - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - /** * loop */ @@ -556,20 +546,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m07500_s04 (__glob * shared */ - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - /** * loop */ diff --git a/OpenCL/m07500_a0-pure.cl b/OpenCL/m07500_a0-pure.cl index 151b797f0..73dd7b341 100644 --- a/OpenCL/m07500_a0-pure.cl +++ b/OpenCL/m07500_a0-pure.cl @@ -24,7 +24,7 @@ typedef struct } RC4_KEY; -DECLSPEC void swap (SCR_TYPE RC4_KEY *rc4_key, const u8 i, const u8 j) +DECLSPEC void swap (__local RC4_KEY *rc4_key, const u8 i, const u8 j) { u8 tmp; @@ -33,12 +33,12 @@ DECLSPEC void swap (SCR_TYPE RC4_KEY *rc4_key, const u8 i, const u8 j) rc4_key->S[j] = tmp; } -DECLSPEC void rc4_init_16 (SCR_TYPE RC4_KEY *rc4_key, const u32 *data) +DECLSPEC void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 *data) { u32 v = 0x03020100; u32 a = 0x04040404; - SCR_TYPE u32 *ptr = (SCR_TYPE u32 *) rc4_key->S; + __local u32 *ptr = (__local u32 *) rc4_key->S; #ifdef _unroll #pragma unroll @@ -86,7 +86,7 @@ DECLSPEC void rc4_init_16 (SCR_TYPE RC4_KEY *rc4_key, const u32 *data) } } -DECLSPEC u8 rc4_next_16 (SCR_TYPE RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) +DECLSPEC u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) { #ifdef _unroll #pragma unroll @@ -139,7 +139,7 @@ DECLSPEC u8 rc4_next_16 (SCR_TYPE RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u return j; } -DECLSPEC int decrypt_and_check (SCR_TYPE RC4_KEY *rc4_key, u32 *data, u32 *timestamp_ct) +DECLSPEC int decrypt_and_check (__local RC4_KEY *rc4_key, u32 *data, u32 *timestamp_ct) { rc4_init_16 (rc4_key, data); @@ -285,20 +285,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m07500_mxx (__glob COPY_PW (pws[gid]); - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - u32 checksum[4]; checksum[0] = krb5pa_bufs[digests_offset].checksum[0]; @@ -366,20 +356,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m07500_sxx (__glob COPY_PW (pws[gid]); - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - u32 checksum[4]; checksum[0] = krb5pa_bufs[digests_offset].checksum[0]; diff --git a/OpenCL/m07500_a1-optimized.cl b/OpenCL/m07500_a1-optimized.cl index d4d52bf59..7afd8906a 100644 --- a/OpenCL/m07500_a1-optimized.cl +++ b/OpenCL/m07500_a1-optimized.cl @@ -23,7 +23,7 @@ typedef struct } RC4_KEY; -DECLSPEC void swap (SCR_TYPE RC4_KEY *rc4_key, const u8 i, const u8 j) +DECLSPEC void swap (__local RC4_KEY *rc4_key, const u8 i, const u8 j) { u8 tmp; @@ -32,12 +32,12 @@ DECLSPEC void swap (SCR_TYPE RC4_KEY *rc4_key, const u8 i, const u8 j) rc4_key->S[j] = tmp; } -DECLSPEC void rc4_init_16 (SCR_TYPE RC4_KEY *rc4_key, const u32 *data) +DECLSPEC void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 *data) { u32 v = 0x03020100; u32 a = 0x04040404; - SCR_TYPE u32 *ptr = (SCR_TYPE u32 *) rc4_key->S; + __local u32 *ptr = (__local u32 *) rc4_key->S; #ifdef _unroll #pragma unroll @@ -85,7 +85,7 @@ DECLSPEC void rc4_init_16 (SCR_TYPE RC4_KEY *rc4_key, const u32 *data) } } -DECLSPEC u8 rc4_next_16 (SCR_TYPE RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) +DECLSPEC u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) { #ifdef _unroll #pragma unroll @@ -138,7 +138,7 @@ DECLSPEC u8 rc4_next_16 (SCR_TYPE RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u return j; } -DECLSPEC int decrypt_and_check (SCR_TYPE RC4_KEY *rc4_key, u32 *data, u32 *timestamp_ct) +DECLSPEC int decrypt_and_check (__local RC4_KEY *rc4_key, u32 *data, u32 *timestamp_ct) { rc4_init_16 (rc4_key, data); @@ -438,20 +438,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m07500_m04 (__glob * shared */ - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - /** * loop */ @@ -604,20 +594,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m07500_s04 (__glob * shared */ - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - /** * loop */ diff --git a/OpenCL/m07500_a1-pure.cl b/OpenCL/m07500_a1-pure.cl index ed3cfdcc2..f5438ddd0 100644 --- a/OpenCL/m07500_a1-pure.cl +++ b/OpenCL/m07500_a1-pure.cl @@ -22,7 +22,7 @@ typedef struct } RC4_KEY; -DECLSPEC void swap (SCR_TYPE RC4_KEY *rc4_key, const u8 i, const u8 j) +DECLSPEC void swap (__local RC4_KEY *rc4_key, const u8 i, const u8 j) { u8 tmp; @@ -31,12 +31,12 @@ DECLSPEC void swap (SCR_TYPE RC4_KEY *rc4_key, const u8 i, const u8 j) rc4_key->S[j] = tmp; } -DECLSPEC void rc4_init_16 (SCR_TYPE RC4_KEY *rc4_key, const u32 *data) +DECLSPEC void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 *data) { u32 v = 0x03020100; u32 a = 0x04040404; - SCR_TYPE u32 *ptr = (SCR_TYPE u32 *) rc4_key->S; + __local u32 *ptr = (__local u32 *) rc4_key->S; #ifdef _unroll #pragma unroll @@ -84,7 +84,7 @@ DECLSPEC void rc4_init_16 (SCR_TYPE RC4_KEY *rc4_key, const u32 *data) } } -DECLSPEC u8 rc4_next_16 (SCR_TYPE RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) +DECLSPEC u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) { #ifdef _unroll #pragma unroll @@ -137,7 +137,7 @@ DECLSPEC u8 rc4_next_16 (SCR_TYPE RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u return j; } -DECLSPEC int decrypt_and_check (SCR_TYPE RC4_KEY *rc4_key, u32 *data, u32 *timestamp_ct) +DECLSPEC int decrypt_and_check (__local RC4_KEY *rc4_key, u32 *data, u32 *timestamp_ct) { rc4_init_16 (rc4_key, data); @@ -281,20 +281,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m07500_mxx (__glob * base */ - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - u32 checksum[4]; checksum[0] = krb5pa_bufs[digests_offset].checksum[0]; @@ -360,20 +350,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m07500_sxx (__glob * base */ - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - u32 checksum[4]; checksum[0] = krb5pa_bufs[digests_offset].checksum[0]; diff --git a/OpenCL/m07500_a3-optimized.cl b/OpenCL/m07500_a3-optimized.cl index a1a2dbe63..5c753c75d 100644 --- a/OpenCL/m07500_a3-optimized.cl +++ b/OpenCL/m07500_a3-optimized.cl @@ -23,7 +23,7 @@ typedef struct } RC4_KEY; -DECLSPEC void swap (SCR_TYPE RC4_KEY *rc4_key, const u8 i, const u8 j) +DECLSPEC void swap (__local RC4_KEY *rc4_key, const u8 i, const u8 j) { u8 tmp; @@ -32,12 +32,12 @@ DECLSPEC void swap (SCR_TYPE RC4_KEY *rc4_key, const u8 i, const u8 j) rc4_key->S[j] = tmp; } -DECLSPEC void rc4_init_16 (SCR_TYPE RC4_KEY *rc4_key, const u32 *data) +DECLSPEC void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 *data) { u32 v = 0x03020100; u32 a = 0x04040404; - SCR_TYPE u32 *ptr = (SCR_TYPE u32 *) rc4_key->S; + __local u32 *ptr = (__local u32 *) rc4_key->S; #ifdef _unroll #pragma unroll @@ -85,7 +85,7 @@ DECLSPEC void rc4_init_16 (SCR_TYPE RC4_KEY *rc4_key, const u32 *data) } } -DECLSPEC u8 rc4_next_16 (SCR_TYPE RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) +DECLSPEC u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) { #ifdef _unroll #pragma unroll @@ -138,7 +138,7 @@ DECLSPEC u8 rc4_next_16 (SCR_TYPE RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u return j; } -DECLSPEC int decrypt_and_check (SCR_TYPE RC4_KEY *rc4_key, u32 *data, u32 *timestamp_ct) +DECLSPEC int decrypt_and_check (__local RC4_KEY *rc4_key, u32 *data, u32 *timestamp_ct) { rc4_init_16 (rc4_key, data); @@ -382,7 +382,7 @@ DECLSPEC void kerb_prepare (const u32 *w0, const u32 *w1, const u32 pw_len, cons hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); } -DECLSPEC void m07500 (SCR_TYPE RC4_KEY *rc4_key, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global krb5pa_t *krb5pa_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset) +DECLSPEC void m07500 (__local RC4_KEY *rc4_key, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global krb5pa_t *krb5pa_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -517,20 +517,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m07500_m04 (__glob * main */ - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - m07500 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, krb5pa_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); } @@ -579,20 +569,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m07500_m08 (__glob * main */ - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - m07500 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, krb5pa_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); } @@ -645,20 +625,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m07500_s04 (__glob * main */ - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - m07500 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, krb5pa_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); } @@ -707,20 +677,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m07500_s08 (__glob * main */ - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - m07500 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, krb5pa_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); } diff --git a/OpenCL/m07500_a3-pure.cl b/OpenCL/m07500_a3-pure.cl index c666fb9b1..cebeb8a3d 100644 --- a/OpenCL/m07500_a3-pure.cl +++ b/OpenCL/m07500_a3-pure.cl @@ -22,7 +22,7 @@ typedef struct } RC4_KEY; -DECLSPEC void swap (SCR_TYPE RC4_KEY *rc4_key, const u8 i, const u8 j) +DECLSPEC void swap (__local RC4_KEY *rc4_key, const u8 i, const u8 j) { u8 tmp; @@ -31,12 +31,12 @@ DECLSPEC void swap (SCR_TYPE RC4_KEY *rc4_key, const u8 i, const u8 j) rc4_key->S[j] = tmp; } -DECLSPEC void rc4_init_16 (SCR_TYPE RC4_KEY *rc4_key, const u32 *data) +DECLSPEC void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 *data) { u32 v = 0x03020100; u32 a = 0x04040404; - SCR_TYPE u32 *ptr = (SCR_TYPE u32 *) rc4_key->S; + __local u32 *ptr = (__local u32 *) rc4_key->S; #ifdef _unroll #pragma unroll @@ -84,7 +84,7 @@ DECLSPEC void rc4_init_16 (SCR_TYPE RC4_KEY *rc4_key, const u32 *data) } } -DECLSPEC u8 rc4_next_16 (SCR_TYPE RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) +DECLSPEC u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) { #ifdef _unroll #pragma unroll @@ -137,7 +137,7 @@ DECLSPEC u8 rc4_next_16 (SCR_TYPE RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u return j; } -DECLSPEC int decrypt_and_check (SCR_TYPE RC4_KEY *rc4_key, u32 *data, u32 *timestamp_ct) +DECLSPEC int decrypt_and_check (__local RC4_KEY *rc4_key, u32 *data, u32 *timestamp_ct) { rc4_init_16 (rc4_key, data); @@ -302,20 +302,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m07500_mxx (__glob w[idx] = pws[gid].i[idx]; } - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - u32 checksum[4]; checksum[0] = krb5pa_bufs[digests_offset].checksum[0]; @@ -406,20 +396,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m07500_sxx (__glob w[idx] = pws[gid].i[idx]; } - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - u32 checksum[4]; checksum[0] = krb5pa_bufs[digests_offset].checksum[0]; diff --git a/OpenCL/m13100_a0-optimized.cl b/OpenCL/m13100_a0-optimized.cl index 3ca427acf..b0a3a7f43 100644 --- a/OpenCL/m13100_a0-optimized.cl +++ b/OpenCL/m13100_a0-optimized.cl @@ -25,7 +25,7 @@ typedef struct } RC4_KEY; -DECLSPEC void swap (SCR_TYPE RC4_KEY *rc4_key, const u8 i, const u8 j) +DECLSPEC void swap (__local RC4_KEY *rc4_key, const u8 i, const u8 j) { u8 tmp; @@ -34,12 +34,12 @@ DECLSPEC void swap (SCR_TYPE RC4_KEY *rc4_key, const u8 i, const u8 j) rc4_key->S[j] = tmp; } -DECLSPEC void rc4_init_16 (SCR_TYPE RC4_KEY *rc4_key, const u32 *data) +DECLSPEC void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 *data) { u32 v = 0x03020100; u32 a = 0x04040404; - SCR_TYPE u32 *ptr = (SCR_TYPE u32 *) rc4_key->S; + __local u32 *ptr = (__local u32 *) rc4_key->S; #ifdef _unroll #pragma unroll @@ -87,7 +87,7 @@ DECLSPEC void rc4_init_16 (SCR_TYPE RC4_KEY *rc4_key, const u32 *data) } } -DECLSPEC u8 rc4_next_16 (SCR_TYPE RC4_KEY *rc4_key, u8 i, u8 j, const __global u32 *in, u32 *out) +DECLSPEC u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const __global u32 *in, u32 *out) { #ifdef _unroll #pragma unroll @@ -225,7 +225,7 @@ DECLSPEC void hmac_md5_run (u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *ipad, u32 * md5_transform (w0, w1, w2, w3, digest); } -DECLSPEC int decrypt_and_check (SCR_TYPE RC4_KEY *rc4_key, u32 *data, __global const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) +DECLSPEC int decrypt_and_check (__local RC4_KEY *rc4_key, u32 *data, __global const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) { rc4_init_16 (rc4_key, data); @@ -600,20 +600,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m13100_m04 (__glob * shared */ - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - /** * salt */ @@ -708,20 +698,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m13100_s04 (__glob * shared */ - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - /** * salt */ diff --git a/OpenCL/m13100_a0-pure.cl b/OpenCL/m13100_a0-pure.cl index 0b972d1e0..c70be4242 100644 --- a/OpenCL/m13100_a0-pure.cl +++ b/OpenCL/m13100_a0-pure.cl @@ -24,7 +24,7 @@ typedef struct } RC4_KEY; -DECLSPEC void swap (SCR_TYPE RC4_KEY *rc4_key, const u8 i, const u8 j) +DECLSPEC void swap (__local RC4_KEY *rc4_key, const u8 i, const u8 j) { u8 tmp; @@ -33,12 +33,12 @@ DECLSPEC void swap (SCR_TYPE RC4_KEY *rc4_key, const u8 i, const u8 j) rc4_key->S[j] = tmp; } -DECLSPEC void rc4_init_16 (SCR_TYPE RC4_KEY *rc4_key, const u32 *data) +DECLSPEC void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 *data) { u32 v = 0x03020100; u32 a = 0x04040404; - SCR_TYPE u32 *ptr = (SCR_TYPE u32 *) rc4_key->S; + __local u32 *ptr = (__local u32 *) rc4_key->S; #ifdef _unroll #pragma unroll @@ -86,7 +86,7 @@ DECLSPEC void rc4_init_16 (SCR_TYPE RC4_KEY *rc4_key, const u32 *data) } } -DECLSPEC u8 rc4_next_16 (SCR_TYPE RC4_KEY *rc4_key, u8 i, u8 j, const __global u32 *in, u32 *out) +DECLSPEC u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const __global u32 *in, u32 *out) { #ifdef _unroll #pragma unroll @@ -139,7 +139,7 @@ DECLSPEC u8 rc4_next_16 (SCR_TYPE RC4_KEY *rc4_key, u8 i, u8 j, const __global u return j; } -DECLSPEC int decrypt_and_check (SCR_TYPE RC4_KEY *rc4_key, u32 *data, __global const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) +DECLSPEC int decrypt_and_check (__local RC4_KEY *rc4_key, u32 *data, __global const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) { rc4_init_16 (rc4_key, data); @@ -394,20 +394,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m13100_mxx (__glob COPY_PW (pws[gid]); - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - u32 checksum[4]; checksum[0] = krb5tgs_bufs[digests_offset].checksum[0]; @@ -466,20 +456,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m13100_sxx (__glob COPY_PW (pws[gid]); - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - u32 checksum[4]; checksum[0] = krb5tgs_bufs[digests_offset].checksum[0]; diff --git a/OpenCL/m13100_a1-optimized.cl b/OpenCL/m13100_a1-optimized.cl index e2ac9ecdb..3f079e111 100644 --- a/OpenCL/m13100_a1-optimized.cl +++ b/OpenCL/m13100_a1-optimized.cl @@ -23,7 +23,7 @@ typedef struct } RC4_KEY; -DECLSPEC void swap (SCR_TYPE RC4_KEY *rc4_key, const u8 i, const u8 j) +DECLSPEC void swap (__local RC4_KEY *rc4_key, const u8 i, const u8 j) { u8 tmp; @@ -32,12 +32,12 @@ DECLSPEC void swap (SCR_TYPE RC4_KEY *rc4_key, const u8 i, const u8 j) rc4_key->S[j] = tmp; } -DECLSPEC void rc4_init_16 (SCR_TYPE RC4_KEY *rc4_key, const u32 *data) +DECLSPEC void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 *data) { u32 v = 0x03020100; u32 a = 0x04040404; - SCR_TYPE u32 *ptr = (SCR_TYPE u32 *) rc4_key->S; + __local u32 *ptr = (__local u32 *) rc4_key->S; #ifdef _unroll #pragma unroll @@ -85,7 +85,7 @@ DECLSPEC void rc4_init_16 (SCR_TYPE RC4_KEY *rc4_key, const u32 *data) } } -DECLSPEC u8 rc4_next_16 (SCR_TYPE RC4_KEY *rc4_key, u8 i, u8 j, const __global u32 *in, u32 *out) +DECLSPEC u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const __global u32 *in, u32 *out) { #ifdef _unroll #pragma unroll @@ -223,7 +223,7 @@ DECLSPEC void hmac_md5_run (u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *ipad, u32 * md5_transform (w0, w1, w2, w3, digest); } -DECLSPEC int decrypt_and_check (SCR_TYPE RC4_KEY *rc4_key, u32 *data, __global const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) +DECLSPEC int decrypt_and_check (__local RC4_KEY *rc4_key, u32 *data, __global const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) { rc4_init_16 (rc4_key, data); @@ -597,20 +597,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m13100_m04 (__glob * shared */ - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - /** * salt */ @@ -754,20 +744,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m13100_s04 (__glob * shared */ - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - /** * salt */ diff --git a/OpenCL/m13100_a1-pure.cl b/OpenCL/m13100_a1-pure.cl index 636406c4c..e84795714 100644 --- a/OpenCL/m13100_a1-pure.cl +++ b/OpenCL/m13100_a1-pure.cl @@ -22,7 +22,7 @@ typedef struct } RC4_KEY; -DECLSPEC void swap (SCR_TYPE RC4_KEY *rc4_key, const u8 i, const u8 j) +DECLSPEC void swap (__local RC4_KEY *rc4_key, const u8 i, const u8 j) { u8 tmp; @@ -31,12 +31,12 @@ DECLSPEC void swap (SCR_TYPE RC4_KEY *rc4_key, const u8 i, const u8 j) rc4_key->S[j] = tmp; } -DECLSPEC void rc4_init_16 (SCR_TYPE RC4_KEY *rc4_key, const u32 *data) +DECLSPEC void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 *data) { u32 v = 0x03020100; u32 a = 0x04040404; - SCR_TYPE u32 *ptr = (SCR_TYPE u32 *) rc4_key->S; + __local u32 *ptr = (__local u32 *) rc4_key->S; #ifdef _unroll #pragma unroll @@ -84,7 +84,7 @@ DECLSPEC void rc4_init_16 (SCR_TYPE RC4_KEY *rc4_key, const u32 *data) } } -DECLSPEC u8 rc4_next_16 (SCR_TYPE RC4_KEY *rc4_key, u8 i, u8 j, const __global u32 *in, u32 *out) +DECLSPEC u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const __global u32 *in, u32 *out) { #ifdef _unroll #pragma unroll @@ -137,7 +137,7 @@ DECLSPEC u8 rc4_next_16 (SCR_TYPE RC4_KEY *rc4_key, u8 i, u8 j, const __global u return j; } -DECLSPEC int decrypt_and_check (SCR_TYPE RC4_KEY *rc4_key, u32 *data, __global const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) +DECLSPEC int decrypt_and_check (__local RC4_KEY *rc4_key, u32 *data, __global const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) { rc4_init_16 (rc4_key, data); @@ -390,20 +390,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m13100_mxx (__glob * base */ - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - u32 checksum[4]; checksum[0] = krb5tgs_bufs[digests_offset].checksum[0]; @@ -460,20 +450,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m13100_sxx (__glob * base */ - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - u32 checksum[4]; checksum[0] = krb5tgs_bufs[digests_offset].checksum[0]; diff --git a/OpenCL/m13100_a3-optimized.cl b/OpenCL/m13100_a3-optimized.cl index d57cbb399..400ef0929 100644 --- a/OpenCL/m13100_a3-optimized.cl +++ b/OpenCL/m13100_a3-optimized.cl @@ -23,7 +23,7 @@ typedef struct } RC4_KEY; -DECLSPEC void swap (SCR_TYPE RC4_KEY *rc4_key, const u8 i, const u8 j) +DECLSPEC void swap (__local RC4_KEY *rc4_key, const u8 i, const u8 j) { u8 tmp; @@ -32,12 +32,12 @@ DECLSPEC void swap (SCR_TYPE RC4_KEY *rc4_key, const u8 i, const u8 j) rc4_key->S[j] = tmp; } -DECLSPEC void rc4_init_16 (SCR_TYPE RC4_KEY *rc4_key, const u32 *data) +DECLSPEC void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 *data) { u32 v = 0x03020100; u32 a = 0x04040404; - SCR_TYPE u32 *ptr = (SCR_TYPE u32 *) rc4_key->S; + __local u32 *ptr = (__local u32 *) rc4_key->S; #ifdef _unroll #pragma unroll @@ -85,7 +85,7 @@ DECLSPEC void rc4_init_16 (SCR_TYPE RC4_KEY *rc4_key, const u32 *data) } } -DECLSPEC u8 rc4_next_16 (SCR_TYPE RC4_KEY *rc4_key, u8 i, u8 j, const __global u32 *in, u32 *out) +DECLSPEC u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const __global u32 *in, u32 *out) { #ifdef _unroll #pragma unroll @@ -223,7 +223,7 @@ DECLSPEC void hmac_md5_run (u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *ipad, u32 * md5_transform (w0, w1, w2, w3, digest); } -DECLSPEC int decrypt_and_check (SCR_TYPE RC4_KEY *rc4_key, u32 *data, __global const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) +DECLSPEC int decrypt_and_check (__local RC4_KEY *rc4_key, u32 *data, __global const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) { rc4_init_16 (rc4_key, data); @@ -563,7 +563,7 @@ DECLSPEC void kerb_prepare (const u32 *w0, const u32 *w1, const u32 pw_len, cons hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); } -DECLSPEC void m13100 (SCR_TYPE RC4_KEY *rc4_key, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global krb5tgs_t *krb5tgs_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset) +DECLSPEC void m13100 (__local RC4_KEY *rc4_key, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global krb5tgs_t *krb5tgs_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -667,20 +667,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m13100_m04 (__glob * main */ - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - m13100 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, krb5tgs_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); } @@ -729,20 +719,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m13100_m08 (__glob * main */ - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - m13100 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, krb5tgs_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); } @@ -795,20 +775,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m13100_s04 (__glob * main */ - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - m13100 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, krb5tgs_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); } @@ -857,20 +827,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m13100_s08 (__glob * main */ - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - m13100 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, krb5tgs_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); } diff --git a/OpenCL/m13100_a3-pure.cl b/OpenCL/m13100_a3-pure.cl index 704341820..926d9be22 100644 --- a/OpenCL/m13100_a3-pure.cl +++ b/OpenCL/m13100_a3-pure.cl @@ -22,7 +22,7 @@ typedef struct } RC4_KEY; -DECLSPEC void swap (SCR_TYPE RC4_KEY *rc4_key, const u8 i, const u8 j) +DECLSPEC void swap (__local RC4_KEY *rc4_key, const u8 i, const u8 j) { u8 tmp; @@ -31,12 +31,12 @@ DECLSPEC void swap (SCR_TYPE RC4_KEY *rc4_key, const u8 i, const u8 j) rc4_key->S[j] = tmp; } -DECLSPEC void rc4_init_16 (SCR_TYPE RC4_KEY *rc4_key, const u32 *data) +DECLSPEC void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 *data) { u32 v = 0x03020100; u32 a = 0x04040404; - SCR_TYPE u32 *ptr = (SCR_TYPE u32 *) rc4_key->S; + __local u32 *ptr = (__local u32 *) rc4_key->S; #ifdef _unroll #pragma unroll @@ -84,7 +84,7 @@ DECLSPEC void rc4_init_16 (SCR_TYPE RC4_KEY *rc4_key, const u32 *data) } } -DECLSPEC u8 rc4_next_16 (SCR_TYPE RC4_KEY *rc4_key, u8 i, u8 j, const __global u32 *in, u32 *out) +DECLSPEC u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const __global u32 *in, u32 *out) { #ifdef _unroll #pragma unroll @@ -137,7 +137,7 @@ DECLSPEC u8 rc4_next_16 (SCR_TYPE RC4_KEY *rc4_key, u8 i, u8 j, const __global u return j; } -DECLSPEC int decrypt_and_check (SCR_TYPE RC4_KEY *rc4_key, u32 *data, __global const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) +DECLSPEC int decrypt_and_check (__local RC4_KEY *rc4_key, u32 *data, __global const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) { rc4_init_16 (rc4_key, data); @@ -399,20 +399,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m13100_mxx (__glob w[idx] = pws[gid].i[idx]; } - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - u32 checksum[4]; checksum[0] = krb5tgs_bufs[digests_offset].checksum[0]; @@ -482,20 +472,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m13100_sxx (__glob w[idx] = pws[gid].i[idx]; } - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - u32 checksum[4]; checksum[0] = krb5tgs_bufs[digests_offset].checksum[0]; From a5746548e8f860a41f05637a8ffd4414bd4676db Mon Sep 17 00:00:00 2001 From: jsteube Date: Mon, 13 Aug 2018 13:41:43 +0200 Subject: [PATCH 3/3] Allow use of hash-mode 7900, 10700 and 13731 on AMD devices after workaround --- OpenCL/inc_cipher_aes.cl | 22 +++++++++++----------- OpenCL/inc_cipher_serpent.cl | 10 +++++----- OpenCL/inc_cipher_twofish.cl | 6 +++--- OpenCL/inc_truecrypt_xts.cl | 32 ++++++++++++++++---------------- src/opencl.c | 13 ------------- 5 files changed, 35 insertions(+), 48 deletions(-) diff --git a/OpenCL/inc_cipher_aes.cl b/OpenCL/inc_cipher_aes.cl index 86cebf04d..a9d1eb111 100644 --- a/OpenCL/inc_cipher_aes.cl +++ b/OpenCL/inc_cipher_aes.cl @@ -683,7 +683,7 @@ __constant u32a td4[256] = 0x55555555, 0x21212121, 0x0c0c0c0c, 0x7d7d7d7d, }; -__constant u32a rcon[] = +__constant u32a rcon[10] = { 0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000, 0x20000000, 0x40000000, 0x80000000, @@ -699,7 +699,7 @@ DECLSPEC void aes128_ExpandKey (u32 *ks, const u32 *ukey, SHM_TYPE u32 *s_te0, S ks[2] = ukey[2]; ks[3] = ukey[3]; - for (u32 i = 0, j = 0; i < 10; i += 1, j += 4) + for (volatile int i = 0, j = 0; i < 10; i += 1, j += 4) { u32 temp = ks[j + 3]; @@ -720,7 +720,7 @@ DECLSPEC void aes128_ExpandKey (u32 *ks, const u32 *ukey, SHM_TYPE u32 *s_te0, S DECLSPEC void aes128_InvertKey (u32 *ks, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4, SHM_TYPE u32 *s_td0, SHM_TYPE u32 *s_td1, SHM_TYPE u32 *s_td2, SHM_TYPE u32 *s_td3, SHM_TYPE u32 *s_td4) { - for (u32 i = 0, j = 40; i < j; i += 4, j -= 4) + for (volatile int i = 0, j = 40; i < j; i += 4, j -= 4) { u32 temp; @@ -730,7 +730,7 @@ DECLSPEC void aes128_InvertKey (u32 *ks, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te temp = ks[i + 3]; ks[i + 3] = ks[j + 3]; ks[j + 3] = temp; } - for (u32 i = 1, j = 4; i < 10; i += 1, j += 4) + for (volatile int i = 1, j = 4; i < 10; i += 1, j += 4) { ks[j + 0] = s_td0[s_te1[(ks[j + 0] >> 24) & 0xff] & 0xff] ^ @@ -799,7 +799,7 @@ DECLSPEC void aes128_encrypt (const u32 *ks, const u32 *in, u32 *out, SHM_TYPE u #ifdef _unroll #pragma unroll #endif - for (int i = 4; i < 40; i += 4) + for (volatile int i = 4; i < 40; i += 4) { const uchar4 x0 = as_uchar4 (t0); const uchar4 x1 = as_uchar4 (t1); @@ -862,7 +862,7 @@ DECLSPEC void aes128_decrypt (const u32 *ks, const u32 *in, u32 *out, SHM_TYPE u #ifdef _unroll #pragma unroll #endif - for (int i = 4; i < 40; i += 4) + for (volatile int i = 4; i < 40; i += 4) { const uchar4 x0 = as_uchar4 (t0); const uchar4 x1 = as_uchar4 (t1); @@ -926,7 +926,7 @@ DECLSPEC void aes256_ExpandKey (u32 *ks, const u32 *ukey, SHM_TYPE u32 *s_te0, S int i; int j; - for (int i = 0, j = 0; i < 7; i += 1, j += 8) + for (volatile int i = 0, j = 0; i < 7; i += 1, j += 8) { const u32 temp1 = ks[j + 7]; @@ -959,7 +959,7 @@ DECLSPEC void aes256_ExpandKey (u32 *ks, const u32 *ukey, SHM_TYPE u32 *s_te0, S DECLSPEC void aes256_InvertKey (u32 *ks, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4, SHM_TYPE u32 *s_td0, SHM_TYPE u32 *s_td1, SHM_TYPE u32 *s_td2, SHM_TYPE u32 *s_td3, SHM_TYPE u32 *s_td4) { - for (u32 i = 0, j = 56; i < j; i += 4, j -= 4) + for (volatile int i = 0, j = 56; i < j; i += 4, j -= 4) { u32 temp; @@ -969,7 +969,7 @@ DECLSPEC void aes256_InvertKey (u32 *ks, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te temp = ks[i + 3]; ks[i + 3] = ks[j + 3]; ks[j + 3] = temp; } - for (u32 i = 1, j = 4; i < 14; i += 1, j += 4) + for (volatile int i = 1, j = 4; i < 14; i += 1, j += 4) { ks[j + 0] = s_td0[s_te1[(ks[j + 0] >> 24) & 0xff] & 0xff] ^ @@ -1046,7 +1046,7 @@ DECLSPEC void aes256_encrypt (const u32 *ks, const u32 *in, u32 *out, SHM_TYPE u #ifdef _unroll #pragma unroll #endif - for (int i = 4; i < 56; i += 4) + for (volatile int i = 4; i < 56; i += 4) { const uchar4 x0 = as_uchar4 (t0); const uchar4 x1 = as_uchar4 (t1); @@ -1109,7 +1109,7 @@ DECLSPEC void aes256_decrypt (const u32 *ks, const u32 *in, u32 *out, SHM_TYPE u #ifdef _unroll #pragma unroll #endif - for (int i = 4; i < 56; i += 4) + for (volatile int i = 4; i < 56; i += 4) { const uchar4 x0 = as_uchar4 (t0); const uchar4 x1 = as_uchar4 (t1); diff --git a/OpenCL/inc_cipher_serpent.cl b/OpenCL/inc_cipher_serpent.cl index 20a077a38..a32cf65bc 100644 --- a/OpenCL/inc_cipher_serpent.cl +++ b/OpenCL/inc_cipher_serpent.cl @@ -408,7 +408,7 @@ DECLSPEC void serpent128_set_key (u32 *ks, const u32 *ukey) #ifdef _unroll #pragma unroll #endif - for (int i = 0; i < 4; i++) + for (volatile int i = 0; i < 4; i++) { ks[i] = ukey[i]; } @@ -416,7 +416,7 @@ DECLSPEC void serpent128_set_key (u32 *ks, const u32 *ukey) #ifdef _unroll #pragma unroll #endif - for (int i = 4; i < 8; i++) + for (volatile int i = 4; i < 8; i++) { ks[i] = 0; } @@ -426,7 +426,7 @@ DECLSPEC void serpent128_set_key (u32 *ks, const u32 *ukey) #ifdef _unroll #pragma unroll #endif - for (int i = 0; i < 132; i++) + for (volatile int i = 0; i < 132; i++) { ks[i + 8] = rotl32_S (ks[i + 7] ^ ks[i + 5] ^ ks[i + 3] ^ ks[i + 0] ^ 0x9e3779b9 ^ i, 11); } @@ -576,7 +576,7 @@ DECLSPEC void serpent256_set_key (u32 *ks, const u32 *ukey) #ifdef _unroll #pragma unroll #endif - for (int i = 0; i < 8; i++) + for (volatile int i = 0; i < 8; i++) { ks[i] = ukey[i]; } @@ -584,7 +584,7 @@ DECLSPEC void serpent256_set_key (u32 *ks, const u32 *ukey) #ifdef _unroll #pragma unroll #endif - for (int i = 0; i < 132; i++) + for (volatile int i = 0; i < 132; i++) { ks[i + 8] = rotl32_S (ks[i + 7] ^ ks[i + 5] ^ ks[i + 3] ^ ks[i + 0] ^ 0x9e3779b9 ^ i, 11); } diff --git a/OpenCL/inc_cipher_twofish.cl b/OpenCL/inc_cipher_twofish.cl index 68dcda56f..ad4d1a6ba 100644 --- a/OpenCL/inc_cipher_twofish.cl +++ b/OpenCL/inc_cipher_twofish.cl @@ -270,7 +270,7 @@ DECLSPEC u32 mds_rem (u32 p0, u32 p1) { #define G_MOD 0x14d - for (int i = 0; i < 8; i++) + for (volatile int i = 0; i < 8; i++) { u32 t = p1 >> 24; @@ -364,7 +364,7 @@ DECLSPEC void twofish128_set_key (u32 *sk, u32 *lk, const u32 *ukey) sk[1] = mds_rem (me_key[0], mo_key[0]); sk[0] = mds_rem (me_key[1], mo_key[1]); - for (int i = 0; i < 40; i += 2) + for (volatile int i = 0; i < 40; i += 2) { u32 a = 0x01010101 * i; u32 b = 0x01010101 + a; @@ -513,7 +513,7 @@ DECLSPEC void twofish256_set_key (u32 *sk, u32 *lk, const u32 *ukey) sk[1] = mds_rem (me_key[2], mo_key[2]); sk[0] = mds_rem (me_key[3], mo_key[3]); - for (int i = 0; i < 40; i += 2) + for (volatile int i = 0; i < 40; i += 2) { u32 a = 0x01010101 * i; u32 b = 0x01010101 + a; diff --git a/OpenCL/inc_truecrypt_xts.cl b/OpenCL/inc_truecrypt_xts.cl index bc6f319d0..393f71592 100644 --- a/OpenCL/inc_truecrypt_xts.cl +++ b/OpenCL/inc_truecrypt_xts.cl @@ -177,7 +177,7 @@ DECLSPEC int verify_header_aes (__global const tc_t *esalt_bufs, const u32 *ukey // seek to byte 256 - for (int i = 4; i < 64 - 16; i += 4) + for (volatile int i = 4; i < 64 - 16; i += 4) { xts_mul2 (T_aes, T_aes); } @@ -186,7 +186,7 @@ DECLSPEC int verify_header_aes (__global const tc_t *esalt_bufs, const u32 *ukey u32 crc32 = ~0; - for (int i = 64 - 16; i < 128 - 16; i += 4) + for (volatile int i = 64 - 16; i < 128 - 16; i += 4) { data[0] = esalt_bufs[0].data_buf[i + 0]; data[1] = esalt_bufs[0].data_buf[i + 1]; @@ -233,7 +233,7 @@ DECLSPEC int verify_header_serpent (__global const tc_t *esalt_bufs, const u32 * // seek to byte 256 - for (int i = 4; i < 64 - 16; i += 4) + for (volatile int i = 4; i < 64 - 16; i += 4) { xts_mul2 (T_serpent, T_serpent); } @@ -242,7 +242,7 @@ DECLSPEC int verify_header_serpent (__global const tc_t *esalt_bufs, const u32 * u32 crc32 = ~0; - for (int i = 64 - 16; i < 128 - 16; i += 4) + for (volatile int i = 64 - 16; i < 128 - 16; i += 4) { data[0] = esalt_bufs[0].data_buf[i + 0]; data[1] = esalt_bufs[0].data_buf[i + 1]; @@ -290,7 +290,7 @@ DECLSPEC int verify_header_twofish (__global const tc_t *esalt_bufs, const u32 * // seek to byte 256 - for (int i = 4; i < 64 - 16; i += 4) + for (volatile int i = 4; i < 64 - 16; i += 4) { xts_mul2 (T_twofish, T_twofish); } @@ -299,7 +299,7 @@ DECLSPEC int verify_header_twofish (__global const tc_t *esalt_bufs, const u32 * u32 crc32 = ~0; - for (int i = 64 - 16; i < 128 - 16; i += 4) + for (volatile int i = 64 - 16; i < 128 - 16; i += 4) { data[0] = esalt_bufs[0].data_buf[i + 0]; data[1] = esalt_bufs[0].data_buf[i + 1]; @@ -353,7 +353,7 @@ DECLSPEC int verify_header_aes_twofish (__global const tc_t *esalt_bufs, const u // seek to byte 256 - for (int i = 4; i < 64 - 16; i += 4) + for (volatile int i = 4; i < 64 - 16; i += 4) { xts_mul2 (T_aes, T_aes); xts_mul2 (T_twofish, T_twofish); @@ -363,7 +363,7 @@ DECLSPEC int verify_header_aes_twofish (__global const tc_t *esalt_bufs, const u u32 crc32 = ~0; - for (int i = 64 - 16; i < 128 - 16; i += 4) + for (volatile int i = 64 - 16; i < 128 - 16; i += 4) { data[0] = esalt_bufs[0].data_buf[i + 0]; data[1] = esalt_bufs[0].data_buf[i + 1]; @@ -414,7 +414,7 @@ DECLSPEC int verify_header_serpent_aes (__global const tc_t *esalt_bufs, const u // seek to byte 256 - for (int i = 4; i < 64 - 16; i += 4) + for (volatile int i = 4; i < 64 - 16; i += 4) { xts_mul2 (T_serpent, T_serpent); xts_mul2 (T_aes, T_aes); @@ -424,7 +424,7 @@ DECLSPEC int verify_header_serpent_aes (__global const tc_t *esalt_bufs, const u u32 crc32 = ~0; - for (int i = 64 - 16; i < 128 - 16; i += 4) + for (volatile int i = 64 - 16; i < 128 - 16; i += 4) { data[0] = esalt_bufs[0].data_buf[i + 0]; data[1] = esalt_bufs[0].data_buf[i + 1]; @@ -477,7 +477,7 @@ DECLSPEC int verify_header_twofish_serpent (__global const tc_t *esalt_bufs, con // seek to byte 256 - for (int i = 4; i < 64 - 16; i += 4) + for (volatile int i = 4; i < 64 - 16; i += 4) { xts_mul2 (T_twofish, T_twofish); xts_mul2 (T_serpent, T_serpent); @@ -487,7 +487,7 @@ DECLSPEC int verify_header_twofish_serpent (__global const tc_t *esalt_bufs, con u32 crc32 = ~0; - for (int i = 64 - 16; i < 128 - 16; i += 4) + for (volatile int i = 64 - 16; i < 128 - 16; i += 4) { data[0] = esalt_bufs[0].data_buf[i + 0]; data[1] = esalt_bufs[0].data_buf[i + 1]; @@ -546,7 +546,7 @@ DECLSPEC int verify_header_aes_twofish_serpent (__global const tc_t *esalt_bufs, // seek to byte 256 - for (int i = 4; i < 64 - 16; i += 4) + for (volatile int i = 4; i < 64 - 16; i += 4) { xts_mul2 (T_aes, T_aes); xts_mul2 (T_twofish, T_twofish); @@ -557,7 +557,7 @@ DECLSPEC int verify_header_aes_twofish_serpent (__global const tc_t *esalt_bufs, u32 crc32 = ~0; - for (int i = 64 - 16; i < 128 - 16; i += 4) + for (volatile int i = 64 - 16; i < 128 - 16; i += 4) { data[0] = esalt_bufs[0].data_buf[i + 0]; data[1] = esalt_bufs[0].data_buf[i + 1]; @@ -615,7 +615,7 @@ DECLSPEC int verify_header_serpent_twofish_aes (__global const tc_t *esalt_bufs, // seek to byte 256 - for (int i = 4; i < 64 - 16; i += 4) + for (volatile int i = 4; i < 64 - 16; i += 4) { xts_mul2 (T_serpent, T_serpent); xts_mul2 (T_twofish, T_twofish); @@ -626,7 +626,7 @@ DECLSPEC int verify_header_serpent_twofish_aes (__global const tc_t *esalt_bufs, u32 crc32 = ~0; - for (int i = 64 - 16; i < 128 - 16; i += 4) + for (volatile int i = 64 - 16; i < 128 - 16; i += 4) { data[0] = esalt_bufs[0].data_buf[i + 0]; data[1] = esalt_bufs[0].data_buf[i + 1]; diff --git a/src/opencl.c b/src/opencl.c index 53fcc7bc7..0a7189a6c 100644 --- a/src/opencl.c +++ b/src/opencl.c @@ -4069,19 +4069,6 @@ int opencl_session_begin (hashcat_ctx_t *hashcat_ctx) #endif // __APPLE__ - if (device_param->platform_vendor_id == VENDOR_ID_AMD) - { - if (device_param->is_rocm == false) - { - if ((user_options->hash_mode == 7900) - || (user_options->hash_mode == 10700) - || (user_options->hash_mode == 13731)) - { - skipped_temp = true; - } - } - } - if ((skipped_temp == true) && (user_options->force == false)) { event_log_warning (hashcat_ctx, "* Device #%u: Skipping unstable hash-mode %u for this device.", device_id + 1, user_options->hash_mode);