diff --git a/OpenCL/inc_hash_whirlpool.cl b/OpenCL/inc_hash_whirlpool.cl index bff268064..9c1d7c294 100644 --- a/OpenCL/inc_hash_whirlpool.cl +++ b/OpenCL/inc_hash_whirlpool.cl @@ -1125,12 +1125,12 @@ typedef struct whirlpool_ctx int len; - SHM_TYPE u32 (*s_Ch)[256]; - SHM_TYPE u32 (*s_Cl)[256]; + __local u32 (*s_Ch)[256]; + __local u32 (*s_Cl)[256]; } whirlpool_ctx_t; -DECLSPEC void whirlpool_transform (const u32 *w0, const u32 *w1, const u32 *w2, const u32 *w3, u32 *digest, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256]) +DECLSPEC void whirlpool_transform (const u32 *w0, const u32 *w1, const u32 *w2, const u32 *w3, u32 *digest, __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256]) { u32 Kh[8]; u32 Kl[8]; @@ -1300,7 +1300,7 @@ DECLSPEC void whirlpool_transform (const u32 *w0, const u32 *w1, const u32 *w2, digest[15] ^= statel[7] ^ w3[3]; } -DECLSPEC void whirlpool_init (whirlpool_ctx_t *ctx, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256]) +DECLSPEC void whirlpool_init (whirlpool_ctx_t *ctx, __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256]) { ctx->h[ 0] = 0; ctx->h[ 1] = 0; @@ -1971,7 +1971,7 @@ typedef struct whirlpool_hmac_ctx } whirlpool_hmac_ctx_t; -DECLSPEC void whirlpool_hmac_init_64 (whirlpool_hmac_ctx_t *ctx, const u32 *w0, const u32 *w1, const u32 *w2, const u32 *w3, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256]) +DECLSPEC void whirlpool_hmac_init_64 (whirlpool_hmac_ctx_t *ctx, const u32 *w0, const u32 *w1, const u32 *w2, const u32 *w3, __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256]) { u32 t0[4]; u32 t1[4]; @@ -2025,7 +2025,7 @@ DECLSPEC void whirlpool_hmac_init_64 (whirlpool_hmac_ctx_t *ctx, const u32 *w0, whirlpool_update_64 (&ctx->opad, t0, t1, t2, t3, 64); } -DECLSPEC void whirlpool_hmac_init (whirlpool_hmac_ctx_t *ctx, const u32 *w, const int len, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256]) +DECLSPEC void whirlpool_hmac_init (whirlpool_hmac_ctx_t *ctx, const u32 *w, const int len, __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256]) { u32 w0[4]; u32 w1[4]; @@ -2082,7 +2082,7 @@ DECLSPEC void whirlpool_hmac_init (whirlpool_hmac_ctx_t *ctx, const u32 *w, cons whirlpool_hmac_init_64 (ctx, w0, w1, w2, w3, s_Ch, s_Cl); } -DECLSPEC void whirlpool_hmac_init_swap (whirlpool_hmac_ctx_t *ctx, const u32 *w, const int len, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256]) +DECLSPEC void whirlpool_hmac_init_swap (whirlpool_hmac_ctx_t *ctx, const u32 *w, const int len, __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256]) { u32 w0[4]; u32 w1[4]; @@ -2139,7 +2139,7 @@ DECLSPEC void whirlpool_hmac_init_swap (whirlpool_hmac_ctx_t *ctx, const u32 *w, whirlpool_hmac_init_64 (ctx, w0, w1, w2, w3, s_Ch, s_Cl); } -DECLSPEC void whirlpool_hmac_init_global (whirlpool_hmac_ctx_t *ctx, __global const u32 *w, const int len, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256]) +DECLSPEC void whirlpool_hmac_init_global (whirlpool_hmac_ctx_t *ctx, __global const u32 *w, const int len, __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256]) { u32 w0[4]; u32 w1[4]; @@ -2196,7 +2196,7 @@ DECLSPEC void whirlpool_hmac_init_global (whirlpool_hmac_ctx_t *ctx, __global co whirlpool_hmac_init_64 (ctx, w0, w1, w2, w3, s_Ch, s_Cl); } -DECLSPEC void whirlpool_hmac_init_global_swap (whirlpool_hmac_ctx_t *ctx, __global const u32 *w, const int len, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256]) +DECLSPEC void whirlpool_hmac_init_global_swap (whirlpool_hmac_ctx_t *ctx, __global const u32 *w, const int len, __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256]) { u32 w0[4]; u32 w1[4]; @@ -2342,12 +2342,12 @@ typedef struct whirlpool_ctx_vector int len; - SHM_TYPE u32 (*s_Ch)[256]; - SHM_TYPE u32 (*s_Cl)[256]; + __local u32 (*s_Ch)[256]; + __local u32 (*s_Cl)[256]; } whirlpool_ctx_vector_t; -DECLSPEC void whirlpool_transform_vector (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u32x *digest, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256]) +DECLSPEC void whirlpool_transform_vector (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u32x *digest, __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256]) { u32x Kh[8]; u32x Kl[8]; @@ -2517,7 +2517,7 @@ DECLSPEC void whirlpool_transform_vector (const u32x *w0, const u32x *w1, const digest[15] ^= statel[7] ^ w3[3]; } -DECLSPEC void whirlpool_init_vector (whirlpool_ctx_vector_t *ctx, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256]) +DECLSPEC void whirlpool_init_vector (whirlpool_ctx_vector_t *ctx, __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256]) { ctx->h[ 0] = 0; ctx->h[ 1] = 0; @@ -2974,7 +2974,7 @@ typedef struct whirlpool_hmac_ctx_vector } whirlpool_hmac_ctx_vector_t; -DECLSPEC void whirlpool_hmac_init_vector_64 (whirlpool_hmac_ctx_vector_t *ctx, const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256]) +DECLSPEC void whirlpool_hmac_init_vector_64 (whirlpool_hmac_ctx_vector_t *ctx, const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256]) { u32x t0[4]; u32x t1[4]; @@ -3028,7 +3028,7 @@ DECLSPEC void whirlpool_hmac_init_vector_64 (whirlpool_hmac_ctx_vector_t *ctx, c whirlpool_update_vector_64 (&ctx->opad, t0, t1, t2, t3, 64); } -DECLSPEC void whirlpool_hmac_init_vector (whirlpool_hmac_ctx_vector_t *ctx, const u32x *w, const int len, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256]) +DECLSPEC void whirlpool_hmac_init_vector (whirlpool_hmac_ctx_vector_t *ctx, const u32x *w, const int len, __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256]) { u32x w0[4]; u32x w1[4]; diff --git a/OpenCL/inc_vendor.cl b/OpenCL/inc_vendor.cl index da7166d93..2095cd60c 100644 --- a/OpenCL/inc_vendor.cl +++ b/OpenCL/inc_vendor.cl @@ -19,20 +19,6 @@ #define IS_ACCEL #endif -#if DEVICE_TYPE == DEVICE_TYPE_CPU -#elif DEVICE_TYPE == DEVICE_TYPE_GPU -#define REAL_SHM -#elif DEVICE_TYPE == DEVICE_TYPE_ACCEL -#endif - -#ifdef REAL_SHM -#define SHM_TYPE __local -#define SCR_TYPE __local -#else -#define SHM_TYPE __constant -#define SCR_TYPE -#endif - /** * vendor specific */ @@ -80,6 +66,21 @@ #define IS_GENERIC #endif +#if DEVICE_TYPE == DEVICE_TYPE_CPU +#elif DEVICE_TYPE == DEVICE_TYPE_GPU +// AMD fails with mode 6211 +#ifdef IS_NV +#define REAL_SHM +#endif +#elif DEVICE_TYPE == DEVICE_TYPE_ACCEL +#endif + +#ifdef REAL_SHM +#define SHM_TYPE __local +#else +#define SHM_TYPE __constant +#endif + /** * function declarations can have a large influence depending on the opencl runtime */ diff --git a/OpenCL/m06100_a0-optimized.cl b/OpenCL/m06100_a0-optimized.cl index be3fe2ee0..5a8e06463 100644 --- a/OpenCL/m06100_a0-optimized.cl +++ b/OpenCL/m06100_a0-optimized.cl @@ -15,7 +15,7 @@ #include "inc_simd.cl" #include "inc_hash_whirlpool.cl" -DECLSPEC void whirlpool_transform_transport_vector (const u32x *w, u32x *digest, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256]) +DECLSPEC void whirlpool_transform_transport_vector (const u32x *w, u32x *digest, __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256]) { whirlpool_transform_vector (w + 0, w + 4, w + 8, w + 12, digest, s_Ch, s_Cl); } @@ -34,8 +34,6 @@ __kernel void m06100_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru * shared */ - #ifdef REAL_SHM - __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; @@ -62,13 +60,6 @@ __kernel void m06100_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru barrier (CLK_LOCAL_MEM_FENCE); - #else - - __constant u32 (*s_Ch)[256] = Ch; - __constant u32 (*s_Cl)[256] = Cl; - - #endif - if (gid >= gid_max) return; /** @@ -174,8 +165,6 @@ __kernel void m06100_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru * shared */ - #ifdef REAL_SHM - __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; @@ -202,13 +191,6 @@ __kernel void m06100_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru barrier (CLK_LOCAL_MEM_FENCE); - #else - - __constant u32 (*s_Ch)[256] = Ch; - __constant u32 (*s_Cl)[256] = Cl; - - #endif - if (gid >= gid_max) return; /** diff --git a/OpenCL/m06100_a0-pure.cl b/OpenCL/m06100_a0-pure.cl index 3bab742bc..779ff92ec 100644 --- a/OpenCL/m06100_a0-pure.cl +++ b/OpenCL/m06100_a0-pure.cl @@ -29,8 +29,6 @@ __kernel void m06100_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru * shared */ - #ifdef REAL_SHM - __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; @@ -57,13 +55,6 @@ __kernel void m06100_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru barrier (CLK_LOCAL_MEM_FENCE); - #else - - __constant u32 (*s_Ch)[256] = Ch; - __constant u32 (*s_Cl)[256] = Cl; - - #endif - if (gid >= gid_max) return; /** @@ -113,8 +104,6 @@ __kernel void m06100_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru * shared */ - #ifdef REAL_SHM - __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; @@ -141,13 +130,6 @@ __kernel void m06100_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru barrier (CLK_LOCAL_MEM_FENCE); - #else - - __constant u32 (*s_Ch)[256] = Ch; - __constant u32 (*s_Cl)[256] = Cl; - - #endif - if (gid >= gid_max) return; /** diff --git a/OpenCL/m06100_a1-optimized.cl b/OpenCL/m06100_a1-optimized.cl index 1325ac1b3..1bbc9c2dc 100644 --- a/OpenCL/m06100_a1-optimized.cl +++ b/OpenCL/m06100_a1-optimized.cl @@ -13,7 +13,7 @@ #include "inc_simd.cl" #include "inc_hash_whirlpool.cl" -DECLSPEC void whirlpool_transform_transport_vector (const u32x *w, u32x *digest, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256]) +DECLSPEC void whirlpool_transform_transport_vector (const u32x *w, u32x *digest, __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256]) { whirlpool_transform_vector (w + 0, w + 4, w + 8, w + 12, digest, s_Ch, s_Cl); } @@ -32,8 +32,6 @@ __kernel void m06100_m04 (__global pw_t *pws, __global const kernel_rule_t *rule * shared */ - #ifdef REAL_SHM - __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; @@ -60,13 +58,6 @@ __kernel void m06100_m04 (__global pw_t *pws, __global const kernel_rule_t *rule barrier (CLK_LOCAL_MEM_FENCE); - #else - - __constant u32 (*s_Ch)[256] = Ch; - __constant u32 (*s_Cl)[256] = Cl; - - #endif - if (gid >= gid_max) return; /** @@ -230,8 +221,6 @@ __kernel void m06100_s04 (__global pw_t *pws, __global const kernel_rule_t *rule * shared */ - #ifdef REAL_SHM - __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; @@ -258,13 +247,6 @@ __kernel void m06100_s04 (__global pw_t *pws, __global const kernel_rule_t *rule barrier (CLK_LOCAL_MEM_FENCE); - #else - - __constant u32 (*s_Ch)[256] = Ch; - __constant u32 (*s_Cl)[256] = Cl; - - #endif - if (gid >= gid_max) return; /** diff --git a/OpenCL/m06100_a1-pure.cl b/OpenCL/m06100_a1-pure.cl index b170ca66e..2c7dbbdc2 100644 --- a/OpenCL/m06100_a1-pure.cl +++ b/OpenCL/m06100_a1-pure.cl @@ -27,8 +27,6 @@ __kernel void m06100_mxx (__global pw_t *pws, __global const kernel_rule_t *rule * shared */ - #ifdef REAL_SHM - __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; @@ -55,13 +53,6 @@ __kernel void m06100_mxx (__global pw_t *pws, __global const kernel_rule_t *rule barrier (CLK_LOCAL_MEM_FENCE); - #else - - __constant u32 (*s_Ch)[256] = Ch; - __constant u32 (*s_Cl)[256] = Cl; - - #endif - if (gid >= gid_max) return; /** @@ -109,8 +100,6 @@ __kernel void m06100_sxx (__global pw_t *pws, __global const kernel_rule_t *rule * shared */ - #ifdef REAL_SHM - __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; @@ -137,13 +126,6 @@ __kernel void m06100_sxx (__global pw_t *pws, __global const kernel_rule_t *rule barrier (CLK_LOCAL_MEM_FENCE); - #else - - __constant u32 (*s_Ch)[256] = Ch; - __constant u32 (*s_Cl)[256] = Cl; - - #endif - if (gid >= gid_max) return; /** diff --git a/OpenCL/m06100_a3-optimized.cl b/OpenCL/m06100_a3-optimized.cl index 11fa4c7fe..5883edea7 100644 --- a/OpenCL/m06100_a3-optimized.cl +++ b/OpenCL/m06100_a3-optimized.cl @@ -13,12 +13,12 @@ #include "inc_simd.cl" #include "inc_hash_whirlpool.cl" -DECLSPEC void whirlpool_transform_transport_vector (const u32x *w, u32x *digest, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256]) +DECLSPEC void whirlpool_transform_transport_vector (const u32x *w, u32x *digest, __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256]) { whirlpool_transform_vector (w + 0, w + 4, w + 8, w + 12, digest, s_Ch, s_Cl); } -DECLSPEC void m06100m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, SHM_TYPE u32 (*s_Cl)[256], SHM_TYPE u32 (*s_Ch)[256]) +DECLSPEC void m06100m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 (*s_Cl)[256], __local u32 (*s_Ch)[256]) { /** * modifier @@ -87,7 +87,7 @@ DECLSPEC void m06100m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __g } } -DECLSPEC void m06100s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, SHM_TYPE u32 (*s_Cl)[256], SHM_TYPE u32 (*s_Ch)[256]) +DECLSPEC void m06100s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 (*s_Cl)[256], __local u32 (*s_Ch)[256]) { /** * modifier @@ -182,8 +182,6 @@ __kernel void m06100_m04 (__global pw_t *pws, __global const kernel_rule_t *rule * shared */ - #ifdef REAL_SHM - __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; @@ -210,13 +208,6 @@ __kernel void m06100_m04 (__global pw_t *pws, __global const kernel_rule_t *rule barrier (CLK_LOCAL_MEM_FENCE); - #else - - __constant u32 (*s_Ch)[256] = Ch; - __constant u32 (*s_Cl)[256] = Cl; - - #endif - if (gid >= gid_max) return; /** @@ -274,8 +265,6 @@ __kernel void m06100_m08 (__global pw_t *pws, __global const kernel_rule_t *rule * shared */ - #ifdef REAL_SHM - __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; @@ -302,13 +291,6 @@ __kernel void m06100_m08 (__global pw_t *pws, __global const kernel_rule_t *rule barrier (CLK_LOCAL_MEM_FENCE); - #else - - __constant u32 (*s_Ch)[256] = Ch; - __constant u32 (*s_Cl)[256] = Cl; - - #endif - if (gid >= gid_max) return; /** @@ -366,8 +348,6 @@ __kernel void m06100_m16 (__global pw_t *pws, __global const kernel_rule_t *rule * shared */ - #ifdef REAL_SHM - __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; @@ -394,13 +374,6 @@ __kernel void m06100_m16 (__global pw_t *pws, __global const kernel_rule_t *rule barrier (CLK_LOCAL_MEM_FENCE); - #else - - __constant u32 (*s_Ch)[256] = Ch; - __constant u32 (*s_Cl)[256] = Cl; - - #endif - if (gid >= gid_max) return; /** @@ -458,8 +431,6 @@ __kernel void m06100_s04 (__global pw_t *pws, __global const kernel_rule_t *rule * shared */ - #ifdef REAL_SHM - __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; @@ -486,13 +457,6 @@ __kernel void m06100_s04 (__global pw_t *pws, __global const kernel_rule_t *rule barrier (CLK_LOCAL_MEM_FENCE); - #else - - __constant u32 (*s_Ch)[256] = Ch; - __constant u32 (*s_Cl)[256] = Cl; - - #endif - if (gid >= gid_max) return; /** @@ -550,8 +514,6 @@ __kernel void m06100_s08 (__global pw_t *pws, __global const kernel_rule_t *rule * shared */ - #ifdef REAL_SHM - __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; @@ -578,13 +540,6 @@ __kernel void m06100_s08 (__global pw_t *pws, __global const kernel_rule_t *rule barrier (CLK_LOCAL_MEM_FENCE); - #else - - __constant u32 (*s_Ch)[256] = Ch; - __constant u32 (*s_Cl)[256] = Cl; - - #endif - if (gid >= gid_max) return; /** @@ -642,8 +597,6 @@ __kernel void m06100_s16 (__global pw_t *pws, __global const kernel_rule_t *rule * shared */ - #ifdef REAL_SHM - __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; @@ -670,13 +623,6 @@ __kernel void m06100_s16 (__global pw_t *pws, __global const kernel_rule_t *rule barrier (CLK_LOCAL_MEM_FENCE); - #else - - __constant u32 (*s_Ch)[256] = Ch; - __constant u32 (*s_Cl)[256] = Cl; - - #endif - if (gid >= gid_max) return; /** diff --git a/OpenCL/m06100_a3-pure.cl b/OpenCL/m06100_a3-pure.cl index 564d39d5e..de353deec 100644 --- a/OpenCL/m06100_a3-pure.cl +++ b/OpenCL/m06100_a3-pure.cl @@ -27,8 +27,6 @@ __kernel void m06100_mxx (__global pw_t *pws, __global const kernel_rule_t *rule * shared */ - #ifdef REAL_SHM - __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; @@ -55,13 +53,6 @@ __kernel void m06100_mxx (__global pw_t *pws, __global const kernel_rule_t *rule barrier (CLK_LOCAL_MEM_FENCE); - #else - - __constant u32 (*s_Ch)[256] = Ch; - __constant u32 (*s_Cl)[256] = Cl; - - #endif - if (gid >= gid_max) return; /** @@ -122,8 +113,6 @@ __kernel void m06100_sxx (__global pw_t *pws, __global const kernel_rule_t *rule * shared */ - #ifdef REAL_SHM - __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; @@ -150,13 +139,6 @@ __kernel void m06100_sxx (__global pw_t *pws, __global const kernel_rule_t *rule barrier (CLK_LOCAL_MEM_FENCE); - #else - - __constant u32 (*s_Ch)[256] = Ch; - __constant u32 (*s_Cl)[256] = Cl; - - #endif - if (gid >= gid_max) return; /** diff --git a/OpenCL/m06231-pure.cl b/OpenCL/m06231-pure.cl index 06a82204f..07804cda7 100644 --- a/OpenCL/m06231-pure.cl +++ b/OpenCL/m06231-pure.cl @@ -45,7 +45,7 @@ DECLSPEC u32 u8add (const u32 a, const u32 b) return r; } -DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256]) +DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest, __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256]) { digest[ 0] = ipad[ 0]; digest[ 1] = ipad[ 1]; @@ -155,8 +155,6 @@ __kernel void m06231_init (__global pw_t *pws, __global const kernel_rule_t *rul * shared */ - #ifdef REAL_SHM - __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; @@ -183,13 +181,6 @@ __kernel void m06231_init (__global pw_t *pws, __global const kernel_rule_t *rul barrier (CLK_LOCAL_MEM_FENCE); - #else - - __constant u32 (*s_Ch)[256] = Ch; - __constant u32 (*s_Cl)[256] = Cl; - - #endif - if (gid >= gid_max) return; u32 w0[4]; @@ -363,8 +354,6 @@ __kernel void m06231_loop (__global pw_t *pws, __global const kernel_rule_t *rul * shared */ - #ifdef REAL_SHM - __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; @@ -391,13 +380,6 @@ __kernel void m06231_loop (__global pw_t *pws, __global const kernel_rule_t *rul barrier (CLK_LOCAL_MEM_FENCE); - #else - - __constant u32 (*s_Ch)[256] = Ch; - __constant u32 (*s_Cl)[256] = Cl; - - #endif - if ((gid * VECT_SIZE) >= gid_max) return; u32x ipad[16]; @@ -568,17 +550,17 @@ __kernel void m06231_comp (__global pw_t *pws, __global const kernel_rule_t *rul #ifdef REAL_SHM - SHM_TYPE u32 s_td0[256]; - SHM_TYPE u32 s_td1[256]; - SHM_TYPE u32 s_td2[256]; - SHM_TYPE u32 s_td3[256]; - SHM_TYPE u32 s_td4[256]; - - SHM_TYPE u32 s_te0[256]; - SHM_TYPE u32 s_te1[256]; - SHM_TYPE u32 s_te2[256]; - SHM_TYPE u32 s_te3[256]; - SHM_TYPE u32 s_te4[256]; + __local u32 s_td0[256]; + __local u32 s_td1[256]; + __local u32 s_td2[256]; + __local u32 s_td3[256]; + __local u32 s_td4[256]; + + __local u32 s_te0[256]; + __local u32 s_te1[256]; + __local u32 s_te2[256]; + __local u32 s_te3[256]; + __local u32 s_te4[256]; for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { diff --git a/OpenCL/m06232-pure.cl b/OpenCL/m06232-pure.cl index 8af85c314..db1e591f2 100644 --- a/OpenCL/m06232-pure.cl +++ b/OpenCL/m06232-pure.cl @@ -45,7 +45,7 @@ DECLSPEC u32 u8add (const u32 a, const u32 b) return r; } -DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256]) +DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest, __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256]) { digest[ 0] = ipad[ 0]; digest[ 1] = ipad[ 1]; @@ -155,8 +155,6 @@ __kernel void m06232_init (__global pw_t *pws, __global const kernel_rule_t *rul * shared */ - #ifdef REAL_SHM - __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; @@ -183,13 +181,6 @@ __kernel void m06232_init (__global pw_t *pws, __global const kernel_rule_t *rul barrier (CLK_LOCAL_MEM_FENCE); - #else - - __constant u32 (*s_Ch)[256] = Ch; - __constant u32 (*s_Cl)[256] = Cl; - - #endif - if (gid >= gid_max) return; u32 w0[4]; @@ -363,8 +354,6 @@ __kernel void m06232_loop (__global pw_t *pws, __global const kernel_rule_t *rul * shared */ - #ifdef REAL_SHM - __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; @@ -391,13 +380,6 @@ __kernel void m06232_loop (__global pw_t *pws, __global const kernel_rule_t *rul barrier (CLK_LOCAL_MEM_FENCE); - #else - - __constant u32 (*s_Ch)[256] = Ch; - __constant u32 (*s_Cl)[256] = Cl; - - #endif - if ((gid * VECT_SIZE) >= gid_max) return; u32x ipad[16]; @@ -568,17 +550,17 @@ __kernel void m06232_comp (__global pw_t *pws, __global const kernel_rule_t *rul #ifdef REAL_SHM - SHM_TYPE u32 s_td0[256]; - SHM_TYPE u32 s_td1[256]; - SHM_TYPE u32 s_td2[256]; - SHM_TYPE u32 s_td3[256]; - SHM_TYPE u32 s_td4[256]; - - SHM_TYPE u32 s_te0[256]; - SHM_TYPE u32 s_te1[256]; - SHM_TYPE u32 s_te2[256]; - SHM_TYPE u32 s_te3[256]; - SHM_TYPE u32 s_te4[256]; + __local u32 s_td0[256]; + __local u32 s_td1[256]; + __local u32 s_td2[256]; + __local u32 s_td3[256]; + __local u32 s_td4[256]; + + __local u32 s_te0[256]; + __local u32 s_te1[256]; + __local u32 s_te2[256]; + __local u32 s_te3[256]; + __local u32 s_te4[256]; for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { diff --git a/OpenCL/m06233-pure.cl b/OpenCL/m06233-pure.cl index 37412dca2..957cbf227 100644 --- a/OpenCL/m06233-pure.cl +++ b/OpenCL/m06233-pure.cl @@ -45,7 +45,7 @@ DECLSPEC u32 u8add (const u32 a, const u32 b) return r; } -DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256]) +DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest, __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256]) { digest[ 0] = ipad[ 0]; digest[ 1] = ipad[ 1]; @@ -155,8 +155,6 @@ __kernel void m06233_init (__global pw_t *pws, __global const kernel_rule_t *rul * shared */ - #ifdef REAL_SHM - __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; @@ -183,13 +181,6 @@ __kernel void m06233_init (__global pw_t *pws, __global const kernel_rule_t *rul barrier (CLK_LOCAL_MEM_FENCE); - #else - - __constant u32 (*s_Ch)[256] = Ch; - __constant u32 (*s_Cl)[256] = Cl; - - #endif - if (gid >= gid_max) return; u32 w0[4]; @@ -363,8 +354,6 @@ __kernel void m06233_loop (__global pw_t *pws, __global const kernel_rule_t *rul * shared */ - #ifdef REAL_SHM - __local u32 s_Ch[8][256]; __local u32 s_Cl[8][256]; @@ -391,13 +380,6 @@ __kernel void m06233_loop (__global pw_t *pws, __global const kernel_rule_t *rul barrier (CLK_LOCAL_MEM_FENCE); - #else - - __constant u32 (*s_Ch)[256] = Ch; - __constant u32 (*s_Cl)[256] = Cl; - - #endif - if ((gid * VECT_SIZE) >= gid_max) return; u32x ipad[16]; @@ -568,17 +550,17 @@ __kernel void m06233_comp (__global pw_t *pws, __global const kernel_rule_t *rul #ifdef REAL_SHM - SHM_TYPE u32 s_td0[256]; - SHM_TYPE u32 s_td1[256]; - SHM_TYPE u32 s_td2[256]; - SHM_TYPE u32 s_td3[256]; - SHM_TYPE u32 s_td4[256]; - - SHM_TYPE u32 s_te0[256]; - SHM_TYPE u32 s_te1[256]; - SHM_TYPE u32 s_te2[256]; - SHM_TYPE u32 s_te3[256]; - SHM_TYPE u32 s_te4[256]; + __local u32 s_td0[256]; + __local u32 s_td1[256]; + __local u32 s_td2[256]; + __local u32 s_td3[256]; + __local u32 s_td4[256]; + + __local u32 s_te0[256]; + __local u32 s_te1[256]; + __local u32 s_te2[256]; + __local u32 s_te3[256]; + __local u32 s_te4[256]; for (MAYBE_VOLATILE u32 i = lid; i < 256; i += lsz) { diff --git a/OpenCL/m07500_a0-optimized.cl b/OpenCL/m07500_a0-optimized.cl index b4712d593..7de7d4ec8 100644 --- a/OpenCL/m07500_a0-optimized.cl +++ b/OpenCL/m07500_a0-optimized.cl @@ -25,7 +25,7 @@ typedef struct } RC4_KEY; -DECLSPEC void swap (SCR_TYPE RC4_KEY *rc4_key, const u8 i, const u8 j) +DECLSPEC void swap (__local RC4_KEY *rc4_key, const u8 i, const u8 j) { u8 tmp; @@ -34,12 +34,12 @@ DECLSPEC void swap (SCR_TYPE RC4_KEY *rc4_key, const u8 i, const u8 j) rc4_key->S[j] = tmp; } -DECLSPEC void rc4_init_16 (SCR_TYPE RC4_KEY *rc4_key, const u32 *data) +DECLSPEC void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 *data) { u32 v = 0x03020100; u32 a = 0x04040404; - SCR_TYPE u32 *ptr = (SCR_TYPE u32 *) rc4_key->S; + __local u32 *ptr = (__local u32 *) rc4_key->S; #ifdef _unroll #pragma unroll @@ -87,7 +87,7 @@ DECLSPEC void rc4_init_16 (SCR_TYPE RC4_KEY *rc4_key, const u32 *data) } } -DECLSPEC u8 rc4_next_16 (SCR_TYPE RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) +DECLSPEC u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) { #ifdef _unroll #pragma unroll @@ -140,7 +140,7 @@ DECLSPEC u8 rc4_next_16 (SCR_TYPE RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u return j; } -DECLSPEC int decrypt_and_check (SCR_TYPE RC4_KEY *rc4_key, u32 *data, u32 *timestamp_ct) +DECLSPEC int decrypt_and_check (__local RC4_KEY *rc4_key, u32 *data, u32 *timestamp_ct) { rc4_init_16 (rc4_key, data); @@ -440,20 +440,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m07500_m04 (__glob * shared */ - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - /** * loop */ @@ -556,20 +546,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m07500_s04 (__glob * shared */ - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - /** * loop */ diff --git a/OpenCL/m07500_a0-pure.cl b/OpenCL/m07500_a0-pure.cl index 151b797f0..73dd7b341 100644 --- a/OpenCL/m07500_a0-pure.cl +++ b/OpenCL/m07500_a0-pure.cl @@ -24,7 +24,7 @@ typedef struct } RC4_KEY; -DECLSPEC void swap (SCR_TYPE RC4_KEY *rc4_key, const u8 i, const u8 j) +DECLSPEC void swap (__local RC4_KEY *rc4_key, const u8 i, const u8 j) { u8 tmp; @@ -33,12 +33,12 @@ DECLSPEC void swap (SCR_TYPE RC4_KEY *rc4_key, const u8 i, const u8 j) rc4_key->S[j] = tmp; } -DECLSPEC void rc4_init_16 (SCR_TYPE RC4_KEY *rc4_key, const u32 *data) +DECLSPEC void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 *data) { u32 v = 0x03020100; u32 a = 0x04040404; - SCR_TYPE u32 *ptr = (SCR_TYPE u32 *) rc4_key->S; + __local u32 *ptr = (__local u32 *) rc4_key->S; #ifdef _unroll #pragma unroll @@ -86,7 +86,7 @@ DECLSPEC void rc4_init_16 (SCR_TYPE RC4_KEY *rc4_key, const u32 *data) } } -DECLSPEC u8 rc4_next_16 (SCR_TYPE RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) +DECLSPEC u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) { #ifdef _unroll #pragma unroll @@ -139,7 +139,7 @@ DECLSPEC u8 rc4_next_16 (SCR_TYPE RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u return j; } -DECLSPEC int decrypt_and_check (SCR_TYPE RC4_KEY *rc4_key, u32 *data, u32 *timestamp_ct) +DECLSPEC int decrypt_and_check (__local RC4_KEY *rc4_key, u32 *data, u32 *timestamp_ct) { rc4_init_16 (rc4_key, data); @@ -285,20 +285,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m07500_mxx (__glob COPY_PW (pws[gid]); - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - u32 checksum[4]; checksum[0] = krb5pa_bufs[digests_offset].checksum[0]; @@ -366,20 +356,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m07500_sxx (__glob COPY_PW (pws[gid]); - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - u32 checksum[4]; checksum[0] = krb5pa_bufs[digests_offset].checksum[0]; diff --git a/OpenCL/m07500_a1-optimized.cl b/OpenCL/m07500_a1-optimized.cl index d4d52bf59..7afd8906a 100644 --- a/OpenCL/m07500_a1-optimized.cl +++ b/OpenCL/m07500_a1-optimized.cl @@ -23,7 +23,7 @@ typedef struct } RC4_KEY; -DECLSPEC void swap (SCR_TYPE RC4_KEY *rc4_key, const u8 i, const u8 j) +DECLSPEC void swap (__local RC4_KEY *rc4_key, const u8 i, const u8 j) { u8 tmp; @@ -32,12 +32,12 @@ DECLSPEC void swap (SCR_TYPE RC4_KEY *rc4_key, const u8 i, const u8 j) rc4_key->S[j] = tmp; } -DECLSPEC void rc4_init_16 (SCR_TYPE RC4_KEY *rc4_key, const u32 *data) +DECLSPEC void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 *data) { u32 v = 0x03020100; u32 a = 0x04040404; - SCR_TYPE u32 *ptr = (SCR_TYPE u32 *) rc4_key->S; + __local u32 *ptr = (__local u32 *) rc4_key->S; #ifdef _unroll #pragma unroll @@ -85,7 +85,7 @@ DECLSPEC void rc4_init_16 (SCR_TYPE RC4_KEY *rc4_key, const u32 *data) } } -DECLSPEC u8 rc4_next_16 (SCR_TYPE RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) +DECLSPEC u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) { #ifdef _unroll #pragma unroll @@ -138,7 +138,7 @@ DECLSPEC u8 rc4_next_16 (SCR_TYPE RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u return j; } -DECLSPEC int decrypt_and_check (SCR_TYPE RC4_KEY *rc4_key, u32 *data, u32 *timestamp_ct) +DECLSPEC int decrypt_and_check (__local RC4_KEY *rc4_key, u32 *data, u32 *timestamp_ct) { rc4_init_16 (rc4_key, data); @@ -438,20 +438,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m07500_m04 (__glob * shared */ - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - /** * loop */ @@ -604,20 +594,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m07500_s04 (__glob * shared */ - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - /** * loop */ diff --git a/OpenCL/m07500_a1-pure.cl b/OpenCL/m07500_a1-pure.cl index ed3cfdcc2..f5438ddd0 100644 --- a/OpenCL/m07500_a1-pure.cl +++ b/OpenCL/m07500_a1-pure.cl @@ -22,7 +22,7 @@ typedef struct } RC4_KEY; -DECLSPEC void swap (SCR_TYPE RC4_KEY *rc4_key, const u8 i, const u8 j) +DECLSPEC void swap (__local RC4_KEY *rc4_key, const u8 i, const u8 j) { u8 tmp; @@ -31,12 +31,12 @@ DECLSPEC void swap (SCR_TYPE RC4_KEY *rc4_key, const u8 i, const u8 j) rc4_key->S[j] = tmp; } -DECLSPEC void rc4_init_16 (SCR_TYPE RC4_KEY *rc4_key, const u32 *data) +DECLSPEC void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 *data) { u32 v = 0x03020100; u32 a = 0x04040404; - SCR_TYPE u32 *ptr = (SCR_TYPE u32 *) rc4_key->S; + __local u32 *ptr = (__local u32 *) rc4_key->S; #ifdef _unroll #pragma unroll @@ -84,7 +84,7 @@ DECLSPEC void rc4_init_16 (SCR_TYPE RC4_KEY *rc4_key, const u32 *data) } } -DECLSPEC u8 rc4_next_16 (SCR_TYPE RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) +DECLSPEC u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) { #ifdef _unroll #pragma unroll @@ -137,7 +137,7 @@ DECLSPEC u8 rc4_next_16 (SCR_TYPE RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u return j; } -DECLSPEC int decrypt_and_check (SCR_TYPE RC4_KEY *rc4_key, u32 *data, u32 *timestamp_ct) +DECLSPEC int decrypt_and_check (__local RC4_KEY *rc4_key, u32 *data, u32 *timestamp_ct) { rc4_init_16 (rc4_key, data); @@ -281,20 +281,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m07500_mxx (__glob * base */ - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - u32 checksum[4]; checksum[0] = krb5pa_bufs[digests_offset].checksum[0]; @@ -360,20 +350,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m07500_sxx (__glob * base */ - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - u32 checksum[4]; checksum[0] = krb5pa_bufs[digests_offset].checksum[0]; diff --git a/OpenCL/m07500_a3-optimized.cl b/OpenCL/m07500_a3-optimized.cl index a1a2dbe63..5c753c75d 100644 --- a/OpenCL/m07500_a3-optimized.cl +++ b/OpenCL/m07500_a3-optimized.cl @@ -23,7 +23,7 @@ typedef struct } RC4_KEY; -DECLSPEC void swap (SCR_TYPE RC4_KEY *rc4_key, const u8 i, const u8 j) +DECLSPEC void swap (__local RC4_KEY *rc4_key, const u8 i, const u8 j) { u8 tmp; @@ -32,12 +32,12 @@ DECLSPEC void swap (SCR_TYPE RC4_KEY *rc4_key, const u8 i, const u8 j) rc4_key->S[j] = tmp; } -DECLSPEC void rc4_init_16 (SCR_TYPE RC4_KEY *rc4_key, const u32 *data) +DECLSPEC void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 *data) { u32 v = 0x03020100; u32 a = 0x04040404; - SCR_TYPE u32 *ptr = (SCR_TYPE u32 *) rc4_key->S; + __local u32 *ptr = (__local u32 *) rc4_key->S; #ifdef _unroll #pragma unroll @@ -85,7 +85,7 @@ DECLSPEC void rc4_init_16 (SCR_TYPE RC4_KEY *rc4_key, const u32 *data) } } -DECLSPEC u8 rc4_next_16 (SCR_TYPE RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) +DECLSPEC u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) { #ifdef _unroll #pragma unroll @@ -138,7 +138,7 @@ DECLSPEC u8 rc4_next_16 (SCR_TYPE RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u return j; } -DECLSPEC int decrypt_and_check (SCR_TYPE RC4_KEY *rc4_key, u32 *data, u32 *timestamp_ct) +DECLSPEC int decrypt_and_check (__local RC4_KEY *rc4_key, u32 *data, u32 *timestamp_ct) { rc4_init_16 (rc4_key, data); @@ -382,7 +382,7 @@ DECLSPEC void kerb_prepare (const u32 *w0, const u32 *w1, const u32 pw_len, cons hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); } -DECLSPEC void m07500 (SCR_TYPE RC4_KEY *rc4_key, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global krb5pa_t *krb5pa_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset) +DECLSPEC void m07500 (__local RC4_KEY *rc4_key, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global krb5pa_t *krb5pa_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -517,20 +517,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m07500_m04 (__glob * main */ - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - m07500 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, krb5pa_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); } @@ -579,20 +569,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m07500_m08 (__glob * main */ - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - m07500 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, krb5pa_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); } @@ -645,20 +625,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m07500_s04 (__glob * main */ - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - m07500 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, krb5pa_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); } @@ -707,20 +677,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m07500_s08 (__glob * main */ - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - m07500 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, krb5pa_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); } diff --git a/OpenCL/m07500_a3-pure.cl b/OpenCL/m07500_a3-pure.cl index c666fb9b1..cebeb8a3d 100644 --- a/OpenCL/m07500_a3-pure.cl +++ b/OpenCL/m07500_a3-pure.cl @@ -22,7 +22,7 @@ typedef struct } RC4_KEY; -DECLSPEC void swap (SCR_TYPE RC4_KEY *rc4_key, const u8 i, const u8 j) +DECLSPEC void swap (__local RC4_KEY *rc4_key, const u8 i, const u8 j) { u8 tmp; @@ -31,12 +31,12 @@ DECLSPEC void swap (SCR_TYPE RC4_KEY *rc4_key, const u8 i, const u8 j) rc4_key->S[j] = tmp; } -DECLSPEC void rc4_init_16 (SCR_TYPE RC4_KEY *rc4_key, const u32 *data) +DECLSPEC void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 *data) { u32 v = 0x03020100; u32 a = 0x04040404; - SCR_TYPE u32 *ptr = (SCR_TYPE u32 *) rc4_key->S; + __local u32 *ptr = (__local u32 *) rc4_key->S; #ifdef _unroll #pragma unroll @@ -84,7 +84,7 @@ DECLSPEC void rc4_init_16 (SCR_TYPE RC4_KEY *rc4_key, const u32 *data) } } -DECLSPEC u8 rc4_next_16 (SCR_TYPE RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) +DECLSPEC u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u32 *out) { #ifdef _unroll #pragma unroll @@ -137,7 +137,7 @@ DECLSPEC u8 rc4_next_16 (SCR_TYPE RC4_KEY *rc4_key, u8 i, u8 j, const u32 *in, u return j; } -DECLSPEC int decrypt_and_check (SCR_TYPE RC4_KEY *rc4_key, u32 *data, u32 *timestamp_ct) +DECLSPEC int decrypt_and_check (__local RC4_KEY *rc4_key, u32 *data, u32 *timestamp_ct) { rc4_init_16 (rc4_key, data); @@ -302,20 +302,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m07500_mxx (__glob w[idx] = pws[gid].i[idx]; } - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - u32 checksum[4]; checksum[0] = krb5pa_bufs[digests_offset].checksum[0]; @@ -406,20 +396,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m07500_sxx (__glob w[idx] = pws[gid].i[idx]; } - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - u32 checksum[4]; checksum[0] = krb5pa_bufs[digests_offset].checksum[0]; diff --git a/OpenCL/m13100_a0-optimized.cl b/OpenCL/m13100_a0-optimized.cl index 3ca427acf..b0a3a7f43 100644 --- a/OpenCL/m13100_a0-optimized.cl +++ b/OpenCL/m13100_a0-optimized.cl @@ -25,7 +25,7 @@ typedef struct } RC4_KEY; -DECLSPEC void swap (SCR_TYPE RC4_KEY *rc4_key, const u8 i, const u8 j) +DECLSPEC void swap (__local RC4_KEY *rc4_key, const u8 i, const u8 j) { u8 tmp; @@ -34,12 +34,12 @@ DECLSPEC void swap (SCR_TYPE RC4_KEY *rc4_key, const u8 i, const u8 j) rc4_key->S[j] = tmp; } -DECLSPEC void rc4_init_16 (SCR_TYPE RC4_KEY *rc4_key, const u32 *data) +DECLSPEC void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 *data) { u32 v = 0x03020100; u32 a = 0x04040404; - SCR_TYPE u32 *ptr = (SCR_TYPE u32 *) rc4_key->S; + __local u32 *ptr = (__local u32 *) rc4_key->S; #ifdef _unroll #pragma unroll @@ -87,7 +87,7 @@ DECLSPEC void rc4_init_16 (SCR_TYPE RC4_KEY *rc4_key, const u32 *data) } } -DECLSPEC u8 rc4_next_16 (SCR_TYPE RC4_KEY *rc4_key, u8 i, u8 j, const __global u32 *in, u32 *out) +DECLSPEC u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const __global u32 *in, u32 *out) { #ifdef _unroll #pragma unroll @@ -225,7 +225,7 @@ DECLSPEC void hmac_md5_run (u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *ipad, u32 * md5_transform (w0, w1, w2, w3, digest); } -DECLSPEC int decrypt_and_check (SCR_TYPE RC4_KEY *rc4_key, u32 *data, __global const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) +DECLSPEC int decrypt_and_check (__local RC4_KEY *rc4_key, u32 *data, __global const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) { rc4_init_16 (rc4_key, data); @@ -600,20 +600,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m13100_m04 (__glob * shared */ - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - /** * salt */ @@ -708,20 +698,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m13100_s04 (__glob * shared */ - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - /** * salt */ diff --git a/OpenCL/m13100_a0-pure.cl b/OpenCL/m13100_a0-pure.cl index 0b972d1e0..c70be4242 100644 --- a/OpenCL/m13100_a0-pure.cl +++ b/OpenCL/m13100_a0-pure.cl @@ -24,7 +24,7 @@ typedef struct } RC4_KEY; -DECLSPEC void swap (SCR_TYPE RC4_KEY *rc4_key, const u8 i, const u8 j) +DECLSPEC void swap (__local RC4_KEY *rc4_key, const u8 i, const u8 j) { u8 tmp; @@ -33,12 +33,12 @@ DECLSPEC void swap (SCR_TYPE RC4_KEY *rc4_key, const u8 i, const u8 j) rc4_key->S[j] = tmp; } -DECLSPEC void rc4_init_16 (SCR_TYPE RC4_KEY *rc4_key, const u32 *data) +DECLSPEC void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 *data) { u32 v = 0x03020100; u32 a = 0x04040404; - SCR_TYPE u32 *ptr = (SCR_TYPE u32 *) rc4_key->S; + __local u32 *ptr = (__local u32 *) rc4_key->S; #ifdef _unroll #pragma unroll @@ -86,7 +86,7 @@ DECLSPEC void rc4_init_16 (SCR_TYPE RC4_KEY *rc4_key, const u32 *data) } } -DECLSPEC u8 rc4_next_16 (SCR_TYPE RC4_KEY *rc4_key, u8 i, u8 j, const __global u32 *in, u32 *out) +DECLSPEC u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const __global u32 *in, u32 *out) { #ifdef _unroll #pragma unroll @@ -139,7 +139,7 @@ DECLSPEC u8 rc4_next_16 (SCR_TYPE RC4_KEY *rc4_key, u8 i, u8 j, const __global u return j; } -DECLSPEC int decrypt_and_check (SCR_TYPE RC4_KEY *rc4_key, u32 *data, __global const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) +DECLSPEC int decrypt_and_check (__local RC4_KEY *rc4_key, u32 *data, __global const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) { rc4_init_16 (rc4_key, data); @@ -394,20 +394,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m13100_mxx (__glob COPY_PW (pws[gid]); - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - u32 checksum[4]; checksum[0] = krb5tgs_bufs[digests_offset].checksum[0]; @@ -466,20 +456,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m13100_sxx (__glob COPY_PW (pws[gid]); - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - u32 checksum[4]; checksum[0] = krb5tgs_bufs[digests_offset].checksum[0]; diff --git a/OpenCL/m13100_a1-optimized.cl b/OpenCL/m13100_a1-optimized.cl index e2ac9ecdb..3f079e111 100644 --- a/OpenCL/m13100_a1-optimized.cl +++ b/OpenCL/m13100_a1-optimized.cl @@ -23,7 +23,7 @@ typedef struct } RC4_KEY; -DECLSPEC void swap (SCR_TYPE RC4_KEY *rc4_key, const u8 i, const u8 j) +DECLSPEC void swap (__local RC4_KEY *rc4_key, const u8 i, const u8 j) { u8 tmp; @@ -32,12 +32,12 @@ DECLSPEC void swap (SCR_TYPE RC4_KEY *rc4_key, const u8 i, const u8 j) rc4_key->S[j] = tmp; } -DECLSPEC void rc4_init_16 (SCR_TYPE RC4_KEY *rc4_key, const u32 *data) +DECLSPEC void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 *data) { u32 v = 0x03020100; u32 a = 0x04040404; - SCR_TYPE u32 *ptr = (SCR_TYPE u32 *) rc4_key->S; + __local u32 *ptr = (__local u32 *) rc4_key->S; #ifdef _unroll #pragma unroll @@ -85,7 +85,7 @@ DECLSPEC void rc4_init_16 (SCR_TYPE RC4_KEY *rc4_key, const u32 *data) } } -DECLSPEC u8 rc4_next_16 (SCR_TYPE RC4_KEY *rc4_key, u8 i, u8 j, const __global u32 *in, u32 *out) +DECLSPEC u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const __global u32 *in, u32 *out) { #ifdef _unroll #pragma unroll @@ -223,7 +223,7 @@ DECLSPEC void hmac_md5_run (u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *ipad, u32 * md5_transform (w0, w1, w2, w3, digest); } -DECLSPEC int decrypt_and_check (SCR_TYPE RC4_KEY *rc4_key, u32 *data, __global const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) +DECLSPEC int decrypt_and_check (__local RC4_KEY *rc4_key, u32 *data, __global const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) { rc4_init_16 (rc4_key, data); @@ -597,20 +597,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m13100_m04 (__glob * shared */ - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - /** * salt */ @@ -754,20 +744,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m13100_s04 (__glob * shared */ - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - /** * salt */ diff --git a/OpenCL/m13100_a1-pure.cl b/OpenCL/m13100_a1-pure.cl index 636406c4c..e84795714 100644 --- a/OpenCL/m13100_a1-pure.cl +++ b/OpenCL/m13100_a1-pure.cl @@ -22,7 +22,7 @@ typedef struct } RC4_KEY; -DECLSPEC void swap (SCR_TYPE RC4_KEY *rc4_key, const u8 i, const u8 j) +DECLSPEC void swap (__local RC4_KEY *rc4_key, const u8 i, const u8 j) { u8 tmp; @@ -31,12 +31,12 @@ DECLSPEC void swap (SCR_TYPE RC4_KEY *rc4_key, const u8 i, const u8 j) rc4_key->S[j] = tmp; } -DECLSPEC void rc4_init_16 (SCR_TYPE RC4_KEY *rc4_key, const u32 *data) +DECLSPEC void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 *data) { u32 v = 0x03020100; u32 a = 0x04040404; - SCR_TYPE u32 *ptr = (SCR_TYPE u32 *) rc4_key->S; + __local u32 *ptr = (__local u32 *) rc4_key->S; #ifdef _unroll #pragma unroll @@ -84,7 +84,7 @@ DECLSPEC void rc4_init_16 (SCR_TYPE RC4_KEY *rc4_key, const u32 *data) } } -DECLSPEC u8 rc4_next_16 (SCR_TYPE RC4_KEY *rc4_key, u8 i, u8 j, const __global u32 *in, u32 *out) +DECLSPEC u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const __global u32 *in, u32 *out) { #ifdef _unroll #pragma unroll @@ -137,7 +137,7 @@ DECLSPEC u8 rc4_next_16 (SCR_TYPE RC4_KEY *rc4_key, u8 i, u8 j, const __global u return j; } -DECLSPEC int decrypt_and_check (SCR_TYPE RC4_KEY *rc4_key, u32 *data, __global const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) +DECLSPEC int decrypt_and_check (__local RC4_KEY *rc4_key, u32 *data, __global const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) { rc4_init_16 (rc4_key, data); @@ -390,20 +390,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m13100_mxx (__glob * base */ - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - u32 checksum[4]; checksum[0] = krb5tgs_bufs[digests_offset].checksum[0]; @@ -460,20 +450,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m13100_sxx (__glob * base */ - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - u32 checksum[4]; checksum[0] = krb5tgs_bufs[digests_offset].checksum[0]; diff --git a/OpenCL/m13100_a3-optimized.cl b/OpenCL/m13100_a3-optimized.cl index d57cbb399..400ef0929 100644 --- a/OpenCL/m13100_a3-optimized.cl +++ b/OpenCL/m13100_a3-optimized.cl @@ -23,7 +23,7 @@ typedef struct } RC4_KEY; -DECLSPEC void swap (SCR_TYPE RC4_KEY *rc4_key, const u8 i, const u8 j) +DECLSPEC void swap (__local RC4_KEY *rc4_key, const u8 i, const u8 j) { u8 tmp; @@ -32,12 +32,12 @@ DECLSPEC void swap (SCR_TYPE RC4_KEY *rc4_key, const u8 i, const u8 j) rc4_key->S[j] = tmp; } -DECLSPEC void rc4_init_16 (SCR_TYPE RC4_KEY *rc4_key, const u32 *data) +DECLSPEC void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 *data) { u32 v = 0x03020100; u32 a = 0x04040404; - SCR_TYPE u32 *ptr = (SCR_TYPE u32 *) rc4_key->S; + __local u32 *ptr = (__local u32 *) rc4_key->S; #ifdef _unroll #pragma unroll @@ -85,7 +85,7 @@ DECLSPEC void rc4_init_16 (SCR_TYPE RC4_KEY *rc4_key, const u32 *data) } } -DECLSPEC u8 rc4_next_16 (SCR_TYPE RC4_KEY *rc4_key, u8 i, u8 j, const __global u32 *in, u32 *out) +DECLSPEC u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const __global u32 *in, u32 *out) { #ifdef _unroll #pragma unroll @@ -223,7 +223,7 @@ DECLSPEC void hmac_md5_run (u32 *w0, u32 *w1, u32 *w2, u32 *w3, u32 *ipad, u32 * md5_transform (w0, w1, w2, w3, digest); } -DECLSPEC int decrypt_and_check (SCR_TYPE RC4_KEY *rc4_key, u32 *data, __global const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) +DECLSPEC int decrypt_and_check (__local RC4_KEY *rc4_key, u32 *data, __global const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) { rc4_init_16 (rc4_key, data); @@ -563,7 +563,7 @@ DECLSPEC void kerb_prepare (const u32 *w0, const u32 *w1, const u32 pw_len, cons hmac_md5_run (w0_t, w1_t, w2_t, w3_t, ipad, opad, digest); } -DECLSPEC void m13100 (SCR_TYPE RC4_KEY *rc4_key, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global krb5tgs_t *krb5tgs_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset) +DECLSPEC void m13100 (__local RC4_KEY *rc4_key, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global krb5tgs_t *krb5tgs_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset) { /** * modifier @@ -667,20 +667,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m13100_m04 (__glob * main */ - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - m13100 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, krb5tgs_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); } @@ -729,20 +719,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m13100_m08 (__glob * main */ - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - m13100 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, krb5tgs_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); } @@ -795,20 +775,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m13100_s04 (__glob * main */ - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - m13100 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, krb5tgs_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); } @@ -857,20 +827,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m13100_s08 (__glob * main */ - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - m13100 (rc4_key, w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, krb5tgs_bufs, d_return_buf, d_scryptV0_buf, d_scryptV1_buf, d_scryptV2_buf, d_scryptV3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset); } diff --git a/OpenCL/m13100_a3-pure.cl b/OpenCL/m13100_a3-pure.cl index 704341820..926d9be22 100644 --- a/OpenCL/m13100_a3-pure.cl +++ b/OpenCL/m13100_a3-pure.cl @@ -22,7 +22,7 @@ typedef struct } RC4_KEY; -DECLSPEC void swap (SCR_TYPE RC4_KEY *rc4_key, const u8 i, const u8 j) +DECLSPEC void swap (__local RC4_KEY *rc4_key, const u8 i, const u8 j) { u8 tmp; @@ -31,12 +31,12 @@ DECLSPEC void swap (SCR_TYPE RC4_KEY *rc4_key, const u8 i, const u8 j) rc4_key->S[j] = tmp; } -DECLSPEC void rc4_init_16 (SCR_TYPE RC4_KEY *rc4_key, const u32 *data) +DECLSPEC void rc4_init_16 (__local RC4_KEY *rc4_key, const u32 *data) { u32 v = 0x03020100; u32 a = 0x04040404; - SCR_TYPE u32 *ptr = (SCR_TYPE u32 *) rc4_key->S; + __local u32 *ptr = (__local u32 *) rc4_key->S; #ifdef _unroll #pragma unroll @@ -84,7 +84,7 @@ DECLSPEC void rc4_init_16 (SCR_TYPE RC4_KEY *rc4_key, const u32 *data) } } -DECLSPEC u8 rc4_next_16 (SCR_TYPE RC4_KEY *rc4_key, u8 i, u8 j, const __global u32 *in, u32 *out) +DECLSPEC u8 rc4_next_16 (__local RC4_KEY *rc4_key, u8 i, u8 j, const __global u32 *in, u32 *out) { #ifdef _unroll #pragma unroll @@ -137,7 +137,7 @@ DECLSPEC u8 rc4_next_16 (SCR_TYPE RC4_KEY *rc4_key, u8 i, u8 j, const __global u return j; } -DECLSPEC int decrypt_and_check (SCR_TYPE RC4_KEY *rc4_key, u32 *data, __global const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) +DECLSPEC int decrypt_and_check (__local RC4_KEY *rc4_key, u32 *data, __global const u32 *edata2, const u32 edata2_len, const u32 *K2, const u32 *checksum) { rc4_init_16 (rc4_key, data); @@ -399,20 +399,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m13100_mxx (__glob w[idx] = pws[gid].i[idx]; } - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - u32 checksum[4]; checksum[0] = krb5tgs_bufs[digests_offset].checksum[0]; @@ -482,20 +472,10 @@ __kernel void __attribute__((reqd_work_group_size(64, 1, 1))) m13100_sxx (__glob w[idx] = pws[gid].i[idx]; } - #ifdef REAL_SHM - __local RC4_KEY rc4_keys[64]; __local RC4_KEY *rc4_key = &rc4_keys[lid]; - #else - - RC4_KEY rc4_keys[1]; - - RC4_KEY *rc4_key = &rc4_keys[0]; - - #endif - u32 checksum[4]; checksum[0] = krb5tgs_bufs[digests_offset].checksum[0];