Do not use __local memory for whirlpool if running on a device without physical shared memory

pull/1358/head
Jens Steube 7 years ago
parent 0a0522cf76
commit 63f6ca5114

@ -690,12 +690,6 @@ __constant u32a rcon[] =
0x1b000000, 0x36000000,
};
#ifdef REAL_SHM
#define SHM_TYPE __local
#else
#define SHM_TYPE __constant
#endif
// 128 bit key
static void aes128_ExpandKey (u32 *ks, const u32 *ukey, SHM_TYPE u32 *s_te0, SHM_TYPE u32 *s_te1, SHM_TYPE u32 *s_te2, SHM_TYPE u32 *s_te3, SHM_TYPE u32 *s_te4)

@ -1125,12 +1125,12 @@ typedef struct whirlpool_ctx
int len;
__local u32 (*s_Ch)[256];
__local u32 (*s_Cl)[256];
SHM_TYPE u32 (*s_Ch)[256];
SHM_TYPE u32 (*s_Cl)[256];
} whirlpool_ctx_t;
static void whirlpool_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[16], __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256])
static void whirlpool_transform (const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], u32 digest[16], SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256])
{
u32 Kh[8];
u32 Kl[8];
@ -1300,7 +1300,7 @@ static void whirlpool_transform (const u32 w0[4], const u32 w1[4], const u32 w2[
digest[15] ^= statel[7] ^ w3[3];
}
static void whirlpool_init (whirlpool_ctx_t *ctx, __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256])
static void whirlpool_init (whirlpool_ctx_t *ctx, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256])
{
ctx->h[ 0] = 0;
ctx->h[ 1] = 0;
@ -1975,7 +1975,7 @@ typedef struct whirlpool_hmac_ctx
} whirlpool_hmac_ctx_t;
static void whirlpool_hmac_init_64 (whirlpool_hmac_ctx_t *ctx, const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256])
static void whirlpool_hmac_init_64 (whirlpool_hmac_ctx_t *ctx, const u32 w0[4], const u32 w1[4], const u32 w2[4], const u32 w3[4], SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256])
{
u32 t0[4];
u32 t1[4];
@ -2029,7 +2029,7 @@ static void whirlpool_hmac_init_64 (whirlpool_hmac_ctx_t *ctx, const u32 w0[4],
whirlpool_update_64 (&ctx->opad, t0, t1, t2, t3, 64);
}
static void whirlpool_hmac_init (whirlpool_hmac_ctx_t *ctx, const u32 *w, const int len, __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256])
static void whirlpool_hmac_init (whirlpool_hmac_ctx_t *ctx, const u32 *w, const int len, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256])
{
u32 w0[4];
u32 w1[4];
@ -2086,7 +2086,7 @@ static void whirlpool_hmac_init (whirlpool_hmac_ctx_t *ctx, const u32 *w, const
whirlpool_hmac_init_64 (ctx, w0, w1, w2, w3, s_Ch, s_Cl);
}
static void whirlpool_hmac_init_swap (whirlpool_hmac_ctx_t *ctx, const u32 *w, const int len, __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256])
static void whirlpool_hmac_init_swap (whirlpool_hmac_ctx_t *ctx, const u32 *w, const int len, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256])
{
u32 w0[4];
u32 w1[4];
@ -2143,7 +2143,7 @@ static void whirlpool_hmac_init_swap (whirlpool_hmac_ctx_t *ctx, const u32 *w, c
whirlpool_hmac_init_64 (ctx, w0, w1, w2, w3, s_Ch, s_Cl);
}
static void whirlpool_hmac_init_global (whirlpool_hmac_ctx_t *ctx, __global const u32 *w, const int len, __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256])
static void whirlpool_hmac_init_global (whirlpool_hmac_ctx_t *ctx, __global const u32 *w, const int len, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256])
{
u32 w0[4];
u32 w1[4];
@ -2200,7 +2200,7 @@ static void whirlpool_hmac_init_global (whirlpool_hmac_ctx_t *ctx, __global cons
whirlpool_hmac_init_64 (ctx, w0, w1, w2, w3, s_Ch, s_Cl);
}
static void whirlpool_hmac_init_global_swap (whirlpool_hmac_ctx_t *ctx, __global const u32 *w, const int len, __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256])
static void whirlpool_hmac_init_global_swap (whirlpool_hmac_ctx_t *ctx, __global const u32 *w, const int len, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256])
{
u32 w0[4];
u32 w1[4];
@ -2346,12 +2346,12 @@ typedef struct whirlpool_ctx_vector
int len;
__local u32 (*s_Ch)[256];
__local u32 (*s_Cl)[256];
SHM_TYPE u32 (*s_Ch)[256];
SHM_TYPE u32 (*s_Cl)[256];
} whirlpool_ctx_vector_t;
static void whirlpool_transform_vector (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[16], __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256])
static void whirlpool_transform_vector (const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], u32x digest[16], SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256])
{
u32x Kh[8];
u32x Kl[8];
@ -2521,7 +2521,7 @@ static void whirlpool_transform_vector (const u32x w0[4], const u32x w1[4], cons
digest[15] ^= statel[7] ^ w3[3];
}
static void whirlpool_init_vector (whirlpool_ctx_vector_t *ctx, __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256])
static void whirlpool_init_vector (whirlpool_ctx_vector_t *ctx, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256])
{
ctx->h[ 0] = 0;
ctx->h[ 1] = 0;
@ -2982,7 +2982,7 @@ typedef struct whirlpool_hmac_ctx_vector
} whirlpool_hmac_ctx_vector_t;
static void whirlpool_hmac_init_vector_64 (whirlpool_hmac_ctx_vector_t *ctx, const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256])
static void whirlpool_hmac_init_vector_64 (whirlpool_hmac_ctx_vector_t *ctx, const u32x w0[4], const u32x w1[4], const u32x w2[4], const u32x w3[4], SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256])
{
u32x t0[4];
u32x t1[4];
@ -3036,7 +3036,7 @@ static void whirlpool_hmac_init_vector_64 (whirlpool_hmac_ctx_vector_t *ctx, con
whirlpool_update_vector_64 (&ctx->opad, t0, t1, t2, t3, 64);
}
static void whirlpool_hmac_init_vector (whirlpool_hmac_ctx_vector_t *ctx, const u32x *w, const int len, __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256])
static void whirlpool_hmac_init_vector (whirlpool_hmac_ctx_vector_t *ctx, const u32x *w, const int len, SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256])
{
u32x w0[4];
u32x w1[4];

@ -25,6 +25,12 @@
#elif DEVICE_TYPE == DEVICE_TYPE_ACCEL
#endif
#ifdef REAL_SHM
#define SHM_TYPE __local
#else
#define SHM_TYPE __constant
#endif
/**
* vendor specific
*/

@ -15,7 +15,7 @@
#include "inc_simd.cl"
#include "inc_hash_whirlpool.cl"
void whirlpool_transform_transport_vector (const u32x w[16], u32x digest[16], __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256])
void whirlpool_transform_transport_vector (const u32x w[16], u32x digest[16], SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256])
{
whirlpool_transform_vector (w + 0, w + 4, w + 8, w + 12, digest, s_Ch, s_Cl);
}
@ -34,6 +34,8 @@ __kernel void m06100_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru
* shared
*/
#ifdef REAL_SHM
__local u32 s_Ch[8][256];
__local u32 s_Cl[8][256];
@ -60,6 +62,13 @@ __kernel void m06100_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru
barrier (CLK_LOCAL_MEM_FENCE);
#else
__constant u32 (*s_Ch)[256] = Ch;
__constant u32 (*s_Cl)[256] = Cl;
#endif
if (gid >= gid_max) return;
/**
@ -165,6 +174,8 @@ __kernel void m06100_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru
* shared
*/
#ifdef REAL_SHM
__local u32 s_Ch[8][256];
__local u32 s_Cl[8][256];
@ -191,6 +202,13 @@ __kernel void m06100_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru
barrier (CLK_LOCAL_MEM_FENCE);
#else
__constant u32 (*s_Ch)[256] = Ch;
__constant u32 (*s_Cl)[256] = Cl;
#endif
if (gid >= gid_max) return;
/**

@ -29,6 +29,8 @@ __kernel void m06100_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru
* shared
*/
#ifdef REAL_SHM
__local u32 s_Ch[8][256];
__local u32 s_Cl[8][256];
@ -55,6 +57,13 @@ __kernel void m06100_mxx (__global pw_t *pws, __constant const kernel_rule_t *ru
barrier (CLK_LOCAL_MEM_FENCE);
#else
__constant u32 (*s_Ch)[256] = Ch;
__constant u32 (*s_Cl)[256] = Cl;
#endif
if (gid >= gid_max) return;
/**
@ -104,6 +113,8 @@ __kernel void m06100_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru
* shared
*/
#ifdef REAL_SHM
__local u32 s_Ch[8][256];
__local u32 s_Cl[8][256];
@ -130,6 +141,13 @@ __kernel void m06100_sxx (__global pw_t *pws, __constant const kernel_rule_t *ru
barrier (CLK_LOCAL_MEM_FENCE);
#else
__constant u32 (*s_Ch)[256] = Ch;
__constant u32 (*s_Cl)[256] = Cl;
#endif
if (gid >= gid_max) return;
/**

@ -13,7 +13,7 @@
#include "inc_simd.cl"
#include "inc_hash_whirlpool.cl"
void whirlpool_transform_transport_vector (const u32x w[16], u32x digest[16], __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256])
void whirlpool_transform_transport_vector (const u32x w[16], u32x digest[16], SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256])
{
whirlpool_transform_vector (w + 0, w + 4, w + 8, w + 12, digest, s_Ch, s_Cl);
}
@ -32,6 +32,8 @@ __kernel void m06100_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
* shared
*/
#ifdef REAL_SHM
__local u32 s_Ch[8][256];
__local u32 s_Cl[8][256];
@ -58,6 +60,13 @@ __kernel void m06100_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
barrier (CLK_LOCAL_MEM_FENCE);
#else
__constant u32 (*s_Ch)[256] = Ch;
__constant u32 (*s_Cl)[256] = Cl;
#endif
if (gid >= gid_max) return;
/**
@ -221,6 +230,8 @@ __kernel void m06100_s04 (__global pw_t *pws, __global const kernel_rule_t *rule
* shared
*/
#ifdef REAL_SHM
__local u32 s_Ch[8][256];
__local u32 s_Cl[8][256];
@ -247,6 +258,13 @@ __kernel void m06100_s04 (__global pw_t *pws, __global const kernel_rule_t *rule
barrier (CLK_LOCAL_MEM_FENCE);
#else
__constant u32 (*s_Ch)[256] = Ch;
__constant u32 (*s_Cl)[256] = Cl;
#endif
if (gid >= gid_max) return;
/**

@ -27,6 +27,8 @@ __kernel void m06100_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
* shared
*/
#ifdef REAL_SHM
__local u32 s_Ch[8][256];
__local u32 s_Cl[8][256];
@ -53,6 +55,13 @@ __kernel void m06100_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
barrier (CLK_LOCAL_MEM_FENCE);
#else
__constant u32 (*s_Ch)[256] = Ch;
__constant u32 (*s_Cl)[256] = Cl;
#endif
if (gid >= gid_max) return;
/**
@ -100,6 +109,8 @@ __kernel void m06100_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
* shared
*/
#ifdef REAL_SHM
__local u32 s_Ch[8][256];
__local u32 s_Cl[8][256];
@ -126,6 +137,13 @@ __kernel void m06100_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
barrier (CLK_LOCAL_MEM_FENCE);
#else
__constant u32 (*s_Ch)[256] = Ch;
__constant u32 (*s_Cl)[256] = Cl;
#endif
if (gid >= gid_max) return;
/**

@ -13,12 +13,12 @@
#include "inc_simd.cl"
#include "inc_hash_whirlpool.cl"
void whirlpool_transform_transport_vector (const u32x w[16], u32x digest[16], __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256])
void whirlpool_transform_transport_vector (const u32x w[16], u32x digest[16], SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256])
{
whirlpool_transform_vector (w + 0, w + 4, w + 8, w + 12, digest, s_Ch, s_Cl);
}
void m06100m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 (*s_Cl)[256], __local u32 (*s_Ch)[256])
void m06100m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, SHM_TYPE u32 (*s_Cl)[256], SHM_TYPE u32 (*s_Ch)[256])
{
/**
* modifier
@ -87,7 +87,7 @@ void m06100m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __gl
}
}
void m06100s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, __local u32 (*s_Cl)[256], __local u32 (*s_Ch)[256])
void m06100s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_len, __global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, SHM_TYPE u32 (*s_Cl)[256], SHM_TYPE u32 (*s_Ch)[256])
{
/**
* modifier
@ -182,6 +182,8 @@ __kernel void m06100_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
* shared
*/
#ifdef REAL_SHM
__local u32 s_Ch[8][256];
__local u32 s_Cl[8][256];
@ -208,6 +210,13 @@ __kernel void m06100_m04 (__global pw_t *pws, __global const kernel_rule_t *rule
barrier (CLK_LOCAL_MEM_FENCE);
#else
__constant u32 (*s_Ch)[256] = Ch;
__constant u32 (*s_Cl)[256] = Cl;
#endif
if (gid >= gid_max) return;
/**
@ -265,6 +274,8 @@ __kernel void m06100_m08 (__global pw_t *pws, __global const kernel_rule_t *rule
* shared
*/
#ifdef REAL_SHM
__local u32 s_Ch[8][256];
__local u32 s_Cl[8][256];
@ -291,6 +302,13 @@ __kernel void m06100_m08 (__global pw_t *pws, __global const kernel_rule_t *rule
barrier (CLK_LOCAL_MEM_FENCE);
#else
__constant u32 (*s_Ch)[256] = Ch;
__constant u32 (*s_Cl)[256] = Cl;
#endif
if (gid >= gid_max) return;
/**
@ -348,6 +366,8 @@ __kernel void m06100_m16 (__global pw_t *pws, __global const kernel_rule_t *rule
* shared
*/
#ifdef REAL_SHM
__local u32 s_Ch[8][256];
__local u32 s_Cl[8][256];
@ -374,6 +394,13 @@ __kernel void m06100_m16 (__global pw_t *pws, __global const kernel_rule_t *rule
barrier (CLK_LOCAL_MEM_FENCE);
#else
__constant u32 (*s_Ch)[256] = Ch;
__constant u32 (*s_Cl)[256] = Cl;
#endif
if (gid >= gid_max) return;
/**
@ -431,6 +458,8 @@ __kernel void m06100_s04 (__global pw_t *pws, __global const kernel_rule_t *rule
* shared
*/
#ifdef REAL_SHM
__local u32 s_Ch[8][256];
__local u32 s_Cl[8][256];
@ -457,6 +486,13 @@ __kernel void m06100_s04 (__global pw_t *pws, __global const kernel_rule_t *rule
barrier (CLK_LOCAL_MEM_FENCE);
#else
__constant u32 (*s_Ch)[256] = Ch;
__constant u32 (*s_Cl)[256] = Cl;
#endif
if (gid >= gid_max) return;
/**
@ -514,6 +550,8 @@ __kernel void m06100_s08 (__global pw_t *pws, __global const kernel_rule_t *rule
* shared
*/
#ifdef REAL_SHM
__local u32 s_Ch[8][256];
__local u32 s_Cl[8][256];
@ -540,6 +578,13 @@ __kernel void m06100_s08 (__global pw_t *pws, __global const kernel_rule_t *rule
barrier (CLK_LOCAL_MEM_FENCE);
#else
__constant u32 (*s_Ch)[256] = Ch;
__constant u32 (*s_Cl)[256] = Cl;
#endif
if (gid >= gid_max) return;
/**
@ -597,6 +642,8 @@ __kernel void m06100_s16 (__global pw_t *pws, __global const kernel_rule_t *rule
* shared
*/
#ifdef REAL_SHM
__local u32 s_Ch[8][256];
__local u32 s_Cl[8][256];
@ -623,6 +670,13 @@ __kernel void m06100_s16 (__global pw_t *pws, __global const kernel_rule_t *rule
barrier (CLK_LOCAL_MEM_FENCE);
#else
__constant u32 (*s_Ch)[256] = Ch;
__constant u32 (*s_Cl)[256] = Cl;
#endif
if (gid >= gid_max) return;
/**

@ -27,6 +27,8 @@ __kernel void m06100_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
* shared
*/
#ifdef REAL_SHM
__local u32 s_Ch[8][256];
__local u32 s_Cl[8][256];
@ -53,6 +55,13 @@ __kernel void m06100_mxx (__global pw_t *pws, __global const kernel_rule_t *rule
barrier (CLK_LOCAL_MEM_FENCE);
#else
__constant u32 (*s_Ch)[256] = Ch;
__constant u32 (*s_Cl)[256] = Cl;
#endif
if (gid >= gid_max) return;
/**
@ -113,6 +122,8 @@ __kernel void m06100_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
* shared
*/
#ifdef REAL_SHM
__local u32 s_Ch[8][256];
__local u32 s_Cl[8][256];
@ -139,6 +150,13 @@ __kernel void m06100_sxx (__global pw_t *pws, __global const kernel_rule_t *rule
barrier (CLK_LOCAL_MEM_FENCE);
#else
__constant u32 (*s_Ch)[256] = Ch;
__constant u32 (*s_Cl)[256] = Cl;
#endif
if (gid >= gid_max) return;
/**

@ -45,7 +45,7 @@ u32 u8add (const u32 a, const u32 b)
return r;
}
void hmac_whirlpool_run_V (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[16], u32x opad[16], u32x digest[16], __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256])
void hmac_whirlpool_run_V (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[16], u32x opad[16], u32x digest[16], SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256])
{
digest[ 0] = ipad[ 0];
digest[ 1] = ipad[ 1];
@ -155,6 +155,8 @@ __kernel void m06231_init (__global pw_t *pws, __global const kernel_rule_t *rul
* shared
*/
#ifdef REAL_SHM
__local u32 s_Ch[8][256];
__local u32 s_Cl[8][256];
@ -181,6 +183,13 @@ __kernel void m06231_init (__global pw_t *pws, __global const kernel_rule_t *rul
barrier (CLK_LOCAL_MEM_FENCE);
#else
__constant u32 (*s_Ch)[256] = Ch;
__constant u32 (*s_Cl)[256] = Cl;
#endif
if (gid >= gid_max) return;
u32 w0[4];
@ -354,6 +363,8 @@ __kernel void m06231_loop (__global pw_t *pws, __global const kernel_rule_t *rul
* shared
*/
#ifdef REAL_SHM
__local u32 s_Ch[8][256];
__local u32 s_Cl[8][256];
@ -380,6 +391,13 @@ __kernel void m06231_loop (__global pw_t *pws, __global const kernel_rule_t *rul
barrier (CLK_LOCAL_MEM_FENCE);
#else
__constant u32 (*s_Ch)[256] = Ch;
__constant u32 (*s_Cl)[256] = Cl;
#endif
if ((gid * VECT_SIZE) >= gid_max) return;
u32x ipad[16];
@ -550,17 +568,17 @@ __kernel void m06231_comp (__global pw_t *pws, __global const kernel_rule_t *rul
#ifdef REAL_SHM
__local u32 s_td0[256];
__local u32 s_td1[256];
__local u32 s_td2[256];
__local u32 s_td3[256];
__local u32 s_td4[256];
__local u32 s_te0[256];
__local u32 s_te1[256];
__local u32 s_te2[256];
__local u32 s_te3[256];
__local u32 s_te4[256];
SHM_TYPE u32 s_td0[256];
SHM_TYPE u32 s_td1[256];
SHM_TYPE u32 s_td2[256];
SHM_TYPE u32 s_td3[256];
SHM_TYPE u32 s_td4[256];
SHM_TYPE u32 s_te0[256];
SHM_TYPE u32 s_te1[256];
SHM_TYPE u32 s_te2[256];
SHM_TYPE u32 s_te3[256];
SHM_TYPE u32 s_te4[256];
for (u32 i = lid; i < 256; i += lsz)
{

@ -45,7 +45,7 @@ u32 u8add (const u32 a, const u32 b)
return r;
}
void hmac_whirlpool_run_V (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[16], u32x opad[16], u32x digest[16], __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256])
void hmac_whirlpool_run_V (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[16], u32x opad[16], u32x digest[16], SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256])
{
digest[ 0] = ipad[ 0];
digest[ 1] = ipad[ 1];
@ -155,6 +155,8 @@ __kernel void m06232_init (__global pw_t *pws, __global const kernel_rule_t *rul
* shared
*/
#ifdef REAL_SHM
__local u32 s_Ch[8][256];
__local u32 s_Cl[8][256];
@ -181,6 +183,13 @@ __kernel void m06232_init (__global pw_t *pws, __global const kernel_rule_t *rul
barrier (CLK_LOCAL_MEM_FENCE);
#else
__constant u32 (*s_Ch)[256] = Ch;
__constant u32 (*s_Cl)[256] = Cl;
#endif
if (gid >= gid_max) return;
u32 w0[4];
@ -354,6 +363,8 @@ __kernel void m06232_loop (__global pw_t *pws, __global const kernel_rule_t *rul
* shared
*/
#ifdef REAL_SHM
__local u32 s_Ch[8][256];
__local u32 s_Cl[8][256];
@ -380,6 +391,13 @@ __kernel void m06232_loop (__global pw_t *pws, __global const kernel_rule_t *rul
barrier (CLK_LOCAL_MEM_FENCE);
#else
__constant u32 (*s_Ch)[256] = Ch;
__constant u32 (*s_Cl)[256] = Cl;
#endif
if ((gid * VECT_SIZE) >= gid_max) return;
u32x ipad[16];
@ -550,17 +568,17 @@ __kernel void m06232_comp (__global pw_t *pws, __global const kernel_rule_t *rul
#ifdef REAL_SHM
__local u32 s_td0[256];
__local u32 s_td1[256];
__local u32 s_td2[256];
__local u32 s_td3[256];
__local u32 s_td4[256];
__local u32 s_te0[256];
__local u32 s_te1[256];
__local u32 s_te2[256];
__local u32 s_te3[256];
__local u32 s_te4[256];
SHM_TYPE u32 s_td0[256];
SHM_TYPE u32 s_td1[256];
SHM_TYPE u32 s_td2[256];
SHM_TYPE u32 s_td3[256];
SHM_TYPE u32 s_td4[256];
SHM_TYPE u32 s_te0[256];
SHM_TYPE u32 s_te1[256];
SHM_TYPE u32 s_te2[256];
SHM_TYPE u32 s_te3[256];
SHM_TYPE u32 s_te4[256];
for (u32 i = lid; i < 256; i += lsz)
{

@ -45,7 +45,7 @@ u32 u8add (const u32 a, const u32 b)
return r;
}
void hmac_whirlpool_run_V (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[16], u32x opad[16], u32x digest[16], __local u32 (*s_Ch)[256], __local u32 (*s_Cl)[256])
void hmac_whirlpool_run_V (u32x w0[4], u32x w1[4], u32x w2[4], u32x w3[4], u32x ipad[16], u32x opad[16], u32x digest[16], SHM_TYPE u32 (*s_Ch)[256], SHM_TYPE u32 (*s_Cl)[256])
{
digest[ 0] = ipad[ 0];
digest[ 1] = ipad[ 1];
@ -155,6 +155,8 @@ __kernel void m06233_init (__global pw_t *pws, __global const kernel_rule_t *rul
* shared
*/
#ifdef REAL_SHM
__local u32 s_Ch[8][256];
__local u32 s_Cl[8][256];
@ -181,6 +183,13 @@ __kernel void m06233_init (__global pw_t *pws, __global const kernel_rule_t *rul
barrier (CLK_LOCAL_MEM_FENCE);
#else
__constant u32 (*s_Ch)[256] = Ch;
__constant u32 (*s_Cl)[256] = Cl;
#endif
if (gid >= gid_max) return;
u32 w0[4];
@ -354,6 +363,8 @@ __kernel void m06233_loop (__global pw_t *pws, __global const kernel_rule_t *rul
* shared
*/
#ifdef REAL_SHM
__local u32 s_Ch[8][256];
__local u32 s_Cl[8][256];
@ -380,6 +391,13 @@ __kernel void m06233_loop (__global pw_t *pws, __global const kernel_rule_t *rul
barrier (CLK_LOCAL_MEM_FENCE);
#else
__constant u32 (*s_Ch)[256] = Ch;
__constant u32 (*s_Cl)[256] = Cl;
#endif
if ((gid * VECT_SIZE) >= gid_max) return;
u32x ipad[16];
@ -550,17 +568,17 @@ __kernel void m06233_comp (__global pw_t *pws, __global const kernel_rule_t *rul
#ifdef REAL_SHM
__local u32 s_td0[256];
__local u32 s_td1[256];
__local u32 s_td2[256];
__local u32 s_td3[256];
__local u32 s_td4[256];
__local u32 s_te0[256];
__local u32 s_te1[256];
__local u32 s_te2[256];
__local u32 s_te3[256];
__local u32 s_te4[256];
SHM_TYPE u32 s_td0[256];
SHM_TYPE u32 s_td1[256];
SHM_TYPE u32 s_td2[256];
SHM_TYPE u32 s_td3[256];
SHM_TYPE u32 s_td4[256];
SHM_TYPE u32 s_te0[256];
SHM_TYPE u32 s_te1[256];
SHM_TYPE u32 s_te2[256];
SHM_TYPE u32 s_te3[256];
SHM_TYPE u32 s_te4[256];
for (u32 i = lid; i < 256; i += lsz)
{

Loading…
Cancel
Save