diff --git a/OpenCL/m01800-optimized.cl b/OpenCL/m01800-optimized.cl index b4a2d856d..29b636b64 100644 --- a/OpenCL/m01800-optimized.cl +++ b/OpenCL/m01800-optimized.cl @@ -194,13 +194,13 @@ __kernel void m01800_init (__global pw_t *pws, __global const kernel_rule_t *rul u64 pw[2]; - pw[0] = swap64 (hl32_to_64 (w0[1], w0[0])); - pw[1] = swap64 (hl32_to_64 (w0[3], w0[2])); + pw[0] = swap64_S (hl32_to_64 (w0[1], w0[0])); + pw[1] = swap64_S (hl32_to_64 (w0[3], w0[2])); u64 salt[2]; - salt[0] = swap64 (hl32_to_64 (salt_buf[1], salt_buf[0])); - salt[1] = swap64 (hl32_to_64 (salt_buf[3], salt_buf[2])); + salt[0] = swap64_S (hl32_to_64 (salt_buf[1], salt_buf[0])); + salt[1] = swap64_S (hl32_to_64 (salt_buf[3], salt_buf[2])); /** * begin @@ -474,8 +474,8 @@ __kernel void m01800_comp (__global pw_t *pws, __global const kernel_rule_t *rul const u64 lid = get_local_id (0); - const u64 a = swap64 (tmps[gid].l_alt_result[0]); - const u64 b = swap64 (tmps[gid].l_alt_result[1]); + const u64 a = swap64_S (tmps[gid].l_alt_result[0]); + const u64 b = swap64_S (tmps[gid].l_alt_result[1]); const u32 r0 = l32_from_64_S (a); const u32 r1 = h32_from_64_S (a); diff --git a/OpenCL/m01800.cl b/OpenCL/m01800.cl index 309cebde1..82e5cc4c5 100644 --- a/OpenCL/m01800.cl +++ b/OpenCL/m01800.cl @@ -38,7 +38,7 @@ __kernel void m01800_init (__global pw_t *pws, __global const kernel_rule_t *rul for (int i = 0, idx = 0; i < pw_len; i += 4, idx += 1) { - w[idx] = swap32 (w[idx]); + w[idx] = swap32_S (w[idx]); } const u32 salt_len = salt_bufs[salt_pos].salt_len; @@ -52,7 +52,7 @@ __kernel void m01800_init (__global pw_t *pws, __global const kernel_rule_t *rul for (int i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = swap32 (s[idx]); + s[idx] = swap32_S (s[idx]); } /** @@ -394,10 +394,10 @@ __kernel void m01800_comp (__global pw_t *pws, __global const kernel_rule_t *rul const u64 lid = get_local_id (0); - const u32 r0 = swap32 (tmps[gid].alt_result[0]); - const u32 r1 = swap32 (tmps[gid].alt_result[1]); - const u32 r2 = swap32 (tmps[gid].alt_result[2]); - const u32 r3 = swap32 (tmps[gid].alt_result[3]); + const u32 r0 = swap32_S (tmps[gid].alt_result[0]); + const u32 r1 = swap32_S (tmps[gid].alt_result[1]); + const u32 r2 = swap32_S (tmps[gid].alt_result[2]); + const u32 r3 = swap32_S (tmps[gid].alt_result[3]); #define il_pos 0 diff --git a/OpenCL/m03200.cl b/OpenCL/m03200.cl index a4705cb49..355569a21 100644 --- a/OpenCL/m03200.cl +++ b/OpenCL/m03200.cl @@ -429,24 +429,24 @@ __kernel void m03200_init (__global pw_t *pws, __global const kernel_rule_t *rul expand_key (E, w, pw_len); - E[ 0] = swap32 (E[ 0]); - E[ 1] = swap32 (E[ 1]); - E[ 2] = swap32 (E[ 2]); - E[ 3] = swap32 (E[ 3]); - E[ 4] = swap32 (E[ 4]); - E[ 5] = swap32 (E[ 5]); - E[ 6] = swap32 (E[ 6]); - E[ 7] = swap32 (E[ 7]); - E[ 8] = swap32 (E[ 8]); - E[ 9] = swap32 (E[ 9]); - E[10] = swap32 (E[10]); - E[11] = swap32 (E[11]); - E[12] = swap32 (E[12]); - E[13] = swap32 (E[13]); - E[14] = swap32 (E[14]); - E[15] = swap32 (E[15]); - E[16] = swap32 (E[16]); - E[17] = swap32 (E[17]); + E[ 0] = swap32_S (E[ 0]); + E[ 1] = swap32_S (E[ 1]); + E[ 2] = swap32_S (E[ 2]); + E[ 3] = swap32_S (E[ 3]); + E[ 4] = swap32_S (E[ 4]); + E[ 5] = swap32_S (E[ 5]); + E[ 6] = swap32_S (E[ 6]); + E[ 7] = swap32_S (E[ 7]); + E[ 8] = swap32_S (E[ 8]); + E[ 9] = swap32_S (E[ 9]); + E[10] = swap32_S (E[10]); + E[11] = swap32_S (E[11]); + E[12] = swap32_S (E[12]); + E[13] = swap32_S (E[13]); + E[14] = swap32_S (E[14]); + E[15] = swap32_S (E[15]); + E[16] = swap32_S (E[16]); + E[17] = swap32_S (E[17]); for (u32 i = 0; i < 18; i++) { diff --git a/OpenCL/m05800-optimized.cl b/OpenCL/m05800-optimized.cl index 35507e198..08623e738 100644 --- a/OpenCL/m05800-optimized.cl +++ b/OpenCL/m05800-optimized.cl @@ -2252,16 +2252,16 @@ __kernel void m05800_init (__global pw_t *pws, __global const kernel_rule_t *rul u32 w2[4]; u32 w3[4]; - w0[0] = swap32 (data0[0]); - w0[1] = swap32 (data0[1]); - w0[2] = swap32 (data0[2]); - w0[3] = swap32 (data0[3]); - w1[0] = swap32 (data1[0]); - w1[1] = swap32 (data1[1]); - w1[2] = swap32 (data1[2]); - w1[3] = swap32 (data1[3]); - w2[0] = swap32 (data2[0]); - w2[1] = swap32 (data2[1]); + w0[0] = swap32_S (data0[0]); + w0[1] = swap32_S (data0[1]); + w0[2] = swap32_S (data0[2]); + w0[3] = swap32_S (data0[3]); + w1[0] = swap32_S (data1[0]); + w1[1] = swap32_S (data1[1]); + w1[2] = swap32_S (data1[2]); + w1[3] = swap32_S (data1[3]); + w2[0] = swap32_S (data2[0]); + w2[1] = swap32_S (data2[1]); w2[2] = 0; w2[3] = 0; w3[0] = 0; @@ -2377,15 +2377,15 @@ __kernel void m05800_loop (__global pw_t *pws, __global const kernel_rule_t *rul w0[2] = digest[2]; w0[3] = digest[3]; w1[0] = digest[4]; - w1[1] = swap32 (data0[0]); - w1[2] = swap32 (data0[1]); - w1[3] = swap32 (data0[2]); - w2[0] = swap32 (data0[3]); - w2[1] = swap32 (data1[0]); - w2[2] = swap32 (data1[1]); - w2[3] = swap32 (data1[2]); - w3[0] = swap32 (data1[3]); - w3[1] = swap32 (data2[0]); + w1[1] = swap32_S (data0[0]); + w1[2] = swap32_S (data0[1]); + w1[3] = swap32_S (data0[2]); + w2[0] = swap32_S (data0[3]); + w2[1] = swap32_S (data1[0]); + w2[2] = swap32_S (data1[1]); + w2[3] = swap32_S (data1[2]); + w3[0] = swap32_S (data1[3]); + w3[1] = swap32_S (data2[0]); w3[2] = 0; w3[3] = (20 + pc_len + pw_len + salt_len) * 8; diff --git a/OpenCL/m05800.cl b/OpenCL/m05800.cl index 6f6835ed4..5afdf1eeb 100644 --- a/OpenCL/m05800.cl +++ b/OpenCL/m05800.cl @@ -2264,7 +2264,7 @@ __kernel void m05800_loop (__global pw_t *pws, __global const kernel_rule_t *rul for (int i = 0, idx = 0; i < pw_len; i += 4, idx += 1) { - w[idx] = swap32 (pws[gid].i[idx]); + w[idx] = swap32_S (pws[gid].i[idx]); } const u32 salt_len = salt_bufs[salt_pos].salt_len; @@ -2273,7 +2273,7 @@ __kernel void m05800_loop (__global pw_t *pws, __global const kernel_rule_t *rul for (int i = 0, idx = 0; i < salt_len; i += 4, idx += 1) { - s[idx] = swap32 (salt_bufs[salt_pos].salt_buf[idx]); + s[idx] = swap32_S (salt_bufs[salt_pos].salt_buf[idx]); } u32 digest[5]; @@ -2302,7 +2302,7 @@ __kernel void m05800_loop (__global pw_t *pws, __global const kernel_rule_t *rul ctx.w0[2] = digest[2]; ctx.w0[3] = digest[3]; ctx.w1[0] = digest[4]; - ctx.w1[1] = swap32 (pc_dec); + ctx.w1[1] = swap32_S (pc_dec); ctx.len = 20 + pc_len; diff --git a/OpenCL/m07400-optimized.cl b/OpenCL/m07400-optimized.cl index 9ab079cd9..c6f0c2bb1 100644 --- a/OpenCL/m07400-optimized.cl +++ b/OpenCL/m07400-optimized.cl @@ -74,14 +74,14 @@ DECLSPEC void bzero16 (u32 block[16]) DECLSPEC void bswap8 (u32 block[16]) { - block[ 0] = swap32 (block[ 0]); - block[ 1] = swap32 (block[ 1]); - block[ 2] = swap32 (block[ 2]); - block[ 3] = swap32 (block[ 3]); - block[ 4] = swap32 (block[ 4]); - block[ 5] = swap32 (block[ 5]); - block[ 6] = swap32 (block[ 6]); - block[ 7] = swap32 (block[ 7]); + block[ 0] = swap32_S (block[ 0]); + block[ 1] = swap32_S (block[ 1]); + block[ 2] = swap32_S (block[ 2]); + block[ 3] = swap32_S (block[ 3]); + block[ 4] = swap32_S (block[ 4]); + block[ 5] = swap32_S (block[ 5]); + block[ 6] = swap32_S (block[ 6]); + block[ 7] = swap32_S (block[ 7]); } DECLSPEC u32 memcat16 (u32 block[16], const u32 offset, const u32 append[4], const u32 append_len) @@ -757,7 +757,7 @@ __kernel void m07400_init (__global pw_t *pws, __global const kernel_rule_t *rul append_0x80_1x16 (block, block_len); - block[15] = swap32 (block_len * 8); + block[15] = swap32_S (block_len * 8); init_ctx (alt_result); @@ -839,7 +839,7 @@ __kernel void m07400_init (__global pw_t *pws, __global const kernel_rule_t *rul bzero16 (block); } - block[15] = swap32 (transform_len * 8); + block[15] = swap32_S (transform_len * 8); sha256_transform_transport (block, alt_result); @@ -884,7 +884,7 @@ __kernel void m07400_init (__global pw_t *pws, __global const kernel_rule_t *rul bzero16 (block); } - block[15] = swap32 (transform_len * 8); + block[15] = swap32_S (transform_len * 8); sha256_transform_transport (block, p_bytes); @@ -927,7 +927,7 @@ __kernel void m07400_init (__global pw_t *pws, __global const kernel_rule_t *rul bzero16 (block); } - block[15] = swap32 (transform_len * 8); + block[15] = swap32_S (transform_len * 8); sha256_transform_transport (block, s_bytes); @@ -1078,7 +1078,7 @@ __kernel void m07400_loop (__global pw_t *pws, __global const kernel_rule_t *rul block[15] = 0; } - block[15] = swap32 (block_len * 8); + block[15] = swap32_S (block_len * 8); sha256_transform_transport (block, tmp); diff --git a/OpenCL/m07800_a0-optimized.cl b/OpenCL/m07800_a0-optimized.cl index 8ac6f9a52..baf12775e 100644 --- a/OpenCL/m07800_a0-optimized.cl +++ b/OpenCL/m07800_a0-optimized.cl @@ -149,20 +149,20 @@ __kernel void m07800_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru u32 final[32]; - final[ 0] = swap32 (w0[0] | s0[0]); - final[ 1] = swap32 (w0[1] | s0[1]); - final[ 2] = swap32 (w0[2] | s0[2]); - final[ 3] = swap32 (w0[3] | s0[3]); - final[ 4] = swap32 (w1[0] | s1[0]); - final[ 5] = swap32 (w1[1] | s1[1]); - final[ 6] = swap32 (w1[2] | s1[2]); - final[ 7] = swap32 (w1[3] | s1[3]); - final[ 8] = swap32 (w2[0] | s2[0]); - final[ 9] = swap32 (w2[1] | s2[1]); - final[10] = swap32 (w2[2] | s2[2]); - final[11] = swap32 (w2[3] | s2[3]); - final[12] = swap32 (w3[0] | s3[0]); - final[13] = swap32 (w3[1] | s3[1]); + final[ 0] = swap32_S (w0[0] | s0[0]); + final[ 1] = swap32_S (w0[1] | s0[1]); + final[ 2] = swap32_S (w0[2] | s0[2]); + final[ 3] = swap32_S (w0[3] | s0[3]); + final[ 4] = swap32_S (w1[0] | s1[0]); + final[ 5] = swap32_S (w1[1] | s1[1]); + final[ 6] = swap32_S (w1[2] | s1[2]); + final[ 7] = swap32_S (w1[3] | s1[3]); + final[ 8] = swap32_S (w2[0] | s2[0]); + final[ 9] = swap32_S (w2[1] | s2[1]); + final[10] = swap32_S (w2[2] | s2[2]); + final[11] = swap32_S (w2[3] | s2[3]); + final[12] = swap32_S (w3[0] | s3[0]); + final[13] = swap32_S (w3[1] | s3[1]); final[14] = 0; final[15] = pw_salt_len * 8; final[16] = 0; @@ -409,20 +409,20 @@ __kernel void m07800_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru u32 final[32]; - final[ 0] = swap32 (w0[0] | s0[0]); - final[ 1] = swap32 (w0[1] | s0[1]); - final[ 2] = swap32 (w0[2] | s0[2]); - final[ 3] = swap32 (w0[3] | s0[3]); - final[ 4] = swap32 (w1[0] | s1[0]); - final[ 5] = swap32 (w1[1] | s1[1]); - final[ 6] = swap32 (w1[2] | s1[2]); - final[ 7] = swap32 (w1[3] | s1[3]); - final[ 8] = swap32 (w2[0] | s2[0]); - final[ 9] = swap32 (w2[1] | s2[1]); - final[10] = swap32 (w2[2] | s2[2]); - final[11] = swap32 (w2[3] | s2[3]); - final[12] = swap32 (w3[0] | s3[0]); - final[13] = swap32 (w3[1] | s3[1]); + final[ 0] = swap32_S (w0[0] | s0[0]); + final[ 1] = swap32_S (w0[1] | s0[1]); + final[ 2] = swap32_S (w0[2] | s0[2]); + final[ 3] = swap32_S (w0[3] | s0[3]); + final[ 4] = swap32_S (w1[0] | s1[0]); + final[ 5] = swap32_S (w1[1] | s1[1]); + final[ 6] = swap32_S (w1[2] | s1[2]); + final[ 7] = swap32_S (w1[3] | s1[3]); + final[ 8] = swap32_S (w2[0] | s2[0]); + final[ 9] = swap32_S (w2[1] | s2[1]); + final[10] = swap32_S (w2[2] | s2[2]); + final[11] = swap32_S (w2[3] | s2[3]); + final[12] = swap32_S (w3[0] | s3[0]); + final[13] = swap32_S (w3[1] | s3[1]); final[14] = 0; final[15] = pw_salt_len * 8; final[16] = 0; diff --git a/OpenCL/m07800_a1-optimized.cl b/OpenCL/m07800_a1-optimized.cl index a997ce382..dbc22f79b 100644 --- a/OpenCL/m07800_a1-optimized.cl +++ b/OpenCL/m07800_a1-optimized.cl @@ -207,20 +207,20 @@ __kernel void m07800_m04 (__global pw_t *pws, __global const kernel_rule_t *rule u32 final[32]; - final[ 0] = swap32 (w0[0] | s0[0]); - final[ 1] = swap32 (w0[1] | s0[1]); - final[ 2] = swap32 (w0[2] | s0[2]); - final[ 3] = swap32 (w0[3] | s0[3]); - final[ 4] = swap32 (w1[0] | s1[0]); - final[ 5] = swap32 (w1[1] | s1[1]); - final[ 6] = swap32 (w1[2] | s1[2]); - final[ 7] = swap32 (w1[3] | s1[3]); - final[ 8] = swap32 (w2[0] | s2[0]); - final[ 9] = swap32 (w2[1] | s2[1]); - final[10] = swap32 (w2[2] | s2[2]); - final[11] = swap32 (w2[3] | s2[3]); - final[12] = swap32 (w3[0] | s3[0]); - final[13] = swap32 (w3[1] | s3[1]); + final[ 0] = swap32_S (w0[0] | s0[0]); + final[ 1] = swap32_S (w0[1] | s0[1]); + final[ 2] = swap32_S (w0[2] | s0[2]); + final[ 3] = swap32_S (w0[3] | s0[3]); + final[ 4] = swap32_S (w1[0] | s1[0]); + final[ 5] = swap32_S (w1[1] | s1[1]); + final[ 6] = swap32_S (w1[2] | s1[2]); + final[ 7] = swap32_S (w1[3] | s1[3]); + final[ 8] = swap32_S (w2[0] | s2[0]); + final[ 9] = swap32_S (w2[1] | s2[1]); + final[10] = swap32_S (w2[2] | s2[2]); + final[11] = swap32_S (w2[3] | s2[3]); + final[12] = swap32_S (w3[0] | s3[0]); + final[13] = swap32_S (w3[1] | s3[1]); final[14] = 0; final[15] = pw_salt_len * 8; final[16] = 0; @@ -527,20 +527,20 @@ __kernel void m07800_s04 (__global pw_t *pws, __global const kernel_rule_t *rule u32 final[32]; - final[ 0] = swap32 (w0[0] | s0[0]); - final[ 1] = swap32 (w0[1] | s0[1]); - final[ 2] = swap32 (w0[2] | s0[2]); - final[ 3] = swap32 (w0[3] | s0[3]); - final[ 4] = swap32 (w1[0] | s1[0]); - final[ 5] = swap32 (w1[1] | s1[1]); - final[ 6] = swap32 (w1[2] | s1[2]); - final[ 7] = swap32 (w1[3] | s1[3]); - final[ 8] = swap32 (w2[0] | s2[0]); - final[ 9] = swap32 (w2[1] | s2[1]); - final[10] = swap32 (w2[2] | s2[2]); - final[11] = swap32 (w2[3] | s2[3]); - final[12] = swap32 (w3[0] | s3[0]); - final[13] = swap32 (w3[1] | s3[1]); + final[ 0] = swap32_S (w0[0] | s0[0]); + final[ 1] = swap32_S (w0[1] | s0[1]); + final[ 2] = swap32_S (w0[2] | s0[2]); + final[ 3] = swap32_S (w0[3] | s0[3]); + final[ 4] = swap32_S (w1[0] | s1[0]); + final[ 5] = swap32_S (w1[1] | s1[1]); + final[ 6] = swap32_S (w1[2] | s1[2]); + final[ 7] = swap32_S (w1[3] | s1[3]); + final[ 8] = swap32_S (w2[0] | s2[0]); + final[ 9] = swap32_S (w2[1] | s2[1]); + final[10] = swap32_S (w2[2] | s2[2]); + final[11] = swap32_S (w2[3] | s2[3]); + final[12] = swap32_S (w3[0] | s3[0]); + final[13] = swap32_S (w3[1] | s3[1]); final[14] = 0; final[15] = pw_salt_len * 8; final[16] = 0; diff --git a/OpenCL/m08900.cl b/OpenCL/m08900.cl index e72919601..e1d234deb 100644 --- a/OpenCL/m08900.cl +++ b/OpenCL/m08900.cl @@ -383,10 +383,10 @@ __kernel void m08900_comp (__global pw_t *pws, __global const kernel_rule_t *rul sha256_hmac_final (&ctx); - const u32 r0 = swap32 (ctx.opad.h[DGST_R0]); - const u32 r1 = swap32 (ctx.opad.h[DGST_R1]); - const u32 r2 = swap32 (ctx.opad.h[DGST_R2]); - const u32 r3 = swap32 (ctx.opad.h[DGST_R3]); + const u32 r0 = swap32_S (ctx.opad.h[DGST_R0]); + const u32 r1 = swap32_S (ctx.opad.h[DGST_R1]); + const u32 r2 = swap32_S (ctx.opad.h[DGST_R2]); + const u32 r3 = swap32_S (ctx.opad.h[DGST_R3]); #define il_pos 0 diff --git a/OpenCL/m09000.cl b/OpenCL/m09000.cl index abd1748bb..22866cb1e 100644 --- a/OpenCL/m09000.cl +++ b/OpenCL/m09000.cl @@ -572,20 +572,20 @@ __kernel void m09000_init (__global pw_t *pws, __global const kernel_rule_t *rul w0[1] = salt_buf[1]; w0[0] = salt_buf[0]; - w0[0] = swap32 (w0[0]); - w0[1] = swap32 (w0[1]); - w0[2] = swap32 (w0[2]); - w0[3] = swap32 (w0[3]); - w1[0] = swap32 (w1[0]); - w1[1] = swap32 (w1[1]); - w1[2] = swap32 (w1[2]); - w1[3] = swap32 (w1[3]); - w2[0] = swap32 (w2[0]); - w2[1] = swap32 (w2[1]); - w2[2] = swap32 (w2[2]); - w2[3] = swap32 (w2[3]); - w3[0] = swap32 (w3[0]); - w3[1] = swap32 (w3[1]); + w0[0] = swap32_S (w0[0]); + w0[1] = swap32_S (w0[1]); + w0[2] = swap32_S (w0[2]); + w0[3] = swap32_S (w0[3]); + w1[0] = swap32_S (w1[0]); + w1[1] = swap32_S (w1[1]); + w1[2] = swap32_S (w1[2]); + w1[3] = swap32_S (w1[3]); + w2[0] = swap32_S (w2[0]); + w2[1] = swap32_S (w2[1]); + w2[2] = swap32_S (w2[2]); + w2[3] = swap32_S (w2[3]); + w3[0] = swap32_S (w3[0]); + w3[1] = swap32_S (w3[1]); const u32 block_len = salt_len + 2 + pw_len; @@ -806,8 +806,8 @@ __kernel void m09000_comp (__global pw_t *pws, __global const kernel_rule_t *rul u32 w2[4]; u32 w3[4]; - w0[0] = swap32 (digest[0]); - w0[1] = swap32 (digest[1]); + w0[0] = swap32_S (digest[0]); + w0[1] = swap32_S (digest[1]); w0[2] = 0x00008000; w0[3] = 0; w1[0] = 0; diff --git a/OpenCL/m09800_a0-optimized.cl b/OpenCL/m09800_a0-optimized.cl index 817172658..451f6bbab 100644 --- a/OpenCL/m09800_a0-optimized.cl +++ b/OpenCL/m09800_a0-optimized.cl @@ -277,10 +277,10 @@ __kernel void m09800_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru sha1_transform (w0, w1, w2, w3, digest); - digest[0] = swap32 (digest[0]); - digest[1] = swap32 (digest[1]); - digest[2] = swap32 (digest[2]); - digest[3] = swap32 (digest[3]); + digest[0] = swap32_S (digest[0]); + digest[1] = swap32_S (digest[1]); + digest[2] = swap32_S (digest[2]); + digest[3] = swap32_S (digest[3]); if (version == 3) { @@ -320,10 +320,10 @@ __kernel void m09800_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru sha1_transform (w0, w1, w2, w3, digest); - digest[0] = swap32 (digest[0]); - digest[1] = swap32 (digest[1]); - digest[2] = swap32 (digest[2]); - digest[3] = swap32 (digest[3]); + digest[0] = swap32_S (digest[0]); + digest[1] = swap32_S (digest[1]); + digest[2] = swap32_S (digest[2]); + digest[3] = swap32_S (digest[3]); rc4_next_16 (rc4_key, 16, j, digest, out); @@ -489,10 +489,10 @@ __kernel void m09800_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru sha1_transform (w0, w1, w2, w3, digest); - digest[0] = swap32 (digest[0]); - digest[1] = swap32 (digest[1]); - digest[2] = swap32 (digest[2]); - digest[3] = swap32 (digest[3]); + digest[0] = swap32_S (digest[0]); + digest[1] = swap32_S (digest[1]); + digest[2] = swap32_S (digest[2]); + digest[3] = swap32_S (digest[3]); if (version == 3) { @@ -532,10 +532,10 @@ __kernel void m09800_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru sha1_transform (w0, w1, w2, w3, digest); - digest[0] = swap32 (digest[0]); - digest[1] = swap32 (digest[1]); - digest[2] = swap32 (digest[2]); - digest[3] = swap32 (digest[3]); + digest[0] = swap32_S (digest[0]); + digest[1] = swap32_S (digest[1]); + digest[2] = swap32_S (digest[2]); + digest[3] = swap32_S (digest[3]); rc4_next_16 (rc4_key, 16, j, digest, out); diff --git a/OpenCL/m09800_a1-optimized.cl b/OpenCL/m09800_a1-optimized.cl index 9151a94cb..a43c4e218 100644 --- a/OpenCL/m09800_a1-optimized.cl +++ b/OpenCL/m09800_a1-optimized.cl @@ -325,10 +325,10 @@ __kernel void m09800_m04 (__global pw_t *pws, __global const kernel_rule_t *rule sha1_transform (w0, w1, w2, w3, digest); - digest[0] = swap32 (digest[0]); - digest[1] = swap32 (digest[1]); - digest[2] = swap32 (digest[2]); - digest[3] = swap32 (digest[3]); + digest[0] = swap32_S (digest[0]); + digest[1] = swap32_S (digest[1]); + digest[2] = swap32_S (digest[2]); + digest[3] = swap32_S (digest[3]); if (version == 3) { @@ -368,10 +368,10 @@ __kernel void m09800_m04 (__global pw_t *pws, __global const kernel_rule_t *rule sha1_transform (w0, w1, w2, w3, digest); - digest[0] = swap32 (digest[0]); - digest[1] = swap32 (digest[1]); - digest[2] = swap32 (digest[2]); - digest[3] = swap32 (digest[3]); + digest[0] = swap32_S (digest[0]); + digest[1] = swap32_S (digest[1]); + digest[2] = swap32_S (digest[2]); + digest[3] = swap32_S (digest[3]); rc4_next_16 (rc4_key, 16, j, digest, out); @@ -587,10 +587,10 @@ __kernel void m09800_s04 (__global pw_t *pws, __global const kernel_rule_t *rule sha1_transform (w0, w1, w2, w3, digest); - digest[0] = swap32 (digest[0]); - digest[1] = swap32 (digest[1]); - digest[2] = swap32 (digest[2]); - digest[3] = swap32 (digest[3]); + digest[0] = swap32_S (digest[0]); + digest[1] = swap32_S (digest[1]); + digest[2] = swap32_S (digest[2]); + digest[3] = swap32_S (digest[3]); if (version == 3) { @@ -630,10 +630,10 @@ __kernel void m09800_s04 (__global pw_t *pws, __global const kernel_rule_t *rule sha1_transform (w0, w1, w2, w3, digest); - digest[0] = swap32 (digest[0]); - digest[1] = swap32 (digest[1]); - digest[2] = swap32 (digest[2]); - digest[3] = swap32 (digest[3]); + digest[0] = swap32_S (digest[0]); + digest[1] = swap32_S (digest[1]); + digest[2] = swap32_S (digest[2]); + digest[3] = swap32_S (digest[3]); rc4_next_16 (rc4_key, 16, j, digest, out); diff --git a/OpenCL/m09800_a3-optimized.cl b/OpenCL/m09800_a3-optimized.cl index 787ead666..3f711df60 100644 --- a/OpenCL/m09800_a3-optimized.cl +++ b/OpenCL/m09800_a3-optimized.cl @@ -248,10 +248,10 @@ DECLSPEC void m09800m (__local RC4_KEY *rc4_keys, u32 w0[4], u32 w1[4], u32 w2[4 sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - digest[0] = swap32 (digest[0]); - digest[1] = swap32 (digest[1]); - digest[2] = swap32 (digest[2]); - digest[3] = swap32 (digest[3]); + digest[0] = swap32_S (digest[0]); + digest[1] = swap32_S (digest[1]); + digest[2] = swap32_S (digest[2]); + digest[3] = swap32_S (digest[3]); if (version == 3) { @@ -266,10 +266,10 @@ DECLSPEC void m09800m (__local RC4_KEY *rc4_keys, u32 w0[4], u32 w1[4], u32 w2[4 u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); - w0_t[0] = swap32 (out[0]); - w0_t[1] = swap32 (out[1]); - w0_t[2] = swap32 (out[2]); - w0_t[3] = swap32 (out[3]); + w0_t[0] = swap32_S (out[0]); + w0_t[1] = swap32_S (out[1]); + w0_t[2] = swap32_S (out[2]); + w0_t[3] = swap32_S (out[3]); w1_t[0] = 0x80000000; w1_t[1] = 0; w1_t[2] = 0; @@ -291,10 +291,10 @@ DECLSPEC void m09800m (__local RC4_KEY *rc4_keys, u32 w0[4], u32 w1[4], u32 w2[4 sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - digest[0] = swap32 (digest[0]); - digest[1] = swap32 (digest[1]); - digest[2] = swap32 (digest[2]); - digest[3] = swap32 (digest[3]); + digest[0] = swap32_S (digest[0]); + digest[1] = swap32_S (digest[1]); + digest[2] = swap32_S (digest[2]); + digest[3] = swap32_S (digest[3]); rc4_next_16 (rc4_key, 16, j, digest, out); @@ -428,10 +428,10 @@ DECLSPEC void m09800s (__local RC4_KEY *rc4_keys, u32 w0[4], u32 w1[4], u32 w2[4 sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - digest[0] = swap32 (digest[0]); - digest[1] = swap32 (digest[1]); - digest[2] = swap32 (digest[2]); - digest[3] = swap32 (digest[3]); + digest[0] = swap32_S (digest[0]); + digest[1] = swap32_S (digest[1]); + digest[2] = swap32_S (digest[2]); + digest[3] = swap32_S (digest[3]); if (version == 3) { @@ -446,10 +446,10 @@ DECLSPEC void m09800s (__local RC4_KEY *rc4_keys, u32 w0[4], u32 w1[4], u32 w2[4 u8 j = rc4_next_16 (rc4_key, 0, 0, encryptedVerifier, out); - w0_t[0] = swap32 (out[0]); - w0_t[1] = swap32 (out[1]); - w0_t[2] = swap32 (out[2]); - w0_t[3] = swap32 (out[3]); + w0_t[0] = swap32_S (out[0]); + w0_t[1] = swap32_S (out[1]); + w0_t[2] = swap32_S (out[2]); + w0_t[3] = swap32_S (out[3]); w1_t[0] = 0x80000000; w1_t[1] = 0; w1_t[2] = 0; @@ -471,10 +471,10 @@ DECLSPEC void m09800s (__local RC4_KEY *rc4_keys, u32 w0[4], u32 w1[4], u32 w2[4 sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - digest[0] = swap32 (digest[0]); - digest[1] = swap32 (digest[1]); - digest[2] = swap32 (digest[2]); - digest[3] = swap32 (digest[3]); + digest[0] = swap32_S (digest[0]); + digest[1] = swap32_S (digest[1]); + digest[2] = swap32_S (digest[2]); + digest[3] = swap32_S (digest[3]); rc4_next_16 (rc4_key, 16, j, digest, out); diff --git a/OpenCL/m09810_a0-optimized.cl b/OpenCL/m09810_a0-optimized.cl index 510f4ea61..5a8e2c6d7 100644 --- a/OpenCL/m09810_a0-optimized.cl +++ b/OpenCL/m09810_a0-optimized.cl @@ -247,10 +247,10 @@ __kernel void m09810_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru sha1_transform (w0, w1, w2, w3, digest); - digest[0] = swap32 (digest[0]); - digest[1] = swap32 (digest[1]); - digest[2] = swap32 (digest[2]); - digest[3] = swap32 (digest[3]); + digest[0] = swap32_S (digest[0]); + digest[1] = swap32_S (digest[1]); + digest[2] = swap32_S (digest[2]); + digest[3] = swap32_S (digest[3]); rc4_next_16 (rc4_key, 16, j, digest, out); @@ -386,10 +386,10 @@ __kernel void m09810_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru sha1_transform (w0, w1, w2, w3, digest); - digest[0] = swap32 (digest[0]); - digest[1] = swap32 (digest[1]); - digest[2] = swap32 (digest[2]); - digest[3] = swap32 (digest[3]); + digest[0] = swap32_S (digest[0]); + digest[1] = swap32_S (digest[1]); + digest[2] = swap32_S (digest[2]); + digest[3] = swap32_S (digest[3]); rc4_next_16 (rc4_key, 16, j, digest, out); diff --git a/OpenCL/m09810_a1-optimized.cl b/OpenCL/m09810_a1-optimized.cl index 412c266ec..6b78eebc3 100644 --- a/OpenCL/m09810_a1-optimized.cl +++ b/OpenCL/m09810_a1-optimized.cl @@ -291,10 +291,10 @@ __kernel void m09810_m04 (__global pw_t *pws, __global const kernel_rule_t *rule sha1_transform (w0, w1, w2, w3, digest); - digest[0] = swap32 (digest[0]); - digest[1] = swap32 (digest[1]); - digest[2] = swap32 (digest[2]); - digest[3] = swap32 (digest[3]); + digest[0] = swap32_S (digest[0]); + digest[1] = swap32_S (digest[1]); + digest[2] = swap32_S (digest[2]); + digest[3] = swap32_S (digest[3]); rc4_next_16 (rc4_key, 16, j, digest, out); @@ -476,10 +476,10 @@ __kernel void m09810_s04 (__global pw_t *pws, __global const kernel_rule_t *rule sha1_transform (w0, w1, w2, w3, digest); - digest[0] = swap32 (digest[0]); - digest[1] = swap32 (digest[1]); - digest[2] = swap32 (digest[2]); - digest[3] = swap32 (digest[3]); + digest[0] = swap32_S (digest[0]); + digest[1] = swap32_S (digest[1]); + digest[2] = swap32_S (digest[2]); + digest[3] = swap32_S (digest[3]); rc4_next_16 (rc4_key, 16, j, digest, out); diff --git a/OpenCL/m09810_a3-optimized.cl b/OpenCL/m09810_a3-optimized.cl index e415e2616..09f10192e 100644 --- a/OpenCL/m09810_a3-optimized.cl +++ b/OpenCL/m09810_a3-optimized.cl @@ -195,10 +195,10 @@ DECLSPEC void m09810m (__local RC4_KEY *rc4_keys, u32 w0[4], u32 w1[4], u32 w2[4 u32 w2_t[4]; u32 w3_t[4]; - w0_t[0] = swap32 (out[0]); - w0_t[1] = swap32 (out[1]); - w0_t[2] = swap32 (out[2]); - w0_t[3] = swap32 (out[3]); + w0_t[0] = swap32_S (out[0]); + w0_t[1] = swap32_S (out[1]); + w0_t[2] = swap32_S (out[2]); + w0_t[3] = swap32_S (out[3]); w1_t[0] = 0x80000000; w1_t[1] = 0; w1_t[2] = 0; @@ -222,10 +222,10 @@ DECLSPEC void m09810m (__local RC4_KEY *rc4_keys, u32 w0[4], u32 w1[4], u32 w2[4 sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - digest[0] = swap32 (digest[0]); - digest[1] = swap32 (digest[1]); - digest[2] = swap32 (digest[2]); - digest[3] = swap32 (digest[3]); + digest[0] = swap32_S (digest[0]); + digest[1] = swap32_S (digest[1]); + digest[2] = swap32_S (digest[2]); + digest[3] = swap32_S (digest[3]); rc4_next_16 (rc4_key, 16, j, digest, out); @@ -303,10 +303,10 @@ DECLSPEC void m09810s (__local RC4_KEY *rc4_keys, u32 w0[4], u32 w1[4], u32 w2[4 u32 w2_t[4]; u32 w3_t[4]; - w0_t[0] = swap32 (out[0]); - w0_t[1] = swap32 (out[1]); - w0_t[2] = swap32 (out[2]); - w0_t[3] = swap32 (out[3]); + w0_t[0] = swap32_S (out[0]); + w0_t[1] = swap32_S (out[1]); + w0_t[2] = swap32_S (out[2]); + w0_t[3] = swap32_S (out[3]); w1_t[0] = 0x80000000; w1_t[1] = 0; w1_t[2] = 0; @@ -330,10 +330,10 @@ DECLSPEC void m09810s (__local RC4_KEY *rc4_keys, u32 w0[4], u32 w1[4], u32 w2[4 sha1_transform (w0_t, w1_t, w2_t, w3_t, digest); - digest[0] = swap32 (digest[0]); - digest[1] = swap32 (digest[1]); - digest[2] = swap32 (digest[2]); - digest[3] = swap32 (digest[3]); + digest[0] = swap32_S (digest[0]); + digest[1] = swap32_S (digest[1]); + digest[2] = swap32_S (digest[2]); + digest[3] = swap32_S (digest[3]); rc4_next_16 (rc4_key, 16, j, digest, out); diff --git a/OpenCL/m09820_a0-optimized.cl b/OpenCL/m09820_a0-optimized.cl index 15bfc79ca..b9368ac1a 100644 --- a/OpenCL/m09820_a0-optimized.cl +++ b/OpenCL/m09820_a0-optimized.cl @@ -97,7 +97,7 @@ __kernel void m09820_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru w0[1] = salt_buf[1]; w0[0] = salt_buf[0]; - u32 digest[5]; + u32x digest[5]; digest[0] = SHA1M_A; digest[1] = SHA1M_B; @@ -243,7 +243,7 @@ __kernel void m09820_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru w0[1] = salt_buf[1]; w0[0] = salt_buf[0]; - u32 digest[5]; + u32x digest[5]; digest[0] = SHA1M_A; digest[1] = SHA1M_B; diff --git a/OpenCL/m09820_a1-optimized.cl b/OpenCL/m09820_a1-optimized.cl index 04cf549fa..5b0e6bab2 100644 --- a/OpenCL/m09820_a1-optimized.cl +++ b/OpenCL/m09820_a1-optimized.cl @@ -145,7 +145,7 @@ __kernel void m09820_m04 (__global pw_t *pws, __global const kernel_rule_t *rule w0[1] = salt_buf[1]; w0[0] = salt_buf[0]; - u32 digest[5]; + u32x digest[5]; digest[0] = SHA1M_A; digest[1] = SHA1M_B; @@ -341,7 +341,7 @@ __kernel void m09820_s04 (__global pw_t *pws, __global const kernel_rule_t *rule w0[1] = salt_buf[1]; w0[0] = salt_buf[0]; - u32 digest[5]; + u32x digest[5]; digest[0] = SHA1M_A; digest[1] = SHA1M_B; diff --git a/OpenCL/m10700-optimized.cl b/OpenCL/m10700-optimized.cl index e2871f390..93310abf6 100644 --- a/OpenCL/m10700-optimized.cl +++ b/OpenCL/m10700-optimized.cl @@ -393,7 +393,7 @@ DECLSPEC u32 do_round (const u32 *pw, const u32 pw_len, ctx_t *ctx, SHM_TYPE u32 ctx->W64[12] = 0; ctx->W64[13] = 0; ctx->W64[14] = 0; - ctx->W64[15] = swap64 ((u64) (final_len * 8)); + ctx->W64[15] = swap64_S ((u64) (final_len * 8)); ex = ctx->W64[7] >> 56; break; case BLSZ512: make_w_with_offset (ctx, 64, offset, sc, pwbl_len, iv, ks, s_te0, s_te1, s_te2, s_te3, s_te4); @@ -404,7 +404,7 @@ DECLSPEC u32 do_round (const u32 *pw, const u32 pw_len, ctx_t *ctx, SHM_TYPE u32 ctx->W64[12] = 0; ctx->W64[13] = 0; ctx->W64[14] = 0; - ctx->W64[15] = swap64 ((u64) (final_len * 8)); + ctx->W64[15] = swap64_S ((u64) (final_len * 8)); ex = ctx->W64[7] >> 56; break; } @@ -429,7 +429,7 @@ DECLSPEC u32 do_round (const u32 *pw, const u32 pw_len, ctx_t *ctx, SHM_TYPE u32 ctx->W32[12] = 0; ctx->W32[13] = 0; ctx->W32[14] = 0; - ctx->W32[15] = swap32 (final_len * 8); + ctx->W32[15] = swap32_S (final_len * 8); break; case BLSZ384: ex = ctx->W64[15] >> 56; ctx->W64[ 0] = 0x80; @@ -447,7 +447,7 @@ DECLSPEC u32 do_round (const u32 *pw, const u32 pw_len, ctx_t *ctx, SHM_TYPE u32 ctx->W64[12] = 0; ctx->W64[13] = 0; ctx->W64[14] = 0; - ctx->W64[15] = swap64 ((u64) (final_len * 8)); + ctx->W64[15] = swap64_S ((u64) (final_len * 8)); break; case BLSZ512: ex = ctx->W64[15] >> 56; ctx->W64[ 0] = 0x80; @@ -465,7 +465,7 @@ DECLSPEC u32 do_round (const u32 *pw, const u32 pw_len, ctx_t *ctx, SHM_TYPE u32 ctx->W64[12] = 0; ctx->W64[13] = 0; ctx->W64[14] = 0; - ctx->W64[15] = swap64 ((u64) (final_len * 8)); + ctx->W64[15] = swap64_S ((u64) (final_len * 8)); break; } } @@ -473,14 +473,14 @@ DECLSPEC u32 do_round (const u32 *pw, const u32 pw_len, ctx_t *ctx, SHM_TYPE u32 switch (ctx->dgst_len) { case BLSZ256: orig_sha256_transform (&ctx->W32[ 0], &ctx->W32[ 4], &ctx->W32[ 8], &ctx->W32[12], ctx->dgst32); - ctx->dgst32[ 0] = swap32 (ctx->dgst32[0]); - ctx->dgst32[ 1] = swap32 (ctx->dgst32[1]); - ctx->dgst32[ 2] = swap32 (ctx->dgst32[2]); - ctx->dgst32[ 3] = swap32 (ctx->dgst32[3]); - ctx->dgst32[ 4] = swap32 (ctx->dgst32[4]); - ctx->dgst32[ 5] = swap32 (ctx->dgst32[5]); - ctx->dgst32[ 6] = swap32 (ctx->dgst32[6]); - ctx->dgst32[ 7] = swap32 (ctx->dgst32[7]); + ctx->dgst32[ 0] = swap32_S (ctx->dgst32[0]); + ctx->dgst32[ 1] = swap32_S (ctx->dgst32[1]); + ctx->dgst32[ 2] = swap32_S (ctx->dgst32[2]); + ctx->dgst32[ 3] = swap32_S (ctx->dgst32[3]); + ctx->dgst32[ 4] = swap32_S (ctx->dgst32[4]); + ctx->dgst32[ 5] = swap32_S (ctx->dgst32[5]); + ctx->dgst32[ 6] = swap32_S (ctx->dgst32[6]); + ctx->dgst32[ 7] = swap32_S (ctx->dgst32[7]); ctx->dgst32[ 8] = 0; ctx->dgst32[ 9] = 0; ctx->dgst32[10] = 0; @@ -491,24 +491,24 @@ DECLSPEC u32 do_round (const u32 *pw, const u32 pw_len, ctx_t *ctx, SHM_TYPE u32 ctx->dgst32[15] = 0; break; case BLSZ384: orig_sha384_transform (&ctx->W64[ 0], &ctx->W64[ 4], &ctx->W64[ 8], &ctx->W64[12], ctx->dgst64); - ctx->dgst64[0] = swap64 (ctx->dgst64[0]); - ctx->dgst64[1] = swap64 (ctx->dgst64[1]); - ctx->dgst64[2] = swap64 (ctx->dgst64[2]); - ctx->dgst64[3] = swap64 (ctx->dgst64[3]); - ctx->dgst64[4] = swap64 (ctx->dgst64[4]); - ctx->dgst64[5] = swap64 (ctx->dgst64[5]); + ctx->dgst64[0] = swap64_S (ctx->dgst64[0]); + ctx->dgst64[1] = swap64_S (ctx->dgst64[1]); + ctx->dgst64[2] = swap64_S (ctx->dgst64[2]); + ctx->dgst64[3] = swap64_S (ctx->dgst64[3]); + ctx->dgst64[4] = swap64_S (ctx->dgst64[4]); + ctx->dgst64[5] = swap64_S (ctx->dgst64[5]); ctx->dgst64[6] = 0; ctx->dgst64[7] = 0; break; case BLSZ512: orig_sha512_transform (&ctx->W64[ 0], &ctx->W64[ 4], &ctx->W64[ 8], &ctx->W64[12], ctx->dgst64); - ctx->dgst64[0] = swap64 (ctx->dgst64[0]); - ctx->dgst64[1] = swap64 (ctx->dgst64[1]); - ctx->dgst64[2] = swap64 (ctx->dgst64[2]); - ctx->dgst64[3] = swap64 (ctx->dgst64[3]); - ctx->dgst64[4] = swap64 (ctx->dgst64[4]); - ctx->dgst64[5] = swap64 (ctx->dgst64[5]); - ctx->dgst64[6] = swap64 (ctx->dgst64[6]); - ctx->dgst64[7] = swap64 (ctx->dgst64[7]); + ctx->dgst64[0] = swap64_S (ctx->dgst64[0]); + ctx->dgst64[1] = swap64_S (ctx->dgst64[1]); + ctx->dgst64[2] = swap64_S (ctx->dgst64[2]); + ctx->dgst64[3] = swap64_S (ctx->dgst64[3]); + ctx->dgst64[4] = swap64_S (ctx->dgst64[4]); + ctx->dgst64[5] = swap64_S (ctx->dgst64[5]); + ctx->dgst64[6] = swap64_S (ctx->dgst64[6]); + ctx->dgst64[7] = swap64_S (ctx->dgst64[7]); break; } diff --git a/OpenCL/m11600.cl b/OpenCL/m11600.cl index ed3e6c2bc..c80743232 100644 --- a/OpenCL/m11600.cl +++ b/OpenCL/m11600.cl @@ -301,14 +301,14 @@ __kernel void m11600_hook23 (__global pw_t *pws, __global const kernel_rule_t *r sha256_final (&ctx); - seven_zip_hook[gid].ukey[0] = swap32 (ctx.h[0]); - seven_zip_hook[gid].ukey[1] = swap32 (ctx.h[1]); - seven_zip_hook[gid].ukey[2] = swap32 (ctx.h[2]); - seven_zip_hook[gid].ukey[3] = swap32 (ctx.h[3]); - seven_zip_hook[gid].ukey[4] = swap32 (ctx.h[4]); - seven_zip_hook[gid].ukey[5] = swap32 (ctx.h[5]); - seven_zip_hook[gid].ukey[6] = swap32 (ctx.h[6]); - seven_zip_hook[gid].ukey[7] = swap32 (ctx.h[7]); + seven_zip_hook[gid].ukey[0] = swap32_S (ctx.h[0]); + seven_zip_hook[gid].ukey[1] = swap32_S (ctx.h[1]); + seven_zip_hook[gid].ukey[2] = swap32_S (ctx.h[2]); + seven_zip_hook[gid].ukey[3] = swap32_S (ctx.h[3]); + seven_zip_hook[gid].ukey[4] = swap32_S (ctx.h[4]); + seven_zip_hook[gid].ukey[5] = swap32_S (ctx.h[5]); + seven_zip_hook[gid].ukey[6] = swap32_S (ctx.h[6]); + seven_zip_hook[gid].ukey[7] = swap32_S (ctx.h[7]); } __kernel void m11600_comp (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const pw_t *combs_buf, __global const bf_t *bfs_buf, __global seven_zip_tmp_t *tmps, __global seven_zip_hook_t *seven_zip_hook, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global void *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u64 gid_max) diff --git a/OpenCL/m14400_a0-optimized.cl b/OpenCL/m14400_a0-optimized.cl index 0a00e5b7b..5cdf13ea6 100644 --- a/OpenCL/m14400_a0-optimized.cl +++ b/OpenCL/m14400_a0-optimized.cl @@ -197,22 +197,22 @@ __kernel void m14400_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru salt_buf0[0] |= dashes >> 16; salt_buf1[1] |= dashes << 16; - salt_buf0[0] = swap32 (salt_buf0[0]); - salt_buf0[1] = swap32 (salt_buf0[1]); - salt_buf0[2] = swap32 (salt_buf0[2]); - salt_buf0[3] = swap32 (salt_buf0[3]); - salt_buf1[0] = swap32 (salt_buf1[0]); - salt_buf1[1] = swap32 (salt_buf1[1]); - salt_buf1[2] = swap32 (salt_buf1[2]); - salt_buf1[3] = swap32 (salt_buf1[3]); - salt_buf2[0] = swap32 (salt_buf2[0]); - salt_buf2[1] = swap32 (salt_buf2[1]); - salt_buf2[2] = swap32 (salt_buf2[2]); - salt_buf2[3] = swap32 (salt_buf2[3]); - salt_buf3[0] = swap32 (salt_buf3[0]); - salt_buf3[1] = swap32 (salt_buf3[1]); - salt_buf3[2] = swap32 (salt_buf3[2]); - salt_buf3[3] = swap32 (salt_buf3[3]); + salt_buf0[0] = swap32_S (salt_buf0[0]); + salt_buf0[1] = swap32_S (salt_buf0[1]); + salt_buf0[2] = swap32_S (salt_buf0[2]); + salt_buf0[3] = swap32_S (salt_buf0[3]); + salt_buf1[0] = swap32_S (salt_buf1[0]); + salt_buf1[1] = swap32_S (salt_buf1[1]); + salt_buf1[2] = swap32_S (salt_buf1[2]); + salt_buf1[3] = swap32_S (salt_buf1[3]); + salt_buf2[0] = swap32_S (salt_buf2[0]); + salt_buf2[1] = swap32_S (salt_buf2[1]); + salt_buf2[2] = swap32_S (salt_buf2[2]); + salt_buf2[3] = swap32_S (salt_buf2[3]); + salt_buf3[0] = swap32_S (salt_buf3[0]); + salt_buf3[1] = swap32_S (salt_buf3[1]); + salt_buf3[2] = swap32_S (salt_buf3[2]); + salt_buf3[3] = swap32_S (salt_buf3[3]); const u32 salt_len_orig = salt_bufs[salt_pos].salt_len; @@ -468,22 +468,22 @@ __kernel void m14400_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru salt_buf0[0] |= dashes >> 16; salt_buf1[1] |= dashes << 16; - salt_buf0[0] = swap32 (salt_buf0[0]); - salt_buf0[1] = swap32 (salt_buf0[1]); - salt_buf0[2] = swap32 (salt_buf0[2]); - salt_buf0[3] = swap32 (salt_buf0[3]); - salt_buf1[0] = swap32 (salt_buf1[0]); - salt_buf1[1] = swap32 (salt_buf1[1]); - salt_buf1[2] = swap32 (salt_buf1[2]); - salt_buf1[3] = swap32 (salt_buf1[3]); - salt_buf2[0] = swap32 (salt_buf2[0]); - salt_buf2[1] = swap32 (salt_buf2[1]); - salt_buf2[2] = swap32 (salt_buf2[2]); - salt_buf2[3] = swap32 (salt_buf2[3]); - salt_buf3[0] = swap32 (salt_buf3[0]); - salt_buf3[1] = swap32 (salt_buf3[1]); - salt_buf3[2] = swap32 (salt_buf3[2]); - salt_buf3[3] = swap32 (salt_buf3[3]); + salt_buf0[0] = swap32_S (salt_buf0[0]); + salt_buf0[1] = swap32_S (salt_buf0[1]); + salt_buf0[2] = swap32_S (salt_buf0[2]); + salt_buf0[3] = swap32_S (salt_buf0[3]); + salt_buf1[0] = swap32_S (salt_buf1[0]); + salt_buf1[1] = swap32_S (salt_buf1[1]); + salt_buf1[2] = swap32_S (salt_buf1[2]); + salt_buf1[3] = swap32_S (salt_buf1[3]); + salt_buf2[0] = swap32_S (salt_buf2[0]); + salt_buf2[1] = swap32_S (salt_buf2[1]); + salt_buf2[2] = swap32_S (salt_buf2[2]); + salt_buf2[3] = swap32_S (salt_buf2[3]); + salt_buf3[0] = swap32_S (salt_buf3[0]); + salt_buf3[1] = swap32_S (salt_buf3[1]); + salt_buf3[2] = swap32_S (salt_buf3[2]); + salt_buf3[3] = swap32_S (salt_buf3[3]); const u32 salt_len_orig = salt_bufs[salt_pos].salt_len; diff --git a/OpenCL/m14400_a1-optimized.cl b/OpenCL/m14400_a1-optimized.cl index 57b35d5df..d453da36e 100644 --- a/OpenCL/m14400_a1-optimized.cl +++ b/OpenCL/m14400_a1-optimized.cl @@ -197,22 +197,22 @@ __kernel void m14400_m04 (__global pw_t *pws, __global const kernel_rule_t *rule salt_buf0[0] |= dashes >> 16; salt_buf1[1] |= dashes << 16; - salt_buf0[0] = swap32 (salt_buf0[0]); - salt_buf0[1] = swap32 (salt_buf0[1]); - salt_buf0[2] = swap32 (salt_buf0[2]); - salt_buf0[3] = swap32 (salt_buf0[3]); - salt_buf1[0] = swap32 (salt_buf1[0]); - salt_buf1[1] = swap32 (salt_buf1[1]); - salt_buf1[2] = swap32 (salt_buf1[2]); - salt_buf1[3] = swap32 (salt_buf1[3]); - salt_buf2[0] = swap32 (salt_buf2[0]); - salt_buf2[1] = swap32 (salt_buf2[1]); - salt_buf2[2] = swap32 (salt_buf2[2]); - salt_buf2[3] = swap32 (salt_buf2[3]); - salt_buf3[0] = swap32 (salt_buf3[0]); - salt_buf3[1] = swap32 (salt_buf3[1]); - salt_buf3[2] = swap32 (salt_buf3[2]); - salt_buf3[3] = swap32 (salt_buf3[3]); + salt_buf0[0] = swap32_S (salt_buf0[0]); + salt_buf0[1] = swap32_S (salt_buf0[1]); + salt_buf0[2] = swap32_S (salt_buf0[2]); + salt_buf0[3] = swap32_S (salt_buf0[3]); + salt_buf1[0] = swap32_S (salt_buf1[0]); + salt_buf1[1] = swap32_S (salt_buf1[1]); + salt_buf1[2] = swap32_S (salt_buf1[2]); + salt_buf1[3] = swap32_S (salt_buf1[3]); + salt_buf2[0] = swap32_S (salt_buf2[0]); + salt_buf2[1] = swap32_S (salt_buf2[1]); + salt_buf2[2] = swap32_S (salt_buf2[2]); + salt_buf2[3] = swap32_S (salt_buf2[3]); + salt_buf3[0] = swap32_S (salt_buf3[0]); + salt_buf3[1] = swap32_S (salt_buf3[1]); + salt_buf3[2] = swap32_S (salt_buf3[2]); + salt_buf3[3] = swap32_S (salt_buf3[3]); const u32 salt_len_orig = salt_bufs[salt_pos].salt_len; @@ -532,22 +532,22 @@ __kernel void m14400_s04 (__global pw_t *pws, __global const kernel_rule_t *rule salt_buf0[0] |= dashes >> 16; salt_buf1[1] |= dashes << 16; - salt_buf0[0] = swap32 (salt_buf0[0]); - salt_buf0[1] = swap32 (salt_buf0[1]); - salt_buf0[2] = swap32 (salt_buf0[2]); - salt_buf0[3] = swap32 (salt_buf0[3]); - salt_buf1[0] = swap32 (salt_buf1[0]); - salt_buf1[1] = swap32 (salt_buf1[1]); - salt_buf1[2] = swap32 (salt_buf1[2]); - salt_buf1[3] = swap32 (salt_buf1[3]); - salt_buf2[0] = swap32 (salt_buf2[0]); - salt_buf2[1] = swap32 (salt_buf2[1]); - salt_buf2[2] = swap32 (salt_buf2[2]); - salt_buf2[3] = swap32 (salt_buf2[3]); - salt_buf3[0] = swap32 (salt_buf3[0]); - salt_buf3[1] = swap32 (salt_buf3[1]); - salt_buf3[2] = swap32 (salt_buf3[2]); - salt_buf3[3] = swap32 (salt_buf3[3]); + salt_buf0[0] = swap32_S (salt_buf0[0]); + salt_buf0[1] = swap32_S (salt_buf0[1]); + salt_buf0[2] = swap32_S (salt_buf0[2]); + salt_buf0[3] = swap32_S (salt_buf0[3]); + salt_buf1[0] = swap32_S (salt_buf1[0]); + salt_buf1[1] = swap32_S (salt_buf1[1]); + salt_buf1[2] = swap32_S (salt_buf1[2]); + salt_buf1[3] = swap32_S (salt_buf1[3]); + salt_buf2[0] = swap32_S (salt_buf2[0]); + salt_buf2[1] = swap32_S (salt_buf2[1]); + salt_buf2[2] = swap32_S (salt_buf2[2]); + salt_buf2[3] = swap32_S (salt_buf2[3]); + salt_buf3[0] = swap32_S (salt_buf3[0]); + salt_buf3[1] = swap32_S (salt_buf3[1]); + salt_buf3[2] = swap32_S (salt_buf3[2]); + salt_buf3[3] = swap32_S (salt_buf3[3]); const u32 salt_len_orig = salt_bufs[salt_pos].salt_len; diff --git a/OpenCL/m14400_a3-optimized.cl b/OpenCL/m14400_a3-optimized.cl index b52061d2b..02a1af86a 100644 --- a/OpenCL/m14400_a3-optimized.cl +++ b/OpenCL/m14400_a3-optimized.cl @@ -159,22 +159,22 @@ DECLSPEC void m14400m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_ salt_buf0[0] |= dashes >> 16; salt_buf1[1] |= dashes << 16; - salt_buf0[0] = swap32 (salt_buf0[0]); - salt_buf0[1] = swap32 (salt_buf0[1]); - salt_buf0[2] = swap32 (salt_buf0[2]); - salt_buf0[3] = swap32 (salt_buf0[3]); - salt_buf1[0] = swap32 (salt_buf1[0]); - salt_buf1[1] = swap32 (salt_buf1[1]); - salt_buf1[2] = swap32 (salt_buf1[2]); - salt_buf1[3] = swap32 (salt_buf1[3]); - salt_buf2[0] = swap32 (salt_buf2[0]); - salt_buf2[1] = swap32 (salt_buf2[1]); - salt_buf2[2] = swap32 (salt_buf2[2]); - salt_buf2[3] = swap32 (salt_buf2[3]); - salt_buf3[0] = swap32 (salt_buf3[0]); - salt_buf3[1] = swap32 (salt_buf3[1]); - salt_buf3[2] = swap32 (salt_buf3[2]); - salt_buf3[3] = swap32 (salt_buf3[3]); + salt_buf0[0] = swap32_S (salt_buf0[0]); + salt_buf0[1] = swap32_S (salt_buf0[1]); + salt_buf0[2] = swap32_S (salt_buf0[2]); + salt_buf0[3] = swap32_S (salt_buf0[3]); + salt_buf1[0] = swap32_S (salt_buf1[0]); + salt_buf1[1] = swap32_S (salt_buf1[1]); + salt_buf1[2] = swap32_S (salt_buf1[2]); + salt_buf1[3] = swap32_S (salt_buf1[3]); + salt_buf2[0] = swap32_S (salt_buf2[0]); + salt_buf2[1] = swap32_S (salt_buf2[1]); + salt_buf2[2] = swap32_S (salt_buf2[2]); + salt_buf2[3] = swap32_S (salt_buf2[3]); + salt_buf3[0] = swap32_S (salt_buf3[0]); + salt_buf3[1] = swap32_S (salt_buf3[1]); + salt_buf3[2] = swap32_S (salt_buf3[2]); + salt_buf3[3] = swap32_S (salt_buf3[3]); const u32 salt_len_orig = salt_bufs[salt_pos].salt_len; @@ -225,14 +225,14 @@ DECLSPEC void m14400m (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_ append_0x80_2x4_VV (w0_t, w1_t, pw_len_new); - w0_t[0] = swap32 (w0_t[0]); - w0_t[1] = swap32 (w0_t[1]); - w0_t[2] = swap32 (w0_t[2]); - w0_t[3] = swap32 (w0_t[3]); - w1_t[0] = swap32 (w1_t[0]); - w1_t[1] = swap32 (w1_t[1]); - w1_t[2] = swap32 (w1_t[2]); - w1_t[3] = swap32 (w1_t[3]); + w0_t[0] = swap32_S (w0_t[0]); + w0_t[1] = swap32_S (w0_t[1]); + w0_t[2] = swap32_S (w0_t[2]); + w0_t[3] = swap32_S (w0_t[3]); + w1_t[0] = swap32_S (w1_t[0]); + w1_t[1] = swap32_S (w1_t[1]); + w1_t[2] = swap32_S (w1_t[2]); + w1_t[3] = swap32_S (w1_t[3]); /** * loop @@ -420,22 +420,22 @@ DECLSPEC void m14400s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_ salt_buf0[0] |= dashes >> 16; salt_buf1[1] |= dashes << 16; - salt_buf0[0] = swap32 (salt_buf0[0]); - salt_buf0[1] = swap32 (salt_buf0[1]); - salt_buf0[2] = swap32 (salt_buf0[2]); - salt_buf0[3] = swap32 (salt_buf0[3]); - salt_buf1[0] = swap32 (salt_buf1[0]); - salt_buf1[1] = swap32 (salt_buf1[1]); - salt_buf1[2] = swap32 (salt_buf1[2]); - salt_buf1[3] = swap32 (salt_buf1[3]); - salt_buf2[0] = swap32 (salt_buf2[0]); - salt_buf2[1] = swap32 (salt_buf2[1]); - salt_buf2[2] = swap32 (salt_buf2[2]); - salt_buf2[3] = swap32 (salt_buf2[3]); - salt_buf3[0] = swap32 (salt_buf3[0]); - salt_buf3[1] = swap32 (salt_buf3[1]); - salt_buf3[2] = swap32 (salt_buf3[2]); - salt_buf3[3] = swap32 (salt_buf3[3]); + salt_buf0[0] = swap32_S (salt_buf0[0]); + salt_buf0[1] = swap32_S (salt_buf0[1]); + salt_buf0[2] = swap32_S (salt_buf0[2]); + salt_buf0[3] = swap32_S (salt_buf0[3]); + salt_buf1[0] = swap32_S (salt_buf1[0]); + salt_buf1[1] = swap32_S (salt_buf1[1]); + salt_buf1[2] = swap32_S (salt_buf1[2]); + salt_buf1[3] = swap32_S (salt_buf1[3]); + salt_buf2[0] = swap32_S (salt_buf2[0]); + salt_buf2[1] = swap32_S (salt_buf2[1]); + salt_buf2[2] = swap32_S (salt_buf2[2]); + salt_buf2[3] = swap32_S (salt_buf2[3]); + salt_buf3[0] = swap32_S (salt_buf3[0]); + salt_buf3[1] = swap32_S (salt_buf3[1]); + salt_buf3[2] = swap32_S (salt_buf3[2]); + salt_buf3[3] = swap32_S (salt_buf3[3]); const u32 salt_len_orig = salt_bufs[salt_pos].salt_len; @@ -486,14 +486,14 @@ DECLSPEC void m14400s (u32 w0[4], u32 w1[4], u32 w2[4], u32 w3[4], const u32 pw_ append_0x80_2x4_VV (w0_t, w1_t, pw_len_new); - w0_t[0] = swap32 (w0_t[0]); - w0_t[1] = swap32 (w0_t[1]); - w0_t[2] = swap32 (w0_t[2]); - w0_t[3] = swap32 (w0_t[3]); - w1_t[0] = swap32 (w1_t[0]); - w1_t[1] = swap32 (w1_t[1]); - w1_t[2] = swap32 (w1_t[2]); - w1_t[3] = swap32 (w1_t[3]); + w0_t[0] = swap32_S (w0_t[0]); + w0_t[1] = swap32_S (w0_t[1]); + w0_t[2] = swap32_S (w0_t[2]); + w0_t[3] = swap32_S (w0_t[3]); + w1_t[0] = swap32_S (w1_t[0]); + w1_t[1] = swap32_S (w1_t[1]); + w1_t[2] = swap32_S (w1_t[2]); + w1_t[3] = swap32_S (w1_t[3]); /** * loop diff --git a/OpenCL/m16600_a0-optimized.cl b/OpenCL/m16600_a0-optimized.cl index 91baafed5..ebce8c3b4 100644 --- a/OpenCL/m16600_a0-optimized.cl +++ b/OpenCL/m16600_a0-optimized.cl @@ -115,20 +115,20 @@ __kernel void m16600_m04 (__global pw_t *pws, __constant const kernel_rule_t *ru * sha256 */ - u32x w0_t = swap32_S (w0[0]); - u32x w1_t = swap32_S (w0[1]); - u32x w2_t = swap32_S (w0[2]); - u32x w3_t = swap32_S (w0[3]); - u32x w4_t = swap32_S (w1[0]); - u32x w5_t = swap32_S (w1[1]); - u32x w6_t = swap32_S (w1[2]); - u32x w7_t = swap32_S (w1[3]); - u32x w8_t = swap32_S (w2[0]); - u32x w9_t = swap32_S (w2[1]); - u32x wa_t = swap32_S (w2[2]); - u32x wb_t = swap32_S (w2[3]); - u32x wc_t = swap32_S (w3[0]); - u32x wd_t = swap32_S (w3[1]); + u32x w0_t = swap32 (w0[0]); + u32x w1_t = swap32 (w0[1]); + u32x w2_t = swap32 (w0[2]); + u32x w3_t = swap32 (w0[3]); + u32x w4_t = swap32 (w1[0]); + u32x w5_t = swap32 (w1[1]); + u32x w6_t = swap32 (w1[2]); + u32x w7_t = swap32 (w1[3]); + u32x w8_t = swap32 (w2[0]); + u32x w9_t = swap32 (w2[1]); + u32x wa_t = swap32 (w2[2]); + u32x wb_t = swap32 (w2[3]); + u32x wc_t = swap32 (w3[0]); + u32x wd_t = swap32 (w3[1]); u32x we_t = 0; u32x wf_t = out_len * 8; @@ -484,20 +484,20 @@ __kernel void m16600_s04 (__global pw_t *pws, __constant const kernel_rule_t *ru * sha256 */ - u32x w0_t = swap32_S (w0[0]); - u32x w1_t = swap32_S (w0[1]); - u32x w2_t = swap32_S (w0[2]); - u32x w3_t = swap32_S (w0[3]); - u32x w4_t = swap32_S (w1[0]); - u32x w5_t = swap32_S (w1[1]); - u32x w6_t = swap32_S (w1[2]); - u32x w7_t = swap32_S (w1[3]); - u32x w8_t = swap32_S (w2[0]); - u32x w9_t = swap32_S (w2[1]); - u32x wa_t = swap32_S (w2[2]); - u32x wb_t = swap32_S (w2[3]); - u32x wc_t = swap32_S (w3[0]); - u32x wd_t = swap32_S (w3[1]); + u32x w0_t = swap32 (w0[0]); + u32x w1_t = swap32 (w0[1]); + u32x w2_t = swap32 (w0[2]); + u32x w3_t = swap32 (w0[3]); + u32x w4_t = swap32 (w1[0]); + u32x w5_t = swap32 (w1[1]); + u32x w6_t = swap32 (w1[2]); + u32x w7_t = swap32 (w1[3]); + u32x w8_t = swap32 (w2[0]); + u32x w9_t = swap32 (w2[1]); + u32x wa_t = swap32 (w2[2]); + u32x wb_t = swap32 (w2[3]); + u32x wc_t = swap32 (w3[0]); + u32x wd_t = swap32 (w3[1]); u32x we_t = 0; u32x wf_t = out_len * 8; diff --git a/OpenCL/m16600_a1-optimized.cl b/OpenCL/m16600_a1-optimized.cl index 67827756b..5e657a9e3 100644 --- a/OpenCL/m16600_a1-optimized.cl +++ b/OpenCL/m16600_a1-optimized.cl @@ -171,20 +171,20 @@ __kernel void m16600_m04 (__global pw_t *pws, __global const kernel_rule_t *rule * sha256 */ - u32x w0_t = swap32_S (w0[0]); - u32x w1_t = swap32_S (w0[1]); - u32x w2_t = swap32_S (w0[2]); - u32x w3_t = swap32_S (w0[3]); - u32x w4_t = swap32_S (w1[0]); - u32x w5_t = swap32_S (w1[1]); - u32x w6_t = swap32_S (w1[2]); - u32x w7_t = swap32_S (w1[3]); - u32x w8_t = swap32_S (w2[0]); - u32x w9_t = swap32_S (w2[1]); - u32x wa_t = swap32_S (w2[2]); - u32x wb_t = swap32_S (w2[3]); - u32x wc_t = swap32_S (w3[0]); - u32x wd_t = swap32_S (w3[1]); + u32x w0_t = swap32 (w0[0]); + u32x w1_t = swap32 (w0[1]); + u32x w2_t = swap32 (w0[2]); + u32x w3_t = swap32 (w0[3]); + u32x w4_t = swap32 (w1[0]); + u32x w5_t = swap32 (w1[1]); + u32x w6_t = swap32 (w1[2]); + u32x w7_t = swap32 (w1[3]); + u32x w8_t = swap32 (w2[0]); + u32x w9_t = swap32 (w2[1]); + u32x wa_t = swap32 (w2[2]); + u32x wb_t = swap32 (w2[3]); + u32x wc_t = swap32 (w3[0]); + u32x wd_t = swap32 (w3[1]); u32x we_t = 0; u32x wf_t = pw_len * 8; @@ -598,20 +598,20 @@ __kernel void m16600_s04 (__global pw_t *pws, __global const kernel_rule_t *rule * sha256 */ - u32x w0_t = swap32_S (w0[0]); - u32x w1_t = swap32_S (w0[1]); - u32x w2_t = swap32_S (w0[2]); - u32x w3_t = swap32_S (w0[3]); - u32x w4_t = swap32_S (w1[0]); - u32x w5_t = swap32_S (w1[1]); - u32x w6_t = swap32_S (w1[2]); - u32x w7_t = swap32_S (w1[3]); - u32x w8_t = swap32_S (w2[0]); - u32x w9_t = swap32_S (w2[1]); - u32x wa_t = swap32_S (w2[2]); - u32x wb_t = swap32_S (w2[3]); - u32x wc_t = swap32_S (w3[0]); - u32x wd_t = swap32_S (w3[1]); + u32x w0_t = swap32 (w0[0]); + u32x w1_t = swap32 (w0[1]); + u32x w2_t = swap32 (w0[2]); + u32x w3_t = swap32 (w0[3]); + u32x w4_t = swap32 (w1[0]); + u32x w5_t = swap32 (w1[1]); + u32x w6_t = swap32 (w1[2]); + u32x w7_t = swap32 (w1[3]); + u32x w8_t = swap32 (w2[0]); + u32x w9_t = swap32 (w2[1]); + u32x wa_t = swap32 (w2[2]); + u32x wb_t = swap32 (w2[3]); + u32x wc_t = swap32 (w3[0]); + u32x wd_t = swap32 (w3[1]); u32x we_t = 0; u32x wf_t = pw_len * 8;