diff --git a/OpenCL/m17220_a0-pure.cl b/OpenCL/m17220_a0-pure.cl index 13bfbe70e..430060525 100644 --- a/OpenCL/m17220_a0-pure.cl +++ b/OpenCL/m17220_a0-pure.cl @@ -87,6 +87,7 @@ Related publication: https://scitepress.org/PublicationsDetail.aspx?ID=KLPzPqStp #include "inc_vendor.h" #include "inc_types.h" +#include "inc_platform.cl" #include "inc_common.cl" #include "inc_simd.cl" #include "inc_rp.h" @@ -131,7 +132,7 @@ struct pkzip_hash u32 data_length; u16 checksum_from_crc; u16 checksum_from_timestamp; - u8 data[MAX_DATA]; + u32 data[MAX_DATA]; } __attribute__((packed)); @@ -229,7 +230,7 @@ typedef struct { u16 val; /* offset in table or code value */ } code; -CONSTANT_AS code lenfix[512] = { +CONSTANT_VK code lenfix[512] = { {96,7,0},{0,8,80},{0,8,16},{20,8,115},{18,7,31},{0,8,112},{0,8,48}, {0,9,192},{16,7,10},{0,8,96},{0,8,32},{0,9,160},{0,8,0},{0,8,128}, {0,8,64},{0,9,224},{16,7,6},{0,8,88},{0,8,24},{0,9,144},{19,7,59}, @@ -306,7 +307,7 @@ CONSTANT_AS code lenfix[512] = { {0,9,255} }; -CONSTANT_AS code distfix[32] = { +CONSTANT_VK code distfix[32] = { {16,5,1},{23,5,257},{19,5,17},{27,5,4097},{17,5,5},{25,5,1025}, {21,5,65},{29,5,16385},{16,5,3},{24,5,513},{20,5,33},{28,5,8193}, {18,5,9},{26,5,2049},{22,5,129},{64,5,0},{16,5,2},{23,5,385}, @@ -526,7 +527,7 @@ DECLSPEC int check_inflate_code1 (u8 *next, int left) } } -__kernel void m17220_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) +KERNEL_FQ void m17220_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) { /** * modifier @@ -540,14 +541,14 @@ __kernel void m17220_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) * sbox, kbox */ - LOCAL_AS u32 l_crc32tab[256]; + LOCAL_VK u32 l_crc32tab[256]; for (u64 i = lid; i < 256; i += lsz) { l_crc32tab[i] = crc32tab[i]; } - barrier (CLK_LOCAL_MEM_FENCE); + SYNC_THREADS(); if (gid >= gid_max) return; @@ -592,13 +593,11 @@ __kernel void m17220_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) for (u32 idx = 0; idx < hash_count; idx++) { - __global u32 *data_ptr = (__global u32 *) esalt_bufs[digests_offset].hashes[idx].data; - u32x key0 = key0init; u32x key1 = key1init; u32x key2 = key2init; - next = data_ptr[0]; + next = esalt_bufs[digests_offset].hashes[idx].data[0]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -616,7 +615,7 @@ __kernel void m17220_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) plain = unpack_v8d_from_v32_S (next) ^ key3; update_key012 (key0, key1, key2, plain, l_crc32tab); - next = data_ptr[1]; + next = esalt_bufs[digests_offset].hashes[idx].data[1]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -634,7 +633,7 @@ __kernel void m17220_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) plain = unpack_v8d_from_v32_S (next) ^ key3; update_key012 (key0, key1, key2, plain, l_crc32tab); - next = data_ptr[2]; + next = esalt_bufs[digests_offset].hashes[idx].data[2]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -660,7 +659,7 @@ __kernel void m17220_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) u8 tmp[TMPSIZ]; - next = data_ptr[3]; + next = esalt_bufs[digests_offset].hashes[idx].data[3]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -685,7 +684,7 @@ __kernel void m17220_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) for (int i = 16; i < 36; i += 4) { - next = data_ptr[i / 4]; + next = esalt_bufs[digests_offset].hashes[idx].data[i / 4]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -721,8 +720,8 @@ __kernel void m17220_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) inflate_state pStream; infstream.opaque = Z_NULL; - infstream.avail_in = esalt_bufs[digests_offset].hashes[idx].data_length - 12; // size of input - infstream.next_in = esalt_bufs[digests_offset].hashes[idx].data + 12; // input char array + infstream.avail_in = esalt_bufs[digests_offset].hashes[idx].data_length - 12; // size of input + infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[digests_offset].hashes[idx].data + 12; // input char array infstream.avail_out = TMPSIZ; // size of output infstream.next_out = tmp; // output char array @@ -779,7 +778,7 @@ __kernel void m17220_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) } } -__kernel void m17220_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) +KERNEL_FQ void m17220_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) { /** * modifier @@ -793,14 +792,14 @@ __kernel void m17220_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) * sbox, kbox */ - LOCAL_AS u32 l_crc32tab[256]; + LOCAL_VK u32 l_crc32tab[256]; for (u64 i = lid; i < 256; i += lsz) { l_crc32tab[i] = crc32tab[i]; } - barrier (CLK_LOCAL_MEM_FENCE); + SYNC_THREADS(); if (gid >= gid_max) return; @@ -845,13 +844,11 @@ __kernel void m17220_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) for (u32 idx = 0; idx < hash_count; idx++) { - __global u32 *data_ptr = (__global u32 *) esalt_bufs[digests_offset].hashes[idx].data; - u32x key0 = key0init; u32x key1 = key1init; u32x key2 = key2init; - next = data_ptr[0]; + next = esalt_bufs[digests_offset].hashes[idx].data[0]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -869,7 +866,7 @@ __kernel void m17220_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) plain = unpack_v8d_from_v32_S (next) ^ key3; update_key012 (key0, key1, key2, plain, l_crc32tab); - next = data_ptr[1]; + next = esalt_bufs[digests_offset].hashes[idx].data[1]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -887,7 +884,7 @@ __kernel void m17220_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) plain = unpack_v8d_from_v32_S (next) ^ key3; update_key012 (key0, key1, key2, plain, l_crc32tab); - next = data_ptr[2]; + next = esalt_bufs[digests_offset].hashes[idx].data[2]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -913,7 +910,7 @@ __kernel void m17220_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) u8 tmp[TMPSIZ]; - next = data_ptr[3]; + next = esalt_bufs[digests_offset].hashes[idx].data[3]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -938,7 +935,7 @@ __kernel void m17220_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) for (int i = 16; i < 36; i += 4) { - next = data_ptr[i / 4]; + next = esalt_bufs[digests_offset].hashes[idx].data[i / 4]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -974,8 +971,8 @@ __kernel void m17220_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) inflate_state pStream; infstream.opaque = Z_NULL; - infstream.avail_in = esalt_bufs[digests_offset].hashes[idx].data_length - 12; // size of input - infstream.next_in = esalt_bufs[digests_offset].hashes[idx].data + 12; // input char array + infstream.avail_in = esalt_bufs[digests_offset].hashes[idx].data_length - 12; // size of input + infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[digests_offset].hashes[idx].data + 12; // input char array infstream.avail_out = TMPSIZ; // size of output infstream.next_out = tmp; // output char array diff --git a/OpenCL/m17220_a1-pure.cl b/OpenCL/m17220_a1-pure.cl index 68c3c2f7c..0c0cec887 100644 --- a/OpenCL/m17220_a1-pure.cl +++ b/OpenCL/m17220_a1-pure.cl @@ -87,6 +87,7 @@ Related publication: https://scitepress.org/PublicationsDetail.aspx?ID=KLPzPqStp #include "inc_vendor.h" #include "inc_types.h" +#include "inc_platform.cl" #include "inc_common.cl" #include "inc_simd.cl" @@ -129,7 +130,7 @@ struct pkzip_hash u32 data_length; u16 checksum_from_crc; u16 checksum_from_timestamp; - u8 data[MAX_DATA]; + u32 data[MAX_DATA]; } __attribute__((packed)); @@ -227,7 +228,7 @@ typedef struct { u16 val; /* offset in table or code value */ } code; -CONSTANT_AS code lenfix[512] = { +CONSTANT_VK code lenfix[512] = { {96,7,0},{0,8,80},{0,8,16},{20,8,115},{18,7,31},{0,8,112},{0,8,48}, {0,9,192},{16,7,10},{0,8,96},{0,8,32},{0,9,160},{0,8,0},{0,8,128}, {0,8,64},{0,9,224},{16,7,6},{0,8,88},{0,8,24},{0,9,144},{19,7,59}, @@ -304,7 +305,7 @@ CONSTANT_AS code lenfix[512] = { {0,9,255} }; -CONSTANT_AS code distfix[32] = { +CONSTANT_VK code distfix[32] = { {16,5,1},{23,5,257},{19,5,17},{27,5,4097},{17,5,5},{25,5,1025}, {21,5,65},{29,5,16385},{16,5,3},{24,5,513},{20,5,33},{28,5,8193}, {18,5,9},{26,5,2049},{22,5,129},{64,5,0},{16,5,2},{23,5,385}, @@ -524,7 +525,7 @@ DECLSPEC int check_inflate_code1 (u8 *next, int left) } } -__kernel void m17220_sxx (KERN_ATTR_ESALT (pkzip_t)) +KERNEL_FQ void m17220_sxx (KERN_ATTR_ESALT (pkzip_t)) { /** * modifier @@ -538,14 +539,14 @@ __kernel void m17220_sxx (KERN_ATTR_ESALT (pkzip_t)) * sbox, kbox */ - LOCAL_AS u32 l_crc32tab[256]; + LOCAL_VK u32 l_crc32tab[256]; for (u64 i = lid; i < 256; i += lsz) { l_crc32tab[i] = crc32tab[i]; } - barrier (CLK_LOCAL_MEM_FENCE); + SYNC_THREADS(); if (gid >= gid_max) return; @@ -592,13 +593,11 @@ __kernel void m17220_sxx (KERN_ATTR_ESALT (pkzip_t)) for (u32 idx = 0; idx < hash_count; idx++) { - __global u32 *data_ptr = (__global u32 *) esalt_bufs[digests_offset].hashes[idx].data; - u32x key0 = key0init2; u32x key1 = key1init2; u32x key2 = key2init2; - next = data_ptr[0]; + next = esalt_bufs[digests_offset].hashes[idx].data[0]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -616,7 +615,7 @@ __kernel void m17220_sxx (KERN_ATTR_ESALT (pkzip_t)) plain = unpack_v8d_from_v32_S (next) ^ key3; update_key012 (key0, key1, key2, plain, l_crc32tab); - next = data_ptr[1]; + next = esalt_bufs[digests_offset].hashes[idx].data[1]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -634,7 +633,7 @@ __kernel void m17220_sxx (KERN_ATTR_ESALT (pkzip_t)) plain = unpack_v8d_from_v32_S (next) ^ key3; update_key012 (key0, key1, key2, plain, l_crc32tab); - next = data_ptr[2]; + next = esalt_bufs[digests_offset].hashes[idx].data[2]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -660,7 +659,7 @@ __kernel void m17220_sxx (KERN_ATTR_ESALT (pkzip_t)) u8 tmp[TMPSIZ]; - next = data_ptr[3]; + next = esalt_bufs[digests_offset].hashes[idx].data[3]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -685,7 +684,7 @@ __kernel void m17220_sxx (KERN_ATTR_ESALT (pkzip_t)) for (int i = 16; i < 36; i += 4) { - next = data_ptr[i / 4]; + next = esalt_bufs[digests_offset].hashes[idx].data[i / 4]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -721,8 +720,8 @@ __kernel void m17220_sxx (KERN_ATTR_ESALT (pkzip_t)) inflate_state pStream; infstream.opaque = Z_NULL; - infstream.avail_in = esalt_bufs[digests_offset].hashes[idx].data_length - 12; // size of input - infstream.next_in = esalt_bufs[digests_offset].hashes[idx].data + 12; // input char array + infstream.avail_in = esalt_bufs[digests_offset].hashes[idx].data_length - 12; // size of input + infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[digests_offset].hashes[idx].data + 12; // input char array infstream.avail_out = TMPSIZ; // size of output infstream.next_out = tmp; // output char array @@ -779,7 +778,7 @@ __kernel void m17220_sxx (KERN_ATTR_ESALT (pkzip_t)) } } -__kernel void m17220_mxx (KERN_ATTR_ESALT (pkzip_t)) +KERNEL_FQ void m17220_mxx (KERN_ATTR_ESALT (pkzip_t)) { /** * modifier @@ -793,14 +792,14 @@ __kernel void m17220_mxx (KERN_ATTR_ESALT (pkzip_t)) * sbox, kbox */ - LOCAL_AS u32 l_crc32tab[256]; + LOCAL_VK u32 l_crc32tab[256]; for (u64 i = lid; i < 256; i += lsz) { l_crc32tab[i] = crc32tab[i]; } - barrier (CLK_LOCAL_MEM_FENCE); + SYNC_THREADS(); if (gid >= gid_max) return; @@ -847,13 +846,11 @@ __kernel void m17220_mxx (KERN_ATTR_ESALT (pkzip_t)) for (u32 idx = 0; idx < hash_count; idx++) { - __global u32 *data_ptr = (__global u32 *) esalt_bufs[digests_offset].hashes[idx].data; - u32x key0 = key0init2; u32x key1 = key1init2; u32x key2 = key2init2; - next = data_ptr[0]; + next = esalt_bufs[digests_offset].hashes[idx].data[0]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -871,7 +868,7 @@ __kernel void m17220_mxx (KERN_ATTR_ESALT (pkzip_t)) plain = unpack_v8d_from_v32_S (next) ^ key3; update_key012 (key0, key1, key2, plain, l_crc32tab); - next = data_ptr[1]; + next = esalt_bufs[digests_offset].hashes[idx].data[1]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -889,7 +886,7 @@ __kernel void m17220_mxx (KERN_ATTR_ESALT (pkzip_t)) plain = unpack_v8d_from_v32_S (next) ^ key3; update_key012 (key0, key1, key2, plain, l_crc32tab); - next = data_ptr[2]; + next = esalt_bufs[digests_offset].hashes[idx].data[2]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -915,7 +912,7 @@ __kernel void m17220_mxx (KERN_ATTR_ESALT (pkzip_t)) u8 tmp[TMPSIZ]; - next = data_ptr[3]; + next = esalt_bufs[digests_offset].hashes[idx].data[3]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -940,7 +937,7 @@ __kernel void m17220_mxx (KERN_ATTR_ESALT (pkzip_t)) for (int i = 16; i < 36; i += 4) { - next = data_ptr[i / 4]; + next = esalt_bufs[digests_offset].hashes[idx].data[i / 4]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -976,8 +973,8 @@ __kernel void m17220_mxx (KERN_ATTR_ESALT (pkzip_t)) inflate_state pStream; infstream.opaque = Z_NULL; - infstream.avail_in = esalt_bufs[digests_offset].hashes[idx].data_length - 12; // size of input - infstream.next_in = esalt_bufs[digests_offset].hashes[idx].data + 12; // input char array + infstream.avail_in = esalt_bufs[digests_offset].hashes[idx].data_length - 12; // size of input + infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[digests_offset].hashes[idx].data + 12; // input char array infstream.avail_out = TMPSIZ; // size of output infstream.next_out = tmp; // output char array diff --git a/OpenCL/m17220_a3-pure.cl b/OpenCL/m17220_a3-pure.cl index f92675d47..73a9b4f84 100644 --- a/OpenCL/m17220_a3-pure.cl +++ b/OpenCL/m17220_a3-pure.cl @@ -525,7 +525,7 @@ DECLSPEC int check_inflate_code1 (u8 *next, int left) } } -__kernel void m17220_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) +KERNEL_FQ void m17220_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) { /** * modifier @@ -732,8 +732,8 @@ __kernel void m17220_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) inflate_state pStream; infstream.opaque = Z_NULL; - infstream.avail_in = esalt_bufs[digests_offset].hashes[idx].data_length - 12; // size of input - infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[digests_offset].hashes[idx].data + 12; // input char array + infstream.avail_in = esalt_bufs[digests_offset].hashes[idx].data_length - 12; // size of input + infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[digests_offset].hashes[idx].data + 12; // input char array infstream.avail_out = TMPSIZ; // size of output infstream.next_out = tmp; // output char array @@ -790,7 +790,7 @@ __kernel void m17220_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) } } -__kernel void m17220_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) +KERNEL_FQ void m17220_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) { /** * modifier @@ -997,8 +997,8 @@ __kernel void m17220_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) inflate_state pStream; infstream.opaque = Z_NULL; - infstream.avail_in = esalt_bufs[digests_offset].hashes[idx].data_length - 12; // size of input - infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[digests_offset].hashes[idx].data + 12; // input char array + infstream.avail_in = esalt_bufs[digests_offset].hashes[idx].data_length - 12; // size of input + infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[digests_offset].hashes[idx].data + 12; // input char array infstream.avail_out = TMPSIZ; // size of output infstream.next_out = tmp; // output char array