From d9e5a86765e67a61682cc0f9e70ae279188462b2 Mon Sep 17 00:00:00 2001 From: Sein Coray Date: Tue, 14 May 2019 14:56:37 +0200 Subject: [PATCH] updated m17230 to be cuda compatible --- OpenCL/m17230_a0-pure.cl | 35 ++++++++++++++++------------------- OpenCL/m17230_a1-pure.cl | 35 ++++++++++++++++------------------- OpenCL/m17230_a3-pure.cl | 35 ++++++++++++++++------------------- 3 files changed, 48 insertions(+), 57 deletions(-) diff --git a/OpenCL/m17230_a0-pure.cl b/OpenCL/m17230_a0-pure.cl index 0d8428b5c..d3b1121ff 100644 --- a/OpenCL/m17230_a0-pure.cl +++ b/OpenCL/m17230_a0-pure.cl @@ -87,6 +87,7 @@ Related publication: https://scitepress.org/PublicationsDetail.aspx?ID=KLPzPqStp #include "inc_vendor.h" #include "inc_types.h" +#include "inc_platform.cl" #include "inc_common.cl" #include "inc_simd.cl" #include "inc_rp.h" @@ -131,7 +132,7 @@ struct pkzip_hash u32 data_length; u16 checksum_from_crc; u16 checksum_from_timestamp; - u8 data[MAX_DATA]; + u32 data[MAX_DATA]; } __attribute__((packed)); @@ -219,7 +220,7 @@ CONSTANT_AS u32a crc32tab[256] = 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d }; -__kernel void m17230_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) +KERNEL_FQ void m17230_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) { /** * modifier @@ -233,14 +234,14 @@ __kernel void m17230_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) * sbox, kbox */ - LOCAL_AS u32 l_crc32tab[256]; + LOCAL_VK u32 l_crc32tab[256]; for (u64 i = lid; i < 256; i += lsz) { l_crc32tab[i] = crc32tab[i]; } - barrier (CLK_LOCAL_MEM_FENCE); + SYNC_THREADS(); if (gid >= gid_max) return; @@ -285,13 +286,11 @@ __kernel void m17230_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) for (u32 idx = 0; idx < hash_count; idx++) { - __global u32 *data_ptr = (__global u32 *) esalt_bufs[digests_offset].hashes[idx].data; - u32x key0 = key0init; u32x key1 = key1init; u32x key2 = key2init; - next = data_ptr[0]; + next = esalt_bufs[digests_offset].hashes[idx].data[0]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -309,7 +308,7 @@ __kernel void m17230_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) plain = unpack_v8d_from_v32_S (next) ^ key3; update_key012 (key0, key1, key2, plain, l_crc32tab); - next = data_ptr[1]; + next = esalt_bufs[digests_offset].hashes[idx].data[1]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -327,7 +326,7 @@ __kernel void m17230_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) plain = unpack_v8d_from_v32_S (next) ^ key3; update_key012 (key0, key1, key2, plain, l_crc32tab); - next = data_ptr[2]; + next = esalt_bufs[digests_offset].hashes[idx].data[2]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -347,7 +346,7 @@ __kernel void m17230_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) if ((plain != (esalt_bufs[digests_offset].hashes[idx].checksum_from_crc >> 8)) && (plain != (esalt_bufs[digests_offset].hashes[idx].checksum_from_timestamp >> 8))) break; update_key012 (key0, key1, key2, plain, l_crc32tab); - next = data_ptr[3]; + next = esalt_bufs[digests_offset].hashes[idx].data[3]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -378,7 +377,7 @@ __kernel void m17230_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) } } -__kernel void m17230_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) +KERNEL_FQ void m17230_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) { /** * modifier @@ -392,14 +391,14 @@ __kernel void m17230_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) * sbox, kbox */ - LOCAL_AS u32 l_crc32tab[256]; + LOCAL_VK u32 l_crc32tab[256]; for (u64 i = lid; i < 256; i += lsz) { l_crc32tab[i] = crc32tab[i]; } - barrier (CLK_LOCAL_MEM_FENCE); + SYNC_THREADS(); if (gid >= gid_max) return; @@ -444,13 +443,11 @@ __kernel void m17230_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) for (u32 idx = 0; idx < hash_count; idx++) { - __global u32 *data_ptr = (__global u32 *) esalt_bufs[digests_offset].hashes[idx].data; - u32x key0 = key0init; u32x key1 = key1init; u32x key2 = key2init; - next = data_ptr[0]; + next = esalt_bufs[digests_offset].hashes[idx].data[0]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -468,7 +465,7 @@ __kernel void m17230_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) plain = unpack_v8d_from_v32_S (next) ^ key3; update_key012 (key0, key1, key2, plain, l_crc32tab); - next = data_ptr[1]; + next = esalt_bufs[digests_offset].hashes[idx].data[1]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -486,7 +483,7 @@ __kernel void m17230_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) plain = unpack_v8d_from_v32_S (next) ^ key3; update_key012 (key0, key1, key2, plain, l_crc32tab); - next = data_ptr[2]; + next = esalt_bufs[digests_offset].hashes[idx].data[2]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -506,7 +503,7 @@ __kernel void m17230_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) if ((plain != (esalt_bufs[digests_offset].hashes[idx].checksum_from_crc >> 8)) && (plain != (esalt_bufs[digests_offset].hashes[idx].checksum_from_timestamp >> 8))) break; update_key012 (key0, key1, key2, plain, l_crc32tab); - next = data_ptr[3]; + next = esalt_bufs[digests_offset].hashes[idx].data[3]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; diff --git a/OpenCL/m17230_a1-pure.cl b/OpenCL/m17230_a1-pure.cl index 327773f74..2f79da216 100644 --- a/OpenCL/m17230_a1-pure.cl +++ b/OpenCL/m17230_a1-pure.cl @@ -87,6 +87,7 @@ Related publication: https://scitepress.org/PublicationsDetail.aspx?ID=KLPzPqStp #include "inc_vendor.h" #include "inc_types.h" +#include "inc_platform.cl" #include "inc_common.cl" #include "inc_simd.cl" @@ -129,7 +130,7 @@ struct pkzip_hash u32 data_length; u16 checksum_from_crc; u16 checksum_from_timestamp; - u8 data[MAX_DATA]; + u32 data[MAX_DATA]; } __attribute__((packed)); @@ -217,7 +218,7 @@ CONSTANT_AS u32a crc32tab[256] = 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d }; -__kernel void m17230_sxx (KERN_ATTR_ESALT (pkzip_t)) +KERNEL_FQ void m17230_sxx (KERN_ATTR_ESALT (pkzip_t)) { /** * modifier @@ -231,14 +232,14 @@ __kernel void m17230_sxx (KERN_ATTR_ESALT (pkzip_t)) * sbox, kbox */ - LOCAL_AS u32 l_crc32tab[256]; + LOCAL_VK u32 l_crc32tab[256]; for (u64 i = lid; i < 256; i += lsz) { l_crc32tab[i] = crc32tab[i]; } - barrier (CLK_LOCAL_MEM_FENCE); + SYNC_THREADS(); if (gid >= gid_max) return; @@ -285,13 +286,11 @@ __kernel void m17230_sxx (KERN_ATTR_ESALT (pkzip_t)) for (u32 idx = 0; idx < hash_count; idx++) { - __global u32 *data_ptr = (__global u32 *) esalt_bufs[digests_offset].hashes[idx].data; - u32x key0 = key0init2; u32x key1 = key1init2; u32x key2 = key2init2; - next = data_ptr[0]; + next = esalt_bufs[digests_offset].hashes[idx].data[0]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -309,7 +308,7 @@ __kernel void m17230_sxx (KERN_ATTR_ESALT (pkzip_t)) plain = unpack_v8d_from_v32_S (next) ^ key3; update_key012 (key0, key1, key2, plain, l_crc32tab); - next = data_ptr[1]; + next = esalt_bufs[digests_offset].hashes[idx].data[1]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -327,7 +326,7 @@ __kernel void m17230_sxx (KERN_ATTR_ESALT (pkzip_t)) plain = unpack_v8d_from_v32_S (next) ^ key3; update_key012 (key0, key1, key2, plain, l_crc32tab); - next = data_ptr[2]; + next = esalt_bufs[digests_offset].hashes[idx].data[2]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -347,7 +346,7 @@ __kernel void m17230_sxx (KERN_ATTR_ESALT (pkzip_t)) if ((plain != (esalt_bufs[digests_offset].hashes[idx].checksum_from_crc >> 8)) && (plain != (esalt_bufs[digests_offset].hashes[idx].checksum_from_timestamp >> 8))) break; update_key012 (key0, key1, key2, plain, l_crc32tab); - next = data_ptr[3]; + next = esalt_bufs[digests_offset].hashes[idx].data[3]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -378,7 +377,7 @@ __kernel void m17230_sxx (KERN_ATTR_ESALT (pkzip_t)) } } -__kernel void m17230_mxx (KERN_ATTR_ESALT (pkzip_t)) +KERNEL_FQ void m17230_mxx (KERN_ATTR_ESALT (pkzip_t)) { /** * modifier @@ -392,14 +391,14 @@ __kernel void m17230_mxx (KERN_ATTR_ESALT (pkzip_t)) * sbox, kbox */ - LOCAL_AS u32 l_crc32tab[256]; + LOCAL_VK u32 l_crc32tab[256]; for (u64 i = lid; i < 256; i += lsz) { l_crc32tab[i] = crc32tab[i]; } - barrier (CLK_LOCAL_MEM_FENCE); + SYNC_THREADS(); if (gid >= gid_max) return; @@ -446,13 +445,11 @@ __kernel void m17230_mxx (KERN_ATTR_ESALT (pkzip_t)) for (u32 idx = 0; idx < hash_count; idx++) { - __global u32 *data_ptr = (__global u32 *) esalt_bufs[digests_offset].hashes[idx].data; - u32x key0 = key0init2; u32x key1 = key1init2; u32x key2 = key2init2; - next = data_ptr[0]; + next = esalt_bufs[digests_offset].hashes[idx].data[0]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -470,7 +467,7 @@ __kernel void m17230_mxx (KERN_ATTR_ESALT (pkzip_t)) plain = unpack_v8d_from_v32_S (next) ^ key3; update_key012 (key0, key1, key2, plain, l_crc32tab); - next = data_ptr[1]; + next = esalt_bufs[digests_offset].hashes[idx].data[1]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -488,7 +485,7 @@ __kernel void m17230_mxx (KERN_ATTR_ESALT (pkzip_t)) plain = unpack_v8d_from_v32_S (next) ^ key3; update_key012 (key0, key1, key2, plain, l_crc32tab); - next = data_ptr[2]; + next = esalt_bufs[digests_offset].hashes[idx].data[2]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -508,7 +505,7 @@ __kernel void m17230_mxx (KERN_ATTR_ESALT (pkzip_t)) if ((plain != (esalt_bufs[digests_offset].hashes[idx].checksum_from_crc >> 8)) && (plain != (esalt_bufs[digests_offset].hashes[idx].checksum_from_timestamp >> 8))) break; update_key012 (key0, key1, key2, plain, l_crc32tab); - next = data_ptr[3]; + next = esalt_bufs[digests_offset].hashes[idx].data[3]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; diff --git a/OpenCL/m17230_a3-pure.cl b/OpenCL/m17230_a3-pure.cl index 2ae2e3d3a..5da181f34 100644 --- a/OpenCL/m17230_a3-pure.cl +++ b/OpenCL/m17230_a3-pure.cl @@ -87,6 +87,7 @@ Related publication: https://scitepress.org/PublicationsDetail.aspx?ID=KLPzPqStp #include "inc_vendor.h" #include "inc_types.h" +#include "inc_platform.cl" #include "inc_common.cl" #include "inc_simd.cl" @@ -129,7 +130,7 @@ struct pkzip_hash u32 data_length; u16 checksum_from_crc; u16 checksum_from_timestamp; - u8 data[MAX_DATA]; + u32 data[MAX_DATA]; } __attribute__((packed)); @@ -217,7 +218,7 @@ CONSTANT_AS u32a crc32tab[256] = 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d }; -__kernel void m17230_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) +KERNEL_FQ void m17230_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) { /** * modifier @@ -231,14 +232,14 @@ __kernel void m17230_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) * sbox, kbox */ - LOCAL_AS u32 l_crc32tab[256]; + LOCAL_VK u32 l_crc32tab[256]; for (u64 i = lid; i < 256; i += lsz) { l_crc32tab[i] = crc32tab[i]; } - barrier (CLK_LOCAL_MEM_FENCE); + SYNC_THREADS(); if (gid >= gid_max) return; @@ -299,13 +300,11 @@ __kernel void m17230_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) for (u32 idx = 0; idx < hash_count; idx++) { - __global u32 *data_ptr = (__global u32 *) esalt_bufs[digests_offset].hashes[idx].data; - u32x key0 = key0init; u32x key1 = key1init; u32x key2 = key2init; - next = data_ptr[0]; + next = esalt_bufs[digests_offset].hashes[idx].data[0]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -323,7 +322,7 @@ __kernel void m17230_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) plain = unpack_v8d_from_v32_S (next) ^ key3; update_key012 (key0, key1, key2, plain, l_crc32tab); - next = data_ptr[1]; + next = esalt_bufs[digests_offset].hashes[idx].data[1]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -341,7 +340,7 @@ __kernel void m17230_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) plain = unpack_v8d_from_v32_S (next) ^ key3; update_key012 (key0, key1, key2, plain, l_crc32tab); - next = data_ptr[2]; + next = esalt_bufs[digests_offset].hashes[idx].data[2]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -361,7 +360,7 @@ __kernel void m17230_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) if ((plain != (esalt_bufs[digests_offset].hashes[idx].checksum_from_crc >> 8)) && (plain != (esalt_bufs[digests_offset].hashes[idx].checksum_from_timestamp >> 8))) break; update_key012 (key0, key1, key2, plain, l_crc32tab); - next = data_ptr[3]; + next = esalt_bufs[digests_offset].hashes[idx].data[3]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -392,7 +391,7 @@ __kernel void m17230_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) } } -__kernel void m17230_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) +KERNEL_FQ void m17230_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) { /** * modifier @@ -406,14 +405,14 @@ __kernel void m17230_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) * sbox, kbox */ - LOCAL_AS u32 l_crc32tab[256]; + LOCAL_VK u32 l_crc32tab[256]; for (u64 i = lid; i < 256; i += lsz) { l_crc32tab[i] = crc32tab[i]; } - barrier (CLK_LOCAL_MEM_FENCE); + SYNC_THREADS(); if (gid >= gid_max) return; @@ -474,13 +473,11 @@ __kernel void m17230_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) for (u32 idx = 0; idx < hash_count; idx++) { - __global u32 *data_ptr = (__global u32 *) esalt_bufs[digests_offset].hashes[idx].data; - u32x key0 = key0init; u32x key1 = key1init; u32x key2 = key2init; - next = data_ptr[0]; + next = esalt_bufs[digests_offset].hashes[idx].data[0]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -498,7 +495,7 @@ __kernel void m17230_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) plain = unpack_v8d_from_v32_S (next) ^ key3; update_key012 (key0, key1, key2, plain, l_crc32tab); - next = data_ptr[1]; + next = esalt_bufs[digests_offset].hashes[idx].data[1]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -516,7 +513,7 @@ __kernel void m17230_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) plain = unpack_v8d_from_v32_S (next) ^ key3; update_key012 (key0, key1, key2, plain, l_crc32tab); - next = data_ptr[2]; + next = esalt_bufs[digests_offset].hashes[idx].data[2]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3; @@ -536,7 +533,7 @@ __kernel void m17230_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) if ((plain != (esalt_bufs[digests_offset].hashes[idx].checksum_from_crc >> 8)) && (plain != (esalt_bufs[digests_offset].hashes[idx].checksum_from_timestamp >> 8))) break; update_key012 (key0, key1, key2, plain, l_crc32tab); - next = data_ptr[3]; + next = esalt_bufs[digests_offset].hashes[idx].data[3]; update_key3 (key2, key3); plain = unpack_v8a_from_v32_S (next) ^ key3;