From b29019ae75c977df3b129663b071f21108323583 Mon Sep 17 00:00:00 2001 From: Sein Coray Date: Tue, 14 May 2019 14:21:41 +0200 Subject: [PATCH] updated m17200 to be cuda compatible --- OpenCL/m17200_a0-pure.cl | 43 +++++++++++++++++++--------------------- OpenCL/m17200_a1-pure.cl | 43 +++++++++++++++++++--------------------- OpenCL/m17200_a3-pure.cl | 43 +++++++++++++++++++--------------------- 3 files changed, 60 insertions(+), 69 deletions(-) diff --git a/OpenCL/m17200_a0-pure.cl b/OpenCL/m17200_a0-pure.cl index 84b1c85d7..bf212a813 100644 --- a/OpenCL/m17200_a0-pure.cl +++ b/OpenCL/m17200_a0-pure.cl @@ -87,6 +87,7 @@ Related publication: https://scitepress.org/PublicationsDetail.aspx?ID=KLPzPqStp #include "inc_vendor.h" #include "inc_types.h" +#include "inc_platform.cl" #include "inc_common.cl" #include "inc_simd.cl" #include "inc_rp.h" @@ -131,7 +132,7 @@ struct pkzip_hash u32 data_length; u16 checksum_from_crc; u16 checksum_from_timestamp; - u8 data[MAX_DATA]; + u32 data[MAX_DATA]; } __attribute__((packed)); @@ -231,7 +232,7 @@ typedef struct } code; -CONSTANT_AS code lenfix[512] = { +CONSTANT_VK code lenfix[512] = { {96,7,0},{0,8,80},{0,8,16},{20,8,115},{18,7,31},{0,8,112},{0,8,48}, {0,9,192},{16,7,10},{0,8,96},{0,8,32},{0,9,160},{0,8,0},{0,8,128}, {0,8,64},{0,9,224},{16,7,6},{0,8,88},{0,8,24},{0,9,144},{19,7,59}, @@ -308,7 +309,7 @@ CONSTANT_AS code lenfix[512] = { {0,9,255} }; -CONSTANT_AS code distfix[32] = { +CONSTANT_VK code distfix[32] = { {16,5,1},{23,5,257},{19,5,17},{27,5,4097},{17,5,5},{25,5,1025}, {21,5,65},{29,5,16385},{16,5,3},{24,5,513},{20,5,33},{28,5,8193}, {18,5,9},{26,5,2049},{22,5,129},{64,5,0},{16,5,2},{23,5,385}, @@ -528,7 +529,7 @@ DECLSPEC int check_inflate_code1 (u8 *next, int left) } } -__kernel void m17200_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) +KERNEL_FQ void m17200_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) { /** * modifier @@ -542,25 +543,23 @@ __kernel void m17200_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) * sbox, kbox */ - LOCAL_AS u32 l_crc32tab[256]; + LOCAL_VK u32 l_crc32tab[256]; for (u64 i = lid; i < 256; i += lsz) { l_crc32tab[i] = crc32tab[i]; } - barrier (CLK_LOCAL_MEM_FENCE); + SYNC_THREADS(); - __global u32 *data_ptr = (__global u32 *) esalt_bufs[digests_offset].hash.data; - - LOCAL_AS u32 l_data[MAX_LOCAL]; + LOCAL_VK u32 l_data[MAX_LOCAL]; for (u64 i = lid; i < MAX_LOCAL; i += lsz) { - l_data[i] = data_ptr[i]; + l_data[i] = esalt_bufs[digests_offset].hash.data[i]; } - barrier (CLK_LOCAL_MEM_FENCE); + SYNC_THREADS(); if (gid >= gid_max) return; @@ -734,8 +733,8 @@ __kernel void m17200_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) inflate_state pStream; infstream.opaque = Z_NULL; - infstream.avail_in = esalt_bufs[digests_offset].hash.data_length - 12; // size of input - infstream.next_in = esalt_bufs[digests_offset].hash.data + 12; // input char array + infstream.avail_in = esalt_bufs[digests_offset].hash.data_length - 12; // size of input + infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[digests_offset].hash.data + 12; // input char array infstream.avail_out = TMPSIZ; // size of output infstream.next_out = tmp; // output char array @@ -768,7 +767,7 @@ __kernel void m17200_sxx (KERN_ATTR_RULES_ESALT (pkzip_t)) } } -__kernel void m17200_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) +KERNEL_FQ void m17200_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) { /** * modifier @@ -782,25 +781,23 @@ __kernel void m17200_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) * sbox, kbox */ - LOCAL_AS u32 l_crc32tab[256]; + LOCAL_VK u32 l_crc32tab[256]; for (u64 i = lid; i < 256; i += lsz) { l_crc32tab[i] = crc32tab[i]; } - barrier (CLK_LOCAL_MEM_FENCE); + SYNC_THREADS(); - __global u32 *data_ptr = (__global u32 *) esalt_bufs[digests_offset].hash.data; - - LOCAL_AS u32 l_data[MAX_LOCAL]; + LOCAL_VK u32 l_data[MAX_LOCAL]; for (u64 i = lid; i < MAX_LOCAL; i += lsz) { - l_data[i] = data_ptr[i]; + l_data[i] = esalt_bufs[digests_offset].hash.data[i]; } - barrier (CLK_LOCAL_MEM_FENCE); + SYNC_THREADS(); if (gid >= gid_max) return; @@ -962,8 +959,8 @@ __kernel void m17200_mxx (KERN_ATTR_RULES_ESALT (pkzip_t)) inflate_state pStream; infstream.opaque = Z_NULL; - infstream.avail_in = esalt_bufs[digests_offset].hash.data_length - 12; // size of input - infstream.next_in = esalt_bufs[digests_offset].hash.data + 12; // input char array + infstream.avail_in = esalt_bufs[digests_offset].hash.data_length - 12; // size of input + infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[digests_offset].hash.data + 12; // input char array infstream.avail_out = TMPSIZ; // size of output infstream.next_out = tmp; // output char array diff --git a/OpenCL/m17200_a1-pure.cl b/OpenCL/m17200_a1-pure.cl index 3d8aaf662..08c4beeb1 100644 --- a/OpenCL/m17200_a1-pure.cl +++ b/OpenCL/m17200_a1-pure.cl @@ -87,6 +87,7 @@ Related publication: https://scitepress.org/PublicationsDetail.aspx?ID=KLPzPqStp #include "inc_vendor.h" #include "inc_types.h" +#include "inc_platform.cl" #include "inc_common.cl" #include "inc_simd.cl" @@ -129,7 +130,7 @@ struct pkzip_hash u32 data_length; u16 checksum_from_crc; u16 checksum_from_timestamp; - u8 data[MAX_DATA]; + u32 data[MAX_DATA]; } __attribute__((packed)); @@ -229,7 +230,7 @@ typedef struct } code; -CONSTANT_AS code lenfix[512] = { +CONSTANT_VK code lenfix[512] = { {96,7,0},{0,8,80},{0,8,16},{20,8,115},{18,7,31},{0,8,112},{0,8,48}, {0,9,192},{16,7,10},{0,8,96},{0,8,32},{0,9,160},{0,8,0},{0,8,128}, {0,8,64},{0,9,224},{16,7,6},{0,8,88},{0,8,24},{0,9,144},{19,7,59}, @@ -306,7 +307,7 @@ CONSTANT_AS code lenfix[512] = { {0,9,255} }; -CONSTANT_AS code distfix[32] = { +CONSTANT_VK code distfix[32] = { {16,5,1},{23,5,257},{19,5,17},{27,5,4097},{17,5,5},{25,5,1025}, {21,5,65},{29,5,16385},{16,5,3},{24,5,513},{20,5,33},{28,5,8193}, {18,5,9},{26,5,2049},{22,5,129},{64,5,0},{16,5,2},{23,5,385}, @@ -526,7 +527,7 @@ DECLSPEC int check_inflate_code1 (u8 *next, int left) } } -__kernel void m17200_sxx (KERN_ATTR_ESALT (pkzip_t)) +KERNEL_FQ void m17200_sxx (KERN_ATTR_ESALT (pkzip_t)) { /** * modifier @@ -540,25 +541,23 @@ __kernel void m17200_sxx (KERN_ATTR_ESALT (pkzip_t)) * sbox, kbox */ - LOCAL_AS u32 l_crc32tab[256]; + LOCAL_VK u32 l_crc32tab[256]; for (u64 i = lid; i < 256; i += lsz) { l_crc32tab[i] = crc32tab[i]; } - barrier (CLK_LOCAL_MEM_FENCE); + SYNC_THREADS(); - __global u32 *data_ptr = (__global u32 *) esalt_bufs[digests_offset].hash.data; - - LOCAL_AS u32 l_data[MAX_LOCAL]; + LOCAL_VK u32 l_data[MAX_LOCAL]; for (u64 i = lid; i < MAX_LOCAL; i += lsz) { - l_data[i] = data_ptr[i]; + l_data[i] = esalt_bufs[digests_offset].hash.data[i]; } - barrier (CLK_LOCAL_MEM_FENCE); + SYNC_THREADS(); if (gid >= gid_max) return; @@ -736,8 +735,8 @@ __kernel void m17200_sxx (KERN_ATTR_ESALT (pkzip_t)) inflate_state pStream; infstream.opaque = Z_NULL; - infstream.avail_in = esalt_bufs[digests_offset].hash.data_length - 12; // size of input - infstream.next_in = esalt_bufs[digests_offset].hash.data + 12; // input char array + infstream.avail_in = esalt_bufs[digests_offset].hash.data_length - 12; // size of input + infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[digests_offset].hash.data + 12; // input char array infstream.avail_out = TMPSIZ; // size of output infstream.next_out = tmp; // output char array @@ -770,7 +769,7 @@ __kernel void m17200_sxx (KERN_ATTR_ESALT (pkzip_t)) } } -__kernel void m17200_mxx (KERN_ATTR_ESALT (pkzip_t)) +KERNEL_FQ void m17200_mxx (KERN_ATTR_ESALT (pkzip_t)) { /** * modifier @@ -784,25 +783,23 @@ __kernel void m17200_mxx (KERN_ATTR_ESALT (pkzip_t)) * sbox, kbox */ - LOCAL_AS u32 l_crc32tab[256]; + LOCAL_VK u32 l_crc32tab[256]; for (u64 i = lid; i < 256; i += lsz) { l_crc32tab[i] = crc32tab[i]; } - barrier (CLK_LOCAL_MEM_FENCE); + SYNC_THREADS(); - __global u32 *data_ptr = (__global u32 *) esalt_bufs[digests_offset].hash.data; - - LOCAL_AS u32 l_data[MAX_LOCAL]; + LOCAL_VK u32 l_data[MAX_LOCAL]; for (u64 i = lid; i < MAX_LOCAL; i += lsz) { - l_data[i] = data_ptr[i]; + l_data[i] = esalt_bufs[digests_offset].hash.data[i]; } - barrier (CLK_LOCAL_MEM_FENCE); + SYNC_THREADS(); if (gid >= gid_max) return; @@ -968,8 +965,8 @@ __kernel void m17200_mxx (KERN_ATTR_ESALT (pkzip_t)) inflate_state pStream; infstream.opaque = Z_NULL; - infstream.avail_in = esalt_bufs[digests_offset].hash.data_length - 12; // size of input - infstream.next_in = esalt_bufs[digests_offset].hash.data + 12; // input char array + infstream.avail_in = esalt_bufs[digests_offset].hash.data_length - 12; // size of input + infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[digests_offset].hash.data + 12; // input char array infstream.avail_out = TMPSIZ; // size of output infstream.next_out = tmp; // output char array diff --git a/OpenCL/m17200_a3-pure.cl b/OpenCL/m17200_a3-pure.cl index 4f0c278a3..60e01bfbc 100644 --- a/OpenCL/m17200_a3-pure.cl +++ b/OpenCL/m17200_a3-pure.cl @@ -87,6 +87,7 @@ Related publication: https://scitepress.org/PublicationsDetail.aspx?ID=KLPzPqStp #include "inc_vendor.h" #include "inc_types.h" +#include "inc_platform.cl" #include "inc_common.cl" #include "inc_simd.cl" @@ -129,7 +130,7 @@ struct pkzip_hash u32 data_length; u16 checksum_from_crc; u16 checksum_from_timestamp; - u8 data[MAX_DATA]; + u32 data[MAX_DATA]; } __attribute__((packed)); @@ -229,7 +230,7 @@ typedef struct } code; -CONSTANT_AS code lenfix[512] = +CONSTANT_VK code lenfix[512] = { {96,7,0},{0,8,80},{0,8,16},{20,8,115},{18,7,31},{0,8,112},{0,8,48}, {0,9,192},{16,7,10},{0,8,96},{0,8,32},{0,9,160},{0,8,0},{0,8,128}, @@ -307,7 +308,7 @@ CONSTANT_AS code lenfix[512] = {0,9,255} }; -CONSTANT_AS code distfix[32] = +CONSTANT_VK code distfix[32] = { {16,5,1},{23,5,257},{19,5,17},{27,5,4097},{17,5,5},{25,5,1025}, {21,5,65},{29,5,16385},{16,5,3},{24,5,513},{20,5,33},{28,5,8193}, @@ -527,7 +528,7 @@ DECLSPEC int check_inflate_code1 (u8 *next, int left) } } -__kernel void m17200_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) +KERNEL_FQ void m17200_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) { /** * modifier @@ -541,25 +542,23 @@ __kernel void m17200_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) * sbox, kbox */ - LOCAL_AS u32 l_crc32tab[256]; + LOCAL_VK u32 l_crc32tab[256]; for (u64 i = lid; i < 256; i += lsz) { l_crc32tab[i] = crc32tab[i]; } - barrier (CLK_LOCAL_MEM_FENCE); + SYNC_THREADS(); - __global u32 *data_ptr = (__global u32 *) esalt_bufs[digests_offset].hash.data; - - LOCAL_AS u32 l_data[MAX_LOCAL]; + LOCAL_VK u32 l_data[MAX_LOCAL]; for (u64 i = lid; i < MAX_LOCAL; i += lsz) { - l_data[i] = data_ptr[i]; + l_data[i] = esalt_bufs[digests_offset].hash.data[i]; } - barrier (CLK_LOCAL_MEM_FENCE); + SYNC_THREADS(); if (gid >= gid_max) return; @@ -749,8 +748,8 @@ __kernel void m17200_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) inflate_state pStream; infstream.opaque = Z_NULL; - infstream.avail_in = esalt_bufs[digests_offset].hash.data_length - 12; // size of input - infstream.next_in = esalt_bufs[digests_offset].hash.data + 12; // input char array + infstream.avail_in = esalt_bufs[digests_offset].hash.data_length - 12; // size of input + infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[digests_offset].hash.data + 12; // input char array infstream.avail_out = TMPSIZ; // size of output infstream.next_out = tmp; // output char array @@ -783,7 +782,7 @@ __kernel void m17200_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) } } -__kernel void m17200_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) +KERNEL_FQ void m17200_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) { /** * modifier @@ -797,25 +796,23 @@ __kernel void m17200_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) * sbox, kbox */ - LOCAL_AS u32 l_crc32tab[256]; + LOCAL_VK u32 l_crc32tab[256]; for (u64 i = lid; i < 256; i += lsz) { l_crc32tab[i] = crc32tab[i]; } - barrier (CLK_LOCAL_MEM_FENCE); + SYNC_THREADS(); - __global u32 *data_ptr = (__global u32 *) esalt_bufs[digests_offset].hash.data; - - LOCAL_AS u32 l_data[MAX_LOCAL]; + LOCAL_VK u32 l_data[MAX_LOCAL]; for (u64 i = lid; i < MAX_LOCAL; i += lsz) { - l_data[i] = data_ptr[i]; + l_data[i] = esalt_bufs[digests_offset].hash.data[i]; } - barrier (CLK_LOCAL_MEM_FENCE); + SYNC_THREADS(); if (gid >= gid_max) return; @@ -993,8 +990,8 @@ __kernel void m17200_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t)) inflate_state pStream; infstream.opaque = Z_NULL; - infstream.avail_in = esalt_bufs[digests_offset].hash.data_length - 12; // size of input - infstream.next_in = esalt_bufs[digests_offset].hash.data + 12; // input char array + infstream.avail_in = esalt_bufs[digests_offset].hash.data_length - 12; // size of input + infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[digests_offset].hash.data + 12; // input char array infstream.avail_out = TMPSIZ; // size of output infstream.next_out = tmp; // output char array