updated m17220 to be cuda compatible

pull/1962/head
Sein Coray 5 years ago
parent c9c7261a05
commit 8d1e737d60
No known key found for this signature in database
GPG Key ID: 44C4180EA69758EC

@ -87,6 +87,7 @@ Related publication: https://scitepress.org/PublicationsDetail.aspx?ID=KLPzPqStp
#include "inc_vendor.h"
#include "inc_types.h"
#include "inc_platform.cl"
#include "inc_common.cl"
#include "inc_simd.cl"
#include "inc_rp.h"
@ -131,7 +132,7 @@ struct pkzip_hash
u32 data_length;
u16 checksum_from_crc;
u16 checksum_from_timestamp;
u8 data[MAX_DATA];
u32 data[MAX_DATA];
} __attribute__((packed));
@ -229,7 +230,7 @@ typedef struct {
u16 val; /* offset in table or code value */
} code;
CONSTANT_AS code lenfix[512] = {
CONSTANT_VK code lenfix[512] = {
{96,7,0},{0,8,80},{0,8,16},{20,8,115},{18,7,31},{0,8,112},{0,8,48},
{0,9,192},{16,7,10},{0,8,96},{0,8,32},{0,9,160},{0,8,0},{0,8,128},
{0,8,64},{0,9,224},{16,7,6},{0,8,88},{0,8,24},{0,9,144},{19,7,59},
@ -306,7 +307,7 @@ CONSTANT_AS code lenfix[512] = {
{0,9,255}
};
CONSTANT_AS code distfix[32] = {
CONSTANT_VK code distfix[32] = {
{16,5,1},{23,5,257},{19,5,17},{27,5,4097},{17,5,5},{25,5,1025},
{21,5,65},{29,5,16385},{16,5,3},{24,5,513},{20,5,33},{28,5,8193},
{18,5,9},{26,5,2049},{22,5,129},{64,5,0},{16,5,2},{23,5,385},
@ -526,7 +527,7 @@ DECLSPEC int check_inflate_code1 (u8 *next, int left)
}
}
__kernel void m17220_sxx (KERN_ATTR_RULES_ESALT (pkzip_t))
KERNEL_FQ void m17220_sxx (KERN_ATTR_RULES_ESALT (pkzip_t))
{
/**
* modifier
@ -540,14 +541,14 @@ __kernel void m17220_sxx (KERN_ATTR_RULES_ESALT (pkzip_t))
* sbox, kbox
*/
LOCAL_AS u32 l_crc32tab[256];
LOCAL_VK u32 l_crc32tab[256];
for (u64 i = lid; i < 256; i += lsz)
{
l_crc32tab[i] = crc32tab[i];
}
barrier (CLK_LOCAL_MEM_FENCE);
SYNC_THREADS();
if (gid >= gid_max) return;
@ -592,13 +593,11 @@ __kernel void m17220_sxx (KERN_ATTR_RULES_ESALT (pkzip_t))
for (u32 idx = 0; idx < hash_count; idx++)
{
__global u32 *data_ptr = (__global u32 *) esalt_bufs[digests_offset].hashes[idx].data;
u32x key0 = key0init;
u32x key1 = key1init;
u32x key2 = key2init;
next = data_ptr[0];
next = esalt_bufs[digests_offset].hashes[idx].data[0];
update_key3 (key2, key3);
plain = unpack_v8a_from_v32_S (next) ^ key3;
@ -616,7 +615,7 @@ __kernel void m17220_sxx (KERN_ATTR_RULES_ESALT (pkzip_t))
plain = unpack_v8d_from_v32_S (next) ^ key3;
update_key012 (key0, key1, key2, plain, l_crc32tab);
next = data_ptr[1];
next = esalt_bufs[digests_offset].hashes[idx].data[1];
update_key3 (key2, key3);
plain = unpack_v8a_from_v32_S (next) ^ key3;
@ -634,7 +633,7 @@ __kernel void m17220_sxx (KERN_ATTR_RULES_ESALT (pkzip_t))
plain = unpack_v8d_from_v32_S (next) ^ key3;
update_key012 (key0, key1, key2, plain, l_crc32tab);
next = data_ptr[2];
next = esalt_bufs[digests_offset].hashes[idx].data[2];
update_key3 (key2, key3);
plain = unpack_v8a_from_v32_S (next) ^ key3;
@ -660,7 +659,7 @@ __kernel void m17220_sxx (KERN_ATTR_RULES_ESALT (pkzip_t))
u8 tmp[TMPSIZ];
next = data_ptr[3];
next = esalt_bufs[digests_offset].hashes[idx].data[3];
update_key3 (key2, key3);
plain = unpack_v8a_from_v32_S (next) ^ key3;
@ -685,7 +684,7 @@ __kernel void m17220_sxx (KERN_ATTR_RULES_ESALT (pkzip_t))
for (int i = 16; i < 36; i += 4)
{
next = data_ptr[i / 4];
next = esalt_bufs[digests_offset].hashes[idx].data[i / 4];
update_key3 (key2, key3);
plain = unpack_v8a_from_v32_S (next) ^ key3;
@ -721,8 +720,8 @@ __kernel void m17220_sxx (KERN_ATTR_RULES_ESALT (pkzip_t))
inflate_state pStream;
infstream.opaque = Z_NULL;
infstream.avail_in = esalt_bufs[digests_offset].hashes[idx].data_length - 12; // size of input
infstream.next_in = esalt_bufs[digests_offset].hashes[idx].data + 12; // input char array
infstream.avail_in = esalt_bufs[digests_offset].hashes[idx].data_length - 12; // size of input
infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[digests_offset].hashes[idx].data + 12; // input char array
infstream.avail_out = TMPSIZ; // size of output
infstream.next_out = tmp; // output char array
@ -779,7 +778,7 @@ __kernel void m17220_sxx (KERN_ATTR_RULES_ESALT (pkzip_t))
}
}
__kernel void m17220_mxx (KERN_ATTR_RULES_ESALT (pkzip_t))
KERNEL_FQ void m17220_mxx (KERN_ATTR_RULES_ESALT (pkzip_t))
{
/**
* modifier
@ -793,14 +792,14 @@ __kernel void m17220_mxx (KERN_ATTR_RULES_ESALT (pkzip_t))
* sbox, kbox
*/
LOCAL_AS u32 l_crc32tab[256];
LOCAL_VK u32 l_crc32tab[256];
for (u64 i = lid; i < 256; i += lsz)
{
l_crc32tab[i] = crc32tab[i];
}
barrier (CLK_LOCAL_MEM_FENCE);
SYNC_THREADS();
if (gid >= gid_max) return;
@ -845,13 +844,11 @@ __kernel void m17220_mxx (KERN_ATTR_RULES_ESALT (pkzip_t))
for (u32 idx = 0; idx < hash_count; idx++)
{
__global u32 *data_ptr = (__global u32 *) esalt_bufs[digests_offset].hashes[idx].data;
u32x key0 = key0init;
u32x key1 = key1init;
u32x key2 = key2init;
next = data_ptr[0];
next = esalt_bufs[digests_offset].hashes[idx].data[0];
update_key3 (key2, key3);
plain = unpack_v8a_from_v32_S (next) ^ key3;
@ -869,7 +866,7 @@ __kernel void m17220_mxx (KERN_ATTR_RULES_ESALT (pkzip_t))
plain = unpack_v8d_from_v32_S (next) ^ key3;
update_key012 (key0, key1, key2, plain, l_crc32tab);
next = data_ptr[1];
next = esalt_bufs[digests_offset].hashes[idx].data[1];
update_key3 (key2, key3);
plain = unpack_v8a_from_v32_S (next) ^ key3;
@ -887,7 +884,7 @@ __kernel void m17220_mxx (KERN_ATTR_RULES_ESALT (pkzip_t))
plain = unpack_v8d_from_v32_S (next) ^ key3;
update_key012 (key0, key1, key2, plain, l_crc32tab);
next = data_ptr[2];
next = esalt_bufs[digests_offset].hashes[idx].data[2];
update_key3 (key2, key3);
plain = unpack_v8a_from_v32_S (next) ^ key3;
@ -913,7 +910,7 @@ __kernel void m17220_mxx (KERN_ATTR_RULES_ESALT (pkzip_t))
u8 tmp[TMPSIZ];
next = data_ptr[3];
next = esalt_bufs[digests_offset].hashes[idx].data[3];
update_key3 (key2, key3);
plain = unpack_v8a_from_v32_S (next) ^ key3;
@ -938,7 +935,7 @@ __kernel void m17220_mxx (KERN_ATTR_RULES_ESALT (pkzip_t))
for (int i = 16; i < 36; i += 4)
{
next = data_ptr[i / 4];
next = esalt_bufs[digests_offset].hashes[idx].data[i / 4];
update_key3 (key2, key3);
plain = unpack_v8a_from_v32_S (next) ^ key3;
@ -974,8 +971,8 @@ __kernel void m17220_mxx (KERN_ATTR_RULES_ESALT (pkzip_t))
inflate_state pStream;
infstream.opaque = Z_NULL;
infstream.avail_in = esalt_bufs[digests_offset].hashes[idx].data_length - 12; // size of input
infstream.next_in = esalt_bufs[digests_offset].hashes[idx].data + 12; // input char array
infstream.avail_in = esalt_bufs[digests_offset].hashes[idx].data_length - 12; // size of input
infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[digests_offset].hashes[idx].data + 12; // input char array
infstream.avail_out = TMPSIZ; // size of output
infstream.next_out = tmp; // output char array

@ -87,6 +87,7 @@ Related publication: https://scitepress.org/PublicationsDetail.aspx?ID=KLPzPqStp
#include "inc_vendor.h"
#include "inc_types.h"
#include "inc_platform.cl"
#include "inc_common.cl"
#include "inc_simd.cl"
@ -129,7 +130,7 @@ struct pkzip_hash
u32 data_length;
u16 checksum_from_crc;
u16 checksum_from_timestamp;
u8 data[MAX_DATA];
u32 data[MAX_DATA];
} __attribute__((packed));
@ -227,7 +228,7 @@ typedef struct {
u16 val; /* offset in table or code value */
} code;
CONSTANT_AS code lenfix[512] = {
CONSTANT_VK code lenfix[512] = {
{96,7,0},{0,8,80},{0,8,16},{20,8,115},{18,7,31},{0,8,112},{0,8,48},
{0,9,192},{16,7,10},{0,8,96},{0,8,32},{0,9,160},{0,8,0},{0,8,128},
{0,8,64},{0,9,224},{16,7,6},{0,8,88},{0,8,24},{0,9,144},{19,7,59},
@ -304,7 +305,7 @@ CONSTANT_AS code lenfix[512] = {
{0,9,255}
};
CONSTANT_AS code distfix[32] = {
CONSTANT_VK code distfix[32] = {
{16,5,1},{23,5,257},{19,5,17},{27,5,4097},{17,5,5},{25,5,1025},
{21,5,65},{29,5,16385},{16,5,3},{24,5,513},{20,5,33},{28,5,8193},
{18,5,9},{26,5,2049},{22,5,129},{64,5,0},{16,5,2},{23,5,385},
@ -524,7 +525,7 @@ DECLSPEC int check_inflate_code1 (u8 *next, int left)
}
}
__kernel void m17220_sxx (KERN_ATTR_ESALT (pkzip_t))
KERNEL_FQ void m17220_sxx (KERN_ATTR_ESALT (pkzip_t))
{
/**
* modifier
@ -538,14 +539,14 @@ __kernel void m17220_sxx (KERN_ATTR_ESALT (pkzip_t))
* sbox, kbox
*/
LOCAL_AS u32 l_crc32tab[256];
LOCAL_VK u32 l_crc32tab[256];
for (u64 i = lid; i < 256; i += lsz)
{
l_crc32tab[i] = crc32tab[i];
}
barrier (CLK_LOCAL_MEM_FENCE);
SYNC_THREADS();
if (gid >= gid_max) return;
@ -592,13 +593,11 @@ __kernel void m17220_sxx (KERN_ATTR_ESALT (pkzip_t))
for (u32 idx = 0; idx < hash_count; idx++)
{
__global u32 *data_ptr = (__global u32 *) esalt_bufs[digests_offset].hashes[idx].data;
u32x key0 = key0init2;
u32x key1 = key1init2;
u32x key2 = key2init2;
next = data_ptr[0];
next = esalt_bufs[digests_offset].hashes[idx].data[0];
update_key3 (key2, key3);
plain = unpack_v8a_from_v32_S (next) ^ key3;
@ -616,7 +615,7 @@ __kernel void m17220_sxx (KERN_ATTR_ESALT (pkzip_t))
plain = unpack_v8d_from_v32_S (next) ^ key3;
update_key012 (key0, key1, key2, plain, l_crc32tab);
next = data_ptr[1];
next = esalt_bufs[digests_offset].hashes[idx].data[1];
update_key3 (key2, key3);
plain = unpack_v8a_from_v32_S (next) ^ key3;
@ -634,7 +633,7 @@ __kernel void m17220_sxx (KERN_ATTR_ESALT (pkzip_t))
plain = unpack_v8d_from_v32_S (next) ^ key3;
update_key012 (key0, key1, key2, plain, l_crc32tab);
next = data_ptr[2];
next = esalt_bufs[digests_offset].hashes[idx].data[2];
update_key3 (key2, key3);
plain = unpack_v8a_from_v32_S (next) ^ key3;
@ -660,7 +659,7 @@ __kernel void m17220_sxx (KERN_ATTR_ESALT (pkzip_t))
u8 tmp[TMPSIZ];
next = data_ptr[3];
next = esalt_bufs[digests_offset].hashes[idx].data[3];
update_key3 (key2, key3);
plain = unpack_v8a_from_v32_S (next) ^ key3;
@ -685,7 +684,7 @@ __kernel void m17220_sxx (KERN_ATTR_ESALT (pkzip_t))
for (int i = 16; i < 36; i += 4)
{
next = data_ptr[i / 4];
next = esalt_bufs[digests_offset].hashes[idx].data[i / 4];
update_key3 (key2, key3);
plain = unpack_v8a_from_v32_S (next) ^ key3;
@ -721,8 +720,8 @@ __kernel void m17220_sxx (KERN_ATTR_ESALT (pkzip_t))
inflate_state pStream;
infstream.opaque = Z_NULL;
infstream.avail_in = esalt_bufs[digests_offset].hashes[idx].data_length - 12; // size of input
infstream.next_in = esalt_bufs[digests_offset].hashes[idx].data + 12; // input char array
infstream.avail_in = esalt_bufs[digests_offset].hashes[idx].data_length - 12; // size of input
infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[digests_offset].hashes[idx].data + 12; // input char array
infstream.avail_out = TMPSIZ; // size of output
infstream.next_out = tmp; // output char array
@ -779,7 +778,7 @@ __kernel void m17220_sxx (KERN_ATTR_ESALT (pkzip_t))
}
}
__kernel void m17220_mxx (KERN_ATTR_ESALT (pkzip_t))
KERNEL_FQ void m17220_mxx (KERN_ATTR_ESALT (pkzip_t))
{
/**
* modifier
@ -793,14 +792,14 @@ __kernel void m17220_mxx (KERN_ATTR_ESALT (pkzip_t))
* sbox, kbox
*/
LOCAL_AS u32 l_crc32tab[256];
LOCAL_VK u32 l_crc32tab[256];
for (u64 i = lid; i < 256; i += lsz)
{
l_crc32tab[i] = crc32tab[i];
}
barrier (CLK_LOCAL_MEM_FENCE);
SYNC_THREADS();
if (gid >= gid_max) return;
@ -847,13 +846,11 @@ __kernel void m17220_mxx (KERN_ATTR_ESALT (pkzip_t))
for (u32 idx = 0; idx < hash_count; idx++)
{
__global u32 *data_ptr = (__global u32 *) esalt_bufs[digests_offset].hashes[idx].data;
u32x key0 = key0init2;
u32x key1 = key1init2;
u32x key2 = key2init2;
next = data_ptr[0];
next = esalt_bufs[digests_offset].hashes[idx].data[0];
update_key3 (key2, key3);
plain = unpack_v8a_from_v32_S (next) ^ key3;
@ -871,7 +868,7 @@ __kernel void m17220_mxx (KERN_ATTR_ESALT (pkzip_t))
plain = unpack_v8d_from_v32_S (next) ^ key3;
update_key012 (key0, key1, key2, plain, l_crc32tab);
next = data_ptr[1];
next = esalt_bufs[digests_offset].hashes[idx].data[1];
update_key3 (key2, key3);
plain = unpack_v8a_from_v32_S (next) ^ key3;
@ -889,7 +886,7 @@ __kernel void m17220_mxx (KERN_ATTR_ESALT (pkzip_t))
plain = unpack_v8d_from_v32_S (next) ^ key3;
update_key012 (key0, key1, key2, plain, l_crc32tab);
next = data_ptr[2];
next = esalt_bufs[digests_offset].hashes[idx].data[2];
update_key3 (key2, key3);
plain = unpack_v8a_from_v32_S (next) ^ key3;
@ -915,7 +912,7 @@ __kernel void m17220_mxx (KERN_ATTR_ESALT (pkzip_t))
u8 tmp[TMPSIZ];
next = data_ptr[3];
next = esalt_bufs[digests_offset].hashes[idx].data[3];
update_key3 (key2, key3);
plain = unpack_v8a_from_v32_S (next) ^ key3;
@ -940,7 +937,7 @@ __kernel void m17220_mxx (KERN_ATTR_ESALT (pkzip_t))
for (int i = 16; i < 36; i += 4)
{
next = data_ptr[i / 4];
next = esalt_bufs[digests_offset].hashes[idx].data[i / 4];
update_key3 (key2, key3);
plain = unpack_v8a_from_v32_S (next) ^ key3;
@ -976,8 +973,8 @@ __kernel void m17220_mxx (KERN_ATTR_ESALT (pkzip_t))
inflate_state pStream;
infstream.opaque = Z_NULL;
infstream.avail_in = esalt_bufs[digests_offset].hashes[idx].data_length - 12; // size of input
infstream.next_in = esalt_bufs[digests_offset].hashes[idx].data + 12; // input char array
infstream.avail_in = esalt_bufs[digests_offset].hashes[idx].data_length - 12; // size of input
infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[digests_offset].hashes[idx].data + 12; // input char array
infstream.avail_out = TMPSIZ; // size of output
infstream.next_out = tmp; // output char array

@ -525,7 +525,7 @@ DECLSPEC int check_inflate_code1 (u8 *next, int left)
}
}
__kernel void m17220_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t))
KERNEL_FQ void m17220_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t))
{
/**
* modifier
@ -732,8 +732,8 @@ __kernel void m17220_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t))
inflate_state pStream;
infstream.opaque = Z_NULL;
infstream.avail_in = esalt_bufs[digests_offset].hashes[idx].data_length - 12; // size of input
infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[digests_offset].hashes[idx].data + 12; // input char array
infstream.avail_in = esalt_bufs[digests_offset].hashes[idx].data_length - 12; // size of input
infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[digests_offset].hashes[idx].data + 12; // input char array
infstream.avail_out = TMPSIZ; // size of output
infstream.next_out = tmp; // output char array
@ -790,7 +790,7 @@ __kernel void m17220_sxx (KERN_ATTR_VECTOR_ESALT (pkzip_t))
}
}
__kernel void m17220_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t))
KERNEL_FQ void m17220_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t))
{
/**
* modifier
@ -997,8 +997,8 @@ __kernel void m17220_mxx (KERN_ATTR_VECTOR_ESALT (pkzip_t))
inflate_state pStream;
infstream.opaque = Z_NULL;
infstream.avail_in = esalt_bufs[digests_offset].hashes[idx].data_length - 12; // size of input
infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[digests_offset].hashes[idx].data + 12; // input char array
infstream.avail_in = esalt_bufs[digests_offset].hashes[idx].data_length - 12; // size of input
infstream.next_in = (GLOBAL_AS u8 *) esalt_bufs[digests_offset].hashes[idx].data + 12; // input char array
infstream.avail_out = TMPSIZ; // size of output
infstream.next_out = tmp; // output char array

Loading…
Cancel
Save