Merge pull request #1151 from philsmd/master

osx: some more volatile are required for luks/tc
pull/1152/head
Jens Steube 7 years ago committed by GitHub
commit d57b493d13

@ -2757,7 +2757,11 @@ void luks_af_sha1_then_aes_decrypt (__global luks_t *luks_bufs, __global luks_tm
{
if (key_size == HC_LUKS_KEY_SIZE_128)
{
#if defined (IS_APPLE) && defined (IS_GPU)
volatile u32 ukey[4];
#else
u32 ukey[4];
#endif
ukey[0] = mk[0];
ukey[1] = mk[1];
@ -2778,7 +2782,11 @@ void luks_af_sha1_then_aes_decrypt (__global luks_t *luks_bufs, __global luks_tm
}
else if (key_size == HC_LUKS_KEY_SIZE_256)
{
#if defined (IS_APPLE) && defined (IS_GPU)
volatile u32 ukey[8];
#else
u32 ukey[8];
#endif
ukey[0] = mk[0];
ukey[1] = mk[1];
@ -2806,7 +2814,11 @@ void luks_af_sha1_then_aes_decrypt (__global luks_t *luks_bufs, __global luks_tm
{
if (key_size == HC_LUKS_KEY_SIZE_128)
{
#if defined (IS_APPLE) && defined (IS_GPU)
volatile u32 ukey[4];
#else
u32 ukey[4];
#endif
ukey[0] = mk[0];
ukey[1] = mk[1];
@ -2821,7 +2833,11 @@ void luks_af_sha1_then_aes_decrypt (__global luks_t *luks_bufs, __global luks_tm
}
else if (key_size == HC_LUKS_KEY_SIZE_256)
{
#if defined (IS_APPLE) && defined (IS_GPU)
volatile u32 ukey[8];
#else
u32 ukey[8];
#endif
ukey[0] = mk[0];
ukey[1] = mk[1];
@ -2843,14 +2859,22 @@ void luks_af_sha1_then_aes_decrypt (__global luks_t *luks_bufs, __global luks_tm
{
if (key_size == HC_LUKS_KEY_SIZE_256)
{
#if defined (IS_APPLE) && defined (IS_GPU)
volatile u32 ukey1[4];
#else
u32 ukey1[4];
#endif
ukey1[0] = mk[0];
ukey1[1] = mk[1];
ukey1[2] = mk[2];
ukey1[3] = mk[3];
#if defined (IS_APPLE) && defined (IS_GPU)
volatile u32 ukey2[4];
#else
u32 ukey2[4];
#endif
ukey2[0] = mk[4];
ukey2[1] = mk[5];
@ -2867,7 +2891,11 @@ void luks_af_sha1_then_aes_decrypt (__global luks_t *luks_bufs, __global luks_tm
}
else if (key_size == HC_LUKS_KEY_SIZE_512)
{
#if defined (IS_APPLE) && defined (IS_GPU)
volatile u32 ukey1[8];
#else
u32 ukey1[8];
#endif
ukey1[0] = mk[ 0];
ukey1[1] = mk[ 1];
@ -2878,7 +2906,11 @@ void luks_af_sha1_then_aes_decrypt (__global luks_t *luks_bufs, __global luks_tm
ukey1[6] = mk[ 6];
ukey1[7] = mk[ 7];
#if defined (IS_APPLE) && defined (IS_GPU)
volatile u32 ukey2[8];
#else
u32 ukey2[8];
#endif
ukey2[0] = mk[ 8];
ukey2[1] = mk[ 9];

@ -334,7 +334,7 @@ __constant u32a c_sbox3[256] =
// temporary hack for Apple Iris GPUs (with as little performance drop as possible)
#ifdef IS_APPLE
#if defined (IS_APPLE) && defined (IS_GPU)
#define TMP_TYPE volatile u32
#else
#define TMP_TYPE u32

@ -666,7 +666,11 @@ __kernel void m06212_comp (__global pw_t *pws, __global const kernel_rule_t *rul
if (gid >= gid_max) return;
#if defined (IS_APPLE) && defined (IS_GPU)
volatile u32 ukey1[8];
#else
u32 ukey1[8];
#endif
ukey1[0] = tmps[gid].out[ 0];
ukey1[1] = tmps[gid].out[ 1];
@ -677,7 +681,11 @@ __kernel void m06212_comp (__global pw_t *pws, __global const kernel_rule_t *rul
ukey1[6] = tmps[gid].out[ 6];
ukey1[7] = tmps[gid].out[ 7];
#if defined (IS_APPLE) && defined (IS_GPU)
volatile u32 ukey2[8];
#else
u32 ukey2[8];
#endif
ukey2[0] = tmps[gid].out[ 8];
ukey2[1] = tmps[gid].out[ 9];
@ -703,7 +711,11 @@ __kernel void m06212_comp (__global pw_t *pws, __global const kernel_rule_t *rul
mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0);
}
#if defined (IS_APPLE) && defined (IS_GPU)
volatile u32 ukey3[8];
#else
u32 ukey3[8];
#endif
ukey3[0] = tmps[gid].out[16];
ukey3[1] = tmps[gid].out[17];
@ -714,7 +726,11 @@ __kernel void m06212_comp (__global pw_t *pws, __global const kernel_rule_t *rul
ukey3[6] = tmps[gid].out[22];
ukey3[7] = tmps[gid].out[23];
#if defined (IS_APPLE) && defined (IS_GPU)
volatile u32 ukey4[8];
#else
u32 ukey4[8];
#endif
ukey4[0] = tmps[gid].out[24];
ukey4[1] = tmps[gid].out[25];

@ -666,7 +666,11 @@ __kernel void m06213_comp (__global pw_t *pws, __global const kernel_rule_t *rul
if (gid >= gid_max) return;
#if defined (IS_APPLE) && defined (IS_GPU)
volatile u32 ukey1[8];
#else
u32 ukey1[8];
#endif
ukey1[0] = tmps[gid].out[ 0];
ukey1[1] = tmps[gid].out[ 1];
@ -677,7 +681,11 @@ __kernel void m06213_comp (__global pw_t *pws, __global const kernel_rule_t *rul
ukey1[6] = tmps[gid].out[ 6];
ukey1[7] = tmps[gid].out[ 7];
#if defined (IS_APPLE) && defined (IS_GPU)
volatile u32 ukey2[8];
#else
u32 ukey2[8];
#endif
ukey2[0] = tmps[gid].out[ 8];
ukey2[1] = tmps[gid].out[ 9];
@ -703,7 +711,11 @@ __kernel void m06213_comp (__global pw_t *pws, __global const kernel_rule_t *rul
mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0);
}
#if defined (IS_APPLE) && defined (IS_GPU)
volatile u32 ukey3[8];
#else
u32 ukey3[8];
#endif
ukey3[0] = tmps[gid].out[16];
ukey3[1] = tmps[gid].out[17];

@ -574,7 +574,11 @@ __kernel void m06222_comp (__global pw_t *pws, __global const kernel_rule_t *rul
if (gid >= gid_max) return;
#if defined (IS_APPLE) && defined (IS_GPU)
volatile u32 ukey1[8];
#else
u32 ukey1[8];
#endif
ukey1[0] = swap32 (h32_from_64 (tmps[gid].out[ 0]));
ukey1[1] = swap32 (l32_from_64 (tmps[gid].out[ 0]));
@ -585,7 +589,11 @@ __kernel void m06222_comp (__global pw_t *pws, __global const kernel_rule_t *rul
ukey1[6] = swap32 (h32_from_64 (tmps[gid].out[ 3]));
ukey1[7] = swap32 (l32_from_64 (tmps[gid].out[ 3]));
#if defined (IS_APPLE) && defined (IS_GPU)
volatile u32 ukey2[8];
#else
u32 ukey2[8];
#endif
ukey2[0] = swap32 (h32_from_64 (tmps[gid].out[ 4]));
ukey2[1] = swap32 (l32_from_64 (tmps[gid].out[ 4]));
@ -611,7 +619,11 @@ __kernel void m06222_comp (__global pw_t *pws, __global const kernel_rule_t *rul
mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0);
}
#if defined (IS_APPLE) && defined (IS_GPU)
volatile u32 ukey3[8];
#else
u32 ukey3[8];
#endif
ukey3[0] = swap32 (h32_from_64 (tmps[gid].out[ 8]));
ukey3[1] = swap32 (l32_from_64 (tmps[gid].out[ 8]));
@ -622,7 +634,11 @@ __kernel void m06222_comp (__global pw_t *pws, __global const kernel_rule_t *rul
ukey3[6] = swap32 (h32_from_64 (tmps[gid].out[11]));
ukey3[7] = swap32 (l32_from_64 (tmps[gid].out[11]));
#if defined (IS_APPLE) && defined (IS_GPU)
volatile u32 ukey4[8];
#else
u32 ukey4[8];
#endif
ukey4[0] = swap32 (h32_from_64 (tmps[gid].out[12]));
ukey4[1] = swap32 (l32_from_64 (tmps[gid].out[12]));

@ -623,7 +623,11 @@ __kernel void m06223_comp (__global pw_t *pws, __global const kernel_rule_t *rul
if (gid >= gid_max) return;
#if defined (IS_APPLE) && defined (IS_GPU)
volatile u32 ukey1[8];
#else
u32 ukey1[8];
#endif
ukey1[0] = swap32 (h32_from_64 (tmps[gid].out[ 0]));
ukey1[1] = swap32 (l32_from_64 (tmps[gid].out[ 0]));
@ -634,7 +638,11 @@ __kernel void m06223_comp (__global pw_t *pws, __global const kernel_rule_t *rul
ukey1[6] = swap32 (h32_from_64 (tmps[gid].out[ 3]));
ukey1[7] = swap32 (l32_from_64 (tmps[gid].out[ 3]));
#if defined (IS_APPLE) && defined (IS_GPU)
volatile u32 ukey2[8];
#else
u32 ukey2[8];
#endif
ukey2[0] = swap32 (h32_from_64 (tmps[gid].out[ 4]));
ukey2[1] = swap32 (l32_from_64 (tmps[gid].out[ 4]));
@ -660,7 +668,11 @@ __kernel void m06223_comp (__global pw_t *pws, __global const kernel_rule_t *rul
mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0);
}
#if defined (IS_APPLE) && defined (IS_GPU)
volatile u32 ukey3[8];
#else
u32 ukey3[8];
#endif
ukey3[0] = swap32 (h32_from_64 (tmps[gid].out[ 8]));
ukey3[1] = swap32 (l32_from_64 (tmps[gid].out[ 8]));
@ -701,7 +713,11 @@ __kernel void m06223_comp (__global pw_t *pws, __global const kernel_rule_t *rul
mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0);
}
#if defined (IS_APPLE) && defined (IS_GPU)
volatile u32 ukey5[8];
#else
volatile u32 ukey5[8];
#endif
ukey5[0] = swap32 (h32_from_64 (tmps[gid].out[16]));
ukey5[1] = swap32 (l32_from_64 (tmps[gid].out[16]));
@ -712,7 +728,11 @@ __kernel void m06223_comp (__global pw_t *pws, __global const kernel_rule_t *rul
ukey5[6] = swap32 (h32_from_64 (tmps[gid].out[19]));
ukey5[7] = swap32 (l32_from_64 (tmps[gid].out[19]));
#if defined (IS_APPLE) && defined (IS_GPU)
volatile u32 ukey6[8];
#else
volatile u32 ukey6[8];
#endif
ukey6[0] = swap32 (h32_from_64 (tmps[gid].out[20]));
ukey6[1] = swap32 (l32_from_64 (tmps[gid].out[20]));

@ -1978,7 +1978,11 @@ __kernel void m06232_comp (__global pw_t *pws, __global const kernel_rule_t *rul
if (gid >= gid_max) return;
#if defined (IS_APPLE) && defined (IS_GPU)
volatile u32 ukey1[8];
#else
u32 ukey1[8];
#endif
ukey1[0] = swap32 (tmps[gid].out[ 0]);
ukey1[1] = swap32 (tmps[gid].out[ 1]);
@ -1989,7 +1993,11 @@ __kernel void m06232_comp (__global pw_t *pws, __global const kernel_rule_t *rul
ukey1[6] = swap32 (tmps[gid].out[ 6]);
ukey1[7] = swap32 (tmps[gid].out[ 7]);
#if defined (IS_APPLE) && defined (IS_GPU)
volatile u32 ukey2[8];
#else
u32 ukey2[8];
#endif
ukey2[0] = swap32 (tmps[gid].out[ 8]);
ukey2[1] = swap32 (tmps[gid].out[ 9]);
@ -2015,7 +2023,11 @@ __kernel void m06232_comp (__global pw_t *pws, __global const kernel_rule_t *rul
mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0);
}
#if defined (IS_APPLE) && defined (IS_GPU)
volatile u32 ukey3[8];
#else
u32 ukey3[8];
#endif
ukey3[0] = swap32 (tmps[gid].out[16]);
ukey3[1] = swap32 (tmps[gid].out[17]);
@ -2026,7 +2038,11 @@ __kernel void m06232_comp (__global pw_t *pws, __global const kernel_rule_t *rul
ukey3[6] = swap32 (tmps[gid].out[22]);
ukey3[7] = swap32 (tmps[gid].out[23]);
#if defined (IS_APPLE) && defined (IS_GPU)
volatile u32 ukey4[8];
#else
u32 ukey4[8];
#endif
ukey4[0] = swap32 (tmps[gid].out[24]);
ukey4[1] = swap32 (tmps[gid].out[25]);

@ -1978,7 +1978,11 @@ __kernel void m06233_comp (__global pw_t *pws, __global const kernel_rule_t *rul
if (gid >= gid_max) return;
#if defined (IS_APPLE) && defined (IS_GPU)
volatile u32 ukey1[8];
#else
u32 ukey1[8];
#endif
ukey1[0] = swap32 (tmps[gid].out[ 0]);
ukey1[1] = swap32 (tmps[gid].out[ 1]);
@ -1989,7 +1993,11 @@ __kernel void m06233_comp (__global pw_t *pws, __global const kernel_rule_t *rul
ukey1[6] = swap32 (tmps[gid].out[ 6]);
ukey1[7] = swap32 (tmps[gid].out[ 7]);
#if defined (IS_APPLE) && defined (IS_GPU)
volatile u32 ukey2[8];
#else
u32 ukey2[8];
#endif
ukey2[0] = swap32 (tmps[gid].out[ 8]);
ukey2[1] = swap32 (tmps[gid].out[ 9]);
@ -2015,7 +2023,11 @@ __kernel void m06233_comp (__global pw_t *pws, __global const kernel_rule_t *rul
mark_hash (plains_buf, d_return_buf, salt_pos, digests_cnt, 0, 0, gid, 0);
}
#if defined (IS_APPLE) && defined (IS_GPU)
volatile u32 ukey3[8];
#else
u32 ukey3[8];
#endif
ukey3[0] = swap32 (tmps[gid].out[16]);
ukey3[1] = swap32 (tmps[gid].out[17]);

@ -37,6 +37,8 @@
- Workaround added for AMDGPU-Pro OpenCL runtime: AES encrypt and decrypt Invertkey function was calculated wrong in certain cases
- Workaround added for AMDGPU-Pro OpenCL runtime: RAR3 kernel require a volatile variable to work correctly
- Workaround added for Apple OpenCL runtime: bcrypt kernel requires a volatile variable because of a compiler optimization bug
- Workaround added for Apple OpenCL runtime: LUKS kernel requires some volatile variables because of a compiler optimization bug
- Workaround added for Apple OpenCL runtime: TrueCrypt kernel requires some volatile variables because of a compiler optimization bug
- Workaround added for NVidia OpenCL runtime: RACF kernel requires EBCDIC lookup to be done on shared memory
##

Loading…
Cancel
Save