Workaround added for AMDGPU-Pro OpenCL runtime: AES encrypt and decrypt Invertkey function was calculated wrong in certain cases

pull/961/head
jsteube 7 years ago
parent bb5663e439
commit a1c13778c7

@ -732,7 +732,7 @@ static void AES128_ExpandKey (u32 *userkey, u32 *rek, __local u32 *s_te0, __loca
static void AES128_InvertKey (u32 *rdk, __local u32 *s_td0, __local u32 *s_td1, __local u32 *s_td2, __local u32 *s_td3, __local u32 *s_td4, __local u32 *s_te0, __local u32 *s_te1, __local u32 *s_te2, __local u32 *s_te3, __local u32 *s_te4)
{
for (u32 i = 0, j = 40; i < j; i += 4, j -= 4)
for (u32 i = 0, j = 40; i < 20; i += 4, j -= 4)
{
u32 temp;

@ -759,7 +759,7 @@ static void AES256_ExpandKey (u32 *userkey, u32 *rek, __local u32 *s_te0, __loca
static void AES256_InvertKey (u32 *rdk, __local u32 *s_td0, __local u32 *s_td1, __local u32 *s_td2, __local u32 *s_td3, __local u32 *s_td4, __local u32 *s_te0, __local u32 *s_te1, __local u32 *s_te2, __local u32 *s_te3, __local u32 *s_te4)
{
for (u32 i = 0, j = 56; i < j; i += 4, j -= 4)
for (u32 i = 0, j = 56; i < 28; i += 4, j -= 4)
{
u32 temp;

@ -726,7 +726,7 @@ static void AES128_ExpandKey (u32 *userkey, u32 *rek, __local u32 *s_te0, __loca
static void AES128_InvertKey (u32 *rdk, __local u32 *s_td0, __local u32 *s_td1, __local u32 *s_td2, __local u32 *s_td3, __local u32 *s_td4, __local u32 *s_te0, __local u32 *s_te1, __local u32 *s_te2, __local u32 *s_te3, __local u32 *s_te4)
{
for (u32 i = 0, j = 40; i < j; i += 4, j -= 4)
for (u32 i = 0, j = 40; i < 20; i += 4, j -= 4)
{
u32 temp;
@ -896,7 +896,7 @@ static void AES256_ExpandKey (u32 *userkey, u32 *rek, __local u32 *s_te0, __loca
static void AES256_InvertKey (u32 *rdk, __local u32 *s_td0, __local u32 *s_td1, __local u32 *s_td2, __local u32 *s_td3, __local u32 *s_td4, __local u32 *s_te0, __local u32 *s_te1, __local u32 *s_te2, __local u32 *s_te3, __local u32 *s_te4)
{
for (u32 i = 0, j = 56; i < j; i += 4, j -= 4)
for (u32 i = 0, j = 56; i < 28; i += 4, j -= 4)
{
u32 temp;

@ -729,7 +729,7 @@ static void AES128_ExpandKey (u32 *userkey, u32 *rek, __local u32 *s_te0, __loca
static void AES128_InvertKey (u32 *rdk, __local u32 *s_td0, __local u32 *s_td1, __local u32 *s_td2, __local u32 *s_td3, __local u32 *s_td4, __local u32 *s_te0, __local u32 *s_te1, __local u32 *s_te2, __local u32 *s_te3, __local u32 *s_te4)
{
for (u32 i = 0, j = 40; i < j; i += 4, j -= 4)
for (u32 i = 0, j = 40; i < 20; i += 4, j -= 4)
{
u32 temp;
@ -967,7 +967,7 @@ static void AES256_ExpandKey (u32 *userkey, u32 *rek, __local u32 *s_te0, __loca
static void AES256_InvertKey (u32 *rdk, __local u32 *s_td0, __local u32 *s_td1, __local u32 *s_td2, __local u32 *s_td3, __local u32 *s_td4, __local u32 *s_te0, __local u32 *s_te1, __local u32 *s_te2, __local u32 *s_te3, __local u32 *s_te4)
{
for (u32 i = 0, j = 56; i < j; i += 4, j -= 4)
for (u32 i = 0, j = 56; i < 28; i += 4, j -= 4)
{
u32 temp;

@ -729,7 +729,7 @@ static void AES128_ExpandKey (u32 *userkey, u32 *rek, __local u32 *s_te0, __loca
static void AES128_InvertKey (u32 *rdk, __local u32 *s_td0, __local u32 *s_td1, __local u32 *s_td2, __local u32 *s_td3, __local u32 *s_td4, __local u32 *s_te0, __local u32 *s_te1, __local u32 *s_te2, __local u32 *s_te3, __local u32 *s_te4)
{
for (u32 i = 0, j = 40; i < j; i += 4, j -= 4)
for (u32 i = 0, j = 40; i < 20; i += 4, j -= 4)
{
u32 temp;

@ -753,7 +753,7 @@ static void AES256_ExpandKey (u32 *userkey, u32 *rek, __local u32 *s_te0, __loca
static void AES256_InvertKey (u32 *rdk, __local u32 *s_td0, __local u32 *s_td1, __local u32 *s_td2, __local u32 *s_td3, __local u32 *s_td4, __local u32 *s_te0, __local u32 *s_te1, __local u32 *s_te2, __local u32 *s_te3, __local u32 *s_te4)
{
for (u32 i = 0, j = 56; i < j; i += 4, j -= 4)
for (u32 i = 0, j = 56; i < 28; i += 4, j -= 4)
{
u32 temp;

@ -756,7 +756,7 @@ static void AES256_ExpandKey (u32 *userkey, u32 *rek, __local u32 *s_te0, __loca
static void AES256_InvertKey (u32 *rdk, __local u32 *s_td0, __local u32 *s_td1, __local u32 *s_td2, __local u32 *s_td3, __local u32 *s_td4, __local u32 *s_te0, __local u32 *s_te1, __local u32 *s_te2, __local u32 *s_te3, __local u32 *s_te4)
{
for (u32 i = 0, j = 56; i < j; i += 4, j -= 4)
for (u32 i = 0, j = 56; i < 28; i += 4, j -= 4)
{
u32 temp;

@ -759,7 +759,7 @@ static void AES256_ExpandKey (u32 *userkey, u32 *rek, __local u32 *s_te0, __loca
static void AES256_InvertKey (u32 *rdk, __local u32 *s_td0, __local u32 *s_td1, __local u32 *s_td2, __local u32 *s_td3, __local u32 *s_td4, __local u32 *s_te0, __local u32 *s_te1, __local u32 *s_te2, __local u32 *s_te3, __local u32 *s_te4)
{
for (u32 i = 0, j = 56; i < j; i += 4, j -= 4)
for (u32 i = 0, j = 56; i < 28; i += 4, j -= 4)
{
u32 temp;

@ -740,7 +740,7 @@ static void AES128_ExpandKey (u32 *userkey, u32 *rek, __local u32 *s_te0, __loca
static void AES128_InvertKey (u32 *rdk, __local u32 *s_td0, __local u32 *s_td1, __local u32 *s_td2, __local u32 *s_td3, __local u32 *s_td4, __local u32 *s_te0, __local u32 *s_te1, __local u32 *s_te2, __local u32 *s_te3, __local u32 *s_te4)
{
for (u32 i = 0, j = 40; i < j; i += 4, j -= 4)
for (u32 i = 0, j = 40; i < 20; i += 4, j -= 4)
{
u32 temp;

@ -759,7 +759,7 @@ static void AES256_ExpandKey (u32 *userkey, u32 *rek, __local u32 *s_te0, __loca
static void AES256_InvertKey (u32 *rdk, __local u32 *s_td0, __local u32 *s_td1, __local u32 *s_td2, __local u32 *s_td3, __local u32 *s_td4, __local u32 *s_te0, __local u32 *s_te1, __local u32 *s_te2, __local u32 *s_te3, __local u32 *s_te4)
{
for (u32 i = 0, j = 56; i < j; i += 4, j -= 4)
for (u32 i = 0, j = 56; i < 28; i += 4, j -= 4)
{
u32 temp;

@ -729,7 +729,7 @@ static void AES128_ExpandKey (u32 *userkey, u32 *rek, __local u32 *s_te0, __loca
static void AES128_InvertKey (u32 *rdk, __local u32 *s_td0, __local u32 *s_td1, __local u32 *s_td2, __local u32 *s_td3, __local u32 *s_td4, __local u32 *s_te0, __local u32 *s_te1, __local u32 *s_te2, __local u32 *s_te3, __local u32 *s_te4)
{
for (u32 i = 0, j = 40; i < j; i += 4, j -= 4)
for (u32 i = 0, j = 40; i < 20; i += 4, j -= 4)
{
u32 temp;

@ -748,7 +748,7 @@ static void AES256_InvertKey (u32 *rdk, __local u32 *s_td0, __local u32 *s_td1,
#ifdef _unroll
#pragma unroll
#endif
for (u32 i = 0, j = 56; i < j; i += 4, j -= 4)
for (u32 i = 0, j = 56; i < 28; i += 4, j -= 4)
{
u32 temp;

@ -1,3 +1,11 @@
* changes v3.30 -> v3.xx:
##
## Workarounds
##
- Workaround added for AMDGPU-Pro OpenCL runtime: AES encrypt and decrypt Invertkey function was calculated wrong in certain cases
* changes v3.20 -> v3.30:
##

Loading…
Cancel
Save