From a1c13778c7ff7519852539960169b85f8a5906e8 Mon Sep 17 00:00:00 2001 From: jsteube Date: Wed, 11 Jan 2017 16:04:05 +0100 Subject: [PATCH] Workaround added for AMDGPU-Pro OpenCL runtime: AES encrypt and decrypt Invertkey function was calculated wrong in certain cases --- OpenCL/m06600.cl | 2 +- OpenCL/m06800.cl | 2 +- OpenCL/m08800.cl | 4 ++-- OpenCL/m09400.cl | 4 ++-- OpenCL/m09500.cl | 2 +- OpenCL/m09600.cl | 2 +- OpenCL/m11300.cl | 2 +- OpenCL/m11600.cl | 2 +- OpenCL/m12500.cl | 2 +- OpenCL/m12700.cl | 2 +- OpenCL/m13200.cl | 2 +- OpenCL/m13400.cl | 2 +- docs/changes.txt | 8 ++++++++ 13 files changed, 22 insertions(+), 14 deletions(-) diff --git a/OpenCL/m06600.cl b/OpenCL/m06600.cl index a1798030b..16b9ec6f9 100644 --- a/OpenCL/m06600.cl +++ b/OpenCL/m06600.cl @@ -732,7 +732,7 @@ static void AES128_ExpandKey (u32 *userkey, u32 *rek, __local u32 *s_te0, __loca static void AES128_InvertKey (u32 *rdk, __local u32 *s_td0, __local u32 *s_td1, __local u32 *s_td2, __local u32 *s_td3, __local u32 *s_td4, __local u32 *s_te0, __local u32 *s_te1, __local u32 *s_te2, __local u32 *s_te3, __local u32 *s_te4) { - for (u32 i = 0, j = 40; i < j; i += 4, j -= 4) + for (u32 i = 0, j = 40; i < 20; i += 4, j -= 4) { u32 temp; diff --git a/OpenCL/m06800.cl b/OpenCL/m06800.cl index b55eb5c6c..04ff27df3 100644 --- a/OpenCL/m06800.cl +++ b/OpenCL/m06800.cl @@ -759,7 +759,7 @@ static void AES256_ExpandKey (u32 *userkey, u32 *rek, __local u32 *s_te0, __loca static void AES256_InvertKey (u32 *rdk, __local u32 *s_td0, __local u32 *s_td1, __local u32 *s_td2, __local u32 *s_td3, __local u32 *s_td4, __local u32 *s_te0, __local u32 *s_te1, __local u32 *s_te2, __local u32 *s_te3, __local u32 *s_te4) { - for (u32 i = 0, j = 56; i < j; i += 4, j -= 4) + for (u32 i = 0, j = 56; i < 28; i += 4, j -= 4) { u32 temp; diff --git a/OpenCL/m08800.cl b/OpenCL/m08800.cl index 490558e1c..e91d5aa78 100644 --- a/OpenCL/m08800.cl +++ b/OpenCL/m08800.cl @@ -726,7 +726,7 @@ static void AES128_ExpandKey (u32 *userkey, u32 *rek, __local u32 *s_te0, __loca static void AES128_InvertKey (u32 *rdk, __local u32 *s_td0, __local u32 *s_td1, __local u32 *s_td2, __local u32 *s_td3, __local u32 *s_td4, __local u32 *s_te0, __local u32 *s_te1, __local u32 *s_te2, __local u32 *s_te3, __local u32 *s_te4) { - for (u32 i = 0, j = 40; i < j; i += 4, j -= 4) + for (u32 i = 0, j = 40; i < 20; i += 4, j -= 4) { u32 temp; @@ -896,7 +896,7 @@ static void AES256_ExpandKey (u32 *userkey, u32 *rek, __local u32 *s_te0, __loca static void AES256_InvertKey (u32 *rdk, __local u32 *s_td0, __local u32 *s_td1, __local u32 *s_td2, __local u32 *s_td3, __local u32 *s_td4, __local u32 *s_te0, __local u32 *s_te1, __local u32 *s_te2, __local u32 *s_te3, __local u32 *s_te4) { - for (u32 i = 0, j = 56; i < j; i += 4, j -= 4) + for (u32 i = 0, j = 56; i < 28; i += 4, j -= 4) { u32 temp; diff --git a/OpenCL/m09400.cl b/OpenCL/m09400.cl index cdce14852..aa6c60a2b 100644 --- a/OpenCL/m09400.cl +++ b/OpenCL/m09400.cl @@ -729,7 +729,7 @@ static void AES128_ExpandKey (u32 *userkey, u32 *rek, __local u32 *s_te0, __loca static void AES128_InvertKey (u32 *rdk, __local u32 *s_td0, __local u32 *s_td1, __local u32 *s_td2, __local u32 *s_td3, __local u32 *s_td4, __local u32 *s_te0, __local u32 *s_te1, __local u32 *s_te2, __local u32 *s_te3, __local u32 *s_te4) { - for (u32 i = 0, j = 40; i < j; i += 4, j -= 4) + for (u32 i = 0, j = 40; i < 20; i += 4, j -= 4) { u32 temp; @@ -967,7 +967,7 @@ static void AES256_ExpandKey (u32 *userkey, u32 *rek, __local u32 *s_te0, __loca static void AES256_InvertKey (u32 *rdk, __local u32 *s_td0, __local u32 *s_td1, __local u32 *s_td2, __local u32 *s_td3, __local u32 *s_td4, __local u32 *s_te0, __local u32 *s_te1, __local u32 *s_te2, __local u32 *s_te3, __local u32 *s_te4) { - for (u32 i = 0, j = 56; i < j; i += 4, j -= 4) + for (u32 i = 0, j = 56; i < 28; i += 4, j -= 4) { u32 temp; diff --git a/OpenCL/m09500.cl b/OpenCL/m09500.cl index 699276826..58576a563 100644 --- a/OpenCL/m09500.cl +++ b/OpenCL/m09500.cl @@ -729,7 +729,7 @@ static void AES128_ExpandKey (u32 *userkey, u32 *rek, __local u32 *s_te0, __loca static void AES128_InvertKey (u32 *rdk, __local u32 *s_td0, __local u32 *s_td1, __local u32 *s_td2, __local u32 *s_td3, __local u32 *s_td4, __local u32 *s_te0, __local u32 *s_te1, __local u32 *s_te2, __local u32 *s_te3, __local u32 *s_te4) { - for (u32 i = 0, j = 40; i < j; i += 4, j -= 4) + for (u32 i = 0, j = 40; i < 20; i += 4, j -= 4) { u32 temp; diff --git a/OpenCL/m09600.cl b/OpenCL/m09600.cl index 875c333e0..828de7396 100644 --- a/OpenCL/m09600.cl +++ b/OpenCL/m09600.cl @@ -753,7 +753,7 @@ static void AES256_ExpandKey (u32 *userkey, u32 *rek, __local u32 *s_te0, __loca static void AES256_InvertKey (u32 *rdk, __local u32 *s_td0, __local u32 *s_td1, __local u32 *s_td2, __local u32 *s_td3, __local u32 *s_td4, __local u32 *s_te0, __local u32 *s_te1, __local u32 *s_te2, __local u32 *s_te3, __local u32 *s_te4) { - for (u32 i = 0, j = 56; i < j; i += 4, j -= 4) + for (u32 i = 0, j = 56; i < 28; i += 4, j -= 4) { u32 temp; diff --git a/OpenCL/m11300.cl b/OpenCL/m11300.cl index 7c30831fe..9429e8213 100644 --- a/OpenCL/m11300.cl +++ b/OpenCL/m11300.cl @@ -756,7 +756,7 @@ static void AES256_ExpandKey (u32 *userkey, u32 *rek, __local u32 *s_te0, __loca static void AES256_InvertKey (u32 *rdk, __local u32 *s_td0, __local u32 *s_td1, __local u32 *s_td2, __local u32 *s_td3, __local u32 *s_td4, __local u32 *s_te0, __local u32 *s_te1, __local u32 *s_te2, __local u32 *s_te3, __local u32 *s_te4) { - for (u32 i = 0, j = 56; i < j; i += 4, j -= 4) + for (u32 i = 0, j = 56; i < 28; i += 4, j -= 4) { u32 temp; diff --git a/OpenCL/m11600.cl b/OpenCL/m11600.cl index 8057a6ae7..80b04e574 100644 --- a/OpenCL/m11600.cl +++ b/OpenCL/m11600.cl @@ -759,7 +759,7 @@ static void AES256_ExpandKey (u32 *userkey, u32 *rek, __local u32 *s_te0, __loca static void AES256_InvertKey (u32 *rdk, __local u32 *s_td0, __local u32 *s_td1, __local u32 *s_td2, __local u32 *s_td3, __local u32 *s_td4, __local u32 *s_te0, __local u32 *s_te1, __local u32 *s_te2, __local u32 *s_te3, __local u32 *s_te4) { - for (u32 i = 0, j = 56; i < j; i += 4, j -= 4) + for (u32 i = 0, j = 56; i < 28; i += 4, j -= 4) { u32 temp; diff --git a/OpenCL/m12500.cl b/OpenCL/m12500.cl index b088b51c9..a8d4a5263 100644 --- a/OpenCL/m12500.cl +++ b/OpenCL/m12500.cl @@ -740,7 +740,7 @@ static void AES128_ExpandKey (u32 *userkey, u32 *rek, __local u32 *s_te0, __loca static void AES128_InvertKey (u32 *rdk, __local u32 *s_td0, __local u32 *s_td1, __local u32 *s_td2, __local u32 *s_td3, __local u32 *s_td4, __local u32 *s_te0, __local u32 *s_te1, __local u32 *s_te2, __local u32 *s_te3, __local u32 *s_te4) { - for (u32 i = 0, j = 40; i < j; i += 4, j -= 4) + for (u32 i = 0, j = 40; i < 20; i += 4, j -= 4) { u32 temp; diff --git a/OpenCL/m12700.cl b/OpenCL/m12700.cl index 1b567f890..f38757d05 100644 --- a/OpenCL/m12700.cl +++ b/OpenCL/m12700.cl @@ -759,7 +759,7 @@ static void AES256_ExpandKey (u32 *userkey, u32 *rek, __local u32 *s_te0, __loca static void AES256_InvertKey (u32 *rdk, __local u32 *s_td0, __local u32 *s_td1, __local u32 *s_td2, __local u32 *s_td3, __local u32 *s_td4, __local u32 *s_te0, __local u32 *s_te1, __local u32 *s_te2, __local u32 *s_te3, __local u32 *s_te4) { - for (u32 i = 0, j = 56; i < j; i += 4, j -= 4) + for (u32 i = 0, j = 56; i < 28; i += 4, j -= 4) { u32 temp; diff --git a/OpenCL/m13200.cl b/OpenCL/m13200.cl index 4f29c8873..fddb386d8 100644 --- a/OpenCL/m13200.cl +++ b/OpenCL/m13200.cl @@ -729,7 +729,7 @@ static void AES128_ExpandKey (u32 *userkey, u32 *rek, __local u32 *s_te0, __loca static void AES128_InvertKey (u32 *rdk, __local u32 *s_td0, __local u32 *s_td1, __local u32 *s_td2, __local u32 *s_td3, __local u32 *s_td4, __local u32 *s_te0, __local u32 *s_te1, __local u32 *s_te2, __local u32 *s_te3, __local u32 *s_te4) { - for (u32 i = 0, j = 40; i < j; i += 4, j -= 4) + for (u32 i = 0, j = 40; i < 20; i += 4, j -= 4) { u32 temp; diff --git a/OpenCL/m13400.cl b/OpenCL/m13400.cl index 569f7ebdd..a07d52f04 100644 --- a/OpenCL/m13400.cl +++ b/OpenCL/m13400.cl @@ -748,7 +748,7 @@ static void AES256_InvertKey (u32 *rdk, __local u32 *s_td0, __local u32 *s_td1, #ifdef _unroll #pragma unroll #endif - for (u32 i = 0, j = 56; i < j; i += 4, j -= 4) + for (u32 i = 0, j = 56; i < 28; i += 4, j -= 4) { u32 temp; diff --git a/docs/changes.txt b/docs/changes.txt index 0b41b7bc4..eaf89b1fc 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -1,3 +1,11 @@ +* changes v3.30 -> v3.xx: + +## +## Workarounds +## + +- Workaround added for AMDGPU-Pro OpenCL runtime: AES encrypt and decrypt Invertkey function was calculated wrong in certain cases + * changes v3.20 -> v3.30: ##