From fa4b521d484d7ced67376ade7f1b97ab93115ac7 Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Fri, 6 Mar 2020 13:31:32 +0100 Subject: [PATCH] Add unpack_v8x_from_v32 for vector datatypes, update -m 200 --- OpenCL/inc_common.cl | 188 +++++++++++++++++++++++ OpenCL/inc_common.h | 5 + OpenCL/m00200_a0-optimized.cl | 40 ++--- OpenCL/m00200_a1-optimized.cl | 42 +++--- OpenCL/m00200_a3-optimized.cl | 272 ++++++++++++++-------------------- 5 files changed, 347 insertions(+), 200 deletions(-) diff --git a/OpenCL/inc_common.cl b/OpenCL/inc_common.cl index 6a7373867..407a24ef6 100644 --- a/OpenCL/inc_common.cl +++ b/OpenCL/inc_common.cl @@ -490,6 +490,194 @@ DECLSPEC u64 v64_from_v32ab_S (const u32 v32a, const u32 v32b) // unpack function are similar, but always return u32 +DECLSPEC u32x unpack_v8a_from_v32 (const u32x v32) +{ + u32x r = 0; + + #if defined IS_NV && HAS_BFE == 1 + + #if VECT_SIZE == 1 + asm volatile ("bfe.u32 %0, %1, 0, 8;" : "=r"(r) : "r"(v32)); + #endif + + #if VECT_SIZE >= 2 + asm volatile ("bfe.u32 %0, %1, 0, 8;" : "=r"(r.s0) : "r"(v32.s0)); + asm volatile ("bfe.u32 %0, %1, 0, 8;" : "=r"(r.s1) : "r"(v32.s1)); + #endif + + #if VECT_SIZE >= 4 + asm volatile ("bfe.u32 %0, %1, 0, 8;" : "=r"(r.s2) : "r"(v32.s2)); + asm volatile ("bfe.u32 %0, %1, 0, 8;" : "=r"(r.s3) : "r"(v32.s3)); + #endif + + #if VECT_SIZE >= 8 + asm volatile ("bfe.u32 %0, %1, 0, 8;" : "=r"(r.s4) : "r"(v32.s4)); + asm volatile ("bfe.u32 %0, %1, 0, 8;" : "=r"(r.s5) : "r"(v32.s5)); + asm volatile ("bfe.u32 %0, %1, 0, 8;" : "=r"(r.s6) : "r"(v32.s6)); + asm volatile ("bfe.u32 %0, %1, 0, 8;" : "=r"(r.s7) : "r"(v32.s7)); + #endif + + #if VECT_SIZE >= 16 + asm volatile ("bfe.u32 %0, %1, 0, 8;" : "=r"(r.s8) : "r"(v32.s8)); + asm volatile ("bfe.u32 %0, %1, 0, 8;" : "=r"(r.s9) : "r"(v32.s9)); + asm volatile ("bfe.u32 %0, %1, 0, 8;" : "=r"(r.sa) : "r"(v32.sa)); + asm volatile ("bfe.u32 %0, %1, 0, 8;" : "=r"(r.sb) : "r"(v32.sb)); + asm volatile ("bfe.u32 %0, %1, 0, 8;" : "=r"(r.sc) : "r"(v32.sc)); + asm volatile ("bfe.u32 %0, %1, 0, 8;" : "=r"(r.sd) : "r"(v32.sd)); + asm volatile ("bfe.u32 %0, %1, 0, 8;" : "=r"(r.se) : "r"(v32.se)); + asm volatile ("bfe.u32 %0, %1, 0, 8;" : "=r"(r.sf) : "r"(v32.sf)); + #endif + + //#elif defined IS_AMD && HAS_VBFE == 1 + //__asm__ __volatile__ ("V_BFE_U32 %0, %1, 0, 8;" : "=v"(r) : "v"(v32)); + #else + r = (v32 >> 0) & 0xff; + #endif + + return r; +} + +DECLSPEC u32x unpack_v8b_from_v32 (const u32x v32) +{ + u32x r = 0; + + #if defined IS_NV && HAS_BFE == 1 + + #if VECT_SIZE == 1 + asm volatile ("bfe.u32 %0, %1, 8, 8;" : "=r"(r) : "r"(v32)); + #endif + + #if VECT_SIZE >= 2 + asm volatile ("bfe.u32 %0, %1, 8, 8;" : "=r"(r.s0) : "r"(v32.s0)); + asm volatile ("bfe.u32 %0, %1, 8, 8;" : "=r"(r.s1) : "r"(v32.s1)); + #endif + + #if VECT_SIZE >= 4 + asm volatile ("bfe.u32 %0, %1, 8, 8;" : "=r"(r.s2) : "r"(v32.s2)); + asm volatile ("bfe.u32 %0, %1, 8, 8;" : "=r"(r.s3) : "r"(v32.s3)); + #endif + + #if VECT_SIZE >= 8 + asm volatile ("bfe.u32 %0, %1, 8, 8;" : "=r"(r.s4) : "r"(v32.s4)); + asm volatile ("bfe.u32 %0, %1, 8, 8;" : "=r"(r.s5) : "r"(v32.s5)); + asm volatile ("bfe.u32 %0, %1, 8, 8;" : "=r"(r.s6) : "r"(v32.s6)); + asm volatile ("bfe.u32 %0, %1, 8, 8;" : "=r"(r.s7) : "r"(v32.s7)); + #endif + + #if VECT_SIZE >= 16 + asm volatile ("bfe.u32 %0, %1, 8, 8;" : "=r"(r.s8) : "r"(v32.s8)); + asm volatile ("bfe.u32 %0, %1, 8, 8;" : "=r"(r.s9) : "r"(v32.s9)); + asm volatile ("bfe.u32 %0, %1, 8, 8;" : "=r"(r.sa) : "r"(v32.sa)); + asm volatile ("bfe.u32 %0, %1, 8, 8;" : "=r"(r.sb) : "r"(v32.sb)); + asm volatile ("bfe.u32 %0, %1, 8, 8;" : "=r"(r.sc) : "r"(v32.sc)); + asm volatile ("bfe.u32 %0, %1, 8, 8;" : "=r"(r.sd) : "r"(v32.sd)); + asm volatile ("bfe.u32 %0, %1, 8, 8;" : "=r"(r.se) : "r"(v32.se)); + asm volatile ("bfe.u32 %0, %1, 8, 8;" : "=r"(r.sf) : "r"(v32.sf)); + #endif + + //#elif defined IS_AMD && HAS_VBFE == 1 + //__asm__ __volatile__ ("V_BFE_U32 %0, %1, 8, 8;" : "=v"(r) : "v"(v32)); + #else + r = (v32 >> 8) & 0xff; + #endif + + return r; +} + +DECLSPEC u32x unpack_v8c_from_v32 (const u32x v32) +{ + u32x r = 0; + + #if defined IS_NV && HAS_BFE == 1 + + #if VECT_SIZE == 1 + asm volatile ("bfe.u32 %0, %1, 16, 8;" : "=r"(r) : "r"(v32)); + #endif + + #if VECT_SIZE >= 2 + asm volatile ("bfe.u32 %0, %1, 16, 8;" : "=r"(r.s0) : "r"(v32.s0)); + asm volatile ("bfe.u32 %0, %1, 16, 8;" : "=r"(r.s1) : "r"(v32.s1)); + #endif + + #if VECT_SIZE >= 4 + asm volatile ("bfe.u32 %0, %1, 16, 8;" : "=r"(r.s2) : "r"(v32.s2)); + asm volatile ("bfe.u32 %0, %1, 16, 8;" : "=r"(r.s3) : "r"(v32.s3)); + #endif + + #if VECT_SIZE >= 8 + asm volatile ("bfe.u32 %0, %1, 16, 8;" : "=r"(r.s4) : "r"(v32.s4)); + asm volatile ("bfe.u32 %0, %1, 16, 8;" : "=r"(r.s5) : "r"(v32.s5)); + asm volatile ("bfe.u32 %0, %1, 16, 8;" : "=r"(r.s6) : "r"(v32.s6)); + asm volatile ("bfe.u32 %0, %1, 16, 8;" : "=r"(r.s7) : "r"(v32.s7)); + #endif + + #if VECT_SIZE >= 16 + asm volatile ("bfe.u32 %0, %1, 16, 8;" : "=r"(r.s8) : "r"(v32.s8)); + asm volatile ("bfe.u32 %0, %1, 16, 8;" : "=r"(r.s9) : "r"(v32.s9)); + asm volatile ("bfe.u32 %0, %1, 16, 8;" : "=r"(r.sa) : "r"(v32.sa)); + asm volatile ("bfe.u32 %0, %1, 16, 8;" : "=r"(r.sb) : "r"(v32.sb)); + asm volatile ("bfe.u32 %0, %1, 16, 8;" : "=r"(r.sc) : "r"(v32.sc)); + asm volatile ("bfe.u32 %0, %1, 16, 8;" : "=r"(r.sd) : "r"(v32.sd)); + asm volatile ("bfe.u32 %0, %1, 16, 8;" : "=r"(r.se) : "r"(v32.se)); + asm volatile ("bfe.u32 %0, %1, 16, 8;" : "=r"(r.sf) : "r"(v32.sf)); + #endif + + //#elif defined IS_AMD && HAS_VBFE == 1 + //__asm__ __volatile__ ("V_BFE_U32 %0, %1, 16, 8;" : "=v"(r) : "v"(v32)); + #else + r = (v32 >> 16) & 0xff; + #endif + + return r; +} + +DECLSPEC u32x unpack_v8d_from_v32 (const u32x v32) +{ + u32x r = 0; + + #if defined IS_NV && HAS_BFE == 1 + + #if VECT_SIZE == 1 + asm volatile ("bfe.u32 %0, %1, 24, 8;" : "=r"(r) : "r"(v32)); + #endif + + #if VECT_SIZE >= 2 + asm volatile ("bfe.u32 %0, %1, 24, 8;" : "=r"(r.s0) : "r"(v32.s0)); + asm volatile ("bfe.u32 %0, %1, 24, 8;" : "=r"(r.s1) : "r"(v32.s1)); + #endif + + #if VECT_SIZE >= 4 + asm volatile ("bfe.u32 %0, %1, 24, 8;" : "=r"(r.s2) : "r"(v32.s2)); + asm volatile ("bfe.u32 %0, %1, 24, 8;" : "=r"(r.s3) : "r"(v32.s3)); + #endif + + #if VECT_SIZE >= 8 + asm volatile ("bfe.u32 %0, %1, 24, 8;" : "=r"(r.s4) : "r"(v32.s4)); + asm volatile ("bfe.u32 %0, %1, 24, 8;" : "=r"(r.s5) : "r"(v32.s5)); + asm volatile ("bfe.u32 %0, %1, 24, 8;" : "=r"(r.s6) : "r"(v32.s6)); + asm volatile ("bfe.u32 %0, %1, 24, 8;" : "=r"(r.s7) : "r"(v32.s7)); + #endif + + #if VECT_SIZE >= 16 + asm volatile ("bfe.u32 %0, %1, 24, 8;" : "=r"(r.s8) : "r"(v32.s8)); + asm volatile ("bfe.u32 %0, %1, 24, 8;" : "=r"(r.s9) : "r"(v32.s9)); + asm volatile ("bfe.u32 %0, %1, 24, 8;" : "=r"(r.sa) : "r"(v32.sa)); + asm volatile ("bfe.u32 %0, %1, 24, 8;" : "=r"(r.sb) : "r"(v32.sb)); + asm volatile ("bfe.u32 %0, %1, 24, 8;" : "=r"(r.sc) : "r"(v32.sc)); + asm volatile ("bfe.u32 %0, %1, 24, 8;" : "=r"(r.sd) : "r"(v32.sd)); + asm volatile ("bfe.u32 %0, %1, 24, 8;" : "=r"(r.se) : "r"(v32.se)); + asm volatile ("bfe.u32 %0, %1, 24, 8;" : "=r"(r.sf) : "r"(v32.sf)); + #endif + + //#elif defined IS_AMD && HAS_VBFE == 1 + //__asm__ __volatile__ ("V_BFE_U32 %0, %1, 24, 8;" : "=v"(r) : "v"(v32)); + #else + r = (v32 >> 24) & 0xff; + #endif + + return r; +} + DECLSPEC u32 unpack_v8a_from_v32_S (const u32 v32) { u32 r = 0; diff --git a/OpenCL/inc_common.h b/OpenCL/inc_common.h index 8715ae75e..07137297b 100644 --- a/OpenCL/inc_common.h +++ b/OpenCL/inc_common.h @@ -171,6 +171,11 @@ DECLSPEC u64 v64_from_v32ab_S (const u32 v32a, const u32 v32b); // inline asm packing +DECLSPEC u32x unpack_v8a_from_v32 (const u32x v32); +DECLSPEC u32x unpack_v8b_from_v32 (const u32x v32); +DECLSPEC u32x unpack_v8c_from_v32 (const u32x v32); +DECLSPEC u32x unpack_v8d_from_v32 (const u32x v32); + DECLSPEC u32 unpack_v8a_from_v32_S (const u32 v32); DECLSPEC u32 unpack_v8b_from_v32_S (const u32 v32); DECLSPEC u32 unpack_v8c_from_v32_S (const u32 v32); diff --git a/OpenCL/m00200_a0-optimized.cl b/OpenCL/m00200_a0-optimized.cl index c5d334f36..ab9071cb0 100644 --- a/OpenCL/m00200_a0-optimized.cl +++ b/OpenCL/m00200_a0-optimized.cl @@ -99,10 +99,10 @@ KERNEL_FQ void m00200_m04 (KERN_ATTR_RULES ()) { const u32x wj = w_t[j]; - ROUND ((wj >> 0) & 0xff); - ROUND ((wj >> 8) & 0xff); - ROUND ((wj >> 16) & 0xff); - ROUND ((wj >> 24) & 0xff); + ROUND (unpack_v8a_from_v32 (wj)); + ROUND (unpack_v8b_from_v32 (wj)); + ROUND (unpack_v8c_from_v32 (wj)); + ROUND (unpack_v8d_from_v32 (wj)); } const u32x wj = w_t[j]; @@ -111,18 +111,18 @@ KERNEL_FQ void m00200_m04 (KERN_ATTR_RULES ()) if (left == 3) { - ROUND ((wj >> 0) & 0xff); - ROUND ((wj >> 8) & 0xff); - ROUND ((wj >> 16) & 0xff); + ROUND (unpack_v8a_from_v32 (wj)); + ROUND (unpack_v8b_from_v32 (wj)); + ROUND (unpack_v8c_from_v32 (wj)); } else if (left == 2) { - ROUND ((wj >> 0) & 0xff); - ROUND ((wj >> 8) & 0xff); + ROUND (unpack_v8a_from_v32 (wj)); + ROUND (unpack_v8b_from_v32 (wj)); } else if (left == 1) { - ROUND ((wj >> 0) & 0xff); + ROUND (unpack_v8a_from_v32 (wj)); } a &= 0x7fffffff; @@ -237,10 +237,10 @@ KERNEL_FQ void m00200_s04 (KERN_ATTR_RULES ()) { const u32x wj = w_t[j]; - ROUND ((wj >> 0) & 0xff); - ROUND ((wj >> 8) & 0xff); - ROUND ((wj >> 16) & 0xff); - ROUND ((wj >> 24) & 0xff); + ROUND (unpack_v8a_from_v32 (wj)); + ROUND (unpack_v8b_from_v32 (wj)); + ROUND (unpack_v8c_from_v32 (wj)); + ROUND (unpack_v8d_from_v32 (wj)); } const u32x wj = w_t[j]; @@ -249,18 +249,18 @@ KERNEL_FQ void m00200_s04 (KERN_ATTR_RULES ()) if (left == 3) { - ROUND ((wj >> 0) & 0xff); - ROUND ((wj >> 8) & 0xff); - ROUND ((wj >> 16) & 0xff); + ROUND (unpack_v8a_from_v32 (wj)); + ROUND (unpack_v8b_from_v32 (wj)); + ROUND (unpack_v8c_from_v32 (wj)); } else if (left == 2) { - ROUND ((wj >> 0) & 0xff); - ROUND ((wj >> 8) & 0xff); + ROUND (unpack_v8a_from_v32 (wj)); + ROUND (unpack_v8b_from_v32 (wj)); } else if (left == 1) { - ROUND ((wj >> 0) & 0xff); + ROUND (unpack_v8a_from_v32 (wj)); } a &= 0x7fffffff; diff --git a/OpenCL/m00200_a1-optimized.cl b/OpenCL/m00200_a1-optimized.cl index 5589a386a..16b5054ab 100644 --- a/OpenCL/m00200_a1-optimized.cl +++ b/OpenCL/m00200_a1-optimized.cl @@ -142,6 +142,8 @@ KERNEL_FQ void m00200_m04 (KERN_ATTR_BASIC ()) u32x a = MYSQL323_A; u32x b = MYSQL323_B; + u32x c = 0; + u32x d = 0; u32x add = 7; @@ -159,10 +161,10 @@ KERNEL_FQ void m00200_m04 (KERN_ATTR_BASIC ()) { const u32x wj = w_t[j]; - ROUND ((wj >> 0) & 0xff); - ROUND ((wj >> 8) & 0xff); - ROUND ((wj >> 16) & 0xff); - ROUND ((wj >> 24) & 0xff); + ROUND (unpack_v8a_from_v32 (wj)); + ROUND (unpack_v8b_from_v32 (wj)); + ROUND (unpack_v8c_from_v32 (wj)); + ROUND (unpack_v8d_from_v32 (wj)); } const u32x wj = w_t[j]; @@ -171,18 +173,18 @@ KERNEL_FQ void m00200_m04 (KERN_ATTR_BASIC ()) if (left == 3) { - ROUND ((wj >> 0) & 0xff); - ROUND ((wj >> 8) & 0xff); - ROUND ((wj >> 16) & 0xff); + ROUND (unpack_v8a_from_v32 (wj)); + ROUND (unpack_v8b_from_v32 (wj)); + ROUND (unpack_v8c_from_v32 (wj)); } else if (left == 2) { - ROUND ((wj >> 0) & 0xff); - ROUND ((wj >> 8) & 0xff); + ROUND (unpack_v8a_from_v32 (wj)); + ROUND (unpack_v8b_from_v32 (wj)); } else if (left == 1) { - ROUND ((wj >> 0) & 0xff); + ROUND (unpack_v8a_from_v32 (wj)); } a &= 0x7fffffff; @@ -361,10 +363,10 @@ KERNEL_FQ void m00200_s04 (KERN_ATTR_BASIC ()) { const u32x wj = w_t[j]; - ROUND ((wj >> 0) & 0xff); - ROUND ((wj >> 8) & 0xff); - ROUND ((wj >> 16) & 0xff); - ROUND ((wj >> 24) & 0xff); + ROUND (unpack_v8a_from_v32 (wj)); + ROUND (unpack_v8b_from_v32 (wj)); + ROUND (unpack_v8c_from_v32 (wj)); + ROUND (unpack_v8d_from_v32 (wj)); } const u32x wj = w_t[j]; @@ -373,18 +375,18 @@ KERNEL_FQ void m00200_s04 (KERN_ATTR_BASIC ()) if (left == 3) { - ROUND ((wj >> 0) & 0xff); - ROUND ((wj >> 8) & 0xff); - ROUND ((wj >> 16) & 0xff); + ROUND (unpack_v8a_from_v32 (wj)); + ROUND (unpack_v8b_from_v32 (wj)); + ROUND (unpack_v8c_from_v32 (wj)); } else if (left == 2) { - ROUND ((wj >> 0) & 0xff); - ROUND ((wj >> 8) & 0xff); + ROUND (unpack_v8a_from_v32 (wj)); + ROUND (unpack_v8b_from_v32 (wj)); } else if (left == 1) { - ROUND ((wj >> 0) & 0xff); + ROUND (unpack_v8a_from_v32 (wj)); } a &= 0x7fffffff; diff --git a/OpenCL/m00200_a3-optimized.cl b/OpenCL/m00200_a3-optimized.cl index 74a1c3234..1a9b72e39 100644 --- a/OpenCL/m00200_a3-optimized.cl +++ b/OpenCL/m00200_a3-optimized.cl @@ -42,10 +42,10 @@ { \ const u32 wj = w[j]; \ \ - ROUND ((wj >> 0) & 0xff); \ - ROUND ((wj >> 8) & 0xff); \ - ROUND ((wj >> 16) & 0xff); \ - ROUND ((wj >> 24) & 0xff); \ + ROUND (unpack_v8a_from_v32 (wj)); \ + ROUND (unpack_v8b_from_v32 (wj)); \ + ROUND (unpack_v8c_from_v32 (wj)); \ + ROUND (unpack_v8d_from_v32 (wj)); \ } \ \ const u32 wj = w[j]; \ @@ -54,18 +54,18 @@ \ if (left == 3) \ { \ - ROUND ((wj >> 0) & 0xff); \ - ROUND ((wj >> 8) & 0xff); \ - ROUND ((wj >> 16) & 0xff); \ + ROUND (unpack_v8a_from_v32 (wj)); \ + ROUND (unpack_v8b_from_v32 (wj)); \ + ROUND (unpack_v8c_from_v32 (wj)); \ } \ else if (left == 2) \ { \ - ROUND ((wj >> 0) & 0xff); \ - ROUND ((wj >> 8) & 0xff); \ + ROUND (unpack_v8a_from_v32 (wj)); \ + ROUND (unpack_v8b_from_v32 (wj)); \ } \ else if (left == 1) \ { \ - ROUND ((wj >> 0) & 0xff); \ + ROUND (unpack_v8a_from_v32 (wj)); \ } #define CODE_POST_M \ @@ -99,141 +99,123 @@ DECLSPEC void m00200m (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) const u64 gid = get_global_id (0); const u64 lid = get_local_id (0); + /** + * digest + */ + + const u32 search[4] = + { + digests_buf[digests_offset].digest_buf[DGST_R0], + digests_buf[digests_offset].digest_buf[DGST_R1], + 0, + 0 + }; + /** * loop */ u32 w0l = w[0]; + CODE_PRE; + switch (pw_len) { case 1: - CODE_PRE; - ROUND ((w0 >> 0) & 0xff); - CODE_POST_M; + ROUND (unpack_v8a_from_v32 ( w0)); break; case 2: - CODE_PRE; - ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); - CODE_POST_M; + ROUND (unpack_v8a_from_v32 ( w0)); ROUND (unpack_v8b_from_v32 ( w0)); break; case 3: - CODE_PRE; - ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); - CODE_POST_M; + ROUND (unpack_v8a_from_v32 ( w0)); ROUND (unpack_v8b_from_v32 ( w0)); ROUND (unpack_v8c_from_v32 ( w0)); break; case 4: - CODE_PRE; - ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); - CODE_POST_M; + ROUND (unpack_v8a_from_v32 ( w0)); ROUND (unpack_v8b_from_v32 ( w0)); ROUND (unpack_v8c_from_v32 ( w0)); ROUND (unpack_v8d_from_v32 ( w0)); break; case 5: - CODE_PRE; - ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); - ROUND ((w[1] >> 0) & 0xff); - CODE_POST_M; + ROUND (unpack_v8a_from_v32 ( w0)); ROUND (unpack_v8b_from_v32 ( w0)); ROUND (unpack_v8c_from_v32 ( w0)); ROUND (unpack_v8d_from_v32 ( w0)); + ROUND (unpack_v8a_from_v32 (w[1])); break; case 6: - CODE_PRE; - ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); - ROUND ((w[1] >> 0) & 0xff); ROUND ((w[1] >> 8) & 0xff); - CODE_POST_M; + ROUND (unpack_v8a_from_v32 ( w0)); ROUND (unpack_v8b_from_v32 ( w0)); ROUND (unpack_v8c_from_v32 ( w0)); ROUND (unpack_v8d_from_v32 ( w0)); + ROUND (unpack_v8a_from_v32 (w[1])); ROUND (unpack_v8b_from_v32 (w[1])); break; case 7: - CODE_PRE; - ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); - ROUND ((w[1] >> 0) & 0xff); ROUND ((w[1] >> 8) & 0xff); ROUND ((w[1] >> 16) & 0xff); - CODE_POST_M; + ROUND (unpack_v8a_from_v32 ( w0)); ROUND (unpack_v8b_from_v32 ( w0)); ROUND (unpack_v8c_from_v32 ( w0)); ROUND (unpack_v8d_from_v32 ( w0)); + ROUND (unpack_v8a_from_v32 (w[1])); ROUND (unpack_v8b_from_v32 (w[1])); ROUND (unpack_v8c_from_v32 (w[1])); break; case 8: - CODE_PRE; - ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); - ROUND ((w[1] >> 0) & 0xff); ROUND ((w[1] >> 8) & 0xff); ROUND ((w[1] >> 16) & 0xff); ROUND ((w[1] >> 24) & 0xff); - CODE_POST_M; + ROUND (unpack_v8a_from_v32 ( w0)); ROUND (unpack_v8b_from_v32 ( w0)); ROUND (unpack_v8c_from_v32 ( w0)); ROUND (unpack_v8d_from_v32 ( w0)); + ROUND (unpack_v8a_from_v32 (w[1])); ROUND (unpack_v8b_from_v32 (w[1])); ROUND (unpack_v8c_from_v32 (w[1])); ROUND (unpack_v8d_from_v32 (w[1])); break; case 9: - CODE_PRE; - ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); - ROUND ((w[1] >> 0) & 0xff); ROUND ((w[1] >> 8) & 0xff); ROUND ((w[1] >> 16) & 0xff); ROUND ((w[1] >> 24) & 0xff); - ROUND ((w[2] >> 0) & 0xff); - CODE_POST_M; + ROUND (unpack_v8a_from_v32 ( w0)); ROUND (unpack_v8b_from_v32 ( w0)); ROUND (unpack_v8c_from_v32 ( w0)); ROUND (unpack_v8d_from_v32 ( w0)); + ROUND (unpack_v8a_from_v32 (w[1])); ROUND (unpack_v8b_from_v32 (w[1])); ROUND (unpack_v8c_from_v32 (w[1])); ROUND (unpack_v8d_from_v32 (w[1])); + ROUND (unpack_v8a_from_v32 (w[2])); break; case 10: - CODE_PRE; - ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); - ROUND ((w[1] >> 0) & 0xff); ROUND ((w[1] >> 8) & 0xff); ROUND ((w[1] >> 16) & 0xff); ROUND ((w[1] >> 24) & 0xff); - ROUND ((w[2] >> 0) & 0xff); ROUND ((w[2] >> 8) & 0xff); - CODE_POST_M; + ROUND (unpack_v8a_from_v32 ( w0)); ROUND (unpack_v8b_from_v32 ( w0)); ROUND (unpack_v8c_from_v32 ( w0)); ROUND (unpack_v8d_from_v32 ( w0)); + ROUND (unpack_v8a_from_v32 (w[1])); ROUND (unpack_v8b_from_v32 (w[1])); ROUND (unpack_v8c_from_v32 (w[1])); ROUND (unpack_v8d_from_v32 (w[1])); + ROUND (unpack_v8a_from_v32 (w[2])); ROUND (unpack_v8b_from_v32 (w[2])); break; case 11: - CODE_PRE; - ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); - ROUND ((w[1] >> 0) & 0xff); ROUND ((w[1] >> 8) & 0xff); ROUND ((w[1] >> 16) & 0xff); ROUND ((w[1] >> 24) & 0xff); - ROUND ((w[2] >> 0) & 0xff); ROUND ((w[2] >> 8) & 0xff); ROUND ((w[2] >> 16) & 0xff); - CODE_POST_M; + ROUND (unpack_v8a_from_v32 ( w0)); ROUND (unpack_v8b_from_v32 ( w0)); ROUND (unpack_v8c_from_v32 ( w0)); ROUND (unpack_v8d_from_v32 ( w0)); + ROUND (unpack_v8a_from_v32 (w[1])); ROUND (unpack_v8b_from_v32 (w[1])); ROUND (unpack_v8c_from_v32 (w[1])); ROUND (unpack_v8d_from_v32 (w[1])); + ROUND (unpack_v8a_from_v32 (w[2])); ROUND (unpack_v8b_from_v32 (w[2])); ROUND (unpack_v8c_from_v32 (w[2])); break; case 12: - CODE_PRE; - ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); - ROUND ((w[1] >> 0) & 0xff); ROUND ((w[1] >> 8) & 0xff); ROUND ((w[1] >> 16) & 0xff); ROUND ((w[1] >> 24) & 0xff); - ROUND ((w[2] >> 0) & 0xff); ROUND ((w[2] >> 8) & 0xff); ROUND ((w[2] >> 16) & 0xff); ROUND ((w[2] >> 24) & 0xff); - CODE_POST_M; + ROUND (unpack_v8a_from_v32 ( w0)); ROUND (unpack_v8b_from_v32 ( w0)); ROUND (unpack_v8c_from_v32 ( w0)); ROUND (unpack_v8d_from_v32 ( w0)); + ROUND (unpack_v8a_from_v32 (w[1])); ROUND (unpack_v8b_from_v32 (w[1])); ROUND (unpack_v8c_from_v32 (w[1])); ROUND (unpack_v8d_from_v32 (w[1])); + ROUND (unpack_v8a_from_v32 (w[2])); ROUND (unpack_v8b_from_v32 (w[2])); ROUND (unpack_v8c_from_v32 (w[2])); ROUND (unpack_v8d_from_v32 (w[2])); break; case 13: - CODE_PRE; - ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); - ROUND ((w[1] >> 0) & 0xff); ROUND ((w[1] >> 8) & 0xff); ROUND ((w[1] >> 16) & 0xff); ROUND ((w[1] >> 24) & 0xff); - ROUND ((w[2] >> 0) & 0xff); ROUND ((w[2] >> 8) & 0xff); ROUND ((w[2] >> 16) & 0xff); ROUND ((w[2] >> 24) & 0xff); - ROUND ((w[3] >> 0) & 0xff); - CODE_POST_M; + ROUND (unpack_v8a_from_v32 ( w0)); ROUND (unpack_v8b_from_v32 ( w0)); ROUND (unpack_v8c_from_v32 ( w0)); ROUND (unpack_v8d_from_v32 ( w0)); + ROUND (unpack_v8a_from_v32 (w[1])); ROUND (unpack_v8b_from_v32 (w[1])); ROUND (unpack_v8c_from_v32 (w[1])); ROUND (unpack_v8d_from_v32 (w[1])); + ROUND (unpack_v8a_from_v32 (w[2])); ROUND (unpack_v8b_from_v32 (w[2])); ROUND (unpack_v8c_from_v32 (w[2])); ROUND (unpack_v8d_from_v32 (w[2])); + ROUND (unpack_v8a_from_v32 (w[3])); break; case 14: - CODE_PRE; - ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); - ROUND ((w[1] >> 0) & 0xff); ROUND ((w[1] >> 8) & 0xff); ROUND ((w[1] >> 16) & 0xff); ROUND ((w[1] >> 24) & 0xff); - ROUND ((w[2] >> 0) & 0xff); ROUND ((w[2] >> 8) & 0xff); ROUND ((w[2] >> 16) & 0xff); ROUND ((w[2] >> 24) & 0xff); - ROUND ((w[3] >> 0) & 0xff); ROUND ((w[3] >> 8) & 0xff); - CODE_POST_M; + ROUND (unpack_v8a_from_v32 ( w0)); ROUND (unpack_v8b_from_v32 ( w0)); ROUND (unpack_v8c_from_v32 ( w0)); ROUND (unpack_v8d_from_v32 ( w0)); + ROUND (unpack_v8a_from_v32 (w[1])); ROUND (unpack_v8b_from_v32 (w[1])); ROUND (unpack_v8c_from_v32 (w[1])); ROUND (unpack_v8d_from_v32 (w[1])); + ROUND (unpack_v8a_from_v32 (w[2])); ROUND (unpack_v8b_from_v32 (w[2])); ROUND (unpack_v8c_from_v32 (w[2])); ROUND (unpack_v8d_from_v32 (w[2])); + ROUND (unpack_v8a_from_v32 (w[3])); ROUND (unpack_v8b_from_v32 (w[3])); break; case 15: - CODE_PRE; - ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); - ROUND ((w[1] >> 0) & 0xff); ROUND ((w[1] >> 8) & 0xff); ROUND ((w[1] >> 16) & 0xff); ROUND ((w[1] >> 24) & 0xff); - ROUND ((w[2] >> 0) & 0xff); ROUND ((w[2] >> 8) & 0xff); ROUND ((w[2] >> 16) & 0xff); ROUND ((w[2] >> 24) & 0xff); - ROUND ((w[3] >> 0) & 0xff); ROUND ((w[3] >> 8) & 0xff); ROUND ((w[3] >> 16) & 0xff); - CODE_POST_M; + ROUND (unpack_v8a_from_v32 ( w0)); ROUND (unpack_v8b_from_v32 ( w0)); ROUND (unpack_v8c_from_v32 ( w0)); ROUND (unpack_v8d_from_v32 ( w0)); + ROUND (unpack_v8a_from_v32 (w[1])); ROUND (unpack_v8b_from_v32 (w[1])); ROUND (unpack_v8c_from_v32 (w[1])); ROUND (unpack_v8d_from_v32 (w[1])); + ROUND (unpack_v8a_from_v32 (w[2])); ROUND (unpack_v8b_from_v32 (w[2])); ROUND (unpack_v8c_from_v32 (w[2])); ROUND (unpack_v8d_from_v32 (w[2])); + ROUND (unpack_v8a_from_v32 (w[3])); ROUND (unpack_v8b_from_v32 (w[3])); ROUND (unpack_v8c_from_v32 (w[3])); break; case 16: - CODE_PRE; - ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); - ROUND ((w[1] >> 0) & 0xff); ROUND ((w[1] >> 8) & 0xff); ROUND ((w[1] >> 16) & 0xff); ROUND ((w[1] >> 24) & 0xff); - ROUND ((w[2] >> 0) & 0xff); ROUND ((w[2] >> 8) & 0xff); ROUND ((w[2] >> 16) & 0xff); ROUND ((w[2] >> 24) & 0xff); - ROUND ((w[3] >> 0) & 0xff); ROUND ((w[3] >> 8) & 0xff); ROUND ((w[3] >> 16) & 0xff); ROUND ((w[3] >> 24) & 0xff); - CODE_POST_M; + ROUND (unpack_v8a_from_v32 ( w0)); ROUND (unpack_v8b_from_v32 ( w0)); ROUND (unpack_v8c_from_v32 ( w0)); ROUND (unpack_v8d_from_v32 ( w0)); + ROUND (unpack_v8a_from_v32 (w[1])); ROUND (unpack_v8b_from_v32 (w[1])); ROUND (unpack_v8c_from_v32 (w[1])); ROUND (unpack_v8d_from_v32 (w[1])); + ROUND (unpack_v8a_from_v32 (w[2])); ROUND (unpack_v8b_from_v32 (w[2])); ROUND (unpack_v8c_from_v32 (w[2])); ROUND (unpack_v8d_from_v32 (w[2])); + ROUND (unpack_v8a_from_v32 (w[3])); ROUND (unpack_v8b_from_v32 (w[3])); ROUND (unpack_v8c_from_v32 (w[3])); ROUND (unpack_v8d_from_v32 (w[3])); break; default: - CODE_PRE; - ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); + ROUND (unpack_v8a_from_v32 ( w0)); ROUND (unpack_v8b_from_v32 ( w0)); ROUND (unpack_v8c_from_v32 ( w0)); ROUND (unpack_v8d_from_v32 ( w0)); CODE_LOOP (pw_len - 4); - CODE_POST_M; break; } + + CODE_POST_M; } DECLSPEC void m00200s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) @@ -263,135 +245,105 @@ DECLSPEC void m00200s (u32 *w, const u32 pw_len, KERN_ATTR_VECTOR ()) u32 w0l = w[0]; + CODE_PRE; + switch (pw_len) { case 1: - CODE_PRE; - ROUND ((w0 >> 0) & 0xff); - CODE_POST_S; + ROUND (unpack_v8a_from_v32 ( w0)); break; case 2: - CODE_PRE; - ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); - CODE_POST_S; + ROUND (unpack_v8a_from_v32 ( w0)); ROUND (unpack_v8b_from_v32 ( w0)); break; case 3: - CODE_PRE; - ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); - CODE_POST_S; + ROUND (unpack_v8a_from_v32 ( w0)); ROUND (unpack_v8b_from_v32 ( w0)); ROUND (unpack_v8c_from_v32 ( w0)); break; case 4: - CODE_PRE; - ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); - CODE_POST_S; + ROUND (unpack_v8a_from_v32 ( w0)); ROUND (unpack_v8b_from_v32 ( w0)); ROUND (unpack_v8c_from_v32 ( w0)); ROUND (unpack_v8d_from_v32 ( w0)); break; case 5: - CODE_PRE; - ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); - ROUND ((w[1] >> 0) & 0xff); - CODE_POST_S; + ROUND (unpack_v8a_from_v32 ( w0)); ROUND (unpack_v8b_from_v32 ( w0)); ROUND (unpack_v8c_from_v32 ( w0)); ROUND (unpack_v8d_from_v32 ( w0)); + ROUND (unpack_v8a_from_v32 (w[1])); break; case 6: - CODE_PRE; - ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); - ROUND ((w[1] >> 0) & 0xff); ROUND ((w[1] >> 8) & 0xff); - CODE_POST_S; + ROUND (unpack_v8a_from_v32 ( w0)); ROUND (unpack_v8b_from_v32 ( w0)); ROUND (unpack_v8c_from_v32 ( w0)); ROUND (unpack_v8d_from_v32 ( w0)); + ROUND (unpack_v8a_from_v32 (w[1])); ROUND (unpack_v8b_from_v32 (w[1])); break; case 7: - CODE_PRE; - ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); - ROUND ((w[1] >> 0) & 0xff); ROUND ((w[1] >> 8) & 0xff); ROUND ((w[1] >> 16) & 0xff); - CODE_POST_S; + ROUND (unpack_v8a_from_v32 ( w0)); ROUND (unpack_v8b_from_v32 ( w0)); ROUND (unpack_v8c_from_v32 ( w0)); ROUND (unpack_v8d_from_v32 ( w0)); + ROUND (unpack_v8a_from_v32 (w[1])); ROUND (unpack_v8b_from_v32 (w[1])); ROUND (unpack_v8c_from_v32 (w[1])); break; case 8: - CODE_PRE; - ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); - ROUND ((w[1] >> 0) & 0xff); ROUND ((w[1] >> 8) & 0xff); ROUND ((w[1] >> 16) & 0xff); ROUND ((w[1] >> 24) & 0xff); - CODE_POST_S; + ROUND (unpack_v8a_from_v32 ( w0)); ROUND (unpack_v8b_from_v32 ( w0)); ROUND (unpack_v8c_from_v32 ( w0)); ROUND (unpack_v8d_from_v32 ( w0)); + ROUND (unpack_v8a_from_v32 (w[1])); ROUND (unpack_v8b_from_v32 (w[1])); ROUND (unpack_v8c_from_v32 (w[1])); ROUND (unpack_v8d_from_v32 (w[1])); break; case 9: - CODE_PRE; - ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); - ROUND ((w[1] >> 0) & 0xff); ROUND ((w[1] >> 8) & 0xff); ROUND ((w[1] >> 16) & 0xff); ROUND ((w[1] >> 24) & 0xff); - ROUND ((w[2] >> 0) & 0xff); - CODE_POST_S; + ROUND (unpack_v8a_from_v32 ( w0)); ROUND (unpack_v8b_from_v32 ( w0)); ROUND (unpack_v8c_from_v32 ( w0)); ROUND (unpack_v8d_from_v32 ( w0)); + ROUND (unpack_v8a_from_v32 (w[1])); ROUND (unpack_v8b_from_v32 (w[1])); ROUND (unpack_v8c_from_v32 (w[1])); ROUND (unpack_v8d_from_v32 (w[1])); + ROUND (unpack_v8a_from_v32 (w[2])); break; case 10: - CODE_PRE; - ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); - ROUND ((w[1] >> 0) & 0xff); ROUND ((w[1] >> 8) & 0xff); ROUND ((w[1] >> 16) & 0xff); ROUND ((w[1] >> 24) & 0xff); - ROUND ((w[2] >> 0) & 0xff); ROUND ((w[2] >> 8) & 0xff); - CODE_POST_S; + ROUND (unpack_v8a_from_v32 ( w0)); ROUND (unpack_v8b_from_v32 ( w0)); ROUND (unpack_v8c_from_v32 ( w0)); ROUND (unpack_v8d_from_v32 ( w0)); + ROUND (unpack_v8a_from_v32 (w[1])); ROUND (unpack_v8b_from_v32 (w[1])); ROUND (unpack_v8c_from_v32 (w[1])); ROUND (unpack_v8d_from_v32 (w[1])); + ROUND (unpack_v8a_from_v32 (w[2])); ROUND (unpack_v8b_from_v32 (w[2])); break; case 11: - CODE_PRE; - ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); - ROUND ((w[1] >> 0) & 0xff); ROUND ((w[1] >> 8) & 0xff); ROUND ((w[1] >> 16) & 0xff); ROUND ((w[1] >> 24) & 0xff); - ROUND ((w[2] >> 0) & 0xff); ROUND ((w[2] >> 8) & 0xff); ROUND ((w[2] >> 16) & 0xff); - CODE_POST_S; + ROUND (unpack_v8a_from_v32 ( w0)); ROUND (unpack_v8b_from_v32 ( w0)); ROUND (unpack_v8c_from_v32 ( w0)); ROUND (unpack_v8d_from_v32 ( w0)); + ROUND (unpack_v8a_from_v32 (w[1])); ROUND (unpack_v8b_from_v32 (w[1])); ROUND (unpack_v8c_from_v32 (w[1])); ROUND (unpack_v8d_from_v32 (w[1])); + ROUND (unpack_v8a_from_v32 (w[2])); ROUND (unpack_v8b_from_v32 (w[2])); ROUND (unpack_v8c_from_v32 (w[2])); break; case 12: - CODE_PRE; - ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); - ROUND ((w[1] >> 0) & 0xff); ROUND ((w[1] >> 8) & 0xff); ROUND ((w[1] >> 16) & 0xff); ROUND ((w[1] >> 24) & 0xff); - ROUND ((w[2] >> 0) & 0xff); ROUND ((w[2] >> 8) & 0xff); ROUND ((w[2] >> 16) & 0xff); ROUND ((w[2] >> 24) & 0xff); - CODE_POST_S; + ROUND (unpack_v8a_from_v32 ( w0)); ROUND (unpack_v8b_from_v32 ( w0)); ROUND (unpack_v8c_from_v32 ( w0)); ROUND (unpack_v8d_from_v32 ( w0)); + ROUND (unpack_v8a_from_v32 (w[1])); ROUND (unpack_v8b_from_v32 (w[1])); ROUND (unpack_v8c_from_v32 (w[1])); ROUND (unpack_v8d_from_v32 (w[1])); + ROUND (unpack_v8a_from_v32 (w[2])); ROUND (unpack_v8b_from_v32 (w[2])); ROUND (unpack_v8c_from_v32 (w[2])); ROUND (unpack_v8d_from_v32 (w[2])); break; case 13: - CODE_PRE; - ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); - ROUND ((w[1] >> 0) & 0xff); ROUND ((w[1] >> 8) & 0xff); ROUND ((w[1] >> 16) & 0xff); ROUND ((w[1] >> 24) & 0xff); - ROUND ((w[2] >> 0) & 0xff); ROUND ((w[2] >> 8) & 0xff); ROUND ((w[2] >> 16) & 0xff); ROUND ((w[2] >> 24) & 0xff); - ROUND ((w[3] >> 0) & 0xff); - CODE_POST_S; + ROUND (unpack_v8a_from_v32 ( w0)); ROUND (unpack_v8b_from_v32 ( w0)); ROUND (unpack_v8c_from_v32 ( w0)); ROUND (unpack_v8d_from_v32 ( w0)); + ROUND (unpack_v8a_from_v32 (w[1])); ROUND (unpack_v8b_from_v32 (w[1])); ROUND (unpack_v8c_from_v32 (w[1])); ROUND (unpack_v8d_from_v32 (w[1])); + ROUND (unpack_v8a_from_v32 (w[2])); ROUND (unpack_v8b_from_v32 (w[2])); ROUND (unpack_v8c_from_v32 (w[2])); ROUND (unpack_v8d_from_v32 (w[2])); + ROUND (unpack_v8a_from_v32 (w[3])); break; case 14: - CODE_PRE; - ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); - ROUND ((w[1] >> 0) & 0xff); ROUND ((w[1] >> 8) & 0xff); ROUND ((w[1] >> 16) & 0xff); ROUND ((w[1] >> 24) & 0xff); - ROUND ((w[2] >> 0) & 0xff); ROUND ((w[2] >> 8) & 0xff); ROUND ((w[2] >> 16) & 0xff); ROUND ((w[2] >> 24) & 0xff); - ROUND ((w[3] >> 0) & 0xff); ROUND ((w[3] >> 8) & 0xff); - CODE_POST_S; + ROUND (unpack_v8a_from_v32 ( w0)); ROUND (unpack_v8b_from_v32 ( w0)); ROUND (unpack_v8c_from_v32 ( w0)); ROUND (unpack_v8d_from_v32 ( w0)); + ROUND (unpack_v8a_from_v32 (w[1])); ROUND (unpack_v8b_from_v32 (w[1])); ROUND (unpack_v8c_from_v32 (w[1])); ROUND (unpack_v8d_from_v32 (w[1])); + ROUND (unpack_v8a_from_v32 (w[2])); ROUND (unpack_v8b_from_v32 (w[2])); ROUND (unpack_v8c_from_v32 (w[2])); ROUND (unpack_v8d_from_v32 (w[2])); + ROUND (unpack_v8a_from_v32 (w[3])); ROUND (unpack_v8b_from_v32 (w[3])); break; case 15: - CODE_PRE; - ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); - ROUND ((w[1] >> 0) & 0xff); ROUND ((w[1] >> 8) & 0xff); ROUND ((w[1] >> 16) & 0xff); ROUND ((w[1] >> 24) & 0xff); - ROUND ((w[2] >> 0) & 0xff); ROUND ((w[2] >> 8) & 0xff); ROUND ((w[2] >> 16) & 0xff); ROUND ((w[2] >> 24) & 0xff); - ROUND ((w[3] >> 0) & 0xff); ROUND ((w[3] >> 8) & 0xff); ROUND ((w[3] >> 16) & 0xff); - CODE_POST_S; + ROUND (unpack_v8a_from_v32 ( w0)); ROUND (unpack_v8b_from_v32 ( w0)); ROUND (unpack_v8c_from_v32 ( w0)); ROUND (unpack_v8d_from_v32 ( w0)); + ROUND (unpack_v8a_from_v32 (w[1])); ROUND (unpack_v8b_from_v32 (w[1])); ROUND (unpack_v8c_from_v32 (w[1])); ROUND (unpack_v8d_from_v32 (w[1])); + ROUND (unpack_v8a_from_v32 (w[2])); ROUND (unpack_v8b_from_v32 (w[2])); ROUND (unpack_v8c_from_v32 (w[2])); ROUND (unpack_v8d_from_v32 (w[2])); + ROUND (unpack_v8a_from_v32 (w[3])); ROUND (unpack_v8b_from_v32 (w[3])); ROUND (unpack_v8c_from_v32 (w[3])); break; case 16: - CODE_PRE; - ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); - ROUND ((w[1] >> 0) & 0xff); ROUND ((w[1] >> 8) & 0xff); ROUND ((w[1] >> 16) & 0xff); ROUND ((w[1] >> 24) & 0xff); - ROUND ((w[2] >> 0) & 0xff); ROUND ((w[2] >> 8) & 0xff); ROUND ((w[2] >> 16) & 0xff); ROUND ((w[2] >> 24) & 0xff); - ROUND ((w[3] >> 0) & 0xff); ROUND ((w[3] >> 8) & 0xff); ROUND ((w[3] >> 16) & 0xff); ROUND ((w[3] >> 24) & 0xff); - CODE_POST_S; + ROUND (unpack_v8a_from_v32 ( w0)); ROUND (unpack_v8b_from_v32 ( w0)); ROUND (unpack_v8c_from_v32 ( w0)); ROUND (unpack_v8d_from_v32 ( w0)); + ROUND (unpack_v8a_from_v32 (w[1])); ROUND (unpack_v8b_from_v32 (w[1])); ROUND (unpack_v8c_from_v32 (w[1])); ROUND (unpack_v8d_from_v32 (w[1])); + ROUND (unpack_v8a_from_v32 (w[2])); ROUND (unpack_v8b_from_v32 (w[2])); ROUND (unpack_v8c_from_v32 (w[2])); ROUND (unpack_v8d_from_v32 (w[2])); + ROUND (unpack_v8a_from_v32 (w[3])); ROUND (unpack_v8b_from_v32 (w[3])); ROUND (unpack_v8c_from_v32 (w[3])); ROUND (unpack_v8d_from_v32 (w[3])); break; default: - CODE_PRE; - ROUND ((w0 >> 0) & 0xff); ROUND ((w0 >> 8) & 0xff); ROUND ((w0 >> 16) & 0xff); ROUND ((w0 >> 24) & 0xff); + ROUND (unpack_v8a_from_v32 ( w0)); ROUND (unpack_v8b_from_v32 ( w0)); ROUND (unpack_v8c_from_v32 ( w0)); ROUND (unpack_v8d_from_v32 ( w0)); CODE_LOOP (pw_len - 4); - CODE_POST_S; break; } + + CODE_POST_S; } KERNEL_FQ void m00200_m04 (KERN_ATTR_VECTOR ())