diff --git a/tools/code_generators/GEN_AMD_switch_buffer_by_offset_16x4_le_S.pl b/tools/code_generators/GEN_AMD_switch_buffer_by_offset_16x4_le_S.pl new file mode 100644 index 000000000..d9d897054 --- /dev/null +++ b/tools/code_generators/GEN_AMD_switch_buffer_by_offset_16x4_le_S.pl @@ -0,0 +1,53 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +for (my $i = 0, my $s = 0; $i < 16; $i++, $s++) +{ + printf (" case %2d:\n", $i); + for (my $j = 16 - 1; $j >= 0; $j--) + { + my $jd4 = $j / 4; + my $jm4 = $j % 4; + + my $js1d4 = ($j - $s - 1) / 4; + my $js1m4 = ($j - $s - 1) % 4; + + my $js0d4 = ($j - $s - 0) / 4; + my $js0m4 = ($j - $s - 0) % 4; + + if ((($j - $s - 1) >= 0) && (($j - $s - 0) >= 0)) + { + printf (" w%d[%d] = amd_bytealign_S (w%d[%d], w%d[%d], offset_minus_4);\n", $jd4, $jm4, $js0d4, $js0m4, $js1d4, $js1m4); + } + elsif (($j - $s - 0) >= 0) + { + printf (" w%d[%d] = amd_bytealign_S (w%d[%d], 0, offset_minus_4);\n", $jd4, $jm4, $js0d4, $js0m4); + } + else + { + printf (" w%d[%d] = 0;\n", $jd4, $jm4); + } + } + printf ("\n"); + + printf (" if (offset_mod_4 == 0)\n"); + printf (" {\n"); + for (my $j = $i; $j < 16 - 1; $j++) + { + my $jd4 = $j / 4; + my $jm4 = $j % 4; + + my $ja1d4 = ($j + 1) / 4; + my $ja1m4 = ($j + 1) % 4; + + printf (" w%d[%d] = w%d[%d];\n", $jd4, $jm4, $ja1d4, $ja1m4); + } + printf (" w3[3] = 0;\n"); + printf (" }\n"); + printf ("\n"); + + printf (" break;\n"); + printf ("\n"); +} diff --git a/tools/code_generators/GEN_AMD_switch_buffer_by_offset_le_S.pl b/tools/code_generators/GEN_AMD_switch_buffer_by_offset_64x1_le_S.pl similarity index 68% rename from tools/code_generators/GEN_AMD_switch_buffer_by_offset_le_S.pl rename to tools/code_generators/GEN_AMD_switch_buffer_by_offset_64x1_le_S.pl index 143342238..294b51f73 100644 --- a/tools/code_generators/GEN_AMD_switch_buffer_by_offset_le_S.pl +++ b/tools/code_generators/GEN_AMD_switch_buffer_by_offset_64x1_le_S.pl @@ -1,27 +1,38 @@ -#!/usr/bin/perl - -use strict; -use warnings; - -for (my $i = 0, my $s = 0; $i < 64; $i++, $s++) -{ - printf (" case %2d:\n", $i); - - for (my $j = 64 - 1; $j >= 0; $j--) - { - if ((($j - $s - 1) >= 0) && (($j - $s - 0) >= 0)) - { - printf (" w[%2d] = amd_bytealign_S (w[%2d], w[%2d], offset_minus_4);\n", $j, $j - $s - 0, $j - $s - 1); - } - elsif (($j - $s - 0) >= 0) - { - printf (" w[%2d] = amd_bytealign_S (w[%2d], 0, offset_minus_4);\n", $j, $j - $s - 0); - } - else - { - printf (" w[%2d] = 0;\n", $j); - } - } - - printf (" break;\n"); -} +#!/usr/bin/perl + +use strict; +use warnings; + +for (my $i = 0, my $s = 0; $i < 64; $i++, $s++) +{ + printf (" case %2d:\n", $i); + for (my $j = 64 - 1; $j >= 0; $j--) + { + if ((($j - $s - 1) >= 0) && (($j - $s - 0) >= 0)) + { + printf (" w[%2d] = amd_bytealign_S (w[%2d], w[%2d], offset_minus_4);\n", $j, $j - $s - 0, $j - $s - 1); + } + elsif (($j - $s - 0) >= 0) + { + printf (" w[%2d] = amd_bytealign_S (w[%2d], 0, offset_minus_4);\n", $j, $j - $s - 0); + } + else + { + printf (" w[%2d] = 0;\n", $j); + } + } + printf ("\n"); + + printf (" if (offset_mod_4 == 0)\n"); + printf (" {\n"); + for (my $j = $i; $j < 64 - 1; $j++) + { + printf (" w[%2d] = w[%2d];\n", $j, $j + 1); + } + printf (" w[63] = 0;\n"); + printf (" }\n"); + printf ("\n"); + + printf (" break;\n"); + printf ("\n"); +} diff --git a/tools/code_generators/GEN_NV_switch_buffer_by_offset_16x4_le_S.pl b/tools/code_generators/GEN_NV_switch_buffer_by_offset_16x4_le_S.pl new file mode 100644 index 000000000..bcdcb962b --- /dev/null +++ b/tools/code_generators/GEN_NV_switch_buffer_by_offset_16x4_le_S.pl @@ -0,0 +1,38 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +for (my $i = 0, my $s = 0; $i < 16; $i++, $s++) +{ + printf (" case %2d:\n", $i); + + for (my $j = 16 - 1; $j >= 0; $j--) + { + my $jd4 = $j / 4; + my $jm4 = $j % 4; + + my $js1d4 = ($j - $s - 1) / 4; + my $js1m4 = ($j - $s - 1) % 4; + + my $js0d4 = ($j - $s - 0) / 4; + my $js0m4 = ($j - $s - 0) % 4; + + if ((($j - $s - 1) >= 0) && (($j - $s - 0) >= 0)) + { + printf (" w%d[%d] = __byte_perm_S (w%d[%d], w%d[%d], selector);\n", $jd4, $jm4, $js1d4, $js1m4, $js0d4, $js0m4); + } + elsif (($j - $s - 0) >= 0) + { + printf (" w%d[%d] = __byte_perm_S ( 0, w%d[%d], selector);\n", $jd4, $jm4, $js0d4, $js0m4); + } + else + { + printf (" w%d[%d] = 0;\n", $jd4, $jm4, ); + } + } + printf ("\n"); + + printf (" break;\n"); + printf ("\n"); +} diff --git a/tools/code_generators/GEN_NV_switch_buffer_by_offset_le_S.pl b/tools/code_generators/GEN_NV_switch_buffer_by_offset_64x1_le_S.pl similarity index 94% rename from tools/code_generators/GEN_NV_switch_buffer_by_offset_le_S.pl rename to tools/code_generators/GEN_NV_switch_buffer_by_offset_64x1_le_S.pl index c9228ce0a..87e4ac527 100644 --- a/tools/code_generators/GEN_NV_switch_buffer_by_offset_le_S.pl +++ b/tools/code_generators/GEN_NV_switch_buffer_by_offset_64x1_le_S.pl @@ -1,27 +1,29 @@ -#!/usr/bin/perl - -use strict; -use warnings; - -for (my $i = 0, my $s = 0; $i < 64; $i++, $s++) -{ - printf (" case %2d:\n", $i); - - for (my $j = 64 - 1; $j >= 0; $j--) - { - if ((($j - $s - 1) >= 0) && (($j - $s - 0) >= 0)) - { - printf (" w[%2d] = __byte_perm_S (w[%2d], w[%2d], selector);\n", $j, $j - $s - 1, $j - $s - 0); - } - elsif (($j - $s - 0) >= 0) - { - printf (" w[%2d] = __byte_perm_S ( 0, w[%2d], selector);\n", $j, $j - $s - 0); - } - else - { - printf (" w[%2d] = 0;\n", $j); - } - } - - printf (" break;\n"); -} +#!/usr/bin/perl + +use strict; +use warnings; + +for (my $i = 0, my $s = 0; $i < 64; $i++, $s++) +{ + printf (" case %2d:\n", $i); + + for (my $j = 64 - 1; $j >= 0; $j--) + { + if ((($j - $s - 1) >= 0) && (($j - $s - 0) >= 0)) + { + printf (" w[%2d] = __byte_perm_S (w[%2d], w[%2d], selector);\n", $j, $j - $s - 1, $j - $s - 0); + } + elsif (($j - $s - 0) >= 0) + { + printf (" w[%2d] = __byte_perm_S ( 0, w[%2d], selector);\n", $j, $j - $s - 0); + } + else + { + printf (" w[%2d] = 0;\n", $j); + } + } + printf ("\n"); + + printf (" break;\n"); + printf ("\n"); +}