mirror of
https://github.com/hashcat/hashcat.git
synced 2025-07-30 18:39:15 +00:00
The Assimilation Bridge (Scrypt-Jane GPU-CPU hybrid plugin -m 70100)
This commit is contained in:
parent
aad670a543
commit
a2a9941475
206
OpenCL/m70100-pure.cl
Normal file
206
OpenCL/m70100-pure.cl
Normal file
@ -0,0 +1,206 @@
|
||||
/**
|
||||
* Author......: See docs/credits.txt
|
||||
* License.....: MIT
|
||||
*/
|
||||
|
||||
#ifdef KERNEL_STATIC
|
||||
#include M2S(INCLUDE_PATH/inc_vendor.h)
|
||||
#include M2S(INCLUDE_PATH/inc_types.h)
|
||||
#include M2S(INCLUDE_PATH/inc_platform.cl)
|
||||
#include M2S(INCLUDE_PATH/inc_common.cl)
|
||||
#include M2S(INCLUDE_PATH/inc_hash_sha256.cl)
|
||||
#endif
|
||||
|
||||
#define COMPARE_S M2S(INCLUDE_PATH/inc_comp_single.cl)
|
||||
#define COMPARE_M M2S(INCLUDE_PATH/inc_comp_multi.cl)
|
||||
|
||||
#define SCRYPT_R_MAX 16
|
||||
#define SCRYPT_P_MAX 16
|
||||
|
||||
#define SCRYPT_TMP_SIZE (128ULL * SCRYPT_R_MAX * SCRYPT_P_MAX)
|
||||
#define SCRYPT_TMP_SIZE4 (SCRYPT_TMP_SIZE / 4)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
u32 P[SCRYPT_TMP_SIZE4];
|
||||
|
||||
} scrypt_tmp_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
u32 salt_buf[64];
|
||||
u32 salt_len;
|
||||
|
||||
u32 digest_buf[64];
|
||||
u32 digest_len;
|
||||
|
||||
u32 N;
|
||||
u32 r;
|
||||
u32 p;
|
||||
|
||||
} scrypt_t;
|
||||
|
||||
KERNEL_FQ void m70100_init (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, scrypt_t))
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
|
||||
if (gid >= GID_CNT) return;
|
||||
|
||||
sha256_hmac_ctx_t sha256_hmac_ctx;
|
||||
|
||||
sha256_hmac_init_global_swap (&sha256_hmac_ctx, pws[gid].i, pws[gid].pw_len);
|
||||
|
||||
sha256_hmac_update_global_swap (&sha256_hmac_ctx, esalt_bufs[DIGESTS_OFFSET_HOST].salt_buf, esalt_bufs[DIGESTS_OFFSET_HOST].salt_len);
|
||||
|
||||
u32 r = esalt_bufs[DIGESTS_OFFSET_HOST].r;
|
||||
u32 p = esalt_bufs[DIGESTS_OFFSET_HOST].p;
|
||||
|
||||
u32 chunk_bytes = 64 * r * 2;
|
||||
|
||||
u32 x_bytes = chunk_bytes * p;
|
||||
|
||||
for (u32 i = 0, j = 0, k = 1; i < x_bytes; i += 32, j += 8, k += 1)
|
||||
{
|
||||
sha256_hmac_ctx_t sha256_hmac_ctx2 = sha256_hmac_ctx;
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
|
||||
w0[0] = k;
|
||||
w0[1] = 0;
|
||||
w0[2] = 0;
|
||||
w0[3] = 0;
|
||||
w1[0] = 0;
|
||||
w1[1] = 0;
|
||||
w1[2] = 0;
|
||||
w1[3] = 0;
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
sha256_hmac_update_64 (&sha256_hmac_ctx2, w0, w1, w2, w3, 4);
|
||||
|
||||
sha256_hmac_final (&sha256_hmac_ctx2);
|
||||
|
||||
u32 digest[8];
|
||||
|
||||
digest[0] = sha256_hmac_ctx2.opad.h[0];
|
||||
digest[1] = sha256_hmac_ctx2.opad.h[1];
|
||||
digest[2] = sha256_hmac_ctx2.opad.h[2];
|
||||
digest[3] = sha256_hmac_ctx2.opad.h[3];
|
||||
digest[4] = sha256_hmac_ctx2.opad.h[4];
|
||||
digest[5] = sha256_hmac_ctx2.opad.h[5];
|
||||
digest[6] = sha256_hmac_ctx2.opad.h[6];
|
||||
digest[7] = sha256_hmac_ctx2.opad.h[7];
|
||||
|
||||
tmps[gid].P[j + 0] = hc_swap32_S (digest[0]);
|
||||
tmps[gid].P[j + 1] = hc_swap32_S (digest[1]);
|
||||
tmps[gid].P[j + 2] = hc_swap32_S (digest[2]);
|
||||
tmps[gid].P[j + 3] = hc_swap32_S (digest[3]);
|
||||
tmps[gid].P[j + 4] = hc_swap32_S (digest[4]);
|
||||
tmps[gid].P[j + 5] = hc_swap32_S (digest[5]);
|
||||
tmps[gid].P[j + 6] = hc_swap32_S (digest[6]);
|
||||
tmps[gid].P[j + 7] = hc_swap32_S (digest[7]);
|
||||
}
|
||||
}
|
||||
|
||||
KERNEL_FQ void m70100_loop (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, scrypt_t))
|
||||
{
|
||||
}
|
||||
|
||||
KERNEL_FQ void m70100_comp (KERN_ATTR_TMPS_ESALT (scrypt_tmp_t, scrypt_t))
|
||||
{
|
||||
/**
|
||||
* base
|
||||
*/
|
||||
|
||||
const u64 gid = get_global_id (0);
|
||||
const u64 lid = get_local_id (0);
|
||||
|
||||
if (gid >= GID_CNT) return;
|
||||
|
||||
/**
|
||||
* 2nd pbkdf2, creates B
|
||||
*/
|
||||
|
||||
u32 w0[4];
|
||||
u32 w1[4];
|
||||
u32 w2[4];
|
||||
u32 w3[4];
|
||||
|
||||
sha256_hmac_ctx_t ctx;
|
||||
|
||||
sha256_hmac_init_global_swap (&ctx, pws[gid].i, pws[gid].pw_len);
|
||||
|
||||
u32 r = esalt_bufs[DIGESTS_OFFSET_HOST].r;
|
||||
u32 p = esalt_bufs[DIGESTS_OFFSET_HOST].p;
|
||||
|
||||
u32 chunk_bytes = 64 * r * 2;
|
||||
|
||||
u32 x_bytes = chunk_bytes * p;
|
||||
|
||||
for (u32 i = 0, j = 0; i < x_bytes; i += 64, j += 16)
|
||||
{
|
||||
w0[0] = hc_swap32_S (tmps[gid].P[j + 0]);
|
||||
w0[1] = hc_swap32_S (tmps[gid].P[j + 1]);
|
||||
w0[2] = hc_swap32_S (tmps[gid].P[j + 2]);
|
||||
w0[3] = hc_swap32_S (tmps[gid].P[j + 3]);
|
||||
w1[0] = hc_swap32_S (tmps[gid].P[j + 4]);
|
||||
w1[1] = hc_swap32_S (tmps[gid].P[j + 5]);
|
||||
w1[2] = hc_swap32_S (tmps[gid].P[j + 6]);
|
||||
w1[3] = hc_swap32_S (tmps[gid].P[j + 7]);
|
||||
w2[0] = hc_swap32_S (tmps[gid].P[j + 8]);
|
||||
w2[1] = hc_swap32_S (tmps[gid].P[j + 9]);
|
||||
w2[2] = hc_swap32_S (tmps[gid].P[j + 10]);
|
||||
w2[3] = hc_swap32_S (tmps[gid].P[j + 11]);
|
||||
w3[0] = hc_swap32_S (tmps[gid].P[j + 12]);
|
||||
w3[1] = hc_swap32_S (tmps[gid].P[j + 13]);
|
||||
w3[2] = hc_swap32_S (tmps[gid].P[j + 14]);
|
||||
w3[3] = hc_swap32_S (tmps[gid].P[j + 15]);
|
||||
|
||||
sha256_hmac_update_64 (&ctx, w0, w1, w2, w3, 64);
|
||||
}
|
||||
|
||||
w0[0] = 1;
|
||||
w0[1] = 0;
|
||||
w0[2] = 0;
|
||||
w0[3] = 0;
|
||||
w1[0] = 0;
|
||||
w1[1] = 0;
|
||||
w1[2] = 0;
|
||||
w1[3] = 0;
|
||||
w2[0] = 0;
|
||||
w2[1] = 0;
|
||||
w2[2] = 0;
|
||||
w2[3] = 0;
|
||||
w3[0] = 0;
|
||||
w3[1] = 0;
|
||||
w3[2] = 0;
|
||||
w3[3] = 0;
|
||||
|
||||
sha256_hmac_update_64 (&ctx, w0, w1, w2, w3, 4);
|
||||
|
||||
sha256_hmac_final (&ctx);
|
||||
|
||||
const u32 r0 = hc_swap32_S (ctx.opad.h[DGST_R0]);
|
||||
const u32 r1 = hc_swap32_S (ctx.opad.h[DGST_R1]);
|
||||
const u32 r2 = hc_swap32_S (ctx.opad.h[DGST_R2]);
|
||||
const u32 r3 = hc_swap32_S (ctx.opad.h[DGST_R3]);
|
||||
|
||||
#define il_pos 0
|
||||
|
||||
#ifdef KERNEL_STATIC
|
||||
#include COMPARE_M
|
||||
#endif
|
||||
}
|
163
deps/scrypt-jane-master/README.md
vendored
Normal file
163
deps/scrypt-jane-master/README.md
vendored
Normal file
@ -0,0 +1,163 @@
|
||||
This project provides a performant, flexible implementations of Colin Percival's [scrypt](http://www.tarsnap.com/scrypt.html).
|
||||
|
||||
# Features
|
||||
|
||||
## Modular Design
|
||||
|
||||
The code uses a modular (compile, not runtime) layout to allow new mixing & hash functions to be added easily. The base components (HMAC, PBKDF2, and scrypt) are static and will immediately work with any conforming mix or hash function.
|
||||
|
||||
## Supported Mix Functions
|
||||
|
||||
* [Salsa20/8](http://cr.yp.to/salsa20.html)
|
||||
* [ChaCha20/8](http://cr.yp.to/chacha.html)
|
||||
* [Salsa6420/8]()
|
||||
|
||||
I am not actually aware of any other candidates for a decent mix function. Salsa20/8 was nearly perfect, but its successor, ChaCha20/8, has better diffusion and is thus stronger, is potentially faster given advanced SIMD support (byte level shuffles, or a 32bit rotate), and is slightly cleaner to implement given that it requires no pre/post processing of data for SIMD implementations.
|
||||
|
||||
64-byte blocks are no longer assumed! Salsa6420/8 is a 'proof of concept' 64-bit version of Salsa20/8 with a 128 byte block, and rotation constants chosen to allow 32-bit word shuffles instead of rotations for two of the rotations which put it on par with ChaCha in terms of SSE implementation shortcuts.
|
||||
|
||||
## Supported Hash Functions
|
||||
|
||||
* SHA256/512
|
||||
* [BLAKE256/512](https://www.131002.net/blake/)
|
||||
* [Skein512](http://www.skein-hash.info/)
|
||||
* [Keccak256/512](http://keccak.noekeon.org/) (SHA-3)
|
||||
|
||||
Hash function implementations, unlike mix functions, are not optimized. The PBKDF2 computations are relatively minor in the scrypt algorithm, so including CPU specific versions, or vastly unrolling loops, would serve little purpose while bloating the code, both source and binary, and making it more confusing to implement correctly.
|
||||
|
||||
Most (now only two!) of the SHA-3 candidates fall in to the "annoying to read/implement" category and have not been included yet. This will of course be moot once ~~BLAKE is chosen as SHA-3~~ Keccak is chosen as SHA-3. Well shit.
|
||||
|
||||
## CPU Adaptation
|
||||
|
||||
The mixing function specialization is selected at runtime based on what the CPU supports (well, x86/x86-64 for now, but theoretically any). On platforms where this is not needed, e.g. where packages are usually compiled from source, it can also select the most suitable implementation at compile time, cutting down on binary size.
|
||||
|
||||
For those who are familiar with the scrypt spec, the code specializes at the ROMix level, allowing all copy, and xor calls to be inlined efficiently. ***Update***: This is actually not as important as I switched from specializing at the mix() level and letting the compiler somewhat inefficiently inline block_copy and block_xor to specializing at ChunkMix(), where they can be inlined properly. I thought about specializing at ROMix(), but it would increase the complexity per mix function even more and would not present many more opportunities than what is generated by the compiler presently.
|
||||
|
||||
MSVC uses SSE intrinsics as opposed to inline assembly for the mix functions to allow the compiler to fully inline properly. Also, Visual Studio is not smart enough to allow inline assembly in 64-bit code.
|
||||
|
||||
## Self Testing
|
||||
|
||||
On first use, scrypt() runs a small series of tests to make sure the hash function, mix functions, and scrypt() itself, are generating correct results. It will exit() (or call a user defined fatal error function) should any of these tests fail.
|
||||
|
||||
Test vectors for individual mix and hash functions are generated from reference implementations. The only "official" test vectors for the full scrypt() are for SHA256 + Salsa20/8 of course; other combinations are generated from this code (once it works with all reference test vectors) and subject to change if any implementation errors are discovered.
|
||||
|
||||
# Performance (on an E5200 2.5GHZ)
|
||||
|
||||
Benchmarks are run _without_ allocating memory, i.e. allocating enough memory before the trials are run. Different allocators can have different costs and non-deterministic effects, which is not the point of comparing implementations. The only hash function compared will be SHA-256 to be comparable to Colin's reference implementation, and the hash function will generally be a fraction of a percent of noise in the overall result.
|
||||
|
||||
Three different scrypt settings are tested (the last two are from the scrypt paper):
|
||||
|
||||
* 'High Volume': N=4096, r=8, p=1, 4mb memory
|
||||
* 'Interactive': N=16384, r=8, p=1, 16mb memory
|
||||
* 'Non-Interactive': N=1048576, r=8, p=1, 1gb memory
|
||||
|
||||
__Note__: Benchmark settings are adjusted based on the underlying block size to keep memory usage consistent with default scrypt. This means Salsa64 has r=4 due to having a 128 byte block size. A 256 byte block size would have r=2, 512 byte block would have r=1, etc. Additionally, this means Salsa6420/8 is doing half the rounds/byte of default scrypt, but has 64 bit word mixing vs 32 bit, and thus does somewhat less overall mixing. Salsa6420/~10-12 would be needed to maintain equivalent overall mixing.
|
||||
|
||||
Cycle counts are in millions of cycles. All versions compiled with gcc 4.6.3, -O3. Sorted from fastest to slowest.
|
||||
|
||||
Scaling refers to how much more expensive 'Non-Interactive' is to compute than 'High Volume', normalized to "ideal" scaling (256x difficulty). Under 100% means it becomes easier to process as N grows, over 100% means it becomes more difficult to process as N grows.
|
||||
|
||||
|
||||
<table>
|
||||
<thead><tr><th>Implemenation</th><th>Algo</th><th>High Volume</th><th>Interactive</th><th>Non-Interactive</th><th>Scaling</th></tr></thead>
|
||||
<tbody>
|
||||
|
||||
<tr><td>scrypt-jane SSSE3 64bit</td><td>Salsa6420/8 </td><td>18.2m</td><td> 75.6m</td><td>5120.0m</td><td>110.0%</td></tr>
|
||||
<tr><td>scrypt-jane SSSE3 64bit</td><td>ChaCha20/8 </td><td>19.6m</td><td> 79.6m</td><td>5296.7m</td><td>105.6%</td></tr>
|
||||
<tr><td>scrypt-jane SSSE3 32bit</td><td>ChaCha20/8 </td><td>19.8m</td><td> 80.3m</td><td>5346.1m</td><td>105.5%</td></tr>
|
||||
<tr><td>scrypt-jane SSE2 64bit </td><td>Salsa6420/8 </td><td>19.8m</td><td> 82.1m</td><td>5529.2m</td><td>109.1%</td></tr>
|
||||
<tr><td>scrypt-jane SSE2 64bit </td><td>Salsa20/8 </td><td>22.1m</td><td> 89.7m</td><td>5938.8m</td><td>105.0%</td></tr>
|
||||
<tr><td>scrypt-jane SSE2 32bit </td><td>Salsa20/8 </td><td>22.3m</td><td> 90.6m</td><td>6011.0m</td><td>105.3%</td></tr>
|
||||
<tr><td>scrypt-jane SSE2 64bit </td><td>ChaCha20/8 </td><td>23.9m</td><td> 96.8m</td><td>6399.7m</td><td>104.6%</td></tr>
|
||||
<tr><td>scrypt-jane SSE2 32bit </td><td>ChaCha20/8 </td><td>24.2m</td><td> 98.3m</td><td>6500.7m</td><td>104.9%</td></tr>
|
||||
<tr><td>*Reference SSE2 64bit* </td><td>Salsa20/8 </td><td>32.9m</td><td>135.2m</td><td>8881.6m</td><td>105.5%</td></tr>
|
||||
<tr><td>*Reference SSE2 32bit* </td><td>Salsa20/8 </td><td>33.0m</td><td>134.4m</td><td>8885.2m</td><td>105.2%</td></tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
* scrypt-jane Salsa6420/8-SSSE3 is ~1.80x faster than reference Salsa20/8-SSE2 for High Volume, but drops to 1.73x faster for 'Non-Interactive' instead of remaining constant
|
||||
* scrypt-jane ChaCha20/8-SSSE3 is ~1.67x faster than reference Salsa20/8-SSE2
|
||||
* scrypt-jane Salsa20/8-SSE2 is ~1.48x faster than reference Salsa20/8-SSE2
|
||||
|
||||
# Performance (on a slightly noisy E3-1270 3.4GHZ)
|
||||
|
||||
All versions compiled with gcc 4.4.7, -O3. Sorted from fastest to slowest.
|
||||
|
||||
<table>
|
||||
<thead><tr><th>Implemenation</th><th>Algo</th><th>High Volume</th><th>Interactive</th><th>Non-Interactive</th><th>Scaling</th></tr></thead>
|
||||
<tbody>
|
||||
<tr><td>scrypt-jane AVX 64bit </td><td>Salsa6420/8 </td><td>11.8m</td><td> 52.5m</td><td>3848.6m</td><td>127.4%</td></tr>
|
||||
<tr><td>scrypt-jane SSSE3 64bit </td><td>Salsa6420/8 </td><td>13.3m</td><td> 57.9m</td><td>4176.6m</td><td>122.7%</td></tr>
|
||||
<tr><td>scrypt-jane SSE2 64bit </td><td>Salsa6420/8 </td><td>14.2m</td><td> 61.1m</td><td>4382.4m</td><td>120.6%</td></tr>
|
||||
<tr><td>scrypt-jane AVX 64bit </td><td>ChaCha20/8 </td><td>18.0m</td><td> 77.4m</td><td>5396.8m</td><td>117.1%</td></tr>
|
||||
<tr><td>scrypt-jane AVX 32bit </td><td>ChaCha20/8 </td><td>18.3m</td><td> 82.1m</td><td>5421.8m</td><td>115.7%</td></tr>
|
||||
<tr><td>scrypt-jane SSSE3 64bit </td><td>ChaCha20/8 </td><td>19.0m</td><td> 81.3m</td><td>5600.7m</td><td>115.1%</td></tr>
|
||||
<tr><td>scrypt-jane AVX 64bit </td><td>Salsa20/8 </td><td>19.0m</td><td> 81.2m</td><td>5610.6m</td><td>115.3%</td></tr>
|
||||
<tr><td>scrypt-jane AVX 32bit </td><td>Salsa20/8 </td><td>19.0m</td><td> 81.3m</td><td>5621.6m</td><td>115.6%</td></tr>
|
||||
<tr><td>scrypt-jane SSSE3 32bit </td><td>ChaCha20/8 </td><td>19.1m</td><td> 81.8m</td><td>5621.6m</td><td>115.0%</td></tr>
|
||||
<tr><td>scrypt-jane SSE2 64bit </td><td>Salsa20/8 </td><td>19.5m</td><td> 83.8m</td><td>5772.9m</td><td>115.6%</td></tr>
|
||||
<tr><td>scrypt-jane SSE2 32bit </td><td>Salsa20/8 </td><td>19.6m</td><td> 84.0m</td><td>5793.9m</td><td>115.5%</td></tr>
|
||||
<tr><td>*Reference SSE2/AVX 64bit* </td><td>Salsa20/8 </td><td>21.5m</td><td> 90.4m</td><td>6147.1m</td><td>111.7%</td></tr>
|
||||
<tr><td>*Reference SSE2/AVX 32bit* </td><td>Salsa20/8 </td><td>22.3m</td><td> 94.0m</td><td>6267.7m</td><td>110.0%</td></tr>
|
||||
<tr><td>scrypt-jane SSE2 64bit </td><td>ChaCha20/8 </td><td>23.1m</td><td> 97.7m</td><td>6670.0m</td><td>112.8%</td></tr>
|
||||
<tr><td>scrypt-jane SSE2 32bit </td><td>ChaCha20/8 </td><td>23.3m</td><td> 98.4m</td><td>6728.7m</td><td>112.8%</td></tr>
|
||||
<tr><td>*Reference SSE2 64bit* </td><td>Salsa20/8 </td><td>30.4m</td><td>125.6m</td><td>8139.4m</td><td>104.6%</td></tr>
|
||||
<tr><td>*Reference SSE2 32bit* </td><td>Salsa20/8 </td><td>30.0m</td><td>124.5m</td><td>8469.3m</td><td>110.3%</td></tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
* scrypt-jane Salsa6420/8-AVX is 1.60x - 1.82x faster than reference Salsa20/8-SSE2/AVX
|
||||
* scrypt-jane ChaCha20/8-AVX is 1.13x - 1.19x faster than reference Salsa20/8-SSE2/AVX
|
||||
* scrypt-jane Salsa20/8-AVX is 1.09x - 1.13x faster than reference Salsa20/8-SSE2/AVX
|
||||
|
||||
|
||||
# Building
|
||||
|
||||
[gcc,icc,clang] scrypt-jane.c -O3 -[m32,m64] -DSCRYPT_MIX -DSCRYPT_HASH -c
|
||||
|
||||
where SCRYPT_MIX is one of
|
||||
|
||||
* SCRYPT_SALSA
|
||||
* SCRYPT_SALSA64 (no optimized 32-bit implementation)
|
||||
* SCRYPT_CHACHA
|
||||
|
||||
and SCRYPT_HASH is one of
|
||||
|
||||
* SCRYPT_SHA256
|
||||
* SCRYPT_SHA512
|
||||
* SCRYPT_BLAKE256
|
||||
* SCRYPT_BLAKE512
|
||||
* SCRYPT_SKEIN512
|
||||
* SCRYPT_KECCAK256
|
||||
* SCRYPT_KECCAK512
|
||||
|
||||
e.g.
|
||||
|
||||
gcc scrypt-jane.c -O3 -DSCRYPT_CHACHA -DSCRYPT_BLAKE512 -c
|
||||
gcc example.c scrypt-jane.o -o example
|
||||
|
||||
clang *may* need "-no-integrated-as" as some? versions don't support ".intel_syntax"
|
||||
|
||||
# Using
|
||||
|
||||
#include "scrypt-jane.h"
|
||||
|
||||
scrypt(password, password_len, salt, salt_len, Nfactor, pfactor, rfactor, out, want_bytes);
|
||||
|
||||
## scrypt parameters
|
||||
|
||||
* Nfactor: Increases CPU & Memory Hardness
|
||||
* rfactor: Increases Memory Hardness
|
||||
* pfactor: Increases CPU Hardness
|
||||
|
||||
In scrypt terms
|
||||
|
||||
* N = (1 << (Nfactor + 1)), which controls how many times to mix each chunk, and how many temporary chunks are used. Increasing N increases both CPU time and memory used.
|
||||
* r = (1 << rfactor), which controls how many blocks are in a chunk (i.e., 2 * r blocks are in a chunk). Increasing r increases how much memory is used.
|
||||
* p = (1 << pfactor), which controls how many passes to perform over the set of N chunks. Increasing p increases CPU time used.
|
||||
|
||||
I chose to use the log2 of each parameter as it is the common way to communicate settings (e.g. 2^20, not 1048576).
|
||||
|
||||
# License
|
||||
|
||||
Public Domain, or MIT
|
87
deps/scrypt-jane-master/_hashcat/macos.patch
vendored
Normal file
87
deps/scrypt-jane-master/_hashcat/macos.patch
vendored
Normal file
@ -0,0 +1,87 @@
|
||||
diff --git a/code/scrypt-jane-portable-x86.h b/code/scrypt-jane-portable-x86.h
|
||||
index 396a7bd..e7858cd 100644
|
||||
--- a/code/scrypt-jane-portable-x86.h
|
||||
+++ b/code/scrypt-jane-portable-x86.h
|
||||
@@ -459,4 +459,11 @@ get_top_cpuflag_desc(size_t flag) {
|
||||
#endif
|
||||
#endif
|
||||
|
||||
-#endif /* defined(CPU_X86) || defined(CPU_X86_64) */
|
||||
\ No newline at end of file
|
||||
+#else
|
||||
+
|
||||
+static size_t
|
||||
+detect_cpu(void) {
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+#endif /* defined(CPU_X86) || defined(CPU_X86_64) */
|
||||
diff --git a/code/scrypt-jane-portable.h b/code/scrypt-jane-portable.h
|
||||
index e83e314..26690ed 100644
|
||||
--- a/code/scrypt-jane-portable.h
|
||||
+++ b/code/scrypt-jane-portable.h
|
||||
@@ -92,7 +92,7 @@
|
||||
#define STDCALL __stdcall
|
||||
#undef NAKED
|
||||
#define NAKED __declspec(naked)
|
||||
- #define ALIGN(n) __declspec(align(n))
|
||||
+ #define JANE_ALIGN(n) __declspec(align(n))
|
||||
#endif
|
||||
#if defined(__ICC)
|
||||
#define COMPILER_INTEL
|
||||
@@ -136,7 +136,7 @@
|
||||
#define CDECL __attribute__((cdecl))
|
||||
#undef STDCALL
|
||||
#define STDCALL __attribute__((stdcall))
|
||||
- #define ALIGN(n) __attribute__((aligned(n)))
|
||||
+ #define JANE_ALIGN(n) __attribute__((aligned(n)))
|
||||
#include <stdint.h>
|
||||
#endif
|
||||
#if defined(__MINGW32__) || defined(__MINGW64__)
|
||||
diff --git a/code/scrypt-jane-romix-basic.h b/code/scrypt-jane-romix-basic.h
|
||||
index 57ba649..ddae500 100644
|
||||
--- a/code/scrypt-jane-romix-basic.h
|
||||
+++ b/code/scrypt-jane-romix-basic.h
|
||||
@@ -35,9 +35,9 @@ scrypt_test_mix_instance(chunkmixfn mixfn, blockfixfn prefn, blockfixfn postfn,
|
||||
/* r = 2, (2 * r) = 4 blocks in a chunk, 4 * SCRYPT_BLOCK_WORDS total */
|
||||
const uint32_t r = 2, blocks = 2 * r, words = blocks * SCRYPT_BLOCK_WORDS;
|
||||
#if (defined(X86ASM_AVX2) || defined(X86_64ASM_AVX2) || defined(X86_INTRINSIC_AVX2))
|
||||
- scrypt_mix_word_t ALIGN(32) chunk[2][4 * SCRYPT_BLOCK_WORDS], v;
|
||||
+ scrypt_mix_word_t JANE_ALIGN(32) chunk[2][4 * SCRYPT_BLOCK_WORDS], v;
|
||||
#else
|
||||
- scrypt_mix_word_t ALIGN(16) chunk[2][4 * SCRYPT_BLOCK_WORDS], v;
|
||||
+ scrypt_mix_word_t JANE_ALIGN(16) chunk[2][4 * SCRYPT_BLOCK_WORDS], v;
|
||||
#endif
|
||||
uint8_t final[16];
|
||||
size_t i;
|
||||
diff --git a/code/scrypt-jane-romix-template.h b/code/scrypt-jane-romix-template.h
|
||||
index 6bbda62..77c0114 100644
|
||||
--- a/code/scrypt-jane-romix-template.h
|
||||
+++ b/code/scrypt-jane-romix-template.h
|
||||
@@ -20,9 +20,9 @@
|
||||
static void asm_calling_convention
|
||||
SCRYPT_CHUNKMIX_FN(scrypt_mix_word_t *Bout/*[chunkWords]*/, scrypt_mix_word_t *Bin/*[chunkWords]*/, scrypt_mix_word_t *Bxor/*[chunkWords]*/, uint32_t r) {
|
||||
#if (defined(X86ASM_AVX2) || defined(X86_64ASM_AVX2) || defined(X86_INTRINSIC_AVX2))
|
||||
- scrypt_mix_word_t ALIGN(32) X[SCRYPT_BLOCK_WORDS], *block;
|
||||
+ scrypt_mix_word_t JANE_ALIGN(32) X[SCRYPT_BLOCK_WORDS], *block;
|
||||
#else
|
||||
- scrypt_mix_word_t ALIGN(16) X[SCRYPT_BLOCK_WORDS], *block;
|
||||
+ scrypt_mix_word_t JANE_ALIGN(16) X[SCRYPT_BLOCK_WORDS], *block;
|
||||
#endif
|
||||
uint32_t i, j, blocksPerChunk = r * 2, half = 0;
|
||||
|
||||
diff --git a/test.sh b/test.sh
|
||||
old mode 100644
|
||||
new mode 100755
|
||||
index dc3d032..13be9eb
|
||||
--- a/test.sh
|
||||
+++ b/test.sh
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
test() {
|
||||
sleep 0.25 # mingw is stupid and will occasionally not have permission to overwrite scrypt_test
|
||||
- gcc scrypt-jane-test.c -O3 -DSCRYPT_$1 -DSCRYPT_$2 $3 -o scrypt_test 2>/dev/null
|
||||
+ gcc scrypt-jane-test.c -O3 -DSCRYPT_$1 -DSCRYPT_$2 $3 -o scrypt_test #2>/dev/null
|
||||
local RC=$?
|
||||
if [ $RC -ne 0 ]; then
|
||||
echo "$1/$2: failed to compile "
|
28
deps/scrypt-jane-master/code/scrypt-conf.h
vendored
Normal file
28
deps/scrypt-jane-master/code/scrypt-conf.h
vendored
Normal file
@ -0,0 +1,28 @@
|
||||
/*
|
||||
pick the best algo at runtime or compile time?
|
||||
----------------------------------------------
|
||||
SCRYPT_CHOOSE_COMPILETIME (gcc only!)
|
||||
SCRYPT_CHOOSE_RUNTIME
|
||||
*/
|
||||
#define SCRYPT_CHOOSE_RUNTIME
|
||||
|
||||
|
||||
/*
|
||||
hash function to use
|
||||
-------------------------------
|
||||
SCRYPT_BLAKE256
|
||||
SCRYPT_BLAKE512
|
||||
SCRYPT_SHA256
|
||||
SCRYPT_SHA512
|
||||
SCRYPT_SKEIN512
|
||||
*/
|
||||
//#define SCRYPT_SHA256
|
||||
|
||||
|
||||
/*
|
||||
block mixer to use
|
||||
-----------------------------
|
||||
SCRYPT_CHACHA
|
||||
SCRYPT_SALSA
|
||||
*/
|
||||
//#define SCRYPT_SALSA
|
162
deps/scrypt-jane-master/code/scrypt-jane-chacha.h
vendored
Normal file
162
deps/scrypt-jane-master/code/scrypt-jane-chacha.h
vendored
Normal file
@ -0,0 +1,162 @@
|
||||
#define SCRYPT_MIX_BASE "ChaCha20/8"
|
||||
|
||||
typedef uint32_t scrypt_mix_word_t;
|
||||
|
||||
#define SCRYPT_WORDTO8_LE U32TO8_LE
|
||||
#define SCRYPT_WORD_ENDIAN_SWAP U32_SWAP
|
||||
|
||||
#define SCRYPT_BLOCK_BYTES 64
|
||||
#define SCRYPT_BLOCK_WORDS (SCRYPT_BLOCK_BYTES / sizeof(scrypt_mix_word_t))
|
||||
|
||||
/* must have these here in case block bytes is ever != 64 */
|
||||
#include "scrypt-jane-romix-basic.h"
|
||||
|
||||
#include "scrypt-jane-mix_chacha-xop.h"
|
||||
#include "scrypt-jane-mix_chacha-avx.h"
|
||||
#include "scrypt-jane-mix_chacha-ssse3.h"
|
||||
#include "scrypt-jane-mix_chacha-sse2.h"
|
||||
#include "scrypt-jane-mix_chacha.h"
|
||||
|
||||
#if defined(SCRYPT_CHACHA_XOP)
|
||||
#define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_xop
|
||||
#define SCRYPT_ROMIX_FN scrypt_ROMix_xop
|
||||
#define SCRYPT_MIX_FN chacha_core_xop
|
||||
#define SCRYPT_ROMIX_TANGLE_FN scrypt_romix_nop
|
||||
#define SCRYPT_ROMIX_UNTANGLE_FN scrypt_romix_nop
|
||||
#include "scrypt-jane-romix-template.h"
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_CHACHA_AVX)
|
||||
#define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_avx
|
||||
#define SCRYPT_ROMIX_FN scrypt_ROMix_avx
|
||||
#define SCRYPT_MIX_FN chacha_core_avx
|
||||
#define SCRYPT_ROMIX_TANGLE_FN scrypt_romix_nop
|
||||
#define SCRYPT_ROMIX_UNTANGLE_FN scrypt_romix_nop
|
||||
#include "scrypt-jane-romix-template.h"
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_CHACHA_SSSE3)
|
||||
#define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_ssse3
|
||||
#define SCRYPT_ROMIX_FN scrypt_ROMix_ssse3
|
||||
#define SCRYPT_MIX_FN chacha_core_ssse3
|
||||
#define SCRYPT_ROMIX_TANGLE_FN scrypt_romix_nop
|
||||
#define SCRYPT_ROMIX_UNTANGLE_FN scrypt_romix_nop
|
||||
#include "scrypt-jane-romix-template.h"
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_CHACHA_SSE2)
|
||||
#define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_sse2
|
||||
#define SCRYPT_ROMIX_FN scrypt_ROMix_sse2
|
||||
#define SCRYPT_MIX_FN chacha_core_sse2
|
||||
#define SCRYPT_ROMIX_TANGLE_FN scrypt_romix_nop
|
||||
#define SCRYPT_ROMIX_UNTANGLE_FN scrypt_romix_nop
|
||||
#include "scrypt-jane-romix-template.h"
|
||||
#endif
|
||||
|
||||
/* cpu agnostic */
|
||||
#define SCRYPT_ROMIX_FN scrypt_ROMix_basic
|
||||
#define SCRYPT_MIX_FN chacha_core_basic
|
||||
#define SCRYPT_ROMIX_TANGLE_FN scrypt_romix_convert_endian
|
||||
#define SCRYPT_ROMIX_UNTANGLE_FN scrypt_romix_convert_endian
|
||||
#include "scrypt-jane-romix-template.h"
|
||||
|
||||
#if !defined(SCRYPT_CHOOSE_COMPILETIME)
|
||||
static scrypt_ROMixfn
|
||||
scrypt_getROMix(void) {
|
||||
size_t cpuflags = detect_cpu();
|
||||
|
||||
#if defined(SCRYPT_CHACHA_XOP)
|
||||
if (cpuflags & cpu_xop)
|
||||
return scrypt_ROMix_xop;
|
||||
else
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_CHACHA_AVX)
|
||||
if (cpuflags & cpu_avx)
|
||||
return scrypt_ROMix_avx;
|
||||
else
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_CHACHA_SSSE3)
|
||||
if (cpuflags & cpu_ssse3)
|
||||
return scrypt_ROMix_ssse3;
|
||||
else
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_CHACHA_SSE2)
|
||||
if (cpuflags & cpu_sse2)
|
||||
return scrypt_ROMix_sse2;
|
||||
else
|
||||
#endif
|
||||
|
||||
return scrypt_ROMix_basic;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(SCRYPT_TEST_SPEED)
|
||||
static size_t
|
||||
available_implementations(void) {
|
||||
size_t cpuflags = detect_cpu();
|
||||
size_t flags = 0;
|
||||
|
||||
#if defined(SCRYPT_CHACHA_XOP)
|
||||
if (cpuflags & cpu_xop)
|
||||
flags |= cpu_xop;
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_CHACHA_AVX)
|
||||
if (cpuflags & cpu_avx)
|
||||
flags |= cpu_avx;
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_CHACHA_SSSE3)
|
||||
if (cpuflags & cpu_ssse3)
|
||||
flags |= cpu_ssse3;
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_CHACHA_SSE2)
|
||||
if (cpuflags & cpu_sse2)
|
||||
flags |= cpu_sse2;
|
||||
#endif
|
||||
|
||||
return flags;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
scrypt_test_mix(void) {
|
||||
static const uint8_t expected[16] = {
|
||||
0x48,0x2b,0x2d,0xb8,0xa1,0x33,0x22,0x73,0xcd,0x16,0xc4,0xb4,0xb0,0x7f,0xb1,0x8a,
|
||||
};
|
||||
|
||||
int ret = 1;
|
||||
size_t cpuflags = detect_cpu();
|
||||
|
||||
#if defined(SCRYPT_CHACHA_XOP)
|
||||
if (cpuflags & cpu_xop)
|
||||
ret &= scrypt_test_mix_instance(scrypt_ChunkMix_xop, scrypt_romix_nop, scrypt_romix_nop, expected);
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_CHACHA_AVX)
|
||||
if (cpuflags & cpu_avx)
|
||||
ret &= scrypt_test_mix_instance(scrypt_ChunkMix_avx, scrypt_romix_nop, scrypt_romix_nop, expected);
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_CHACHA_SSSE3)
|
||||
if (cpuflags & cpu_ssse3)
|
||||
ret &= scrypt_test_mix_instance(scrypt_ChunkMix_ssse3, scrypt_romix_nop, scrypt_romix_nop, expected);
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_CHACHA_SSE2)
|
||||
if (cpuflags & cpu_sse2)
|
||||
ret &= scrypt_test_mix_instance(scrypt_ChunkMix_sse2, scrypt_romix_nop, scrypt_romix_nop, expected);
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_CHACHA_BASIC)
|
||||
ret &= scrypt_test_mix_instance(scrypt_ChunkMix_basic, scrypt_romix_convert_endian, scrypt_romix_convert_endian, expected);
|
||||
#endif
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
48
deps/scrypt-jane-master/code/scrypt-jane-hash.h
vendored
Normal file
48
deps/scrypt-jane-master/code/scrypt-jane-hash.h
vendored
Normal file
@ -0,0 +1,48 @@
|
||||
#if defined(SCRYPT_BLAKE512)
|
||||
#include "scrypt-jane-hash_blake512.h"
|
||||
#elif defined(SCRYPT_BLAKE256)
|
||||
#include "scrypt-jane-hash_blake256.h"
|
||||
#elif defined(SCRYPT_SHA512)
|
||||
#include "scrypt-jane-hash_sha512.h"
|
||||
#elif defined(SCRYPT_SHA256)
|
||||
#include "scrypt-jane-hash_sha256.h"
|
||||
#elif defined(SCRYPT_SKEIN512)
|
||||
#include "scrypt-jane-hash_skein512.h"
|
||||
#elif defined(SCRYPT_KECCAK512) || defined(SCRYPT_KECCAK256)
|
||||
#include "scrypt-jane-hash_keccak.h"
|
||||
#else
|
||||
#define SCRYPT_HASH "ERROR"
|
||||
#define SCRYPT_HASH_BLOCK_SIZE 64
|
||||
#define SCRYPT_HASH_DIGEST_SIZE 64
|
||||
typedef struct scrypt_hash_state_t { size_t dummy; } scrypt_hash_state;
|
||||
typedef uint8_t scrypt_hash_digest[SCRYPT_HASH_DIGEST_SIZE];
|
||||
static void scrypt_hash_init(scrypt_hash_state *S) {}
|
||||
static void scrypt_hash_update(scrypt_hash_state *S, const uint8_t *in, size_t inlen) {}
|
||||
static void scrypt_hash_finish(scrypt_hash_state *S, uint8_t *hash) {}
|
||||
static const uint8_t scrypt_test_hash_expected[SCRYPT_HASH_DIGEST_SIZE] = {0};
|
||||
#error must define a hash function!
|
||||
#endif
|
||||
|
||||
#include "scrypt-jane-pbkdf2.h"
|
||||
|
||||
#define SCRYPT_TEST_HASH_LEN 257 /* (2 * largest block size) + 1 */
|
||||
|
||||
static int
|
||||
scrypt_test_hash(void) {
|
||||
scrypt_hash_state st;
|
||||
scrypt_hash_digest hash, final;
|
||||
uint8_t msg[SCRYPT_TEST_HASH_LEN];
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < SCRYPT_TEST_HASH_LEN; i++)
|
||||
msg[i] = (uint8_t)i;
|
||||
|
||||
scrypt_hash_init(&st);
|
||||
for (i = 0; i < SCRYPT_TEST_HASH_LEN + 1; i++) {
|
||||
scrypt_hash(hash, msg, i);
|
||||
scrypt_hash_update(&st, hash, sizeof(hash));
|
||||
}
|
||||
scrypt_hash_finish(&st, final);
|
||||
return scrypt_verify(final, scrypt_test_hash_expected, SCRYPT_HASH_DIGEST_SIZE);
|
||||
}
|
||||
|
177
deps/scrypt-jane-master/code/scrypt-jane-hash_blake256.h
vendored
Normal file
177
deps/scrypt-jane-master/code/scrypt-jane-hash_blake256.h
vendored
Normal file
@ -0,0 +1,177 @@
|
||||
#define SCRYPT_HASH "BLAKE-256"
|
||||
#define SCRYPT_HASH_BLOCK_SIZE 64
|
||||
#define SCRYPT_HASH_DIGEST_SIZE 32
|
||||
|
||||
typedef uint8_t scrypt_hash_digest[SCRYPT_HASH_DIGEST_SIZE];
|
||||
|
||||
const uint8_t blake256_sigma[] = {
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,
|
||||
14,10, 4, 8, 9,15,13, 6, 1,12, 0, 2,11, 7, 5, 3,
|
||||
11, 8,12, 0, 5, 2,15,13,10,14, 3, 6, 7, 1, 9, 4,
|
||||
7, 9, 3, 1,13,12,11,14, 2, 6, 5,10, 4, 0,15, 8,
|
||||
9, 0, 5, 7, 2, 4,10,15,14, 1,11,12, 6, 8, 3,13,
|
||||
2,12, 6,10, 0,11, 8, 3, 4,13, 7, 5,15,14, 1, 9,
|
||||
12, 5, 1,15,14,13, 4,10, 0, 7, 6, 3, 9, 2, 8,11,
|
||||
13,11, 7,14,12, 1, 3, 9, 5, 0,15, 4, 8, 6, 2,10,
|
||||
6,15,14, 9,11, 3, 0, 8,12, 2,13, 7, 1, 4,10, 5,
|
||||
10, 2, 8, 4, 7, 6, 1, 5,15,11, 9,14, 3,12,13 ,0,
|
||||
};
|
||||
|
||||
const uint32_t blake256_constants[16] = {
|
||||
0x243f6a88, 0x85a308d3, 0x13198a2e, 0x03707344,0xa4093822, 0x299f31d0, 0x082efa98, 0xec4e6c89,
|
||||
0x452821e6, 0x38d01377, 0xbe5466cf, 0x34e90c6c,0xc0ac29b7, 0xc97c50dd, 0x3f84d5b5, 0xb5470917
|
||||
};
|
||||
|
||||
typedef struct scrypt_hash_state_t {
|
||||
uint32_t H[8], T[2];
|
||||
uint32_t leftover;
|
||||
uint8_t buffer[SCRYPT_HASH_BLOCK_SIZE];
|
||||
} scrypt_hash_state;
|
||||
|
||||
static void
|
||||
blake256_blocks(scrypt_hash_state *S, const uint8_t *in, size_t blocks) {
|
||||
const uint8_t *sigma, *sigma_end = blake256_sigma + (10 * 16);
|
||||
uint32_t m[16], v[16], h[8], t[2];
|
||||
uint32_t i;
|
||||
|
||||
for (i = 0; i < 8; i++) h[i] = S->H[i];
|
||||
for (i = 0; i < 2; i++) t[i] = S->T[i];
|
||||
|
||||
while (blocks--) {
|
||||
t[0] += 512;
|
||||
t[1] += (t[0] < 512) ? 1 : 0;
|
||||
|
||||
for (i = 0; i < 8; i++) v[i ] = h[i];
|
||||
for (i = 0; i < 4; i++) v[i + 8] = blake256_constants[i];
|
||||
for (i = 0; i < 2; i++) v[i + 12] = blake256_constants[i+4] ^ t[0];
|
||||
for (i = 0; i < 2; i++) v[i + 14] = blake256_constants[i+6] ^ t[1];
|
||||
|
||||
for (i = 0; i < 16; i++) m[i] = U8TO32_BE(&in[i * 4]);
|
||||
in += 64;
|
||||
|
||||
#define G(a,b,c,d,e) \
|
||||
v[a] += (m[sigma[e+0]] ^ blake256_constants[sigma[e+1]]) + v[b]; \
|
||||
v[d] = ROTR32(v[d] ^ v[a],16); \
|
||||
v[c] += v[d]; \
|
||||
v[b] = ROTR32(v[b] ^ v[c],12); \
|
||||
v[a] += (m[sigma[e+1]] ^ blake256_constants[sigma[e+0]]) + v[b]; \
|
||||
v[d] = ROTR32(v[d] ^ v[a], 8); \
|
||||
v[c] += v[d]; \
|
||||
v[b] = ROTR32(v[b] ^ v[c], 7);
|
||||
|
||||
for (i = 0, sigma = blake256_sigma; i < 14; i++) {
|
||||
G(0, 4, 8,12, 0);
|
||||
G(1, 5, 9,13, 2);
|
||||
G(2, 6,10,14, 4);
|
||||
G(3, 7,11,15, 6);
|
||||
|
||||
G(0, 5,10,15, 8);
|
||||
G(1, 6,11,12,10);
|
||||
G(2, 7, 8,13,12);
|
||||
G(3, 4, 9,14,14);
|
||||
|
||||
sigma += 16;
|
||||
if (sigma == sigma_end)
|
||||
sigma = blake256_sigma;
|
||||
}
|
||||
|
||||
#undef G
|
||||
|
||||
for (i = 0; i < 8; i++) h[i] ^= (v[i] ^ v[i + 8]);
|
||||
}
|
||||
|
||||
for (i = 0; i < 8; i++) S->H[i] = h[i];
|
||||
for (i = 0; i < 2; i++) S->T[i] = t[i];
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_hash_init(scrypt_hash_state *S) {
|
||||
S->H[0] = 0x6a09e667ULL;
|
||||
S->H[1] = 0xbb67ae85ULL;
|
||||
S->H[2] = 0x3c6ef372ULL;
|
||||
S->H[3] = 0xa54ff53aULL;
|
||||
S->H[4] = 0x510e527fULL;
|
||||
S->H[5] = 0x9b05688cULL;
|
||||
S->H[6] = 0x1f83d9abULL;
|
||||
S->H[7] = 0x5be0cd19ULL;
|
||||
S->T[0] = 0;
|
||||
S->T[1] = 0;
|
||||
S->leftover = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_hash_update(scrypt_hash_state *S, const uint8_t *in, size_t inlen) {
|
||||
size_t blocks, want;
|
||||
|
||||
/* handle the previous data */
|
||||
if (S->leftover) {
|
||||
want = (SCRYPT_HASH_BLOCK_SIZE - S->leftover);
|
||||
want = (want < inlen) ? want : inlen;
|
||||
memcpy(S->buffer + S->leftover, in, want);
|
||||
S->leftover += (uint32_t)want;
|
||||
if (S->leftover < SCRYPT_HASH_BLOCK_SIZE)
|
||||
return;
|
||||
in += want;
|
||||
inlen -= want;
|
||||
blake256_blocks(S, S->buffer, 1);
|
||||
}
|
||||
|
||||
/* handle the current data */
|
||||
blocks = (inlen & ~(SCRYPT_HASH_BLOCK_SIZE - 1));
|
||||
S->leftover = (uint32_t)(inlen - blocks);
|
||||
if (blocks) {
|
||||
blake256_blocks(S, in, blocks / SCRYPT_HASH_BLOCK_SIZE);
|
||||
in += blocks;
|
||||
}
|
||||
|
||||
/* handle leftover data */
|
||||
if (S->leftover)
|
||||
memcpy(S->buffer, in, S->leftover);
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_hash_finish(scrypt_hash_state *S, uint8_t *hash) {
|
||||
uint32_t th, tl, bits;
|
||||
|
||||
bits = (S->leftover << 3);
|
||||
tl = S->T[0] + bits;
|
||||
th = S->T[1];
|
||||
if (S->leftover == 0) {
|
||||
S->T[0] = (uint32_t)0 - (uint32_t)512;
|
||||
S->T[1] = (uint32_t)0 - (uint32_t)1;
|
||||
} else if (S->T[0] == 0) {
|
||||
S->T[0] = ((uint32_t)0 - (uint32_t)512) + bits;
|
||||
S->T[1] = S->T[1] - 1;
|
||||
} else {
|
||||
S->T[0] -= (512 - bits);
|
||||
}
|
||||
|
||||
S->buffer[S->leftover] = 0x80;
|
||||
if (S->leftover <= 55) {
|
||||
memset(S->buffer + S->leftover + 1, 0, 55 - S->leftover);
|
||||
} else {
|
||||
memset(S->buffer + S->leftover + 1, 0, 63 - S->leftover);
|
||||
blake256_blocks(S, S->buffer, 1);
|
||||
S->T[0] = (uint32_t)0 - (uint32_t)512;
|
||||
S->T[1] = (uint32_t)0 - (uint32_t)1;
|
||||
memset(S->buffer, 0, 56);
|
||||
}
|
||||
S->buffer[55] |= 1;
|
||||
U32TO8_BE(S->buffer + 56, th);
|
||||
U32TO8_BE(S->buffer + 60, tl);
|
||||
blake256_blocks(S, S->buffer, 1);
|
||||
|
||||
U32TO8_BE(&hash[ 0], S->H[0]);
|
||||
U32TO8_BE(&hash[ 4], S->H[1]);
|
||||
U32TO8_BE(&hash[ 8], S->H[2]);
|
||||
U32TO8_BE(&hash[12], S->H[3]);
|
||||
U32TO8_BE(&hash[16], S->H[4]);
|
||||
U32TO8_BE(&hash[20], S->H[5]);
|
||||
U32TO8_BE(&hash[24], S->H[6]);
|
||||
U32TO8_BE(&hash[28], S->H[7]);
|
||||
}
|
||||
|
||||
static const uint8_t scrypt_test_hash_expected[SCRYPT_HASH_DIGEST_SIZE] = {
|
||||
0xcc,0xa9,0x1e,0xa9,0x20,0x97,0x37,0x40,0x17,0xc0,0xa0,0x52,0x87,0xfc,0x08,0x20,
|
||||
0x40,0xf5,0x81,0x86,0x62,0x75,0x78,0xb2,0x79,0xce,0xde,0x27,0x3c,0x7f,0x85,0xd8,
|
||||
};
|
181
deps/scrypt-jane-master/code/scrypt-jane-hash_blake512.h
vendored
Normal file
181
deps/scrypt-jane-master/code/scrypt-jane-hash_blake512.h
vendored
Normal file
@ -0,0 +1,181 @@
|
||||
#define SCRYPT_HASH "BLAKE-512"
|
||||
#define SCRYPT_HASH_BLOCK_SIZE 128
|
||||
#define SCRYPT_HASH_DIGEST_SIZE 64
|
||||
|
||||
typedef uint8_t scrypt_hash_digest[SCRYPT_HASH_DIGEST_SIZE];
|
||||
|
||||
const uint8_t blake512_sigma[] = {
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,
|
||||
14,10, 4, 8, 9,15,13, 6, 1,12, 0, 2,11, 7, 5, 3,
|
||||
11, 8,12, 0, 5, 2,15,13,10,14, 3, 6, 7, 1, 9, 4,
|
||||
7, 9, 3, 1,13,12,11,14, 2, 6, 5,10, 4, 0,15, 8,
|
||||
9, 0, 5, 7, 2, 4,10,15,14, 1,11,12, 6, 8, 3,13,
|
||||
2,12, 6,10, 0,11, 8, 3, 4,13, 7, 5,15,14, 1, 9,
|
||||
12, 5, 1,15,14,13, 4,10, 0, 7, 6, 3, 9, 2, 8,11,
|
||||
13,11, 7,14,12, 1, 3, 9, 5, 0,15, 4, 8, 6, 2,10,
|
||||
6,15,14, 9,11, 3, 0, 8,12, 2,13, 7, 1, 4,10, 5,
|
||||
10, 2, 8, 4, 7, 6, 1, 5,15,11, 9,14, 3,12,13 ,0,
|
||||
};
|
||||
|
||||
const uint64_t blake512_constants[16] = {
|
||||
0x243f6a8885a308d3ULL, 0x13198a2e03707344ULL, 0xa4093822299f31d0ULL, 0x082efa98ec4e6c89ULL,
|
||||
0x452821e638d01377ULL, 0xbe5466cf34e90c6cULL, 0xc0ac29b7c97c50ddULL, 0x3f84d5b5b5470917ULL,
|
||||
0x9216d5d98979fb1bULL, 0xd1310ba698dfb5acULL, 0x2ffd72dbd01adfb7ULL, 0xb8e1afed6a267e96ULL,
|
||||
0xba7c9045f12c7f99ULL, 0x24a19947b3916cf7ULL, 0x0801f2e2858efc16ULL, 0x636920d871574e69ULL
|
||||
};
|
||||
|
||||
typedef struct scrypt_hash_state_t {
|
||||
uint64_t H[8], T[2];
|
||||
uint32_t leftover;
|
||||
uint8_t buffer[SCRYPT_HASH_BLOCK_SIZE];
|
||||
} scrypt_hash_state;
|
||||
|
||||
static void
|
||||
blake512_blocks(scrypt_hash_state *S, const uint8_t *in, size_t blocks) {
|
||||
const uint8_t *sigma, *sigma_end = blake512_sigma + (10 * 16);
|
||||
uint64_t m[16], v[16], h[8], t[2];
|
||||
uint32_t i;
|
||||
|
||||
for (i = 0; i < 8; i++) h[i] = S->H[i];
|
||||
for (i = 0; i < 2; i++) t[i] = S->T[i];
|
||||
|
||||
while (blocks--) {
|
||||
t[0] += 1024;
|
||||
t[1] += (t[0] < 1024) ? 1 : 0;
|
||||
|
||||
for (i = 0; i < 8; i++) v[i ] = h[i];
|
||||
for (i = 0; i < 4; i++) v[i + 8] = blake512_constants[i];
|
||||
for (i = 0; i < 2; i++) v[i + 12] = blake512_constants[i+4] ^ t[0];
|
||||
for (i = 0; i < 2; i++) v[i + 14] = blake512_constants[i+6] ^ t[1];
|
||||
|
||||
for (i = 0; i < 16; i++) m[i] = U8TO64_BE(&in[i * 8]);
|
||||
in += 128;
|
||||
|
||||
#define G(a,b,c,d,e) \
|
||||
v[a] += (m[sigma[e+0]] ^ blake512_constants[sigma[e+1]]) + v[b]; \
|
||||
v[d] = ROTR64(v[d] ^ v[a],32); \
|
||||
v[c] += v[d]; \
|
||||
v[b] = ROTR64(v[b] ^ v[c],25); \
|
||||
v[a] += (m[sigma[e+1]] ^ blake512_constants[sigma[e+0]]) + v[b]; \
|
||||
v[d] = ROTR64(v[d] ^ v[a],16); \
|
||||
v[c] += v[d]; \
|
||||
v[b] = ROTR64(v[b] ^ v[c],11);
|
||||
|
||||
for (i = 0, sigma = blake512_sigma; i < 16; i++) {
|
||||
G(0, 4, 8,12, 0);
|
||||
G(1, 5, 9,13, 2);
|
||||
G(2, 6,10,14, 4);
|
||||
G(3, 7,11,15, 6);
|
||||
G(0, 5,10,15, 8);
|
||||
G(1, 6,11,12,10);
|
||||
G(2, 7, 8,13,12);
|
||||
G(3, 4, 9,14,14);
|
||||
|
||||
sigma += 16;
|
||||
if (sigma == sigma_end)
|
||||
sigma = blake512_sigma;
|
||||
}
|
||||
|
||||
#undef G
|
||||
|
||||
for (i = 0; i < 8; i++) h[i] ^= (v[i] ^ v[i + 8]);
|
||||
}
|
||||
|
||||
for (i = 0; i < 8; i++) S->H[i] = h[i];
|
||||
for (i = 0; i < 2; i++) S->T[i] = t[i];
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_hash_init(scrypt_hash_state *S) {
|
||||
S->H[0] = 0x6a09e667f3bcc908ULL;
|
||||
S->H[1] = 0xbb67ae8584caa73bULL;
|
||||
S->H[2] = 0x3c6ef372fe94f82bULL;
|
||||
S->H[3] = 0xa54ff53a5f1d36f1ULL;
|
||||
S->H[4] = 0x510e527fade682d1ULL;
|
||||
S->H[5] = 0x9b05688c2b3e6c1fULL;
|
||||
S->H[6] = 0x1f83d9abfb41bd6bULL;
|
||||
S->H[7] = 0x5be0cd19137e2179ULL;
|
||||
S->T[0] = 0;
|
||||
S->T[1] = 0;
|
||||
S->leftover = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_hash_update(scrypt_hash_state *S, const uint8_t *in, size_t inlen) {
|
||||
size_t blocks, want;
|
||||
|
||||
/* handle the previous data */
|
||||
if (S->leftover) {
|
||||
want = (SCRYPT_HASH_BLOCK_SIZE - S->leftover);
|
||||
want = (want < inlen) ? want : inlen;
|
||||
memcpy(S->buffer + S->leftover, in, want);
|
||||
S->leftover += (uint32_t)want;
|
||||
if (S->leftover < SCRYPT_HASH_BLOCK_SIZE)
|
||||
return;
|
||||
in += want;
|
||||
inlen -= want;
|
||||
blake512_blocks(S, S->buffer, 1);
|
||||
}
|
||||
|
||||
/* handle the current data */
|
||||
blocks = (inlen & ~(SCRYPT_HASH_BLOCK_SIZE - 1));
|
||||
S->leftover = (uint32_t)(inlen - blocks);
|
||||
if (blocks) {
|
||||
blake512_blocks(S, in, blocks / SCRYPT_HASH_BLOCK_SIZE);
|
||||
in += blocks;
|
||||
}
|
||||
|
||||
/* handle leftover data */
|
||||
if (S->leftover)
|
||||
memcpy(S->buffer, in, S->leftover);
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_hash_finish(scrypt_hash_state *S, uint8_t *hash) {
|
||||
uint64_t th, tl;
|
||||
size_t bits;
|
||||
|
||||
bits = (S->leftover << 3);
|
||||
tl = S->T[0] + bits;
|
||||
th = S->T[1];
|
||||
if (S->leftover == 0) {
|
||||
S->T[0] = (uint64_t)0 - (uint64_t)1024;
|
||||
S->T[1] = (uint64_t)0 - (uint64_t)1;
|
||||
} else if (S->T[0] == 0) {
|
||||
S->T[0] = ((uint64_t)0 - (uint64_t)1024) + bits;
|
||||
S->T[1] = S->T[1] - 1;
|
||||
} else {
|
||||
S->T[0] -= (1024 - bits);
|
||||
}
|
||||
|
||||
S->buffer[S->leftover] = 0x80;
|
||||
if (S->leftover <= 111) {
|
||||
memset(S->buffer + S->leftover + 1, 0, 111 - S->leftover);
|
||||
} else {
|
||||
memset(S->buffer + S->leftover + 1, 0, 127 - S->leftover);
|
||||
blake512_blocks(S, S->buffer, 1);
|
||||
S->T[0] = (uint64_t)0 - (uint64_t)1024;
|
||||
S->T[1] = (uint64_t)0 - (uint64_t)1;
|
||||
memset(S->buffer, 0, 112);
|
||||
}
|
||||
S->buffer[111] |= 1;
|
||||
U64TO8_BE(S->buffer + 112, th);
|
||||
U64TO8_BE(S->buffer + 120, tl);
|
||||
blake512_blocks(S, S->buffer, 1);
|
||||
|
||||
U64TO8_BE(&hash[ 0], S->H[0]);
|
||||
U64TO8_BE(&hash[ 8], S->H[1]);
|
||||
U64TO8_BE(&hash[16], S->H[2]);
|
||||
U64TO8_BE(&hash[24], S->H[3]);
|
||||
U64TO8_BE(&hash[32], S->H[4]);
|
||||
U64TO8_BE(&hash[40], S->H[5]);
|
||||
U64TO8_BE(&hash[48], S->H[6]);
|
||||
U64TO8_BE(&hash[56], S->H[7]);
|
||||
}
|
||||
|
||||
static const uint8_t scrypt_test_hash_expected[SCRYPT_HASH_DIGEST_SIZE] = {
|
||||
0x2f,0x9d,0x5b,0xbe,0x24,0x0d,0x63,0xd3,0xa0,0xac,0x4f,0xd3,0x01,0xc0,0x23,0x6f,
|
||||
0x6d,0xdf,0x6e,0xfb,0x60,0x6f,0xa0,0x74,0xdf,0x9f,0x25,0x65,0xb6,0x11,0x0a,0x83,
|
||||
0x23,0x96,0xba,0x91,0x68,0x4b,0x85,0x15,0x13,0x54,0xba,0x19,0xf3,0x2c,0x5a,0x4a,
|
||||
0x1f,0x78,0x31,0x02,0xc9,0x1e,0x56,0xc4,0x54,0xca,0xf9,0x8f,0x2c,0x7f,0x85,0xac
|
||||
};
|
168
deps/scrypt-jane-master/code/scrypt-jane-hash_keccak.h
vendored
Normal file
168
deps/scrypt-jane-master/code/scrypt-jane-hash_keccak.h
vendored
Normal file
@ -0,0 +1,168 @@
|
||||
#if defined(SCRYPT_KECCAK256)
|
||||
#define SCRYPT_HASH "Keccak-256"
|
||||
#define SCRYPT_HASH_DIGEST_SIZE 32
|
||||
#else
|
||||
#define SCRYPT_HASH "Keccak-512"
|
||||
#define SCRYPT_HASH_DIGEST_SIZE 64
|
||||
#endif
|
||||
#define SCRYPT_KECCAK_F 1600
|
||||
#define SCRYPT_KECCAK_C (SCRYPT_HASH_DIGEST_SIZE * 8 * 2) /* 256=512, 512=1024 */
|
||||
#define SCRYPT_KECCAK_R (SCRYPT_KECCAK_F - SCRYPT_KECCAK_C) /* 256=1088, 512=576 */
|
||||
#define SCRYPT_HASH_BLOCK_SIZE (SCRYPT_KECCAK_R / 8)
|
||||
|
||||
typedef uint8_t scrypt_hash_digest[SCRYPT_HASH_DIGEST_SIZE];
|
||||
|
||||
typedef struct scrypt_hash_state_t {
|
||||
uint64_t state[SCRYPT_KECCAK_F / 64];
|
||||
uint32_t leftover;
|
||||
uint8_t buffer[SCRYPT_HASH_BLOCK_SIZE];
|
||||
} scrypt_hash_state;
|
||||
|
||||
static const uint64_t keccak_round_constants[24] = {
|
||||
0x0000000000000001ull, 0x0000000000008082ull,
|
||||
0x800000000000808aull, 0x8000000080008000ull,
|
||||
0x000000000000808bull, 0x0000000080000001ull,
|
||||
0x8000000080008081ull, 0x8000000000008009ull,
|
||||
0x000000000000008aull, 0x0000000000000088ull,
|
||||
0x0000000080008009ull, 0x000000008000000aull,
|
||||
0x000000008000808bull, 0x800000000000008bull,
|
||||
0x8000000000008089ull, 0x8000000000008003ull,
|
||||
0x8000000000008002ull, 0x8000000000000080ull,
|
||||
0x000000000000800aull, 0x800000008000000aull,
|
||||
0x8000000080008081ull, 0x8000000000008080ull,
|
||||
0x0000000080000001ull, 0x8000000080008008ull
|
||||
};
|
||||
|
||||
static void
|
||||
keccak_block(scrypt_hash_state *S, const uint8_t *in) {
|
||||
size_t i;
|
||||
uint64_t *s = S->state, t[5], u[5], v, w;
|
||||
|
||||
/* absorb input */
|
||||
for (i = 0; i < SCRYPT_HASH_BLOCK_SIZE / 8; i++, in += 8)
|
||||
s[i] ^= U8TO64_LE(in);
|
||||
|
||||
for (i = 0; i < 24; i++) {
|
||||
/* theta: c = a[0,i] ^ a[1,i] ^ .. a[4,i] */
|
||||
t[0] = s[0] ^ s[5] ^ s[10] ^ s[15] ^ s[20];
|
||||
t[1] = s[1] ^ s[6] ^ s[11] ^ s[16] ^ s[21];
|
||||
t[2] = s[2] ^ s[7] ^ s[12] ^ s[17] ^ s[22];
|
||||
t[3] = s[3] ^ s[8] ^ s[13] ^ s[18] ^ s[23];
|
||||
t[4] = s[4] ^ s[9] ^ s[14] ^ s[19] ^ s[24];
|
||||
|
||||
/* theta: d[i] = c[i+4] ^ rotl(c[i+1],1) */
|
||||
u[0] = t[4] ^ ROTL64(t[1], 1);
|
||||
u[1] = t[0] ^ ROTL64(t[2], 1);
|
||||
u[2] = t[1] ^ ROTL64(t[3], 1);
|
||||
u[3] = t[2] ^ ROTL64(t[4], 1);
|
||||
u[4] = t[3] ^ ROTL64(t[0], 1);
|
||||
|
||||
/* theta: a[0,i], a[1,i], .. a[4,i] ^= d[i] */
|
||||
s[0] ^= u[0]; s[5] ^= u[0]; s[10] ^= u[0]; s[15] ^= u[0]; s[20] ^= u[0];
|
||||
s[1] ^= u[1]; s[6] ^= u[1]; s[11] ^= u[1]; s[16] ^= u[1]; s[21] ^= u[1];
|
||||
s[2] ^= u[2]; s[7] ^= u[2]; s[12] ^= u[2]; s[17] ^= u[2]; s[22] ^= u[2];
|
||||
s[3] ^= u[3]; s[8] ^= u[3]; s[13] ^= u[3]; s[18] ^= u[3]; s[23] ^= u[3];
|
||||
s[4] ^= u[4]; s[9] ^= u[4]; s[14] ^= u[4]; s[19] ^= u[4]; s[24] ^= u[4];
|
||||
|
||||
/* rho pi: b[..] = rotl(a[..], ..) */
|
||||
v = s[ 1];
|
||||
s[ 1] = ROTL64(s[ 6], 44);
|
||||
s[ 6] = ROTL64(s[ 9], 20);
|
||||
s[ 9] = ROTL64(s[22], 61);
|
||||
s[22] = ROTL64(s[14], 39);
|
||||
s[14] = ROTL64(s[20], 18);
|
||||
s[20] = ROTL64(s[ 2], 62);
|
||||
s[ 2] = ROTL64(s[12], 43);
|
||||
s[12] = ROTL64(s[13], 25);
|
||||
s[13] = ROTL64(s[19], 8);
|
||||
s[19] = ROTL64(s[23], 56);
|
||||
s[23] = ROTL64(s[15], 41);
|
||||
s[15] = ROTL64(s[ 4], 27);
|
||||
s[ 4] = ROTL64(s[24], 14);
|
||||
s[24] = ROTL64(s[21], 2);
|
||||
s[21] = ROTL64(s[ 8], 55);
|
||||
s[ 8] = ROTL64(s[16], 45);
|
||||
s[16] = ROTL64(s[ 5], 36);
|
||||
s[ 5] = ROTL64(s[ 3], 28);
|
||||
s[ 3] = ROTL64(s[18], 21);
|
||||
s[18] = ROTL64(s[17], 15);
|
||||
s[17] = ROTL64(s[11], 10);
|
||||
s[11] = ROTL64(s[ 7], 6);
|
||||
s[ 7] = ROTL64(s[10], 3);
|
||||
s[10] = ROTL64( v, 1);
|
||||
|
||||
/* chi: a[i,j] ^= ~b[i,j+1] & b[i,j+2] */
|
||||
v = s[ 0]; w = s[ 1]; s[ 0] ^= (~w) & s[ 2]; s[ 1] ^= (~s[ 2]) & s[ 3]; s[ 2] ^= (~s[ 3]) & s[ 4]; s[ 3] ^= (~s[ 4]) & v; s[ 4] ^= (~v) & w;
|
||||
v = s[ 5]; w = s[ 6]; s[ 5] ^= (~w) & s[ 7]; s[ 6] ^= (~s[ 7]) & s[ 8]; s[ 7] ^= (~s[ 8]) & s[ 9]; s[ 8] ^= (~s[ 9]) & v; s[ 9] ^= (~v) & w;
|
||||
v = s[10]; w = s[11]; s[10] ^= (~w) & s[12]; s[11] ^= (~s[12]) & s[13]; s[12] ^= (~s[13]) & s[14]; s[13] ^= (~s[14]) & v; s[14] ^= (~v) & w;
|
||||
v = s[15]; w = s[16]; s[15] ^= (~w) & s[17]; s[16] ^= (~s[17]) & s[18]; s[17] ^= (~s[18]) & s[19]; s[18] ^= (~s[19]) & v; s[19] ^= (~v) & w;
|
||||
v = s[20]; w = s[21]; s[20] ^= (~w) & s[22]; s[21] ^= (~s[22]) & s[23]; s[22] ^= (~s[23]) & s[24]; s[23] ^= (~s[24]) & v; s[24] ^= (~v) & w;
|
||||
|
||||
/* iota: a[0,0] ^= round constant */
|
||||
s[0] ^= keccak_round_constants[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_hash_init(scrypt_hash_state *S) {
|
||||
memset(S, 0, sizeof(*S));
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_hash_update(scrypt_hash_state *S, const uint8_t *in, size_t inlen) {
|
||||
size_t want;
|
||||
|
||||
/* handle the previous data */
|
||||
if (S->leftover) {
|
||||
want = (SCRYPT_HASH_BLOCK_SIZE - S->leftover);
|
||||
want = (want < inlen) ? want : inlen;
|
||||
memcpy(S->buffer + S->leftover, in, want);
|
||||
S->leftover += (uint32_t)want;
|
||||
if (S->leftover < SCRYPT_HASH_BLOCK_SIZE)
|
||||
return;
|
||||
in += want;
|
||||
inlen -= want;
|
||||
keccak_block(S, S->buffer);
|
||||
}
|
||||
|
||||
/* handle the current data */
|
||||
while (inlen >= SCRYPT_HASH_BLOCK_SIZE) {
|
||||
keccak_block(S, in);
|
||||
in += SCRYPT_HASH_BLOCK_SIZE;
|
||||
inlen -= SCRYPT_HASH_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
/* handle leftover data */
|
||||
S->leftover = (uint32_t)inlen;
|
||||
if (S->leftover)
|
||||
memcpy(S->buffer, in, S->leftover);
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_hash_finish(scrypt_hash_state *S, uint8_t *hash) {
|
||||
size_t i;
|
||||
|
||||
S->buffer[S->leftover] = 0x01;
|
||||
memset(S->buffer + (S->leftover + 1), 0, SCRYPT_HASH_BLOCK_SIZE - (S->leftover + 1));
|
||||
S->buffer[SCRYPT_HASH_BLOCK_SIZE - 1] |= 0x80;
|
||||
keccak_block(S, S->buffer);
|
||||
|
||||
for (i = 0; i < SCRYPT_HASH_DIGEST_SIZE; i += 8) {
|
||||
U64TO8_LE(&hash[i], S->state[i / 8]);
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(SCRYPT_KECCAK256)
|
||||
static const uint8_t scrypt_test_hash_expected[SCRYPT_HASH_DIGEST_SIZE] = {
|
||||
0x26,0xb7,0x10,0xb3,0x66,0xb1,0xd1,0xb1,0x25,0xfc,0x3e,0xe3,0x1e,0x33,0x1d,0x19,
|
||||
0x94,0xaa,0x63,0x7a,0xd5,0x77,0x29,0xb4,0x27,0xe9,0xe0,0xf4,0x19,0xba,0x68,0xea,
|
||||
};
|
||||
#else
|
||||
static const uint8_t scrypt_test_hash_expected[SCRYPT_HASH_DIGEST_SIZE] = {
|
||||
0x17,0xc7,0x8c,0xa0,0xd9,0x08,0x1d,0xba,0x8a,0xc8,0x3e,0x07,0x90,0xda,0x91,0x88,
|
||||
0x25,0xbd,0xd3,0xf8,0x78,0x4a,0x8d,0x5e,0xe4,0x96,0x9c,0x01,0xf3,0xeb,0xdc,0x12,
|
||||
0xea,0x35,0x57,0xba,0x94,0xb8,0xe9,0xb9,0x27,0x45,0x0a,0x48,0x5c,0x3d,0x69,0xf0,
|
||||
0xdb,0x22,0x38,0xb5,0x52,0x22,0x29,0xea,0x7a,0xb2,0xe6,0x07,0xaa,0x37,0x4d,0xe6,
|
||||
};
|
||||
#endif
|
||||
|
135
deps/scrypt-jane-master/code/scrypt-jane-hash_sha256.h
vendored
Normal file
135
deps/scrypt-jane-master/code/scrypt-jane-hash_sha256.h
vendored
Normal file
@ -0,0 +1,135 @@
|
||||
#define SCRYPT_HASH "SHA-2-256"
|
||||
#define SCRYPT_HASH_BLOCK_SIZE 64
|
||||
#define SCRYPT_HASH_DIGEST_SIZE 32
|
||||
|
||||
typedef uint8_t scrypt_hash_digest[SCRYPT_HASH_DIGEST_SIZE];
|
||||
|
||||
typedef struct scrypt_hash_state_t {
|
||||
uint32_t H[8];
|
||||
uint64_t T;
|
||||
uint32_t leftover;
|
||||
uint8_t buffer[SCRYPT_HASH_BLOCK_SIZE];
|
||||
} scrypt_hash_state;
|
||||
|
||||
static const uint32_t sha256_constants[64] = {
|
||||
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
|
||||
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
|
||||
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
|
||||
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
|
||||
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
|
||||
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
|
||||
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
|
||||
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
|
||||
};
|
||||
|
||||
#define Ch(x,y,z) (z ^ (x & (y ^ z)))
|
||||
#define Maj(x,y,z) (((x | y) & z) | (x & y))
|
||||
#define S0(x) (ROTR32(x, 2) ^ ROTR32(x, 13) ^ ROTR32(x, 22))
|
||||
#define S1(x) (ROTR32(x, 6) ^ ROTR32(x, 11) ^ ROTR32(x, 25))
|
||||
#define G0(x) (ROTR32(x, 7) ^ ROTR32(x, 18) ^ (x >> 3))
|
||||
#define G1(x) (ROTR32(x, 17) ^ ROTR32(x, 19) ^ (x >> 10))
|
||||
#define W0(in,i) (U8TO32_BE(&in[i * 4]))
|
||||
#define W1(i) (G1(w[i - 2]) + w[i - 7] + G0(w[i - 15]) + w[i - 16])
|
||||
#define STEP(i) \
|
||||
t1 = S0(r[0]) + Maj(r[0], r[1], r[2]); \
|
||||
t0 = r[7] + S1(r[4]) + Ch(r[4], r[5], r[6]) + sha256_constants[i] + w[i]; \
|
||||
r[7] = r[6]; \
|
||||
r[6] = r[5]; \
|
||||
r[5] = r[4]; \
|
||||
r[4] = r[3] + t0; \
|
||||
r[3] = r[2]; \
|
||||
r[2] = r[1]; \
|
||||
r[1] = r[0]; \
|
||||
r[0] = t0 + t1;
|
||||
|
||||
static void
|
||||
sha256_blocks(scrypt_hash_state *S, const uint8_t *in, size_t blocks) {
|
||||
uint32_t r[8], w[64], t0, t1;
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < 8; i++) r[i] = S->H[i];
|
||||
|
||||
while (blocks--) {
|
||||
for (i = 0; i < 16; i++) { w[i] = W0(in, i); }
|
||||
for (i = 16; i < 64; i++) { w[i] = W1(i); }
|
||||
for (i = 0; i < 64; i++) { STEP(i); }
|
||||
for (i = 0; i < 8; i++) { r[i] += S->H[i]; S->H[i] = r[i]; }
|
||||
S->T += SCRYPT_HASH_BLOCK_SIZE * 8;
|
||||
in += SCRYPT_HASH_BLOCK_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_hash_init(scrypt_hash_state *S) {
|
||||
S->H[0] = 0x6a09e667;
|
||||
S->H[1] = 0xbb67ae85;
|
||||
S->H[2] = 0x3c6ef372;
|
||||
S->H[3] = 0xa54ff53a;
|
||||
S->H[4] = 0x510e527f;
|
||||
S->H[5] = 0x9b05688c;
|
||||
S->H[6] = 0x1f83d9ab;
|
||||
S->H[7] = 0x5be0cd19;
|
||||
S->T = 0;
|
||||
S->leftover = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_hash_update(scrypt_hash_state *S, const uint8_t *in, size_t inlen) {
|
||||
size_t blocks, want;
|
||||
|
||||
/* handle the previous data */
|
||||
if (S->leftover) {
|
||||
want = (SCRYPT_HASH_BLOCK_SIZE - S->leftover);
|
||||
want = (want < inlen) ? want : inlen;
|
||||
memcpy(S->buffer + S->leftover, in, want);
|
||||
S->leftover += (uint32_t)want;
|
||||
if (S->leftover < SCRYPT_HASH_BLOCK_SIZE)
|
||||
return;
|
||||
in += want;
|
||||
inlen -= want;
|
||||
sha256_blocks(S, S->buffer, 1);
|
||||
}
|
||||
|
||||
/* handle the current data */
|
||||
blocks = (inlen & ~(SCRYPT_HASH_BLOCK_SIZE - 1));
|
||||
S->leftover = (uint32_t)(inlen - blocks);
|
||||
if (blocks) {
|
||||
sha256_blocks(S, in, blocks / SCRYPT_HASH_BLOCK_SIZE);
|
||||
in += blocks;
|
||||
}
|
||||
|
||||
/* handle leftover data */
|
||||
if (S->leftover)
|
||||
memcpy(S->buffer, in, S->leftover);
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_hash_finish(scrypt_hash_state *S, uint8_t *hash) {
|
||||
uint64_t t = S->T + (S->leftover * 8);
|
||||
|
||||
S->buffer[S->leftover] = 0x80;
|
||||
if (S->leftover <= 55) {
|
||||
memset(S->buffer + S->leftover + 1, 0, 55 - S->leftover);
|
||||
} else {
|
||||
memset(S->buffer + S->leftover + 1, 0, 63 - S->leftover);
|
||||
sha256_blocks(S, S->buffer, 1);
|
||||
memset(S->buffer, 0, 56);
|
||||
}
|
||||
|
||||
U64TO8_BE(S->buffer + 56, t);
|
||||
sha256_blocks(S, S->buffer, 1);
|
||||
|
||||
U32TO8_BE(&hash[ 0], S->H[0]);
|
||||
U32TO8_BE(&hash[ 4], S->H[1]);
|
||||
U32TO8_BE(&hash[ 8], S->H[2]);
|
||||
U32TO8_BE(&hash[12], S->H[3]);
|
||||
U32TO8_BE(&hash[16], S->H[4]);
|
||||
U32TO8_BE(&hash[20], S->H[5]);
|
||||
U32TO8_BE(&hash[24], S->H[6]);
|
||||
U32TO8_BE(&hash[28], S->H[7]);
|
||||
}
|
||||
|
||||
static const uint8_t scrypt_test_hash_expected[SCRYPT_HASH_DIGEST_SIZE] = {
|
||||
0xee,0x36,0xae,0xa6,0x65,0xf0,0x28,0x7d,0xc9,0xde,0xd8,0xad,0x48,0x33,0x7d,0xbf,
|
||||
0xcb,0xc0,0x48,0xfa,0x5f,0x92,0xfd,0x0a,0x95,0x6f,0x34,0x8e,0x8c,0x1e,0x73,0xad,
|
||||
};
|
152
deps/scrypt-jane-master/code/scrypt-jane-hash_sha512.h
vendored
Normal file
152
deps/scrypt-jane-master/code/scrypt-jane-hash_sha512.h
vendored
Normal file
@ -0,0 +1,152 @@
|
||||
#define SCRYPT_HASH "SHA-2-512"
|
||||
#define SCRYPT_HASH_BLOCK_SIZE 128
|
||||
#define SCRYPT_HASH_DIGEST_SIZE 64
|
||||
|
||||
typedef uint8_t scrypt_hash_digest[SCRYPT_HASH_DIGEST_SIZE];
|
||||
|
||||
typedef struct scrypt_hash_state_t {
|
||||
uint64_t H[8];
|
||||
uint64_t T[2];
|
||||
uint32_t leftover;
|
||||
uint8_t buffer[SCRYPT_HASH_BLOCK_SIZE];
|
||||
} scrypt_hash_state;
|
||||
|
||||
static const uint64_t sha512_constants[80] = {
|
||||
0x428a2f98d728ae22ull, 0x7137449123ef65cdull, 0xb5c0fbcfec4d3b2full, 0xe9b5dba58189dbbcull,
|
||||
0x3956c25bf348b538ull, 0x59f111f1b605d019ull, 0x923f82a4af194f9bull, 0xab1c5ed5da6d8118ull,
|
||||
0xd807aa98a3030242ull, 0x12835b0145706fbeull, 0x243185be4ee4b28cull, 0x550c7dc3d5ffb4e2ull,
|
||||
0x72be5d74f27b896full, 0x80deb1fe3b1696b1ull, 0x9bdc06a725c71235ull, 0xc19bf174cf692694ull,
|
||||
0xe49b69c19ef14ad2ull, 0xefbe4786384f25e3ull, 0x0fc19dc68b8cd5b5ull, 0x240ca1cc77ac9c65ull,
|
||||
0x2de92c6f592b0275ull, 0x4a7484aa6ea6e483ull, 0x5cb0a9dcbd41fbd4ull, 0x76f988da831153b5ull,
|
||||
0x983e5152ee66dfabull, 0xa831c66d2db43210ull, 0xb00327c898fb213full, 0xbf597fc7beef0ee4ull,
|
||||
0xc6e00bf33da88fc2ull, 0xd5a79147930aa725ull, 0x06ca6351e003826full, 0x142929670a0e6e70ull,
|
||||
0x27b70a8546d22ffcull, 0x2e1b21385c26c926ull, 0x4d2c6dfc5ac42aedull, 0x53380d139d95b3dfull,
|
||||
0x650a73548baf63deull, 0x766a0abb3c77b2a8ull, 0x81c2c92e47edaee6ull, 0x92722c851482353bull,
|
||||
0xa2bfe8a14cf10364ull, 0xa81a664bbc423001ull, 0xc24b8b70d0f89791ull, 0xc76c51a30654be30ull,
|
||||
0xd192e819d6ef5218ull, 0xd69906245565a910ull, 0xf40e35855771202aull, 0x106aa07032bbd1b8ull,
|
||||
0x19a4c116b8d2d0c8ull, 0x1e376c085141ab53ull, 0x2748774cdf8eeb99ull, 0x34b0bcb5e19b48a8ull,
|
||||
0x391c0cb3c5c95a63ull, 0x4ed8aa4ae3418acbull, 0x5b9cca4f7763e373ull, 0x682e6ff3d6b2b8a3ull,
|
||||
0x748f82ee5defb2fcull, 0x78a5636f43172f60ull, 0x84c87814a1f0ab72ull, 0x8cc702081a6439ecull,
|
||||
0x90befffa23631e28ull, 0xa4506cebde82bde9ull, 0xbef9a3f7b2c67915ull, 0xc67178f2e372532bull,
|
||||
0xca273eceea26619cull, 0xd186b8c721c0c207ull, 0xeada7dd6cde0eb1eull, 0xf57d4f7fee6ed178ull,
|
||||
0x06f067aa72176fbaull, 0x0a637dc5a2c898a6ull, 0x113f9804bef90daeull, 0x1b710b35131c471bull,
|
||||
0x28db77f523047d84ull, 0x32caab7b40c72493ull, 0x3c9ebe0a15c9bebcull, 0x431d67c49c100d4cull,
|
||||
0x4cc5d4becb3e42b6ull, 0x597f299cfc657e2aull, 0x5fcb6fab3ad6faecull, 0x6c44198c4a475817ull
|
||||
};
|
||||
|
||||
#define Ch(x,y,z) (z ^ (x & (y ^ z)))
|
||||
#define Maj(x,y,z) (((x | y) & z) | (x & y))
|
||||
#define S0(x) (ROTR64(x, 28) ^ ROTR64(x, 34) ^ ROTR64(x, 39))
|
||||
#define S1(x) (ROTR64(x, 14) ^ ROTR64(x, 18) ^ ROTR64(x, 41))
|
||||
#define G0(x) (ROTR64(x, 1) ^ ROTR64(x, 8) ^ (x >> 7))
|
||||
#define G1(x) (ROTR64(x, 19) ^ ROTR64(x, 61) ^ (x >> 6))
|
||||
#define W0(in,i) (U8TO64_BE(&in[i * 8]))
|
||||
#define W1(i) (G1(w[i - 2]) + w[i - 7] + G0(w[i - 15]) + w[i - 16])
|
||||
#define STEP(i) \
|
||||
t1 = S0(r[0]) + Maj(r[0], r[1], r[2]); \
|
||||
t0 = r[7] + S1(r[4]) + Ch(r[4], r[5], r[6]) + sha512_constants[i] + w[i]; \
|
||||
r[7] = r[6]; \
|
||||
r[6] = r[5]; \
|
||||
r[5] = r[4]; \
|
||||
r[4] = r[3] + t0; \
|
||||
r[3] = r[2]; \
|
||||
r[2] = r[1]; \
|
||||
r[1] = r[0]; \
|
||||
r[0] = t0 + t1;
|
||||
|
||||
static void
|
||||
sha512_blocks(scrypt_hash_state *S, const uint8_t *in, size_t blocks) {
|
||||
uint64_t r[8], w[80], t0, t1;
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < 8; i++) r[i] = S->H[i];
|
||||
|
||||
while (blocks--) {
|
||||
for (i = 0; i < 16; i++) { w[i] = W0(in, i); }
|
||||
for (i = 16; i < 80; i++) { w[i] = W1(i); }
|
||||
for (i = 0; i < 80; i++) { STEP(i); }
|
||||
for (i = 0; i < 8; i++) { r[i] += S->H[i]; S->H[i] = r[i]; }
|
||||
S->T[0] += SCRYPT_HASH_BLOCK_SIZE * 8;
|
||||
S->T[1] += (!S->T[0]) ? 1 : 0;
|
||||
in += SCRYPT_HASH_BLOCK_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_hash_init(scrypt_hash_state *S) {
|
||||
S->H[0] = 0x6a09e667f3bcc908ull;
|
||||
S->H[1] = 0xbb67ae8584caa73bull;
|
||||
S->H[2] = 0x3c6ef372fe94f82bull;
|
||||
S->H[3] = 0xa54ff53a5f1d36f1ull;
|
||||
S->H[4] = 0x510e527fade682d1ull;
|
||||
S->H[5] = 0x9b05688c2b3e6c1full;
|
||||
S->H[6] = 0x1f83d9abfb41bd6bull;
|
||||
S->H[7] = 0x5be0cd19137e2179ull;
|
||||
S->T[0] = 0;
|
||||
S->T[1] = 0;
|
||||
S->leftover = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_hash_update(scrypt_hash_state *S, const uint8_t *in, size_t inlen) {
|
||||
size_t blocks, want;
|
||||
|
||||
/* handle the previous data */
|
||||
if (S->leftover) {
|
||||
want = (SCRYPT_HASH_BLOCK_SIZE - S->leftover);
|
||||
want = (want < inlen) ? want : inlen;
|
||||
memcpy(S->buffer + S->leftover, in, want);
|
||||
S->leftover += (uint32_t)want;
|
||||
if (S->leftover < SCRYPT_HASH_BLOCK_SIZE)
|
||||
return;
|
||||
in += want;
|
||||
inlen -= want;
|
||||
sha512_blocks(S, S->buffer, 1);
|
||||
}
|
||||
|
||||
/* handle the current data */
|
||||
blocks = (inlen & ~(SCRYPT_HASH_BLOCK_SIZE - 1));
|
||||
S->leftover = (uint32_t)(inlen - blocks);
|
||||
if (blocks) {
|
||||
sha512_blocks(S, in, blocks / SCRYPT_HASH_BLOCK_SIZE);
|
||||
in += blocks;
|
||||
}
|
||||
|
||||
/* handle leftover data */
|
||||
if (S->leftover)
|
||||
memcpy(S->buffer, in, S->leftover);
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_hash_finish(scrypt_hash_state *S, uint8_t *hash) {
|
||||
uint64_t t0 = S->T[0] + (S->leftover * 8), t1 = S->T[1];
|
||||
|
||||
S->buffer[S->leftover] = 0x80;
|
||||
if (S->leftover <= 111) {
|
||||
memset(S->buffer + S->leftover + 1, 0, 111 - S->leftover);
|
||||
} else {
|
||||
memset(S->buffer + S->leftover + 1, 0, 127 - S->leftover);
|
||||
sha512_blocks(S, S->buffer, 1);
|
||||
memset(S->buffer, 0, 112);
|
||||
}
|
||||
|
||||
U64TO8_BE(S->buffer + 112, t1);
|
||||
U64TO8_BE(S->buffer + 120, t0);
|
||||
sha512_blocks(S, S->buffer, 1);
|
||||
|
||||
U64TO8_BE(&hash[ 0], S->H[0]);
|
||||
U64TO8_BE(&hash[ 8], S->H[1]);
|
||||
U64TO8_BE(&hash[16], S->H[2]);
|
||||
U64TO8_BE(&hash[24], S->H[3]);
|
||||
U64TO8_BE(&hash[32], S->H[4]);
|
||||
U64TO8_BE(&hash[40], S->H[5]);
|
||||
U64TO8_BE(&hash[48], S->H[6]);
|
||||
U64TO8_BE(&hash[56], S->H[7]);
|
||||
}
|
||||
|
||||
static const uint8_t scrypt_test_hash_expected[SCRYPT_HASH_DIGEST_SIZE] = {
|
||||
0xba,0xc3,0x80,0x2b,0x24,0x56,0x95,0x1f,0x19,0x7c,0xa2,0xd3,0x72,0x7c,0x9a,0x4d,
|
||||
0x1d,0x50,0x3a,0xa9,0x12,0x27,0xd8,0xe1,0xbe,0x76,0x53,0x87,0x5a,0x1e,0x82,0xec,
|
||||
0xc8,0xe1,0x6b,0x87,0xd0,0xb5,0x25,0x7e,0xe8,0x1e,0xd7,0x58,0xc6,0x2d,0xc2,0x9c,
|
||||
0x06,0x31,0x8f,0x5b,0x57,0x8e,0x76,0xba,0xd5,0xf6,0xec,0xfe,0x85,0x1f,0x34,0x0c,
|
||||
};
|
188
deps/scrypt-jane-master/code/scrypt-jane-hash_skein512.h
vendored
Normal file
188
deps/scrypt-jane-master/code/scrypt-jane-hash_skein512.h
vendored
Normal file
@ -0,0 +1,188 @@
|
||||
#define SCRYPT_HASH "Skein-512"
|
||||
#define SCRYPT_HASH_BLOCK_SIZE 64
|
||||
#define SCRYPT_HASH_DIGEST_SIZE 64
|
||||
|
||||
typedef uint8_t scrypt_hash_digest[SCRYPT_HASH_DIGEST_SIZE];
|
||||
|
||||
typedef struct scrypt_hash_state_t {
|
||||
uint64_t X[8], T[2];
|
||||
uint32_t leftover;
|
||||
uint8_t buffer[SCRYPT_HASH_BLOCK_SIZE];
|
||||
} scrypt_hash_state;
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
static void
|
||||
skein512_blocks(scrypt_hash_state *S, const uint8_t *in, size_t blocks, size_t add) {
|
||||
uint64_t X[8], key[8], Xt[9+18], T[3+1];
|
||||
size_t r;
|
||||
|
||||
while (blocks--) {
|
||||
T[0] = S->T[0] + add;
|
||||
T[1] = S->T[1];
|
||||
T[2] = T[0] ^ T[1];
|
||||
key[0] = U8TO64_LE(in + 0); Xt[0] = S->X[0]; X[0] = key[0] + Xt[0];
|
||||
key[1] = U8TO64_LE(in + 8); Xt[1] = S->X[1]; X[1] = key[1] + Xt[1];
|
||||
key[2] = U8TO64_LE(in + 16); Xt[2] = S->X[2]; X[2] = key[2] + Xt[2];
|
||||
key[3] = U8TO64_LE(in + 24); Xt[3] = S->X[3]; X[3] = key[3] + Xt[3];
|
||||
key[4] = U8TO64_LE(in + 32); Xt[4] = S->X[4]; X[4] = key[4] + Xt[4];
|
||||
key[5] = U8TO64_LE(in + 40); Xt[5] = S->X[5]; X[5] = key[5] + Xt[5] + T[0];
|
||||
key[6] = U8TO64_LE(in + 48); Xt[6] = S->X[6]; X[6] = key[6] + Xt[6] + T[1];
|
||||
key[7] = U8TO64_LE(in + 56); Xt[7] = S->X[7]; X[7] = key[7] + Xt[7];
|
||||
Xt[8] = 0x1BD11BDAA9FC1A22ull ^ Xt[0] ^ Xt[1] ^ Xt[2] ^ Xt[3] ^ Xt[4] ^ Xt[5] ^ Xt[6] ^ Xt[7];
|
||||
in += SCRYPT_HASH_BLOCK_SIZE;
|
||||
|
||||
for (r = 0; r < 18; r++)
|
||||
Xt[r + 9] = Xt[r + 0];
|
||||
|
||||
for (r = 0; r < 18; r += 2) {
|
||||
X[0] += X[1]; X[1] = ROTL64(X[1], 46) ^ X[0];
|
||||
X[2] += X[3]; X[3] = ROTL64(X[3], 36) ^ X[2];
|
||||
X[4] += X[5]; X[5] = ROTL64(X[5], 19) ^ X[4];
|
||||
X[6] += X[7]; X[7] = ROTL64(X[7], 37) ^ X[6];
|
||||
X[2] += X[1]; X[1] = ROTL64(X[1], 33) ^ X[2];
|
||||
X[0] += X[3]; X[3] = ROTL64(X[3], 42) ^ X[0];
|
||||
X[6] += X[5]; X[5] = ROTL64(X[5], 14) ^ X[6];
|
||||
X[4] += X[7]; X[7] = ROTL64(X[7], 27) ^ X[4];
|
||||
X[4] += X[1]; X[1] = ROTL64(X[1], 17) ^ X[4];
|
||||
X[6] += X[3]; X[3] = ROTL64(X[3], 49) ^ X[6];
|
||||
X[0] += X[5]; X[5] = ROTL64(X[5], 36) ^ X[0];
|
||||
X[2] += X[7]; X[7] = ROTL64(X[7], 39) ^ X[2];
|
||||
X[6] += X[1]; X[1] = ROTL64(X[1], 44) ^ X[6];
|
||||
X[4] += X[3]; X[3] = ROTL64(X[3], 56) ^ X[4];
|
||||
X[2] += X[5]; X[5] = ROTL64(X[5], 54) ^ X[2];
|
||||
X[0] += X[7]; X[7] = ROTL64(X[7], 9) ^ X[0];
|
||||
|
||||
X[0] += Xt[r + 1];
|
||||
X[1] += Xt[r + 2];
|
||||
X[2] += Xt[r + 3];
|
||||
X[3] += Xt[r + 4];
|
||||
X[4] += Xt[r + 5];
|
||||
X[5] += Xt[r + 6] + T[1];
|
||||
X[6] += Xt[r + 7] + T[2];
|
||||
X[7] += Xt[r + 8] + r + 1;
|
||||
|
||||
T[3] = T[0];
|
||||
T[0] = T[1];
|
||||
T[1] = T[2];
|
||||
T[2] = T[3];
|
||||
|
||||
X[0] += X[1]; X[1] = ROTL64(X[1], 39) ^ X[0];
|
||||
X[2] += X[3]; X[3] = ROTL64(X[3], 30) ^ X[2];
|
||||
X[4] += X[5]; X[5] = ROTL64(X[5], 34) ^ X[4];
|
||||
X[6] += X[7]; X[7] = ROTL64(X[7], 24) ^ X[6];
|
||||
X[2] += X[1]; X[1] = ROTL64(X[1], 13) ^ X[2];
|
||||
X[0] += X[3]; X[3] = ROTL64(X[3], 17) ^ X[0];
|
||||
X[6] += X[5]; X[5] = ROTL64(X[5], 10) ^ X[6];
|
||||
X[4] += X[7]; X[7] = ROTL64(X[7], 50) ^ X[4];
|
||||
X[4] += X[1]; X[1] = ROTL64(X[1], 25) ^ X[4];
|
||||
X[6] += X[3]; X[3] = ROTL64(X[3], 29) ^ X[6];
|
||||
X[0] += X[5]; X[5] = ROTL64(X[5], 39) ^ X[0];
|
||||
X[2] += X[7]; X[7] = ROTL64(X[7], 43) ^ X[2];
|
||||
X[6] += X[1]; X[1] = ROTL64(X[1], 8) ^ X[6];
|
||||
X[4] += X[3]; X[3] = ROTL64(X[3], 22) ^ X[4];
|
||||
X[2] += X[5]; X[5] = ROTL64(X[5], 56) ^ X[2];
|
||||
X[0] += X[7]; X[7] = ROTL64(X[7], 35) ^ X[0];
|
||||
|
||||
X[0] += Xt[r + 2];
|
||||
X[1] += Xt[r + 3];
|
||||
X[2] += Xt[r + 4];
|
||||
X[3] += Xt[r + 5];
|
||||
X[4] += Xt[r + 6];
|
||||
X[5] += Xt[r + 7] + T[1];
|
||||
X[6] += Xt[r + 8] + T[2];
|
||||
X[7] += Xt[r + 9] + r + 2;
|
||||
|
||||
T[3] = T[0];
|
||||
T[0] = T[1];
|
||||
T[1] = T[2];
|
||||
T[2] = T[3];
|
||||
}
|
||||
|
||||
S->X[0] = key[0] ^ X[0];
|
||||
S->X[1] = key[1] ^ X[1];
|
||||
S->X[2] = key[2] ^ X[2];
|
||||
S->X[3] = key[3] ^ X[3];
|
||||
S->X[4] = key[4] ^ X[4];
|
||||
S->X[5] = key[5] ^ X[5];
|
||||
S->X[6] = key[6] ^ X[6];
|
||||
S->X[7] = key[7] ^ X[7];
|
||||
|
||||
S->T[0] = T[0];
|
||||
S->T[1] = T[1] & ~0x4000000000000000ull;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_hash_init(scrypt_hash_state *S) {
|
||||
S->X[0] = 0x4903ADFF749C51CEull;
|
||||
S->X[1] = 0x0D95DE399746DF03ull;
|
||||
S->X[2] = 0x8FD1934127C79BCEull;
|
||||
S->X[3] = 0x9A255629FF352CB1ull;
|
||||
S->X[4] = 0x5DB62599DF6CA7B0ull;
|
||||
S->X[5] = 0xEABE394CA9D5C3F4ull;
|
||||
S->X[6] = 0x991112C71A75B523ull;
|
||||
S->X[7] = 0xAE18A40B660FCC33ull;
|
||||
S->T[0] = 0x0000000000000000ull;
|
||||
S->T[1] = 0x7000000000000000ull;
|
||||
S->leftover = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_hash_update(scrypt_hash_state *S, const uint8_t *in, size_t inlen) {
|
||||
size_t blocks, want;
|
||||
|
||||
/* skein processes the final <=64 bytes raw, so we can only update if there are at least 64+1 bytes available */
|
||||
if ((S->leftover + inlen) > SCRYPT_HASH_BLOCK_SIZE) {
|
||||
/* handle the previous data, we know there is enough for at least one block */
|
||||
if (S->leftover) {
|
||||
want = (SCRYPT_HASH_BLOCK_SIZE - S->leftover);
|
||||
memcpy(S->buffer + S->leftover, in, want);
|
||||
in += want;
|
||||
inlen -= want;
|
||||
S->leftover = 0;
|
||||
skein512_blocks(S, S->buffer, 1, SCRYPT_HASH_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
/* handle the current data if there's more than one block */
|
||||
if (inlen > SCRYPT_HASH_BLOCK_SIZE) {
|
||||
blocks = ((inlen - 1) & ~(SCRYPT_HASH_BLOCK_SIZE - 1));
|
||||
skein512_blocks(S, in, blocks / SCRYPT_HASH_BLOCK_SIZE, SCRYPT_HASH_BLOCK_SIZE);
|
||||
inlen -= blocks;
|
||||
in += blocks;
|
||||
}
|
||||
}
|
||||
|
||||
/* handle leftover data */
|
||||
memcpy(S->buffer + S->leftover, in, inlen);
|
||||
S->leftover += inlen;
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_hash_finish(scrypt_hash_state *S, uint8_t *hash) {
|
||||
memset(S->buffer + S->leftover, 0, SCRYPT_HASH_BLOCK_SIZE - S->leftover);
|
||||
S->T[1] |= 0x8000000000000000ull;
|
||||
skein512_blocks(S, S->buffer, 1, S->leftover);
|
||||
|
||||
memset(S->buffer, 0, SCRYPT_HASH_BLOCK_SIZE);
|
||||
S->T[0] = 0;
|
||||
S->T[1] = 0xff00000000000000ull;
|
||||
skein512_blocks(S, S->buffer, 1, 8);
|
||||
|
||||
U64TO8_LE(&hash[ 0], S->X[0]);
|
||||
U64TO8_LE(&hash[ 8], S->X[1]);
|
||||
U64TO8_LE(&hash[16], S->X[2]);
|
||||
U64TO8_LE(&hash[24], S->X[3]);
|
||||
U64TO8_LE(&hash[32], S->X[4]);
|
||||
U64TO8_LE(&hash[40], S->X[5]);
|
||||
U64TO8_LE(&hash[48], S->X[6]);
|
||||
U64TO8_LE(&hash[56], S->X[7]);
|
||||
}
|
||||
|
||||
|
||||
static const uint8_t scrypt_test_hash_expected[SCRYPT_HASH_DIGEST_SIZE] = {
|
||||
0x4d,0x52,0x29,0xff,0x10,0xbc,0xd2,0x62,0xd1,0x61,0x83,0xc8,0xe6,0xf0,0x83,0xc4,
|
||||
0x9f,0xf5,0x6a,0x42,0x75,0x2a,0x26,0x4e,0xf0,0x28,0x72,0x28,0x47,0xe8,0x23,0xdf,
|
||||
0x1e,0x64,0xf1,0x51,0x38,0x35,0x9d,0xc2,0x83,0xfc,0x35,0x4e,0xc0,0x52,0x5f,0x41,
|
||||
0x6a,0x0b,0x7d,0xf5,0xce,0x98,0xde,0x6f,0x36,0xd8,0x51,0x15,0x78,0x78,0x93,0x67,
|
||||
};
|
368
deps/scrypt-jane-master/code/scrypt-jane-mix_chacha-avx.h
vendored
Normal file
368
deps/scrypt-jane-master/code/scrypt-jane-mix_chacha-avx.h
vendored
Normal file
@ -0,0 +1,368 @@
|
||||
/* x86 */
|
||||
#if defined(X86ASM_AVX) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS)
|
||||
|
||||
#define SCRYPT_CHACHA_AVX
|
||||
|
||||
asm_naked_fn_proto(void, scrypt_ChunkMix_avx)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r)
|
||||
asm_naked_fn(scrypt_ChunkMix_avx)
|
||||
a1(push ebx)
|
||||
a1(push edi)
|
||||
a1(push esi)
|
||||
a1(push ebp)
|
||||
a2(mov ebp,esp)
|
||||
a2(mov edi,[ebp+20])
|
||||
a2(mov esi,[ebp+24])
|
||||
a2(mov eax,[ebp+28])
|
||||
a2(mov ebx,[ebp+32])
|
||||
a2(sub esp,64)
|
||||
a2(and esp,~63)
|
||||
a2(lea edx,[ebx*2])
|
||||
a2(shl edx,6)
|
||||
a2(lea ecx,[edx-64])
|
||||
a2(and eax, eax)
|
||||
a2(mov ebx, 0x01000302)
|
||||
a2(vmovd xmm4, ebx)
|
||||
a2(mov ebx, 0x05040706)
|
||||
a2(vmovd xmm0, ebx)
|
||||
a2(mov ebx, 0x09080b0a)
|
||||
a2(vmovd xmm1, ebx)
|
||||
a2(mov ebx, 0x0d0c0f0e)
|
||||
a2(vmovd xmm2, ebx)
|
||||
a2(mov ebx, 0x02010003)
|
||||
a2(vmovd xmm5, ebx)
|
||||
a2(mov ebx, 0x06050407)
|
||||
a2(vmovd xmm3, ebx)
|
||||
a2(mov ebx, 0x0a09080b)
|
||||
a2(vmovd xmm6, ebx)
|
||||
a2(mov ebx, 0x0e0d0c0f)
|
||||
a2(vmovd xmm7, ebx)
|
||||
a3(vpunpckldq xmm4, xmm4, xmm0)
|
||||
a3(vpunpckldq xmm5, xmm5, xmm3)
|
||||
a3(vpunpckldq xmm1, xmm1, xmm2)
|
||||
a3(vpunpckldq xmm6, xmm6, xmm7)
|
||||
a3(vpunpcklqdq xmm4, xmm4, xmm1)
|
||||
a3(vpunpcklqdq xmm5, xmm5, xmm6)
|
||||
a2(vmovdqa xmm0,[ecx+esi+0])
|
||||
a2(vmovdqa xmm1,[ecx+esi+16])
|
||||
a2(vmovdqa xmm2,[ecx+esi+32])
|
||||
a2(vmovdqa xmm3,[ecx+esi+48])
|
||||
aj(jz scrypt_ChunkMix_avx_no_xor1)
|
||||
a3(vpxor xmm0,xmm0,[ecx+eax+0])
|
||||
a3(vpxor xmm1,xmm1,[ecx+eax+16])
|
||||
a3(vpxor xmm2,xmm2,[ecx+eax+32])
|
||||
a3(vpxor xmm3,xmm3,[ecx+eax+48])
|
||||
a1(scrypt_ChunkMix_avx_no_xor1:)
|
||||
a2(xor ecx,ecx)
|
||||
a2(xor ebx,ebx)
|
||||
a1(scrypt_ChunkMix_avx_loop:)
|
||||
a2(and eax, eax)
|
||||
a3(vpxor xmm0,xmm0,[esi+ecx+0])
|
||||
a3(vpxor xmm1,xmm1,[esi+ecx+16])
|
||||
a3(vpxor xmm2,xmm2,[esi+ecx+32])
|
||||
a3(vpxor xmm3,xmm3,[esi+ecx+48])
|
||||
aj(jz scrypt_ChunkMix_avx_no_xor2)
|
||||
a3(vpxor xmm0,xmm0,[eax+ecx+0])
|
||||
a3(vpxor xmm1,xmm1,[eax+ecx+16])
|
||||
a3(vpxor xmm2,xmm2,[eax+ecx+32])
|
||||
a3(vpxor xmm3,xmm3,[eax+ecx+48])
|
||||
a1(scrypt_ChunkMix_avx_no_xor2:)
|
||||
a2(vmovdqa [esp+0],xmm0)
|
||||
a2(vmovdqa [esp+16],xmm1)
|
||||
a2(vmovdqa [esp+32],xmm2)
|
||||
a2(vmovdqa [esp+48],xmm3)
|
||||
a2(mov eax,8)
|
||||
a1(scrypt_chacha_avx_loop: )
|
||||
a3(vpaddd xmm0,xmm0,xmm1)
|
||||
a3(vpxor xmm3,xmm3,xmm0)
|
||||
a3(vpshufb xmm3,xmm3,xmm4)
|
||||
a3(vpaddd xmm2,xmm2,xmm3)
|
||||
a3(vpxor xmm1,xmm1,xmm2)
|
||||
a3(vpsrld xmm6,xmm1,20)
|
||||
a3(vpslld xmm1,xmm1,12)
|
||||
a3(vpxor xmm1,xmm1,xmm6)
|
||||
a3(vpaddd xmm0,xmm0,xmm1)
|
||||
a3(vpxor xmm3,xmm3,xmm0)
|
||||
a3(vpshufb xmm3,xmm3,xmm5)
|
||||
a3(vpshufd xmm0,xmm0,0x93)
|
||||
a3(vpaddd xmm2,xmm2,xmm3)
|
||||
a3(vpshufd xmm3,xmm3,0x4e)
|
||||
a3(vpxor xmm1,xmm1,xmm2)
|
||||
a3(vpshufd xmm2,xmm2,0x39)
|
||||
a3(vpsrld xmm6,xmm1,25)
|
||||
a3(vpslld xmm1,xmm1,7)
|
||||
a3(vpxor xmm1,xmm1,xmm6)
|
||||
a3(vpaddd xmm0,xmm0,xmm1)
|
||||
a3(vpxor xmm3,xmm3,xmm0)
|
||||
a3(vpshufb xmm3,xmm3,xmm4)
|
||||
a3(vpaddd xmm2,xmm2,xmm3)
|
||||
a3(vpxor xmm1,xmm1,xmm2)
|
||||
a3(vpsrld xmm6,xmm1,20)
|
||||
a3(vpslld xmm1,xmm1,12)
|
||||
a3(vpxor xmm1,xmm1,xmm6)
|
||||
a3(vpaddd xmm0,xmm0,xmm1)
|
||||
a3(vpxor xmm3,xmm3,xmm0)
|
||||
a3(vpshufb xmm3,xmm3,xmm5)
|
||||
a3(vpshufd xmm0,xmm0,0x39)
|
||||
a3(vpaddd xmm2,xmm2,xmm3)
|
||||
a3(vpshufd xmm3,xmm3,0x4e)
|
||||
a3(vpxor xmm1,xmm1,xmm2)
|
||||
a3(vpshufd xmm2,xmm2,0x93)
|
||||
a3(vpsrld xmm6,xmm1,25)
|
||||
a3(vpslld xmm1,xmm1,7)
|
||||
a3(vpxor xmm1,xmm1,xmm6)
|
||||
a2(sub eax,2)
|
||||
aj(ja scrypt_chacha_avx_loop)
|
||||
a3(vpaddd xmm0,xmm0,[esp+0])
|
||||
a3(vpaddd xmm1,xmm1,[esp+16])
|
||||
a3(vpaddd xmm2,xmm2,[esp+32])
|
||||
a3(vpaddd xmm3,xmm3,[esp+48])
|
||||
a2(lea eax,[ebx+ecx])
|
||||
a2(xor ebx,edx)
|
||||
a2(and eax,~0x7f)
|
||||
a2(add ecx,64)
|
||||
a2(shr eax,1)
|
||||
a2(add eax, edi)
|
||||
a2(cmp ecx,edx)
|
||||
a2(vmovdqa [eax+0],xmm0)
|
||||
a2(vmovdqa [eax+16],xmm1)
|
||||
a2(vmovdqa [eax+32],xmm2)
|
||||
a2(vmovdqa [eax+48],xmm3)
|
||||
a2(mov eax,[ebp+28])
|
||||
aj(jne scrypt_ChunkMix_avx_loop)
|
||||
a2(mov esp,ebp)
|
||||
a1(pop ebp)
|
||||
a1(pop esi)
|
||||
a1(pop edi)
|
||||
a1(pop ebx)
|
||||
aret(16)
|
||||
asm_naked_fn_end(scrypt_ChunkMix_avx)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/* x64 */
|
||||
#if defined(X86_64ASM_AVX) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS)
|
||||
|
||||
#define SCRYPT_CHACHA_AVX
|
||||
|
||||
asm_naked_fn_proto(void, scrypt_ChunkMix_avx)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r)
|
||||
asm_naked_fn(scrypt_ChunkMix_avx)
|
||||
a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */
|
||||
a2(shl rcx,6)
|
||||
a2(lea r9,[rcx-64])
|
||||
a2(lea rax,[rsi+r9])
|
||||
a2(lea r9,[rdx+r9])
|
||||
a2(and rdx, rdx)
|
||||
a2(vmovdqa xmm0,[rax+0])
|
||||
a2(vmovdqa xmm1,[rax+16])
|
||||
a2(vmovdqa xmm2,[rax+32])
|
||||
a2(vmovdqa xmm3,[rax+48])
|
||||
a2(mov r8, 0x0504070601000302)
|
||||
a2(mov rax, 0x0d0c0f0e09080b0a)
|
||||
a2(movd xmm4, r8)
|
||||
a2(movd xmm6, rax)
|
||||
a2(mov r8, 0x0605040702010003)
|
||||
a2(mov rax, 0x0e0d0c0f0a09080b)
|
||||
a2(movd xmm5, r8)
|
||||
a2(movd xmm7, rax)
|
||||
a3(vpunpcklqdq xmm4, xmm4, xmm6)
|
||||
a3(vpunpcklqdq xmm5, xmm5, xmm7)
|
||||
aj(jz scrypt_ChunkMix_avx_no_xor1)
|
||||
a3(vpxor xmm0,xmm0,[r9+0])
|
||||
a3(vpxor xmm1,xmm1,[r9+16])
|
||||
a3(vpxor xmm2,xmm2,[r9+32])
|
||||
a3(vpxor xmm3,xmm3,[r9+48])
|
||||
a1(scrypt_ChunkMix_avx_no_xor1:)
|
||||
a2(xor r8,r8)
|
||||
a2(xor r9,r9)
|
||||
a1(scrypt_ChunkMix_avx_loop:)
|
||||
a2(and rdx, rdx)
|
||||
a3(vpxor xmm0,xmm0,[rsi+r9+0])
|
||||
a3(vpxor xmm1,xmm1,[rsi+r9+16])
|
||||
a3(vpxor xmm2,xmm2,[rsi+r9+32])
|
||||
a3(vpxor xmm3,xmm3,[rsi+r9+48])
|
||||
aj(jz scrypt_ChunkMix_avx_no_xor2)
|
||||
a3(vpxor xmm0,xmm0,[rdx+r9+0])
|
||||
a3(vpxor xmm1,xmm1,[rdx+r9+16])
|
||||
a3(vpxor xmm2,xmm2,[rdx+r9+32])
|
||||
a3(vpxor xmm3,xmm3,[rdx+r9+48])
|
||||
a1(scrypt_ChunkMix_avx_no_xor2:)
|
||||
a2(vmovdqa xmm8,xmm0)
|
||||
a2(vmovdqa xmm9,xmm1)
|
||||
a2(vmovdqa xmm10,xmm2)
|
||||
a2(vmovdqa xmm11,xmm3)
|
||||
a2(mov rax,8)
|
||||
a1(scrypt_chacha_avx_loop: )
|
||||
a3(vpaddd xmm0,xmm0,xmm1)
|
||||
a3(vpxor xmm3,xmm3,xmm0)
|
||||
a3(vpshufb xmm3,xmm3,xmm4)
|
||||
a3(vpaddd xmm2,xmm2,xmm3)
|
||||
a3(vpxor xmm1,xmm1,xmm2)
|
||||
a3(vpsrld xmm12,xmm1,20)
|
||||
a3(vpslld xmm1,xmm1,12)
|
||||
a3(vpxor xmm1,xmm1,xmm12)
|
||||
a3(vpaddd xmm0,xmm0,xmm1)
|
||||
a3(vpxor xmm3,xmm3,xmm0)
|
||||
a3(vpshufb xmm3,xmm3,xmm5)
|
||||
a3(vpshufd xmm0,xmm0,0x93)
|
||||
a3(vpaddd xmm2,xmm2,xmm3)
|
||||
a3(vpshufd xmm3,xmm3,0x4e)
|
||||
a3(vpxor xmm1,xmm1,xmm2)
|
||||
a3(vpshufd xmm2,xmm2,0x39)
|
||||
a3(vpsrld xmm12,xmm1,25)
|
||||
a3(vpslld xmm1,xmm1,7)
|
||||
a3(vpxor xmm1,xmm1,xmm12)
|
||||
a3(vpaddd xmm0,xmm0,xmm1)
|
||||
a3(vpxor xmm3,xmm3,xmm0)
|
||||
a3(vpshufb xmm3,xmm3,xmm4)
|
||||
a3(vpaddd xmm2,xmm2,xmm3)
|
||||
a3(vpxor xmm1,xmm1,xmm2)
|
||||
a3(vpsrld xmm12,xmm1,20)
|
||||
a3(vpslld xmm1,xmm1,12)
|
||||
a3(vpxor xmm1,xmm1,xmm12)
|
||||
a3(vpaddd xmm0,xmm0,xmm1)
|
||||
a3(vpxor xmm3,xmm3,xmm0)
|
||||
a3(vpshufb xmm3,xmm3,xmm5)
|
||||
a3(vpshufd xmm0,xmm0,0x39)
|
||||
a3(vpaddd xmm2,xmm2,xmm3)
|
||||
a3(vpshufd xmm3,xmm3,0x4e)
|
||||
a3(vpxor xmm1,xmm1,xmm2)
|
||||
a3(vpshufd xmm2,xmm2,0x93)
|
||||
a3(vpsrld xmm12,xmm1,25)
|
||||
a3(vpslld xmm1,xmm1,7)
|
||||
a3(vpxor xmm1,xmm1,xmm12)
|
||||
a2(sub rax,2)
|
||||
aj(ja scrypt_chacha_avx_loop)
|
||||
a3(vpaddd xmm0,xmm0,xmm8)
|
||||
a3(vpaddd xmm1,xmm1,xmm9)
|
||||
a3(vpaddd xmm2,xmm2,xmm10)
|
||||
a3(vpaddd xmm3,xmm3,xmm11)
|
||||
a2(lea rax,[r8+r9])
|
||||
a2(xor r8,rcx)
|
||||
a2(and rax,~0x7f)
|
||||
a2(add r9,64)
|
||||
a2(shr rax,1)
|
||||
a2(add rax, rdi)
|
||||
a2(cmp r9,rcx)
|
||||
a2(vmovdqa [rax+0],xmm0)
|
||||
a2(vmovdqa [rax+16],xmm1)
|
||||
a2(vmovdqa [rax+32],xmm2)
|
||||
a2(vmovdqa [rax+48],xmm3)
|
||||
aj(jne scrypt_ChunkMix_avx_loop)
|
||||
a1(ret)
|
||||
asm_naked_fn_end(scrypt_ChunkMix_avx)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/* intrinsic */
|
||||
#if defined(X86_INTRINSIC_AVX) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED))
|
||||
|
||||
#define SCRYPT_CHACHA_AVX
|
||||
|
||||
static void asm_calling_convention NOINLINE
|
||||
scrypt_ChunkMix_avx(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) {
|
||||
uint32_t i, blocksPerChunk = r * 2, half = 0;
|
||||
xmmi *xmmp,x0,x1,x2,x3,x6,t0,t1,t2,t3;
|
||||
const xmmi x4 = *(xmmi *)&ssse3_rotl16_32bit, x5 = *(xmmi *)&ssse3_rotl8_32bit;
|
||||
size_t rounds;
|
||||
|
||||
/* 1: X = B_{2r - 1} */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1);
|
||||
x0 = xmmp[0];
|
||||
x1 = xmmp[1];
|
||||
x2 = xmmp[2];
|
||||
x3 = xmmp[3];
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
}
|
||||
|
||||
/* 2: for i = 0 to 2r - 1 do */
|
||||
for (i = 0; i < blocksPerChunk; i++, half ^= r) {
|
||||
/* 3: X = H(X ^ B_i) */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
}
|
||||
|
||||
t0 = x0;
|
||||
t1 = x1;
|
||||
t2 = x2;
|
||||
t3 = x3;
|
||||
|
||||
for (rounds = 8; rounds; rounds -= 2) {
|
||||
x0 = _mm_add_epi32(x0, x1);
|
||||
x3 = _mm_xor_si128(x3, x0);
|
||||
x3 = _mm_shuffle_epi8(x3, x4);
|
||||
x2 = _mm_add_epi32(x2, x3);
|
||||
x1 = _mm_xor_si128(x1, x2);
|
||||
x6 = x1;
|
||||
x1 = _mm_or_si128(_mm_slli_epi32(x1, 12), _mm_srli_epi32(x6, 20));
|
||||
x0 = _mm_add_epi32(x0, x1);
|
||||
x3 = _mm_xor_si128(x3, x0);
|
||||
x3 = _mm_shuffle_epi8(x3, x5);
|
||||
x0 = _mm_shuffle_epi32(x0, 0x93);
|
||||
x2 = _mm_add_epi32(x2, x3);
|
||||
x3 = _mm_shuffle_epi32(x3, 0x4e);
|
||||
x1 = _mm_xor_si128(x1, x2);
|
||||
x2 = _mm_shuffle_epi32(x2, 0x39);
|
||||
x6 = x1;
|
||||
x1 = _mm_or_si128(_mm_slli_epi32(x1, 7), _mm_srli_epi32(x6, 25));
|
||||
x0 = _mm_add_epi32(x0, x1);
|
||||
x3 = _mm_xor_si128(x3, x0);
|
||||
x3 = _mm_shuffle_epi8(x3, x4);
|
||||
x2 = _mm_add_epi32(x2, x3);
|
||||
x1 = _mm_xor_si128(x1, x2);
|
||||
x6 = x1;
|
||||
x1 = _mm_or_si128(_mm_slli_epi32(x1, 12), _mm_srli_epi32(x6, 20));
|
||||
x0 = _mm_add_epi32(x0, x1);
|
||||
x3 = _mm_xor_si128(x3, x0);
|
||||
x3 = _mm_shuffle_epi8(x3, x5);
|
||||
x0 = _mm_shuffle_epi32(x0, 0x39);
|
||||
x2 = _mm_add_epi32(x2, x3);
|
||||
x3 = _mm_shuffle_epi32(x3, 0x4e);
|
||||
x1 = _mm_xor_si128(x1, x2);
|
||||
x2 = _mm_shuffle_epi32(x2, 0x93);
|
||||
x6 = x1;
|
||||
x1 = _mm_or_si128(_mm_slli_epi32(x1, 7), _mm_srli_epi32(x6, 25));
|
||||
}
|
||||
|
||||
x0 = _mm_add_epi32(x0, t0);
|
||||
x1 = _mm_add_epi32(x1, t1);
|
||||
x2 = _mm_add_epi32(x2, t2);
|
||||
x3 = _mm_add_epi32(x3, t3);
|
||||
|
||||
/* 4: Y_i = X */
|
||||
/* 6: B'[0..r-1] = Y_even */
|
||||
/* 6: B'[r..2r-1] = Y_odd */
|
||||
xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half);
|
||||
xmmp[0] = x0;
|
||||
xmmp[1] = x1;
|
||||
xmmp[2] = x2;
|
||||
xmmp[3] = x3;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_CHACHA_AVX)
|
||||
#undef SCRYPT_MIX
|
||||
#define SCRYPT_MIX "ChaCha/8-AVX"
|
||||
#undef SCRYPT_CHACHA_INCLUDED
|
||||
#define SCRYPT_CHACHA_INCLUDED
|
||||
#endif
|
363
deps/scrypt-jane-master/code/scrypt-jane-mix_chacha-sse2.h
vendored
Normal file
363
deps/scrypt-jane-master/code/scrypt-jane-mix_chacha-sse2.h
vendored
Normal file
@ -0,0 +1,363 @@
|
||||
/* x86 */
|
||||
#if defined(X86ASM_SSE2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS)
|
||||
|
||||
#define SCRYPT_CHACHA_SSE2
|
||||
|
||||
asm_naked_fn_proto(void, scrypt_ChunkMix_sse2)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r)
|
||||
asm_naked_fn(scrypt_ChunkMix_sse2)
|
||||
a1(push ebx)
|
||||
a1(push edi)
|
||||
a1(push esi)
|
||||
a1(push ebp)
|
||||
a2(mov ebp,esp)
|
||||
a2(mov edi,[ebp+20])
|
||||
a2(mov esi,[ebp+24])
|
||||
a2(mov eax,[ebp+28])
|
||||
a2(mov ebx,[ebp+32])
|
||||
a2(sub esp,16)
|
||||
a2(and esp,~15)
|
||||
a2(lea edx,[ebx*2])
|
||||
a2(shl edx,6)
|
||||
a2(lea ecx,[edx-64])
|
||||
a2(and eax, eax)
|
||||
a2(movdqa xmm0,[ecx+esi+0])
|
||||
a2(movdqa xmm1,[ecx+esi+16])
|
||||
a2(movdqa xmm2,[ecx+esi+32])
|
||||
a2(movdqa xmm3,[ecx+esi+48])
|
||||
aj(jz scrypt_ChunkMix_sse2_no_xor1)
|
||||
a2(pxor xmm0,[ecx+eax+0])
|
||||
a2(pxor xmm1,[ecx+eax+16])
|
||||
a2(pxor xmm2,[ecx+eax+32])
|
||||
a2(pxor xmm3,[ecx+eax+48])
|
||||
a1(scrypt_ChunkMix_sse2_no_xor1:)
|
||||
a2(xor ecx,ecx)
|
||||
a2(xor ebx,ebx)
|
||||
a1(scrypt_ChunkMix_sse2_loop:)
|
||||
a2(and eax, eax)
|
||||
a2(pxor xmm0,[esi+ecx+0])
|
||||
a2(pxor xmm1,[esi+ecx+16])
|
||||
a2(pxor xmm2,[esi+ecx+32])
|
||||
a2(pxor xmm3,[esi+ecx+48])
|
||||
aj(jz scrypt_ChunkMix_sse2_no_xor2)
|
||||
a2(pxor xmm0,[eax+ecx+0])
|
||||
a2(pxor xmm1,[eax+ecx+16])
|
||||
a2(pxor xmm2,[eax+ecx+32])
|
||||
a2(pxor xmm3,[eax+ecx+48])
|
||||
a1(scrypt_ChunkMix_sse2_no_xor2:)
|
||||
a2(movdqa [esp+0],xmm0)
|
||||
a2(movdqa xmm4,xmm1)
|
||||
a2(movdqa xmm5,xmm2)
|
||||
a2(movdqa xmm7,xmm3)
|
||||
a2(mov eax,8)
|
||||
a1(scrypt_chacha_sse2_loop: )
|
||||
a2(paddd xmm0,xmm1)
|
||||
a2(pxor xmm3,xmm0)
|
||||
a3(pshuflw xmm3,xmm3,0xb1)
|
||||
a3(pshufhw xmm3,xmm3,0xb1)
|
||||
a2(paddd xmm2,xmm3)
|
||||
a2(pxor xmm1,xmm2)
|
||||
a2(movdqa xmm6,xmm1)
|
||||
a2(pslld xmm1,12)
|
||||
a2(psrld xmm6,20)
|
||||
a2(pxor xmm1,xmm6)
|
||||
a2(paddd xmm0,xmm1)
|
||||
a2(pxor xmm3,xmm0)
|
||||
a2(movdqa xmm6,xmm3)
|
||||
a2(pslld xmm3,8)
|
||||
a2(psrld xmm6,24)
|
||||
a2(pxor xmm3,xmm6)
|
||||
a3(pshufd xmm0,xmm0,0x93)
|
||||
a2(paddd xmm2,xmm3)
|
||||
a3(pshufd xmm3,xmm3,0x4e)
|
||||
a2(pxor xmm1,xmm2)
|
||||
a3(pshufd xmm2,xmm2,0x39)
|
||||
a2(movdqa xmm6,xmm1)
|
||||
a2(pslld xmm1,7)
|
||||
a2(psrld xmm6,25)
|
||||
a2(pxor xmm1,xmm6)
|
||||
a2(sub eax,2)
|
||||
a2(paddd xmm0,xmm1)
|
||||
a2(pxor xmm3,xmm0)
|
||||
a3(pshuflw xmm3,xmm3,0xb1)
|
||||
a3(pshufhw xmm3,xmm3,0xb1)
|
||||
a2(paddd xmm2,xmm3)
|
||||
a2(pxor xmm1,xmm2)
|
||||
a2(movdqa xmm6,xmm1)
|
||||
a2(pslld xmm1,12)
|
||||
a2(psrld xmm6,20)
|
||||
a2(pxor xmm1,xmm6)
|
||||
a2(paddd xmm0,xmm1)
|
||||
a2(pxor xmm3,xmm0)
|
||||
a2(movdqa xmm6,xmm3)
|
||||
a2(pslld xmm3,8)
|
||||
a2(psrld xmm6,24)
|
||||
a2(pxor xmm3,xmm6)
|
||||
a3(pshufd xmm0,xmm0,0x39)
|
||||
a2(paddd xmm2,xmm3)
|
||||
a3(pshufd xmm3,xmm3,0x4e)
|
||||
a2(pxor xmm1,xmm2)
|
||||
a3(pshufd xmm2,xmm2,0x93)
|
||||
a2(movdqa xmm6,xmm1)
|
||||
a2(pslld xmm1,7)
|
||||
a2(psrld xmm6,25)
|
||||
a2(pxor xmm1,xmm6)
|
||||
aj(ja scrypt_chacha_sse2_loop)
|
||||
a2(paddd xmm0,[esp+0])
|
||||
a2(paddd xmm1,xmm4)
|
||||
a2(paddd xmm2,xmm5)
|
||||
a2(paddd xmm3,xmm7)
|
||||
a2(lea eax,[ebx+ecx])
|
||||
a2(xor ebx,edx)
|
||||
a2(and eax,~0x7f)
|
||||
a2(add ecx,64)
|
||||
a2(shr eax,1)
|
||||
a2(add eax, edi)
|
||||
a2(cmp ecx,edx)
|
||||
a2(movdqa [eax+0],xmm0)
|
||||
a2(movdqa [eax+16],xmm1)
|
||||
a2(movdqa [eax+32],xmm2)
|
||||
a2(movdqa [eax+48],xmm3)
|
||||
a2(mov eax,[ebp+28])
|
||||
aj(jne scrypt_ChunkMix_sse2_loop)
|
||||
a2(mov esp,ebp)
|
||||
a1(pop ebp)
|
||||
a1(pop esi)
|
||||
a1(pop edi)
|
||||
a1(pop ebx)
|
||||
aret(16)
|
||||
asm_naked_fn_end(scrypt_ChunkMix_sse2)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/* x64 */
|
||||
#if defined(X86_64ASM_SSE2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS)
|
||||
|
||||
#define SCRYPT_CHACHA_SSE2
|
||||
|
||||
asm_naked_fn_proto(void, scrypt_ChunkMix_sse2)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r)
|
||||
asm_naked_fn(scrypt_ChunkMix_sse2)
|
||||
a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */
|
||||
a2(shl rcx,6)
|
||||
a2(lea r9,[rcx-64])
|
||||
a2(lea rax,[rsi+r9])
|
||||
a2(lea r9,[rdx+r9])
|
||||
a2(and rdx, rdx)
|
||||
a2(movdqa xmm0,[rax+0])
|
||||
a2(movdqa xmm1,[rax+16])
|
||||
a2(movdqa xmm2,[rax+32])
|
||||
a2(movdqa xmm3,[rax+48])
|
||||
aj(jz scrypt_ChunkMix_sse2_no_xor1)
|
||||
a2(pxor xmm0,[r9+0])
|
||||
a2(pxor xmm1,[r9+16])
|
||||
a2(pxor xmm2,[r9+32])
|
||||
a2(pxor xmm3,[r9+48])
|
||||
a1(scrypt_ChunkMix_sse2_no_xor1:)
|
||||
a2(xor r9,r9)
|
||||
a2(xor r8,r8)
|
||||
a1(scrypt_ChunkMix_sse2_loop:)
|
||||
a2(and rdx, rdx)
|
||||
a2(pxor xmm0,[rsi+r9+0])
|
||||
a2(pxor xmm1,[rsi+r9+16])
|
||||
a2(pxor xmm2,[rsi+r9+32])
|
||||
a2(pxor xmm3,[rsi+r9+48])
|
||||
aj(jz scrypt_ChunkMix_sse2_no_xor2)
|
||||
a2(pxor xmm0,[rdx+r9+0])
|
||||
a2(pxor xmm1,[rdx+r9+16])
|
||||
a2(pxor xmm2,[rdx+r9+32])
|
||||
a2(pxor xmm3,[rdx+r9+48])
|
||||
a1(scrypt_ChunkMix_sse2_no_xor2:)
|
||||
a2(movdqa xmm8,xmm0)
|
||||
a2(movdqa xmm9,xmm1)
|
||||
a2(movdqa xmm10,xmm2)
|
||||
a2(movdqa xmm11,xmm3)
|
||||
a2(mov rax,8)
|
||||
a1(scrypt_chacha_sse2_loop: )
|
||||
a2(paddd xmm0,xmm1)
|
||||
a2(pxor xmm3,xmm0)
|
||||
a3(pshuflw xmm3,xmm3,0xb1)
|
||||
a3(pshufhw xmm3,xmm3,0xb1)
|
||||
a2(paddd xmm2,xmm3)
|
||||
a2(pxor xmm1,xmm2)
|
||||
a2(movdqa xmm6,xmm1)
|
||||
a2(pslld xmm1,12)
|
||||
a2(psrld xmm6,20)
|
||||
a2(pxor xmm1,xmm6)
|
||||
a2(paddd xmm0,xmm1)
|
||||
a2(pxor xmm3,xmm0)
|
||||
a2(movdqa xmm6,xmm3)
|
||||
a2(pslld xmm3,8)
|
||||
a2(psrld xmm6,24)
|
||||
a2(pxor xmm3,xmm6)
|
||||
a3(pshufd xmm0,xmm0,0x93)
|
||||
a2(paddd xmm2,xmm3)
|
||||
a3(pshufd xmm3,xmm3,0x4e)
|
||||
a2(pxor xmm1,xmm2)
|
||||
a3(pshufd xmm2,xmm2,0x39)
|
||||
a2(movdqa xmm6,xmm1)
|
||||
a2(pslld xmm1,7)
|
||||
a2(psrld xmm6,25)
|
||||
a2(pxor xmm1,xmm6)
|
||||
a2(sub rax,2)
|
||||
a2(paddd xmm0,xmm1)
|
||||
a2(pxor xmm3,xmm0)
|
||||
a3(pshuflw xmm3,xmm3,0xb1)
|
||||
a3(pshufhw xmm3,xmm3,0xb1)
|
||||
a2(paddd xmm2,xmm3)
|
||||
a2(pxor xmm1,xmm2)
|
||||
a2(movdqa xmm6,xmm1)
|
||||
a2(pslld xmm1,12)
|
||||
a2(psrld xmm6,20)
|
||||
a2(pxor xmm1,xmm6)
|
||||
a2(paddd xmm0,xmm1)
|
||||
a2(pxor xmm3,xmm0)
|
||||
a2(movdqa xmm6,xmm3)
|
||||
a2(pslld xmm3,8)
|
||||
a2(psrld xmm6,24)
|
||||
a2(pxor xmm3,xmm6)
|
||||
a3(pshufd xmm0,xmm0,0x39)
|
||||
a2(paddd xmm2,xmm3)
|
||||
a3(pshufd xmm3,xmm3,0x4e)
|
||||
a2(pxor xmm1,xmm2)
|
||||
a3(pshufd xmm2,xmm2,0x93)
|
||||
a2(movdqa xmm6,xmm1)
|
||||
a2(pslld xmm1,7)
|
||||
a2(psrld xmm6,25)
|
||||
a2(pxor xmm1,xmm6)
|
||||
aj(ja scrypt_chacha_sse2_loop)
|
||||
a2(paddd xmm0,xmm8)
|
||||
a2(paddd xmm1,xmm9)
|
||||
a2(paddd xmm2,xmm10)
|
||||
a2(paddd xmm3,xmm11)
|
||||
a2(lea rax,[r8+r9])
|
||||
a2(xor r8,rcx)
|
||||
a2(and rax,~0x7f)
|
||||
a2(add r9,64)
|
||||
a2(shr rax,1)
|
||||
a2(add rax, rdi)
|
||||
a2(cmp r9,rcx)
|
||||
a2(movdqa [rax+0],xmm0)
|
||||
a2(movdqa [rax+16],xmm1)
|
||||
a2(movdqa [rax+32],xmm2)
|
||||
a2(movdqa [rax+48],xmm3)
|
||||
aj(jne scrypt_ChunkMix_sse2_loop)
|
||||
a1(ret)
|
||||
asm_naked_fn_end(scrypt_ChunkMix_sse2)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/* intrinsic */
|
||||
#if defined(X86_INTRINSIC_SSE2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED))
|
||||
|
||||
#define SCRYPT_CHACHA_SSE2
|
||||
|
||||
static void NOINLINE asm_calling_convention
|
||||
scrypt_ChunkMix_sse2(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) {
|
||||
uint32_t i, blocksPerChunk = r * 2, half = 0;
|
||||
xmmi *xmmp,x0,x1,x2,x3,x4,t0,t1,t2,t3;
|
||||
size_t rounds;
|
||||
|
||||
/* 1: X = B_{2r - 1} */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1);
|
||||
x0 = xmmp[0];
|
||||
x1 = xmmp[1];
|
||||
x2 = xmmp[2];
|
||||
x3 = xmmp[3];
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
}
|
||||
|
||||
/* 2: for i = 0 to 2r - 1 do */
|
||||
for (i = 0; i < blocksPerChunk; i++, half ^= r) {
|
||||
/* 3: X = H(X ^ B_i) */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
}
|
||||
|
||||
t0 = x0;
|
||||
t1 = x1;
|
||||
t2 = x2;
|
||||
t3 = x3;
|
||||
|
||||
for (rounds = 8; rounds; rounds -= 2) {
|
||||
x0 = _mm_add_epi32(x0, x1);
|
||||
x3 = _mm_xor_si128(x3, x0);
|
||||
x4 = x3;
|
||||
x3 = _mm_shufflehi_epi16(_mm_shufflelo_epi16(x3, 0xb1), 0xb1);
|
||||
x2 = _mm_add_epi32(x2, x3);
|
||||
x1 = _mm_xor_si128(x1, x2);
|
||||
x4 = x1;
|
||||
x1 = _mm_or_si128(_mm_slli_epi32(x1, 12), _mm_srli_epi32(x4, 20));
|
||||
x0 = _mm_add_epi32(x0, x1);
|
||||
x3 = _mm_xor_si128(x3, x0);
|
||||
x4 = x3;
|
||||
x3 = _mm_or_si128(_mm_slli_epi32(x3, 8), _mm_srli_epi32(x4, 24));
|
||||
x0 = _mm_shuffle_epi32(x0, 0x93);
|
||||
x2 = _mm_add_epi32(x2, x3);
|
||||
x3 = _mm_shuffle_epi32(x3, 0x4e);
|
||||
x1 = _mm_xor_si128(x1, x2);
|
||||
x2 = _mm_shuffle_epi32(x2, 0x39);
|
||||
x4 = x1;
|
||||
x1 = _mm_or_si128(_mm_slli_epi32(x1, 7), _mm_srli_epi32(x4, 25));
|
||||
x0 = _mm_add_epi32(x0, x1);
|
||||
x3 = _mm_xor_si128(x3, x0);
|
||||
x4 = x3;
|
||||
x3 = _mm_shufflehi_epi16(_mm_shufflelo_epi16(x3, 0xb1), 0xb1);
|
||||
x2 = _mm_add_epi32(x2, x3);
|
||||
x1 = _mm_xor_si128(x1, x2);
|
||||
x4 = x1;
|
||||
x1 = _mm_or_si128(_mm_slli_epi32(x1, 12), _mm_srli_epi32(x4, 20));
|
||||
x0 = _mm_add_epi32(x0, x1);
|
||||
x3 = _mm_xor_si128(x3, x0);
|
||||
x4 = x3;
|
||||
x3 = _mm_or_si128(_mm_slli_epi32(x3, 8), _mm_srli_epi32(x4, 24));
|
||||
x0 = _mm_shuffle_epi32(x0, 0x39);
|
||||
x2 = _mm_add_epi32(x2, x3);
|
||||
x3 = _mm_shuffle_epi32(x3, 0x4e);
|
||||
x1 = _mm_xor_si128(x1, x2);
|
||||
x2 = _mm_shuffle_epi32(x2, 0x93);
|
||||
x4 = x1;
|
||||
x1 = _mm_or_si128(_mm_slli_epi32(x1, 7), _mm_srli_epi32(x4, 25));
|
||||
}
|
||||
|
||||
x0 = _mm_add_epi32(x0, t0);
|
||||
x1 = _mm_add_epi32(x1, t1);
|
||||
x2 = _mm_add_epi32(x2, t2);
|
||||
x3 = _mm_add_epi32(x3, t3);
|
||||
|
||||
/* 4: Y_i = X */
|
||||
/* 6: B'[0..r-1] = Y_even */
|
||||
/* 6: B'[r..2r-1] = Y_odd */
|
||||
xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half);
|
||||
xmmp[0] = x0;
|
||||
xmmp[1] = x1;
|
||||
xmmp[2] = x2;
|
||||
xmmp[3] = x3;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_CHACHA_SSE2)
|
||||
#undef SCRYPT_MIX
|
||||
#define SCRYPT_MIX "ChaCha/8-SSE2"
|
||||
#undef SCRYPT_CHACHA_INCLUDED
|
||||
#define SCRYPT_CHACHA_INCLUDED
|
||||
#endif
|
376
deps/scrypt-jane-master/code/scrypt-jane-mix_chacha-ssse3.h
vendored
Normal file
376
deps/scrypt-jane-master/code/scrypt-jane-mix_chacha-ssse3.h
vendored
Normal file
@ -0,0 +1,376 @@
|
||||
/* x86 */
|
||||
#if defined(X86ASM_SSSE3) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS)
|
||||
|
||||
#define SCRYPT_CHACHA_SSSE3
|
||||
|
||||
asm_naked_fn_proto(void, scrypt_ChunkMix_ssse3)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r)
|
||||
asm_naked_fn(scrypt_ChunkMix_ssse3)
|
||||
a1(push ebx)
|
||||
a1(push edi)
|
||||
a1(push esi)
|
||||
a1(push ebp)
|
||||
a2(mov ebp,esp)
|
||||
a2(mov edi,[ebp+20])
|
||||
a2(mov esi,[ebp+24])
|
||||
a2(mov eax,[ebp+28])
|
||||
a2(mov ebx,[ebp+32])
|
||||
a2(sub esp,64)
|
||||
a2(and esp,~63)
|
||||
a2(lea edx,[ebx*2])
|
||||
a2(shl edx,6)
|
||||
a2(lea ecx,[edx-64])
|
||||
a2(and eax, eax)
|
||||
a2(mov ebx, 0x01000302)
|
||||
a2(movd xmm4, ebx)
|
||||
a2(mov ebx, 0x05040706)
|
||||
a2(movd xmm0, ebx)
|
||||
a2(mov ebx, 0x09080b0a)
|
||||
a2(movd xmm1, ebx)
|
||||
a2(mov ebx, 0x0d0c0f0e)
|
||||
a2(movd xmm2, ebx)
|
||||
a2(mov ebx, 0x02010003)
|
||||
a2(movd xmm5, ebx)
|
||||
a2(mov ebx, 0x06050407)
|
||||
a2(movd xmm3, ebx)
|
||||
a2(mov ebx, 0x0a09080b)
|
||||
a2(movd xmm6, ebx)
|
||||
a2(mov ebx, 0x0e0d0c0f)
|
||||
a2(movd xmm7, ebx)
|
||||
a2(punpckldq xmm4, xmm0)
|
||||
a2(punpckldq xmm5, xmm3)
|
||||
a2(punpckldq xmm1, xmm2)
|
||||
a2(punpckldq xmm6, xmm7)
|
||||
a2(punpcklqdq xmm4, xmm1)
|
||||
a2(punpcklqdq xmm5, xmm6)
|
||||
a2(movdqa xmm0,[ecx+esi+0])
|
||||
a2(movdqa xmm1,[ecx+esi+16])
|
||||
a2(movdqa xmm2,[ecx+esi+32])
|
||||
a2(movdqa xmm3,[ecx+esi+48])
|
||||
aj(jz scrypt_ChunkMix_ssse3_no_xor1)
|
||||
a2(pxor xmm0,[ecx+eax+0])
|
||||
a2(pxor xmm1,[ecx+eax+16])
|
||||
a2(pxor xmm2,[ecx+eax+32])
|
||||
a2(pxor xmm3,[ecx+eax+48])
|
||||
a1(scrypt_ChunkMix_ssse3_no_xor1:)
|
||||
a2(xor ecx,ecx)
|
||||
a2(xor ebx,ebx)
|
||||
a1(scrypt_ChunkMix_ssse3_loop:)
|
||||
a2(and eax, eax)
|
||||
a2(pxor xmm0,[esi+ecx+0])
|
||||
a2(pxor xmm1,[esi+ecx+16])
|
||||
a2(pxor xmm2,[esi+ecx+32])
|
||||
a2(pxor xmm3,[esi+ecx+48])
|
||||
aj(jz scrypt_ChunkMix_ssse3_no_xor2)
|
||||
a2(pxor xmm0,[eax+ecx+0])
|
||||
a2(pxor xmm1,[eax+ecx+16])
|
||||
a2(pxor xmm2,[eax+ecx+32])
|
||||
a2(pxor xmm3,[eax+ecx+48])
|
||||
a1(scrypt_ChunkMix_ssse3_no_xor2:)
|
||||
a2(movdqa [esp+0],xmm0)
|
||||
a2(movdqa [esp+16],xmm1)
|
||||
a2(movdqa [esp+32],xmm2)
|
||||
a2(movdqa xmm7,xmm3)
|
||||
a2(mov eax,8)
|
||||
a1(scrypt_chacha_ssse3_loop: )
|
||||
a2(paddd xmm0,xmm1)
|
||||
a2(pxor xmm3,xmm0)
|
||||
a2(pshufb xmm3,xmm4)
|
||||
a2(paddd xmm2,xmm3)
|
||||
a2(pxor xmm1,xmm2)
|
||||
a2(movdqa xmm6,xmm1)
|
||||
a2(pslld xmm1,12)
|
||||
a2(psrld xmm6,20)
|
||||
a2(pxor xmm1,xmm6)
|
||||
a2(paddd xmm0,xmm1)
|
||||
a2(pxor xmm3,xmm0)
|
||||
a2(pshufb xmm3,xmm5)
|
||||
a3(pshufd xmm0,xmm0,0x93)
|
||||
a2(paddd xmm2,xmm3)
|
||||
a3(pshufd xmm3,xmm3,0x4e)
|
||||
a2(pxor xmm1,xmm2)
|
||||
a3(pshufd xmm2,xmm2,0x39)
|
||||
a2(movdqa xmm6,xmm1)
|
||||
a2(pslld xmm1,7)
|
||||
a2(psrld xmm6,25)
|
||||
a2(pxor xmm1,xmm6)
|
||||
a2(sub eax,2)
|
||||
a2(paddd xmm0,xmm1)
|
||||
a2(pxor xmm3,xmm0)
|
||||
a2(pshufb xmm3,xmm4)
|
||||
a2(paddd xmm2,xmm3)
|
||||
a2(pxor xmm1,xmm2)
|
||||
a2(movdqa xmm6,xmm1)
|
||||
a2(pslld xmm1,12)
|
||||
a2(psrld xmm6,20)
|
||||
a2(pxor xmm1,xmm6)
|
||||
a2(paddd xmm0,xmm1)
|
||||
a2(pxor xmm3,xmm0)
|
||||
a2(pshufb xmm3,xmm5)
|
||||
a3(pshufd xmm0,xmm0,0x39)
|
||||
a2(paddd xmm2,xmm3)
|
||||
a3(pshufd xmm3,xmm3,0x4e)
|
||||
a2(pxor xmm1,xmm2)
|
||||
a3(pshufd xmm2,xmm2,0x93)
|
||||
a2(movdqa xmm6,xmm1)
|
||||
a2(pslld xmm1,7)
|
||||
a2(psrld xmm6,25)
|
||||
a2(pxor xmm1,xmm6)
|
||||
aj(ja scrypt_chacha_ssse3_loop)
|
||||
a2(paddd xmm0,[esp+0])
|
||||
a2(paddd xmm1,[esp+16])
|
||||
a2(paddd xmm2,[esp+32])
|
||||
a2(paddd xmm3,xmm7)
|
||||
a2(lea eax,[ebx+ecx])
|
||||
a2(xor ebx,edx)
|
||||
a2(and eax,~0x7f)
|
||||
a2(add ecx,64)
|
||||
a2(shr eax,1)
|
||||
a2(add eax, edi)
|
||||
a2(cmp ecx,edx)
|
||||
a2(movdqa [eax+0],xmm0)
|
||||
a2(movdqa [eax+16],xmm1)
|
||||
a2(movdqa [eax+32],xmm2)
|
||||
a2(movdqa [eax+48],xmm3)
|
||||
a2(mov eax,[ebp+28])
|
||||
aj(jne scrypt_ChunkMix_ssse3_loop)
|
||||
a2(mov esp,ebp)
|
||||
a1(pop ebp)
|
||||
a1(pop esi)
|
||||
a1(pop edi)
|
||||
a1(pop ebx)
|
||||
aret(16)
|
||||
asm_naked_fn_end(scrypt_ChunkMix_ssse3)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/* x64 */
|
||||
#if defined(X86_64ASM_SSSE3) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS)
|
||||
|
||||
#define SCRYPT_CHACHA_SSSE3
|
||||
|
||||
asm_naked_fn_proto(void, scrypt_ChunkMix_ssse3)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r)
|
||||
asm_naked_fn(scrypt_ChunkMix_ssse3)
|
||||
a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */
|
||||
a2(shl rcx,6)
|
||||
a2(lea r9,[rcx-64])
|
||||
a2(lea rax,[rsi+r9])
|
||||
a2(lea r9,[rdx+r9])
|
||||
a2(and rdx, rdx)
|
||||
a2(movdqa xmm0,[rax+0])
|
||||
a2(movdqa xmm1,[rax+16])
|
||||
a2(movdqa xmm2,[rax+32])
|
||||
a2(movdqa xmm3,[rax+48])
|
||||
a2(mov r8, 0x0504070601000302)
|
||||
a2(mov rax, 0x0d0c0f0e09080b0a)
|
||||
a2(movd xmm4, r8)
|
||||
a2(movd xmm6, rax)
|
||||
a2(mov r8, 0x0605040702010003)
|
||||
a2(mov rax, 0x0e0d0c0f0a09080b)
|
||||
a2(movd xmm5, r8)
|
||||
a2(movd xmm7, rax)
|
||||
a2(punpcklqdq xmm4, xmm6)
|
||||
a2(punpcklqdq xmm5, xmm7)
|
||||
aj(jz scrypt_ChunkMix_ssse3_no_xor1)
|
||||
a2(pxor xmm0,[r9+0])
|
||||
a2(pxor xmm1,[r9+16])
|
||||
a2(pxor xmm2,[r9+32])
|
||||
a2(pxor xmm3,[r9+48])
|
||||
a1(scrypt_ChunkMix_ssse3_no_xor1:)
|
||||
a2(xor r8,r8)
|
||||
a2(xor r9,r9)
|
||||
a1(scrypt_ChunkMix_ssse3_loop:)
|
||||
a2(and rdx, rdx)
|
||||
a2(pxor xmm0,[rsi+r9+0])
|
||||
a2(pxor xmm1,[rsi+r9+16])
|
||||
a2(pxor xmm2,[rsi+r9+32])
|
||||
a2(pxor xmm3,[rsi+r9+48])
|
||||
aj(jz scrypt_ChunkMix_ssse3_no_xor2)
|
||||
a2(pxor xmm0,[rdx+r9+0])
|
||||
a2(pxor xmm1,[rdx+r9+16])
|
||||
a2(pxor xmm2,[rdx+r9+32])
|
||||
a2(pxor xmm3,[rdx+r9+48])
|
||||
a1(scrypt_ChunkMix_ssse3_no_xor2:)
|
||||
a2(movdqa xmm8,xmm0)
|
||||
a2(movdqa xmm9,xmm1)
|
||||
a2(movdqa xmm10,xmm2)
|
||||
a2(movdqa xmm11,xmm3)
|
||||
a2(mov rax,8)
|
||||
a1(scrypt_chacha_ssse3_loop: )
|
||||
a2(paddd xmm0,xmm1)
|
||||
a2(pxor xmm3,xmm0)
|
||||
a2(pshufb xmm3,xmm4)
|
||||
a2(paddd xmm2,xmm3)
|
||||
a2(pxor xmm1,xmm2)
|
||||
a2(movdqa xmm12,xmm1)
|
||||
a2(pslld xmm1,12)
|
||||
a2(psrld xmm12,20)
|
||||
a2(pxor xmm1,xmm12)
|
||||
a2(paddd xmm0,xmm1)
|
||||
a2(pxor xmm3,xmm0)
|
||||
a2(pshufb xmm3,xmm5)
|
||||
a3(pshufd xmm0,xmm0,0x93)
|
||||
a2(paddd xmm2,xmm3)
|
||||
a3(pshufd xmm3,xmm3,0x4e)
|
||||
a2(pxor xmm1,xmm2)
|
||||
a3(pshufd xmm2,xmm2,0x39)
|
||||
a2(movdqa xmm12,xmm1)
|
||||
a2(pslld xmm1,7)
|
||||
a2(psrld xmm12,25)
|
||||
a2(pxor xmm1,xmm12)
|
||||
a2(sub rax,2)
|
||||
a2(paddd xmm0,xmm1)
|
||||
a2(pxor xmm3,xmm0)
|
||||
a2(pshufb xmm3,xmm4)
|
||||
a2(paddd xmm2,xmm3)
|
||||
a2(pxor xmm1,xmm2)
|
||||
a2(movdqa xmm12,xmm1)
|
||||
a2(pslld xmm1,12)
|
||||
a2(psrld xmm12,20)
|
||||
a2(pxor xmm1,xmm12)
|
||||
a2(paddd xmm0,xmm1)
|
||||
a2(pxor xmm3,xmm0)
|
||||
a2(pshufb xmm3,xmm5)
|
||||
a3(pshufd xmm0,xmm0,0x39)
|
||||
a2(paddd xmm2,xmm3)
|
||||
a3(pshufd xmm3,xmm3,0x4e)
|
||||
a2(pxor xmm1,xmm2)
|
||||
a3(pshufd xmm2,xmm2,0x93)
|
||||
a2(movdqa xmm12,xmm1)
|
||||
a2(pslld xmm1,7)
|
||||
a2(psrld xmm12,25)
|
||||
a2(pxor xmm1,xmm12)
|
||||
aj(ja scrypt_chacha_ssse3_loop)
|
||||
a2(paddd xmm0,xmm8)
|
||||
a2(paddd xmm1,xmm9)
|
||||
a2(paddd xmm2,xmm10)
|
||||
a2(paddd xmm3,xmm11)
|
||||
a2(lea rax,[r8+r9])
|
||||
a2(xor r8,rcx)
|
||||
a2(and rax,~0x7f)
|
||||
a2(add r9,64)
|
||||
a2(shr rax,1)
|
||||
a2(add rax, rdi)
|
||||
a2(cmp r9,rcx)
|
||||
a2(movdqa [rax+0],xmm0)
|
||||
a2(movdqa [rax+16],xmm1)
|
||||
a2(movdqa [rax+32],xmm2)
|
||||
a2(movdqa [rax+48],xmm3)
|
||||
aj(jne scrypt_ChunkMix_ssse3_loop)
|
||||
a1(ret)
|
||||
asm_naked_fn_end(scrypt_ChunkMix_ssse3)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/* intrinsic */
|
||||
#if defined(X86_INTRINSIC_SSSE3) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED))
|
||||
|
||||
#define SCRYPT_CHACHA_SSSE3
|
||||
|
||||
static void NOINLINE asm_calling_convention
|
||||
scrypt_ChunkMix_ssse3(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) {
|
||||
uint32_t i, blocksPerChunk = r * 2, half = 0;
|
||||
xmmi *xmmp,x0,x1,x2,x3,x6,t0,t1,t2,t3;
|
||||
const xmmi x4 = *(xmmi *)&ssse3_rotl16_32bit, x5 = *(xmmi *)&ssse3_rotl8_32bit;
|
||||
size_t rounds;
|
||||
|
||||
/* 1: X = B_{2r - 1} */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1);
|
||||
x0 = xmmp[0];
|
||||
x1 = xmmp[1];
|
||||
x2 = xmmp[2];
|
||||
x3 = xmmp[3];
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
}
|
||||
|
||||
/* 2: for i = 0 to 2r - 1 do */
|
||||
for (i = 0; i < blocksPerChunk; i++, half ^= r) {
|
||||
/* 3: X = H(X ^ B_i) */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
}
|
||||
|
||||
t0 = x0;
|
||||
t1 = x1;
|
||||
t2 = x2;
|
||||
t3 = x3;
|
||||
|
||||
for (rounds = 8; rounds; rounds -= 2) {
|
||||
x0 = _mm_add_epi32(x0, x1);
|
||||
x3 = _mm_xor_si128(x3, x0);
|
||||
x3 = _mm_shuffle_epi8(x3, x4);
|
||||
x2 = _mm_add_epi32(x2, x3);
|
||||
x1 = _mm_xor_si128(x1, x2);
|
||||
x6 = x1;
|
||||
x1 = _mm_or_si128(_mm_slli_epi32(x1, 12), _mm_srli_epi32(x6, 20));
|
||||
x0 = _mm_add_epi32(x0, x1);
|
||||
x3 = _mm_xor_si128(x3, x0);
|
||||
x3 = _mm_shuffle_epi8(x3, x5);
|
||||
x0 = _mm_shuffle_epi32(x0, 0x93);
|
||||
x2 = _mm_add_epi32(x2, x3);
|
||||
x3 = _mm_shuffle_epi32(x3, 0x4e);
|
||||
x1 = _mm_xor_si128(x1, x2);
|
||||
x2 = _mm_shuffle_epi32(x2, 0x39);
|
||||
x6 = x1;
|
||||
x1 = _mm_or_si128(_mm_slli_epi32(x1, 7), _mm_srli_epi32(x6, 25));
|
||||
x0 = _mm_add_epi32(x0, x1);
|
||||
x3 = _mm_xor_si128(x3, x0);
|
||||
x3 = _mm_shuffle_epi8(x3, x4);
|
||||
x2 = _mm_add_epi32(x2, x3);
|
||||
x1 = _mm_xor_si128(x1, x2);
|
||||
x6 = x1;
|
||||
x1 = _mm_or_si128(_mm_slli_epi32(x1, 12), _mm_srli_epi32(x6, 20));
|
||||
x0 = _mm_add_epi32(x0, x1);
|
||||
x3 = _mm_xor_si128(x3, x0);
|
||||
x3 = _mm_shuffle_epi8(x3, x5);
|
||||
x0 = _mm_shuffle_epi32(x0, 0x39);
|
||||
x2 = _mm_add_epi32(x2, x3);
|
||||
x3 = _mm_shuffle_epi32(x3, 0x4e);
|
||||
x1 = _mm_xor_si128(x1, x2);
|
||||
x2 = _mm_shuffle_epi32(x2, 0x93);
|
||||
x6 = x1;
|
||||
x1 = _mm_or_si128(_mm_slli_epi32(x1, 7), _mm_srli_epi32(x6, 25));
|
||||
}
|
||||
|
||||
x0 = _mm_add_epi32(x0, t0);
|
||||
x1 = _mm_add_epi32(x1, t1);
|
||||
x2 = _mm_add_epi32(x2, t2);
|
||||
x3 = _mm_add_epi32(x3, t3);
|
||||
|
||||
/* 4: Y_i = X */
|
||||
/* 6: B'[0..r-1] = Y_even */
|
||||
/* 6: B'[r..2r-1] = Y_odd */
|
||||
xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half);
|
||||
xmmp[0] = x0;
|
||||
xmmp[1] = x1;
|
||||
xmmp[2] = x2;
|
||||
xmmp[3] = x3;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_CHACHA_SSSE3)
|
||||
#undef SCRYPT_MIX
|
||||
#define SCRYPT_MIX "ChaCha/8-SSSE3"
|
||||
#undef SCRYPT_CHACHA_INCLUDED
|
||||
#define SCRYPT_CHACHA_INCLUDED
|
||||
#endif
|
315
deps/scrypt-jane-master/code/scrypt-jane-mix_chacha-xop.h
vendored
Normal file
315
deps/scrypt-jane-master/code/scrypt-jane-mix_chacha-xop.h
vendored
Normal file
@ -0,0 +1,315 @@
|
||||
/* x86 */
|
||||
#if defined(X86ASM_XOP) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS)
|
||||
|
||||
#define SCRYPT_CHACHA_XOP
|
||||
|
||||
asm_naked_fn_proto(void, scrypt_ChunkMix_xop)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r)
|
||||
asm_naked_fn(scrypt_ChunkMix_xop)
|
||||
a1(push ebx)
|
||||
a1(push edi)
|
||||
a1(push esi)
|
||||
a1(push ebp)
|
||||
a2(mov ebp,esp)
|
||||
a2(mov edi,[ebp+20])
|
||||
a2(mov esi,[ebp+24])
|
||||
a2(mov eax,[ebp+28])
|
||||
a2(mov ebx,[ebp+32])
|
||||
a2(sub esp,64)
|
||||
a2(and esp,~63)
|
||||
a2(lea edx,[ebx*2])
|
||||
a2(shl edx,6)
|
||||
a2(lea ecx,[edx-64])
|
||||
a2(and eax, eax)
|
||||
a2(vmovdqa xmm0,[ecx+esi+0])
|
||||
a2(vmovdqa xmm1,[ecx+esi+16])
|
||||
a2(vmovdqa xmm2,[ecx+esi+32])
|
||||
a2(vmovdqa xmm3,[ecx+esi+48])
|
||||
aj(jz scrypt_ChunkMix_xop_no_xor1)
|
||||
a3(vpxor xmm0,xmm0,[ecx+eax+0])
|
||||
a3(vpxor xmm1,xmm1,[ecx+eax+16])
|
||||
a3(vpxor xmm2,xmm2,[ecx+eax+32])
|
||||
a3(vpxor xmm3,xmm3,[ecx+eax+48])
|
||||
a1(scrypt_ChunkMix_xop_no_xor1:)
|
||||
a2(xor ecx,ecx)
|
||||
a2(xor ebx,ebx)
|
||||
a1(scrypt_ChunkMix_xop_loop:)
|
||||
a2(and eax, eax)
|
||||
a3(vpxor xmm0,xmm0,[esi+ecx+0])
|
||||
a3(vpxor xmm1,xmm1,[esi+ecx+16])
|
||||
a3(vpxor xmm2,xmm2,[esi+ecx+32])
|
||||
a3(vpxor xmm3,xmm3,[esi+ecx+48])
|
||||
aj(jz scrypt_ChunkMix_xop_no_xor2)
|
||||
a3(vpxor xmm0,xmm0,[eax+ecx+0])
|
||||
a3(vpxor xmm1,xmm1,[eax+ecx+16])
|
||||
a3(vpxor xmm2,xmm2,[eax+ecx+32])
|
||||
a3(vpxor xmm3,xmm3,[eax+ecx+48])
|
||||
a1(scrypt_ChunkMix_xop_no_xor2:)
|
||||
a2(vmovdqa xmm4,xmm0)
|
||||
a2(vmovdqa xmm5,xmm1)
|
||||
a2(vmovdqa xmm6,xmm2)
|
||||
a2(vmovdqa xmm7,xmm3)
|
||||
a2(mov eax,8)
|
||||
a1(scrypt_chacha_xop_loop: )
|
||||
a3(vpaddd xmm0,xmm0,xmm1)
|
||||
a3(vpxor xmm3,xmm3,xmm0)
|
||||
a3(vprotd xmm3,xmm3,16)
|
||||
a3(vpaddd xmm2,xmm2,xmm3)
|
||||
a3(vpxor xmm1,xmm1,xmm2)
|
||||
a3(vprotd xmm1,xmm1,12)
|
||||
a3(vpaddd xmm0,xmm0,xmm1)
|
||||
a3(vpxor xmm3,xmm3,xmm0)
|
||||
a3(vprotd xmm3,xmm3,8)
|
||||
a3(vpaddd xmm2,xmm2,xmm3)
|
||||
a3(vpshufd xmm0,xmm0,0x93)
|
||||
a3(vpxor xmm1,xmm1,xmm2)
|
||||
a3(vprotd xmm1,xmm1,7)
|
||||
a3(vpshufd xmm3,xmm3,0x4e)
|
||||
a3(vpaddd xmm0,xmm0,xmm1)
|
||||
a3(vpshufd xmm2,xmm2,0x39)
|
||||
a3(vpxor xmm3,xmm3,xmm0)
|
||||
a3(vprotd xmm3,xmm3,16)
|
||||
a3(vpaddd xmm2,xmm2,xmm3)
|
||||
a3(vpxor xmm1,xmm1,xmm2)
|
||||
a3(vprotd xmm1,xmm1,12)
|
||||
a3(vpaddd xmm0,xmm0,xmm1)
|
||||
a3(vpxor xmm3,xmm3,xmm0)
|
||||
a3(vprotd xmm3,xmm3,8)
|
||||
a3(vpaddd xmm2,xmm2,xmm3)
|
||||
a3(vpxor xmm1,xmm1,xmm2)
|
||||
a3(vpshufd xmm0,xmm0,0x39)
|
||||
a3(vprotd xmm1,xmm1,7)
|
||||
a3(pshufd xmm3,xmm3,0x4e)
|
||||
a3(pshufd xmm2,xmm2,0x93)
|
||||
a2(sub eax,2)
|
||||
aj(ja scrypt_chacha_xop_loop)
|
||||
a3(vpaddd xmm0,xmm0,xmm4)
|
||||
a3(vpaddd xmm1,xmm1,xmm5)
|
||||
a3(vpaddd xmm2,xmm2,xmm6)
|
||||
a3(vpaddd xmm3,xmm3,xmm7)
|
||||
a2(lea eax,[ebx+ecx])
|
||||
a2(xor ebx,edx)
|
||||
a2(and eax,~0x7f)
|
||||
a2(add ecx,64)
|
||||
a2(shr eax,1)
|
||||
a2(add eax, edi)
|
||||
a2(cmp ecx,edx)
|
||||
a2(vmovdqa [eax+0],xmm0)
|
||||
a2(vmovdqa [eax+16],xmm1)
|
||||
a2(vmovdqa [eax+32],xmm2)
|
||||
a2(vmovdqa [eax+48],xmm3)
|
||||
a2(mov eax,[ebp+28])
|
||||
aj(jne scrypt_ChunkMix_xop_loop)
|
||||
a2(mov esp,ebp)
|
||||
a1(pop ebp)
|
||||
a1(pop esi)
|
||||
a1(pop edi)
|
||||
a1(pop ebx)
|
||||
aret(16)
|
||||
asm_naked_fn_end(scrypt_ChunkMix_xop)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/* x64 */
|
||||
#if defined(X86_64ASM_XOP) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS)
|
||||
|
||||
#define SCRYPT_CHACHA_XOP
|
||||
|
||||
asm_naked_fn_proto(void, scrypt_ChunkMix_xop)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r)
|
||||
asm_naked_fn(scrypt_ChunkMix_xop)
|
||||
a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */
|
||||
a2(shl rcx,6)
|
||||
a2(lea r9,[rcx-64])
|
||||
a2(lea rax,[rsi+r9])
|
||||
a2(lea r9,[rdx+r9])
|
||||
a2(and rdx, rdx)
|
||||
a2(vmovdqa xmm0,[rax+0])
|
||||
a2(vmovdqa xmm1,[rax+16])
|
||||
a2(vmovdqa xmm2,[rax+32])
|
||||
a2(vmovdqa xmm3,[rax+48])
|
||||
aj(jz scrypt_ChunkMix_xop_no_xor1)
|
||||
a3(vpxor xmm0,xmm0,[r9+0])
|
||||
a3(vpxor xmm1,xmm1,[r9+16])
|
||||
a3(vpxor xmm2,xmm2,[r9+32])
|
||||
a3(vpxor xmm3,xmm3,[r9+48])
|
||||
a1(scrypt_ChunkMix_xop_no_xor1:)
|
||||
a2(xor r8,r8)
|
||||
a2(xor r9,r9)
|
||||
a1(scrypt_ChunkMix_xop_loop:)
|
||||
a2(and rdx, rdx)
|
||||
a3(vpxor xmm0,xmm0,[rsi+r9+0])
|
||||
a3(vpxor xmm1,xmm1,[rsi+r9+16])
|
||||
a3(vpxor xmm2,xmm2,[rsi+r9+32])
|
||||
a3(vpxor xmm3,xmm3,[rsi+r9+48])
|
||||
aj(jz scrypt_ChunkMix_xop_no_xor2)
|
||||
a3(vpxor xmm0,xmm0,[rdx+r9+0])
|
||||
a3(vpxor xmm1,xmm1,[rdx+r9+16])
|
||||
a3(vpxor xmm2,xmm2,[rdx+r9+32])
|
||||
a3(vpxor xmm3,xmm3,[rdx+r9+48])
|
||||
a1(scrypt_ChunkMix_xop_no_xor2:)
|
||||
a2(vmovdqa xmm4,xmm0)
|
||||
a2(vmovdqa xmm5,xmm1)
|
||||
a2(vmovdqa xmm6,xmm2)
|
||||
a2(vmovdqa xmm7,xmm3)
|
||||
a2(mov rax,8)
|
||||
a1(scrypt_chacha_xop_loop: )
|
||||
a3(vpaddd xmm0,xmm0,xmm1)
|
||||
a3(vpxor xmm3,xmm3,xmm0)
|
||||
a3(vprotd xmm3,xmm3,16)
|
||||
a3(vpaddd xmm2,xmm2,xmm3)
|
||||
a3(vpxor xmm1,xmm1,xmm2)
|
||||
a3(vprotd xmm1,xmm1,12)
|
||||
a3(vpaddd xmm0,xmm0,xmm1)
|
||||
a3(vpxor xmm3,xmm3,xmm0)
|
||||
a3(vprotd xmm3,xmm3,8)
|
||||
a3(vpaddd xmm2,xmm2,xmm3)
|
||||
a3(vpshufd xmm0,xmm0,0x93)
|
||||
a3(vpxor xmm1,xmm1,xmm2)
|
||||
a3(vprotd xmm1,xmm1,7)
|
||||
a3(vpshufd xmm3,xmm3,0x4e)
|
||||
a3(vpaddd xmm0,xmm0,xmm1)
|
||||
a3(vpshufd xmm2,xmm2,0x39)
|
||||
a3(vpxor xmm3,xmm3,xmm0)
|
||||
a3(vprotd xmm3,xmm3,16)
|
||||
a3(vpaddd xmm2,xmm2,xmm3)
|
||||
a3(vpxor xmm1,xmm1,xmm2)
|
||||
a3(vprotd xmm1,xmm1,12)
|
||||
a3(vpaddd xmm0,xmm0,xmm1)
|
||||
a3(vpxor xmm3,xmm3,xmm0)
|
||||
a3(vprotd xmm3,xmm3,8)
|
||||
a3(vpaddd xmm2,xmm2,xmm3)
|
||||
a3(vpxor xmm1,xmm1,xmm2)
|
||||
a3(vpshufd xmm0,xmm0,0x39)
|
||||
a3(vprotd xmm1,xmm1,7)
|
||||
a3(pshufd xmm3,xmm3,0x4e)
|
||||
a3(pshufd xmm2,xmm2,0x93)
|
||||
a2(sub rax,2)
|
||||
aj(ja scrypt_chacha_xop_loop)
|
||||
a3(vpaddd xmm0,xmm0,xmm4)
|
||||
a3(vpaddd xmm1,xmm1,xmm5)
|
||||
a3(vpaddd xmm2,xmm2,xmm6)
|
||||
a3(vpaddd xmm3,xmm3,xmm7)
|
||||
a2(lea rax,[r8+r9])
|
||||
a2(xor r8,rcx)
|
||||
a2(and rax,~0x7f)
|
||||
a2(add r9,64)
|
||||
a2(shr rax,1)
|
||||
a2(add rax, rdi)
|
||||
a2(cmp r9,rcx)
|
||||
a2(vmovdqa [rax+0],xmm0)
|
||||
a2(vmovdqa [rax+16],xmm1)
|
||||
a2(vmovdqa [rax+32],xmm2)
|
||||
a2(vmovdqa [rax+48],xmm3)
|
||||
aj(jne scrypt_ChunkMix_xop_loop)
|
||||
a1(ret)
|
||||
asm_naked_fn_end(scrypt_ChunkMix_xop)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/* intrinsic */
|
||||
#if defined(X86_INTRINSIC_XOP) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED))
|
||||
|
||||
#define SCRYPT_CHACHA_XOP
|
||||
|
||||
static void asm_calling_convention NOINLINE
|
||||
scrypt_ChunkMix_xop(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) {
|
||||
uint32_t i, blocksPerChunk = r * 2, half = 0;
|
||||
xmmi *xmmp,x0,x1,x2,x3,x6,t0,t1,t2,t3;
|
||||
size_t rounds;
|
||||
|
||||
/* 1: X = B_{2r - 1} */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1);
|
||||
x0 = xmmp[0];
|
||||
x1 = xmmp[1];
|
||||
x2 = xmmp[2];
|
||||
x3 = xmmp[3];
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
}
|
||||
|
||||
/* 2: for i = 0 to 2r - 1 do */
|
||||
for (i = 0; i < blocksPerChunk; i++, half ^= r) {
|
||||
/* 3: X = H(X ^ B_i) */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
}
|
||||
|
||||
t0 = x0;
|
||||
t1 = x1;
|
||||
t2 = x2;
|
||||
t3 = x3;
|
||||
|
||||
for (rounds = 8; rounds; rounds -= 2) {
|
||||
x0 = _mm_add_epi32(x0, x1);
|
||||
x3 = _mm_xor_si128(x3, x0);
|
||||
x3 = _mm_roti_epi32(x3, 16);
|
||||
x2 = _mm_add_epi32(x2, x3);
|
||||
x1 = _mm_xor_si128(x1, x2);
|
||||
x1 = _mm_roti_epi32(x1, 12);
|
||||
x0 = _mm_add_epi32(x0, x1);
|
||||
x3 = _mm_xor_si128(x3, x0);
|
||||
x3 = _mm_roti_epi32(x3, 8);
|
||||
x2 = _mm_add_epi32(x2, x3);
|
||||
x0 = _mm_shuffle_epi32(x0, 0x93);
|
||||
x1 = _mm_xor_si128(x1, x2);
|
||||
x1 = _mm_roti_epi32(x1, 7);
|
||||
x3 = _mm_shuffle_epi32(x3, 0x4e);
|
||||
x0 = _mm_add_epi32(x0, x1);
|
||||
x2 = _mm_shuffle_epi32(x2, 0x39);
|
||||
x3 = _mm_xor_si128(x3, x0);
|
||||
x3 = _mm_roti_epi32(x3, 16);
|
||||
x2 = _mm_add_epi32(x2, x3);
|
||||
x1 = _mm_xor_si128(x1, x2);
|
||||
x1 = _mm_roti_epi32(x1, 12);
|
||||
x0 = _mm_add_epi32(x0, x1);
|
||||
x3 = _mm_xor_si128(x3, x0);
|
||||
x3 = _mm_roti_epi32(x3, 8);
|
||||
x2 = _mm_add_epi32(x2, x3);
|
||||
x1 = _mm_xor_si128(x1, x2);
|
||||
x0 = _mm_shuffle_epi32(x0, 0x39);
|
||||
x1 = _mm_roti_epi32(x1, 7);
|
||||
x3 = _mm_shuffle_epi32(x3, 0x4e);
|
||||
x2 = _mm_shuffle_epi32(x2, 0x93);
|
||||
}
|
||||
|
||||
x0 = _mm_add_epi32(x0, t0);
|
||||
x1 = _mm_add_epi32(x1, t1);
|
||||
x2 = _mm_add_epi32(x2, t2);
|
||||
x3 = _mm_add_epi32(x3, t3);
|
||||
|
||||
/* 4: Y_i = X */
|
||||
/* 6: B'[0..r-1] = Y_even */
|
||||
/* 6: B'[r..2r-1] = Y_odd */
|
||||
xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half);
|
||||
xmmp[0] = x0;
|
||||
xmmp[1] = x1;
|
||||
xmmp[2] = x2;
|
||||
xmmp[3] = x3;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_CHACHA_XOP)
|
||||
#undef SCRYPT_MIX
|
||||
#define SCRYPT_MIX "ChaCha/8-XOP"
|
||||
#undef SCRYPT_CHACHA_INCLUDED
|
||||
#define SCRYPT_CHACHA_INCLUDED
|
||||
#endif
|
69
deps/scrypt-jane-master/code/scrypt-jane-mix_chacha.h
vendored
Normal file
69
deps/scrypt-jane-master/code/scrypt-jane-mix_chacha.h
vendored
Normal file
@ -0,0 +1,69 @@
|
||||
#if !defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_CHACHA_INCLUDED)
|
||||
|
||||
#undef SCRYPT_MIX
|
||||
#define SCRYPT_MIX "ChaCha20/8 Ref"
|
||||
|
||||
#undef SCRYPT_CHACHA_INCLUDED
|
||||
#define SCRYPT_CHACHA_INCLUDED
|
||||
#define SCRYPT_CHACHA_BASIC
|
||||
|
||||
static void
|
||||
chacha_core_basic(uint32_t state[16]) {
|
||||
size_t rounds = 8;
|
||||
uint32_t x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15,t;
|
||||
|
||||
x0 = state[0];
|
||||
x1 = state[1];
|
||||
x2 = state[2];
|
||||
x3 = state[3];
|
||||
x4 = state[4];
|
||||
x5 = state[5];
|
||||
x6 = state[6];
|
||||
x7 = state[7];
|
||||
x8 = state[8];
|
||||
x9 = state[9];
|
||||
x10 = state[10];
|
||||
x11 = state[11];
|
||||
x12 = state[12];
|
||||
x13 = state[13];
|
||||
x14 = state[14];
|
||||
x15 = state[15];
|
||||
|
||||
#define quarter(a,b,c,d) \
|
||||
a += b; t = d^a; d = ROTL32(t,16); \
|
||||
c += d; t = b^c; b = ROTL32(t,12); \
|
||||
a += b; t = d^a; d = ROTL32(t, 8); \
|
||||
c += d; t = b^c; b = ROTL32(t, 7);
|
||||
|
||||
for (; rounds; rounds -= 2) {
|
||||
quarter( x0, x4, x8,x12)
|
||||
quarter( x1, x5, x9,x13)
|
||||
quarter( x2, x6,x10,x14)
|
||||
quarter( x3, x7,x11,x15)
|
||||
quarter( x0, x5,x10,x15)
|
||||
quarter( x1, x6,x11,x12)
|
||||
quarter( x2, x7, x8,x13)
|
||||
quarter( x3, x4, x9,x14)
|
||||
}
|
||||
|
||||
state[0] += x0;
|
||||
state[1] += x1;
|
||||
state[2] += x2;
|
||||
state[3] += x3;
|
||||
state[4] += x4;
|
||||
state[5] += x5;
|
||||
state[6] += x6;
|
||||
state[7] += x7;
|
||||
state[8] += x8;
|
||||
state[9] += x9;
|
||||
state[10] += x10;
|
||||
state[11] += x11;
|
||||
state[12] += x12;
|
||||
state[13] += x13;
|
||||
state[14] += x14;
|
||||
state[15] += x15;
|
||||
|
||||
#undef quarter
|
||||
}
|
||||
|
||||
#endif
|
381
deps/scrypt-jane-master/code/scrypt-jane-mix_salsa-avx.h
vendored
Normal file
381
deps/scrypt-jane-master/code/scrypt-jane-mix_salsa-avx.h
vendored
Normal file
@ -0,0 +1,381 @@
|
||||
/* x86 */
|
||||
#if defined(X86ASM_AVX) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS)
|
||||
|
||||
#define SCRYPT_SALSA_AVX
|
||||
|
||||
asm_naked_fn_proto(void, scrypt_ChunkMix_avx)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r)
|
||||
asm_naked_fn(scrypt_ChunkMix_avx)
|
||||
a1(push ebx)
|
||||
a1(push edi)
|
||||
a1(push esi)
|
||||
a1(push ebp)
|
||||
a2(mov ebp,esp)
|
||||
a2(mov edi,[ebp+20])
|
||||
a2(mov esi,[ebp+24])
|
||||
a2(mov eax,[ebp+28])
|
||||
a2(mov ebx,[ebp+32])
|
||||
a2(sub esp,32)
|
||||
a2(and esp,~63)
|
||||
a2(lea edx,[ebx*2])
|
||||
a2(shl edx,6)
|
||||
a2(lea ecx,[edx-64])
|
||||
a2(and eax, eax)
|
||||
a2(movdqa xmm0,[ecx+esi+0])
|
||||
a2(movdqa xmm1,[ecx+esi+16])
|
||||
a2(movdqa xmm2,[ecx+esi+32])
|
||||
a2(movdqa xmm3,[ecx+esi+48])
|
||||
aj(jz scrypt_ChunkMix_avx_no_xor1)
|
||||
a3(vpxor xmm0,xmm0,[ecx+eax+0])
|
||||
a3(vpxor xmm1,xmm1,[ecx+eax+16])
|
||||
a3(vpxor xmm2,xmm2,[ecx+eax+32])
|
||||
a3(vpxor xmm3,xmm3,[ecx+eax+48])
|
||||
a1(scrypt_ChunkMix_avx_no_xor1:)
|
||||
a2(xor ecx,ecx)
|
||||
a2(xor ebx,ebx)
|
||||
a1(scrypt_ChunkMix_avx_loop:)
|
||||
a2(and eax, eax)
|
||||
a3(vpxor xmm0,xmm0,[esi+ecx+0])
|
||||
a3(vpxor xmm1,xmm1,[esi+ecx+16])
|
||||
a3(vpxor xmm2,xmm2,[esi+ecx+32])
|
||||
a3(vpxor xmm3,xmm3,[esi+ecx+48])
|
||||
aj(jz scrypt_ChunkMix_avx_no_xor2)
|
||||
a3(vpxor xmm0,xmm0,[eax+ecx+0])
|
||||
a3(vpxor xmm1,xmm1,[eax+ecx+16])
|
||||
a3(vpxor xmm2,xmm2,[eax+ecx+32])
|
||||
a3(vpxor xmm3,xmm3,[eax+ecx+48])
|
||||
a1(scrypt_ChunkMix_avx_no_xor2:)
|
||||
a2(vmovdqa [esp+0],xmm0)
|
||||
a2(vmovdqa [esp+16],xmm1)
|
||||
a2(vmovdqa xmm6,xmm2)
|
||||
a2(vmovdqa xmm7,xmm3)
|
||||
a2(mov eax,8)
|
||||
a1(scrypt_salsa_avx_loop: )
|
||||
a3(vpaddd xmm4, xmm1, xmm0)
|
||||
a3(vpsrld xmm5, xmm4, 25)
|
||||
a3(vpslld xmm4, xmm4, 7)
|
||||
a3(vpxor xmm3, xmm3, xmm5)
|
||||
a3(vpxor xmm3, xmm3, xmm4)
|
||||
a3(vpaddd xmm4, xmm0, xmm3)
|
||||
a3(vpsrld xmm5, xmm4, 23)
|
||||
a3(vpslld xmm4, xmm4, 9)
|
||||
a3(vpxor xmm2, xmm2, xmm5)
|
||||
a3(vpxor xmm2, xmm2, xmm4)
|
||||
a3(vpaddd xmm4, xmm3, xmm2)
|
||||
a3(vpsrld xmm5, xmm4, 19)
|
||||
a3(vpslld xmm4, xmm4, 13)
|
||||
a3(vpxor xmm1, xmm1, xmm5)
|
||||
a3(vpshufd xmm3, xmm3, 0x93)
|
||||
a3(vpxor xmm1, xmm1, xmm4)
|
||||
a3(vpaddd xmm4, xmm2, xmm1)
|
||||
a3(vpsrld xmm5, xmm4, 14)
|
||||
a3(vpslld xmm4, xmm4, 18)
|
||||
a3(vpxor xmm0, xmm0, xmm5)
|
||||
a3(vpshufd xmm2, xmm2, 0x4e)
|
||||
a3(vpxor xmm0, xmm0, xmm4)
|
||||
a3(vpaddd xmm4, xmm3, xmm0)
|
||||
a3(vpshufd xmm1, xmm1, 0x39)
|
||||
a3(vpsrld xmm5, xmm4, 25)
|
||||
a3(vpslld xmm4, xmm4, 7)
|
||||
a3(vpxor xmm1, xmm1, xmm5)
|
||||
a3(vpxor xmm1, xmm1, xmm4)
|
||||
a3(vpaddd xmm4, xmm0, xmm1)
|
||||
a3(vpsrld xmm5, xmm4, 23)
|
||||
a3(vpslld xmm4, xmm4, 9)
|
||||
a3(vpxor xmm2, xmm2, xmm5)
|
||||
a3(vpxor xmm2, xmm2, xmm4)
|
||||
a3(vpaddd xmm4, xmm1, xmm2)
|
||||
a3(vpsrld xmm5, xmm4, 19)
|
||||
a3(vpslld xmm4, xmm4, 13)
|
||||
a3(vpxor xmm3, xmm3, xmm5)
|
||||
a3(vpshufd xmm1, xmm1, 0x93)
|
||||
a3(vpxor xmm3, xmm3, xmm4)
|
||||
a3(vpaddd xmm4, xmm2, xmm3)
|
||||
a3(vpsrld xmm5, xmm4, 14)
|
||||
a3(vpslld xmm4, xmm4, 18)
|
||||
a3(vpxor xmm0, xmm0, xmm5)
|
||||
a3(vpshufd xmm2, xmm2, 0x4e)
|
||||
a3(vpxor xmm0, xmm0, xmm4)
|
||||
a3(vpshufd xmm3, xmm3, 0x39)
|
||||
a2(sub eax, 2)
|
||||
aj(ja scrypt_salsa_avx_loop)
|
||||
a3(vpaddd xmm0,xmm0,[esp+0])
|
||||
a3(vpaddd xmm1,xmm1,[esp+16])
|
||||
a3(vpaddd xmm2,xmm2,xmm6)
|
||||
a3(vpaddd xmm3,xmm3,xmm7)
|
||||
a2(lea eax,[ebx+ecx])
|
||||
a2(xor ebx,edx)
|
||||
a2(and eax,~0x7f)
|
||||
a2(add ecx,64)
|
||||
a2(shr eax,1)
|
||||
a2(add eax, edi)
|
||||
a2(cmp ecx,edx)
|
||||
a2(vmovdqa [eax+0],xmm0)
|
||||
a2(vmovdqa [eax+16],xmm1)
|
||||
a2(vmovdqa [eax+32],xmm2)
|
||||
a2(vmovdqa [eax+48],xmm3)
|
||||
a2(mov eax,[ebp+28])
|
||||
aj(jne scrypt_ChunkMix_avx_loop)
|
||||
a2(mov esp,ebp)
|
||||
a1(pop ebp)
|
||||
a1(pop esi)
|
||||
a1(pop edi)
|
||||
a1(pop ebx)
|
||||
aret(16)
|
||||
asm_naked_fn_end(scrypt_ChunkMix_avx)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/* x64 */
|
||||
#if defined(X86_64ASM_AVX) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS)
|
||||
|
||||
#define SCRYPT_SALSA_AVX
|
||||
|
||||
asm_naked_fn_proto(void, scrypt_ChunkMix_avx)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r)
|
||||
asm_naked_fn(scrypt_ChunkMix_avx)
|
||||
a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */
|
||||
a2(shl rcx,6)
|
||||
a2(lea r9,[rcx-64])
|
||||
a2(lea rax,[rsi+r9])
|
||||
a2(lea r9,[rdx+r9])
|
||||
a2(and rdx, rdx)
|
||||
a2(vmovdqa xmm0,[rax+0])
|
||||
a2(vmovdqa xmm1,[rax+16])
|
||||
a2(vmovdqa xmm2,[rax+32])
|
||||
a2(vmovdqa xmm3,[rax+48])
|
||||
aj(jz scrypt_ChunkMix_avx_no_xor1)
|
||||
a3(vpxor xmm0,xmm0,[r9+0])
|
||||
a3(vpxor xmm1,xmm1,[r9+16])
|
||||
a3(vpxor xmm2,xmm2,[r9+32])
|
||||
a3(vpxor xmm3,xmm3,[r9+48])
|
||||
a1(scrypt_ChunkMix_avx_no_xor1:)
|
||||
a2(xor r9,r9)
|
||||
a2(xor r8,r8)
|
||||
a1(scrypt_ChunkMix_avx_loop:)
|
||||
a2(and rdx, rdx)
|
||||
a3(vpxor xmm0,xmm0,[rsi+r9+0])
|
||||
a3(vpxor xmm1,xmm1,[rsi+r9+16])
|
||||
a3(vpxor xmm2,xmm2,[rsi+r9+32])
|
||||
a3(vpxor xmm3,xmm3,[rsi+r9+48])
|
||||
aj(jz scrypt_ChunkMix_avx_no_xor2)
|
||||
a3(vpxor xmm0,xmm0,[rdx+r9+0])
|
||||
a3(vpxor xmm1,xmm1,[rdx+r9+16])
|
||||
a3(vpxor xmm2,xmm2,[rdx+r9+32])
|
||||
a3(vpxor xmm3,xmm3,[rdx+r9+48])
|
||||
a1(scrypt_ChunkMix_avx_no_xor2:)
|
||||
a2(vmovdqa xmm8,xmm0)
|
||||
a2(vmovdqa xmm9,xmm1)
|
||||
a2(vmovdqa xmm10,xmm2)
|
||||
a2(vmovdqa xmm11,xmm3)
|
||||
a2(mov rax,8)
|
||||
a1(scrypt_salsa_avx_loop: )
|
||||
a3(vpaddd xmm4, xmm1, xmm0)
|
||||
a3(vpsrld xmm5, xmm4, 25)
|
||||
a3(vpslld xmm4, xmm4, 7)
|
||||
a3(vpxor xmm3, xmm3, xmm5)
|
||||
a3(vpxor xmm3, xmm3, xmm4)
|
||||
a3(vpaddd xmm4, xmm0, xmm3)
|
||||
a3(vpsrld xmm5, xmm4, 23)
|
||||
a3(vpslld xmm4, xmm4, 9)
|
||||
a3(vpxor xmm2, xmm2, xmm5)
|
||||
a3(vpxor xmm2, xmm2, xmm4)
|
||||
a3(vpaddd xmm4, xmm3, xmm2)
|
||||
a3(vpsrld xmm5, xmm4, 19)
|
||||
a3(vpslld xmm4, xmm4, 13)
|
||||
a3(vpxor xmm1, xmm1, xmm5)
|
||||
a3(vpshufd xmm3, xmm3, 0x93)
|
||||
a3(vpxor xmm1, xmm1, xmm4)
|
||||
a3(vpaddd xmm4, xmm2, xmm1)
|
||||
a3(vpsrld xmm5, xmm4, 14)
|
||||
a3(vpslld xmm4, xmm4, 18)
|
||||
a3(vpxor xmm0, xmm0, xmm5)
|
||||
a3(vpshufd xmm2, xmm2, 0x4e)
|
||||
a3(vpxor xmm0, xmm0, xmm4)
|
||||
a3(vpaddd xmm4, xmm3, xmm0)
|
||||
a3(vpshufd xmm1, xmm1, 0x39)
|
||||
a3(vpsrld xmm5, xmm4, 25)
|
||||
a3(vpslld xmm4, xmm4, 7)
|
||||
a3(vpxor xmm1, xmm1, xmm5)
|
||||
a3(vpxor xmm1, xmm1, xmm4)
|
||||
a3(vpaddd xmm4, xmm0, xmm1)
|
||||
a3(vpsrld xmm5, xmm4, 23)
|
||||
a3(vpslld xmm4, xmm4, 9)
|
||||
a3(vpxor xmm2, xmm2, xmm5)
|
||||
a3(vpxor xmm2, xmm2, xmm4)
|
||||
a3(vpaddd xmm4, xmm1, xmm2)
|
||||
a3(vpsrld xmm5, xmm4, 19)
|
||||
a3(vpslld xmm4, xmm4, 13)
|
||||
a3(vpxor xmm3, xmm3, xmm5)
|
||||
a3(vpshufd xmm1, xmm1, 0x93)
|
||||
a3(vpxor xmm3, xmm3, xmm4)
|
||||
a3(vpaddd xmm4, xmm2, xmm3)
|
||||
a3(vpsrld xmm5, xmm4, 14)
|
||||
a3(vpslld xmm4, xmm4, 18)
|
||||
a3(vpxor xmm0, xmm0, xmm5)
|
||||
a3(vpshufd xmm2, xmm2, 0x4e)
|
||||
a3(vpxor xmm0, xmm0, xmm4)
|
||||
a3(vpshufd xmm3, xmm3, 0x39)
|
||||
a2(sub rax, 2)
|
||||
aj(ja scrypt_salsa_avx_loop)
|
||||
a3(vpaddd xmm0,xmm0,xmm8)
|
||||
a3(vpaddd xmm1,xmm1,xmm9)
|
||||
a3(vpaddd xmm2,xmm2,xmm10)
|
||||
a3(vpaddd xmm3,xmm3,xmm11)
|
||||
a2(lea rax,[r8+r9])
|
||||
a2(xor r8,rcx)
|
||||
a2(and rax,~0x7f)
|
||||
a2(add r9,64)
|
||||
a2(shr rax,1)
|
||||
a2(add rax, rdi)
|
||||
a2(cmp r9,rcx)
|
||||
a2(vmovdqa [rax+0],xmm0)
|
||||
a2(vmovdqa [rax+16],xmm1)
|
||||
a2(vmovdqa [rax+32],xmm2)
|
||||
a2(vmovdqa [rax+48],xmm3)
|
||||
aj(jne scrypt_ChunkMix_avx_loop)
|
||||
a1(ret)
|
||||
asm_naked_fn_end(scrypt_ChunkMix_avx)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/* intrinsic */
|
||||
#if defined(X86_INTRINSIC_AVX) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA_INCLUDED))
|
||||
|
||||
#define SCRYPT_SALSA_AVX
|
||||
|
||||
static void asm_calling_convention NOINLINE
|
||||
scrypt_ChunkMix_avx(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) {
|
||||
uint32_t i, blocksPerChunk = r * 2, half = 0;
|
||||
xmmi *xmmp,x0,x1,x2,x3,x4,x5,t0,t1,t2,t3;
|
||||
size_t rounds;
|
||||
|
||||
/* 1: X = B_{2r - 1} */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1);
|
||||
x0 = xmmp[0];
|
||||
x1 = xmmp[1];
|
||||
x2 = xmmp[2];
|
||||
x3 = xmmp[3];
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
}
|
||||
|
||||
/* 2: for i = 0 to 2r - 1 do */
|
||||
for (i = 0; i < blocksPerChunk; i++, half ^= r) {
|
||||
/* 3: X = H(X ^ B_i) */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
}
|
||||
|
||||
t0 = x0;
|
||||
t1 = x1;
|
||||
t2 = x2;
|
||||
t3 = x3;
|
||||
|
||||
for (rounds = 8; rounds; rounds -= 2) {
|
||||
x4 = x1;
|
||||
x4 = _mm_add_epi32(x4, x0);
|
||||
x5 = x4;
|
||||
x4 = _mm_slli_epi32(x4, 7);
|
||||
x5 = _mm_srli_epi32(x5, 25);
|
||||
x3 = _mm_xor_si128(x3, x4);
|
||||
x4 = x0;
|
||||
x3 = _mm_xor_si128(x3, x5);
|
||||
x4 = _mm_add_epi32(x4, x3);
|
||||
x5 = x4;
|
||||
x4 = _mm_slli_epi32(x4, 9);
|
||||
x5 = _mm_srli_epi32(x5, 23);
|
||||
x2 = _mm_xor_si128(x2, x4);
|
||||
x4 = x3;
|
||||
x2 = _mm_xor_si128(x2, x5);
|
||||
x3 = _mm_shuffle_epi32(x3, 0x93);
|
||||
x4 = _mm_add_epi32(x4, x2);
|
||||
x5 = x4;
|
||||
x4 = _mm_slli_epi32(x4, 13);
|
||||
x5 = _mm_srli_epi32(x5, 19);
|
||||
x1 = _mm_xor_si128(x1, x4);
|
||||
x4 = x2;
|
||||
x1 = _mm_xor_si128(x1, x5);
|
||||
x2 = _mm_shuffle_epi32(x2, 0x4e);
|
||||
x4 = _mm_add_epi32(x4, x1);
|
||||
x5 = x4;
|
||||
x4 = _mm_slli_epi32(x4, 18);
|
||||
x5 = _mm_srli_epi32(x5, 14);
|
||||
x0 = _mm_xor_si128(x0, x4);
|
||||
x4 = x3;
|
||||
x0 = _mm_xor_si128(x0, x5);
|
||||
x1 = _mm_shuffle_epi32(x1, 0x39);
|
||||
x4 = _mm_add_epi32(x4, x0);
|
||||
x5 = x4;
|
||||
x4 = _mm_slli_epi32(x4, 7);
|
||||
x5 = _mm_srli_epi32(x5, 25);
|
||||
x1 = _mm_xor_si128(x1, x4);
|
||||
x4 = x0;
|
||||
x1 = _mm_xor_si128(x1, x5);
|
||||
x4 = _mm_add_epi32(x4, x1);
|
||||
x5 = x4;
|
||||
x4 = _mm_slli_epi32(x4, 9);
|
||||
x5 = _mm_srli_epi32(x5, 23);
|
||||
x2 = _mm_xor_si128(x2, x4);
|
||||
x4 = x1;
|
||||
x2 = _mm_xor_si128(x2, x5);
|
||||
x1 = _mm_shuffle_epi32(x1, 0x93);
|
||||
x4 = _mm_add_epi32(x4, x2);
|
||||
x5 = x4;
|
||||
x4 = _mm_slli_epi32(x4, 13);
|
||||
x5 = _mm_srli_epi32(x5, 19);
|
||||
x3 = _mm_xor_si128(x3, x4);
|
||||
x4 = x2;
|
||||
x3 = _mm_xor_si128(x3, x5);
|
||||
x2 = _mm_shuffle_epi32(x2, 0x4e);
|
||||
x4 = _mm_add_epi32(x4, x3);
|
||||
x5 = x4;
|
||||
x4 = _mm_slli_epi32(x4, 18);
|
||||
x5 = _mm_srli_epi32(x5, 14);
|
||||
x0 = _mm_xor_si128(x0, x4);
|
||||
x3 = _mm_shuffle_epi32(x3, 0x39);
|
||||
x0 = _mm_xor_si128(x0, x5);
|
||||
}
|
||||
|
||||
x0 = _mm_add_epi32(x0, t0);
|
||||
x1 = _mm_add_epi32(x1, t1);
|
||||
x2 = _mm_add_epi32(x2, t2);
|
||||
x3 = _mm_add_epi32(x3, t3);
|
||||
|
||||
/* 4: Y_i = X */
|
||||
/* 6: B'[0..r-1] = Y_even */
|
||||
/* 6: B'[r..2r-1] = Y_odd */
|
||||
xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half);
|
||||
xmmp[0] = x0;
|
||||
xmmp[1] = x1;
|
||||
xmmp[2] = x2;
|
||||
xmmp[3] = x3;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA_AVX)
|
||||
/* uses salsa_core_tangle_sse2 */
|
||||
|
||||
#undef SCRYPT_MIX
|
||||
#define SCRYPT_MIX "Salsa/8-AVX"
|
||||
#undef SCRYPT_SALSA_INCLUDED
|
||||
#define SCRYPT_SALSA_INCLUDED
|
||||
#endif
|
443
deps/scrypt-jane-master/code/scrypt-jane-mix_salsa-sse2.h
vendored
Normal file
443
deps/scrypt-jane-master/code/scrypt-jane-mix_salsa-sse2.h
vendored
Normal file
@ -0,0 +1,443 @@
|
||||
/* x86 */
|
||||
#if defined(X86ASM_SSE2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS)
|
||||
|
||||
#define SCRYPT_SALSA_SSE2
|
||||
|
||||
asm_naked_fn_proto(void, scrypt_ChunkMix_sse2)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r)
|
||||
asm_naked_fn(scrypt_ChunkMix_sse2)
|
||||
a1(push ebx)
|
||||
a1(push edi)
|
||||
a1(push esi)
|
||||
a1(push ebp)
|
||||
a2(mov ebp,esp)
|
||||
a2(mov edi,[ebp+20])
|
||||
a2(mov esi,[ebp+24])
|
||||
a2(mov eax,[ebp+28])
|
||||
a2(mov ebx,[ebp+32])
|
||||
a2(sub esp,32)
|
||||
a2(and esp,~63)
|
||||
a2(lea edx,[ebx*2])
|
||||
a2(shl edx,6)
|
||||
a2(lea ecx,[edx-64])
|
||||
a2(and eax, eax)
|
||||
a2(movdqa xmm0,[ecx+esi+0])
|
||||
a2(movdqa xmm1,[ecx+esi+16])
|
||||
a2(movdqa xmm2,[ecx+esi+32])
|
||||
a2(movdqa xmm3,[ecx+esi+48])
|
||||
aj(jz scrypt_ChunkMix_sse2_no_xor1)
|
||||
a2(pxor xmm0,[ecx+eax+0])
|
||||
a2(pxor xmm1,[ecx+eax+16])
|
||||
a2(pxor xmm2,[ecx+eax+32])
|
||||
a2(pxor xmm3,[ecx+eax+48])
|
||||
a1(scrypt_ChunkMix_sse2_no_xor1:)
|
||||
a2(xor ecx,ecx)
|
||||
a2(xor ebx,ebx)
|
||||
a1(scrypt_ChunkMix_sse2_loop:)
|
||||
a2(and eax, eax)
|
||||
a2(pxor xmm0,[esi+ecx+0])
|
||||
a2(pxor xmm1,[esi+ecx+16])
|
||||
a2(pxor xmm2,[esi+ecx+32])
|
||||
a2(pxor xmm3,[esi+ecx+48])
|
||||
aj(jz scrypt_ChunkMix_sse2_no_xor2)
|
||||
a2(pxor xmm0,[eax+ecx+0])
|
||||
a2(pxor xmm1,[eax+ecx+16])
|
||||
a2(pxor xmm2,[eax+ecx+32])
|
||||
a2(pxor xmm3,[eax+ecx+48])
|
||||
a1(scrypt_ChunkMix_sse2_no_xor2:)
|
||||
a2(movdqa [esp+0],xmm0)
|
||||
a2(movdqa [esp+16],xmm1)
|
||||
a2(movdqa xmm6,xmm2)
|
||||
a2(movdqa xmm7,xmm3)
|
||||
a2(mov eax,8)
|
||||
a1(scrypt_salsa_sse2_loop: )
|
||||
a2(movdqa xmm4, xmm1)
|
||||
a2(paddd xmm4, xmm0)
|
||||
a2(movdqa xmm5, xmm4)
|
||||
a2(pslld xmm4, 7)
|
||||
a2(psrld xmm5, 25)
|
||||
a2(pxor xmm3, xmm4)
|
||||
a2(movdqa xmm4, xmm0)
|
||||
a2(pxor xmm3, xmm5)
|
||||
a2(paddd xmm4, xmm3)
|
||||
a2(movdqa xmm5, xmm4)
|
||||
a2(pslld xmm4, 9)
|
||||
a2(psrld xmm5, 23)
|
||||
a2(pxor xmm2, xmm4)
|
||||
a2(movdqa xmm4, xmm3)
|
||||
a2(pxor xmm2, xmm5)
|
||||
a3(pshufd xmm3, xmm3, 0x93)
|
||||
a2(paddd xmm4, xmm2)
|
||||
a2(movdqa xmm5, xmm4)
|
||||
a2(pslld xmm4, 13)
|
||||
a2(psrld xmm5, 19)
|
||||
a2(pxor xmm1, xmm4)
|
||||
a2(movdqa xmm4, xmm2)
|
||||
a2(pxor xmm1, xmm5)
|
||||
a3(pshufd xmm2, xmm2, 0x4e)
|
||||
a2(paddd xmm4, xmm1)
|
||||
a2(movdqa xmm5, xmm4)
|
||||
a2(pslld xmm4, 18)
|
||||
a2(psrld xmm5, 14)
|
||||
a2(pxor xmm0, xmm4)
|
||||
a2(movdqa xmm4, xmm3)
|
||||
a2(pxor xmm0, xmm5)
|
||||
a3(pshufd xmm1, xmm1, 0x39)
|
||||
a2(paddd xmm4, xmm0)
|
||||
a2(movdqa xmm5, xmm4)
|
||||
a2(pslld xmm4, 7)
|
||||
a2(psrld xmm5, 25)
|
||||
a2(pxor xmm1, xmm4)
|
||||
a2(movdqa xmm4, xmm0)
|
||||
a2(pxor xmm1, xmm5)
|
||||
a2(paddd xmm4, xmm1)
|
||||
a2(movdqa xmm5, xmm4)
|
||||
a2(pslld xmm4, 9)
|
||||
a2(psrld xmm5, 23)
|
||||
a2(pxor xmm2, xmm4)
|
||||
a2(movdqa xmm4, xmm1)
|
||||
a2(pxor xmm2, xmm5)
|
||||
a3(pshufd xmm1, xmm1, 0x93)
|
||||
a2(paddd xmm4, xmm2)
|
||||
a2(movdqa xmm5, xmm4)
|
||||
a2(pslld xmm4, 13)
|
||||
a2(psrld xmm5, 19)
|
||||
a2(pxor xmm3, xmm4)
|
||||
a2(movdqa xmm4, xmm2)
|
||||
a2(pxor xmm3, xmm5)
|
||||
a3(pshufd xmm2, xmm2, 0x4e)
|
||||
a2(paddd xmm4, xmm3)
|
||||
a2(sub eax, 2)
|
||||
a2(movdqa xmm5, xmm4)
|
||||
a2(pslld xmm4, 18)
|
||||
a2(psrld xmm5, 14)
|
||||
a2(pxor xmm0, xmm4)
|
||||
a3(pshufd xmm3, xmm3, 0x39)
|
||||
a2(pxor xmm0, xmm5)
|
||||
aj(ja scrypt_salsa_sse2_loop)
|
||||
a2(paddd xmm0,[esp+0])
|
||||
a2(paddd xmm1,[esp+16])
|
||||
a2(paddd xmm2,xmm6)
|
||||
a2(paddd xmm3,xmm7)
|
||||
a2(lea eax,[ebx+ecx])
|
||||
a2(xor ebx,edx)
|
||||
a2(and eax,~0x7f)
|
||||
a2(add ecx,64)
|
||||
a2(shr eax,1)
|
||||
a2(add eax, edi)
|
||||
a2(cmp ecx,edx)
|
||||
a2(movdqa [eax+0],xmm0)
|
||||
a2(movdqa [eax+16],xmm1)
|
||||
a2(movdqa [eax+32],xmm2)
|
||||
a2(movdqa [eax+48],xmm3)
|
||||
a2(mov eax,[ebp+28])
|
||||
aj(jne scrypt_ChunkMix_sse2_loop)
|
||||
a2(mov esp,ebp)
|
||||
a1(pop ebp)
|
||||
a1(pop esi)
|
||||
a1(pop edi)
|
||||
a1(pop ebx)
|
||||
aret(16)
|
||||
asm_naked_fn_end(scrypt_ChunkMix_sse2)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/* x64 */
|
||||
#if defined(X86_64ASM_SSE2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS)
|
||||
|
||||
#define SCRYPT_SALSA_SSE2
|
||||
|
||||
asm_naked_fn_proto(void, scrypt_ChunkMix_sse2)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r)
|
||||
asm_naked_fn(scrypt_ChunkMix_sse2)
|
||||
a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */
|
||||
a2(shl rcx,6)
|
||||
a2(lea r9,[rcx-64])
|
||||
a2(lea rax,[rsi+r9])
|
||||
a2(lea r9,[rdx+r9])
|
||||
a2(and rdx, rdx)
|
||||
a2(movdqa xmm0,[rax+0])
|
||||
a2(movdqa xmm1,[rax+16])
|
||||
a2(movdqa xmm2,[rax+32])
|
||||
a2(movdqa xmm3,[rax+48])
|
||||
aj(jz scrypt_ChunkMix_sse2_no_xor1)
|
||||
a2(pxor xmm0,[r9+0])
|
||||
a2(pxor xmm1,[r9+16])
|
||||
a2(pxor xmm2,[r9+32])
|
||||
a2(pxor xmm3,[r9+48])
|
||||
a1(scrypt_ChunkMix_sse2_no_xor1:)
|
||||
a2(xor r9,r9)
|
||||
a2(xor r8,r8)
|
||||
a1(scrypt_ChunkMix_sse2_loop:)
|
||||
a2(and rdx, rdx)
|
||||
a2(pxor xmm0,[rsi+r9+0])
|
||||
a2(pxor xmm1,[rsi+r9+16])
|
||||
a2(pxor xmm2,[rsi+r9+32])
|
||||
a2(pxor xmm3,[rsi+r9+48])
|
||||
aj(jz scrypt_ChunkMix_sse2_no_xor2)
|
||||
a2(pxor xmm0,[rdx+r9+0])
|
||||
a2(pxor xmm1,[rdx+r9+16])
|
||||
a2(pxor xmm2,[rdx+r9+32])
|
||||
a2(pxor xmm3,[rdx+r9+48])
|
||||
a1(scrypt_ChunkMix_sse2_no_xor2:)
|
||||
a2(movdqa xmm8,xmm0)
|
||||
a2(movdqa xmm9,xmm1)
|
||||
a2(movdqa xmm10,xmm2)
|
||||
a2(movdqa xmm11,xmm3)
|
||||
a2(mov rax,8)
|
||||
a1(scrypt_salsa_sse2_loop: )
|
||||
a2(movdqa xmm4, xmm1)
|
||||
a2(paddd xmm4, xmm0)
|
||||
a2(movdqa xmm5, xmm4)
|
||||
a2(pslld xmm4, 7)
|
||||
a2(psrld xmm5, 25)
|
||||
a2(pxor xmm3, xmm4)
|
||||
a2(movdqa xmm4, xmm0)
|
||||
a2(pxor xmm3, xmm5)
|
||||
a2(paddd xmm4, xmm3)
|
||||
a2(movdqa xmm5, xmm4)
|
||||
a2(pslld xmm4, 9)
|
||||
a2(psrld xmm5, 23)
|
||||
a2(pxor xmm2, xmm4)
|
||||
a2(movdqa xmm4, xmm3)
|
||||
a2(pxor xmm2, xmm5)
|
||||
a3(pshufd xmm3, xmm3, 0x93)
|
||||
a2(paddd xmm4, xmm2)
|
||||
a2(movdqa xmm5, xmm4)
|
||||
a2(pslld xmm4, 13)
|
||||
a2(psrld xmm5, 19)
|
||||
a2(pxor xmm1, xmm4)
|
||||
a2(movdqa xmm4, xmm2)
|
||||
a2(pxor xmm1, xmm5)
|
||||
a3(pshufd xmm2, xmm2, 0x4e)
|
||||
a2(paddd xmm4, xmm1)
|
||||
a2(movdqa xmm5, xmm4)
|
||||
a2(pslld xmm4, 18)
|
||||
a2(psrld xmm5, 14)
|
||||
a2(pxor xmm0, xmm4)
|
||||
a2(movdqa xmm4, xmm3)
|
||||
a2(pxor xmm0, xmm5)
|
||||
a3(pshufd xmm1, xmm1, 0x39)
|
||||
a2(paddd xmm4, xmm0)
|
||||
a2(movdqa xmm5, xmm4)
|
||||
a2(pslld xmm4, 7)
|
||||
a2(psrld xmm5, 25)
|
||||
a2(pxor xmm1, xmm4)
|
||||
a2(movdqa xmm4, xmm0)
|
||||
a2(pxor xmm1, xmm5)
|
||||
a2(paddd xmm4, xmm1)
|
||||
a2(movdqa xmm5, xmm4)
|
||||
a2(pslld xmm4, 9)
|
||||
a2(psrld xmm5, 23)
|
||||
a2(pxor xmm2, xmm4)
|
||||
a2(movdqa xmm4, xmm1)
|
||||
a2(pxor xmm2, xmm5)
|
||||
a3(pshufd xmm1, xmm1, 0x93)
|
||||
a2(paddd xmm4, xmm2)
|
||||
a2(movdqa xmm5, xmm4)
|
||||
a2(pslld xmm4, 13)
|
||||
a2(psrld xmm5, 19)
|
||||
a2(pxor xmm3, xmm4)
|
||||
a2(movdqa xmm4, xmm2)
|
||||
a2(pxor xmm3, xmm5)
|
||||
a3(pshufd xmm2, xmm2, 0x4e)
|
||||
a2(paddd xmm4, xmm3)
|
||||
a2(sub rax, 2)
|
||||
a2(movdqa xmm5, xmm4)
|
||||
a2(pslld xmm4, 18)
|
||||
a2(psrld xmm5, 14)
|
||||
a2(pxor xmm0, xmm4)
|
||||
a3(pshufd xmm3, xmm3, 0x39)
|
||||
a2(pxor xmm0, xmm5)
|
||||
aj(ja scrypt_salsa_sse2_loop)
|
||||
a2(paddd xmm0,xmm8)
|
||||
a2(paddd xmm1,xmm9)
|
||||
a2(paddd xmm2,xmm10)
|
||||
a2(paddd xmm3,xmm11)
|
||||
a2(lea rax,[r8+r9])
|
||||
a2(xor r8,rcx)
|
||||
a2(and rax,~0x7f)
|
||||
a2(add r9,64)
|
||||
a2(shr rax,1)
|
||||
a2(add rax, rdi)
|
||||
a2(cmp r9,rcx)
|
||||
a2(movdqa [rax+0],xmm0)
|
||||
a2(movdqa [rax+16],xmm1)
|
||||
a2(movdqa [rax+32],xmm2)
|
||||
a2(movdqa [rax+48],xmm3)
|
||||
aj(jne scrypt_ChunkMix_sse2_loop)
|
||||
a1(ret)
|
||||
asm_naked_fn_end(scrypt_ChunkMix_sse2)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/* intrinsic */
|
||||
#if defined(X86_INTRINSIC_SSE2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA_INCLUDED))
|
||||
|
||||
#define SCRYPT_SALSA_SSE2
|
||||
|
||||
static void NOINLINE asm_calling_convention
|
||||
scrypt_ChunkMix_sse2(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) {
|
||||
uint32_t i, blocksPerChunk = r * 2, half = 0;
|
||||
xmmi *xmmp,x0,x1,x2,x3,x4,x5,t0,t1,t2,t3;
|
||||
size_t rounds;
|
||||
|
||||
/* 1: X = B_{2r - 1} */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1);
|
||||
x0 = xmmp[0];
|
||||
x1 = xmmp[1];
|
||||
x2 = xmmp[2];
|
||||
x3 = xmmp[3];
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
}
|
||||
|
||||
/* 2: for i = 0 to 2r - 1 do */
|
||||
for (i = 0; i < blocksPerChunk; i++, half ^= r) {
|
||||
/* 3: X = H(X ^ B_i) */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
}
|
||||
|
||||
t0 = x0;
|
||||
t1 = x1;
|
||||
t2 = x2;
|
||||
t3 = x3;
|
||||
|
||||
for (rounds = 8; rounds; rounds -= 2) {
|
||||
x4 = x1;
|
||||
x4 = _mm_add_epi32(x4, x0);
|
||||
x5 = x4;
|
||||
x4 = _mm_slli_epi32(x4, 7);
|
||||
x5 = _mm_srli_epi32(x5, 25);
|
||||
x3 = _mm_xor_si128(x3, x4);
|
||||
x4 = x0;
|
||||
x3 = _mm_xor_si128(x3, x5);
|
||||
x4 = _mm_add_epi32(x4, x3);
|
||||
x5 = x4;
|
||||
x4 = _mm_slli_epi32(x4, 9);
|
||||
x5 = _mm_srli_epi32(x5, 23);
|
||||
x2 = _mm_xor_si128(x2, x4);
|
||||
x4 = x3;
|
||||
x2 = _mm_xor_si128(x2, x5);
|
||||
x3 = _mm_shuffle_epi32(x3, 0x93);
|
||||
x4 = _mm_add_epi32(x4, x2);
|
||||
x5 = x4;
|
||||
x4 = _mm_slli_epi32(x4, 13);
|
||||
x5 = _mm_srli_epi32(x5, 19);
|
||||
x1 = _mm_xor_si128(x1, x4);
|
||||
x4 = x2;
|
||||
x1 = _mm_xor_si128(x1, x5);
|
||||
x2 = _mm_shuffle_epi32(x2, 0x4e);
|
||||
x4 = _mm_add_epi32(x4, x1);
|
||||
x5 = x4;
|
||||
x4 = _mm_slli_epi32(x4, 18);
|
||||
x5 = _mm_srli_epi32(x5, 14);
|
||||
x0 = _mm_xor_si128(x0, x4);
|
||||
x4 = x3;
|
||||
x0 = _mm_xor_si128(x0, x5);
|
||||
x1 = _mm_shuffle_epi32(x1, 0x39);
|
||||
x4 = _mm_add_epi32(x4, x0);
|
||||
x5 = x4;
|
||||
x4 = _mm_slli_epi32(x4, 7);
|
||||
x5 = _mm_srli_epi32(x5, 25);
|
||||
x1 = _mm_xor_si128(x1, x4);
|
||||
x4 = x0;
|
||||
x1 = _mm_xor_si128(x1, x5);
|
||||
x4 = _mm_add_epi32(x4, x1);
|
||||
x5 = x4;
|
||||
x4 = _mm_slli_epi32(x4, 9);
|
||||
x5 = _mm_srli_epi32(x5, 23);
|
||||
x2 = _mm_xor_si128(x2, x4);
|
||||
x4 = x1;
|
||||
x2 = _mm_xor_si128(x2, x5);
|
||||
x1 = _mm_shuffle_epi32(x1, 0x93);
|
||||
x4 = _mm_add_epi32(x4, x2);
|
||||
x5 = x4;
|
||||
x4 = _mm_slli_epi32(x4, 13);
|
||||
x5 = _mm_srli_epi32(x5, 19);
|
||||
x3 = _mm_xor_si128(x3, x4);
|
||||
x4 = x2;
|
||||
x3 = _mm_xor_si128(x3, x5);
|
||||
x2 = _mm_shuffle_epi32(x2, 0x4e);
|
||||
x4 = _mm_add_epi32(x4, x3);
|
||||
x5 = x4;
|
||||
x4 = _mm_slli_epi32(x4, 18);
|
||||
x5 = _mm_srli_epi32(x5, 14);
|
||||
x0 = _mm_xor_si128(x0, x4);
|
||||
x3 = _mm_shuffle_epi32(x3, 0x39);
|
||||
x0 = _mm_xor_si128(x0, x5);
|
||||
}
|
||||
|
||||
x0 = _mm_add_epi32(x0, t0);
|
||||
x1 = _mm_add_epi32(x1, t1);
|
||||
x2 = _mm_add_epi32(x2, t2);
|
||||
x3 = _mm_add_epi32(x3, t3);
|
||||
|
||||
/* 4: Y_i = X */
|
||||
/* 6: B'[0..r-1] = Y_even */
|
||||
/* 6: B'[r..2r-1] = Y_odd */
|
||||
xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half);
|
||||
xmmp[0] = x0;
|
||||
xmmp[1] = x1;
|
||||
xmmp[2] = x2;
|
||||
xmmp[3] = x3;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA_SSE2)
|
||||
#undef SCRYPT_MIX
|
||||
#define SCRYPT_MIX "Salsa/8-SSE2"
|
||||
#undef SCRYPT_SALSA_INCLUDED
|
||||
#define SCRYPT_SALSA_INCLUDED
|
||||
#endif
|
||||
|
||||
/* used by avx,etc as well */
|
||||
#if defined(SCRYPT_SALSA_INCLUDED)
|
||||
/*
|
||||
Default layout:
|
||||
0 1 2 3
|
||||
4 5 6 7
|
||||
8 9 10 11
|
||||
12 13 14 15
|
||||
|
||||
SSE2 layout:
|
||||
0 5 10 15
|
||||
12 1 6 11
|
||||
8 13 2 7
|
||||
4 9 14 3
|
||||
*/
|
||||
|
||||
static void asm_calling_convention
|
||||
salsa_core_tangle_sse2(uint32_t *blocks, size_t count) {
|
||||
uint32_t t;
|
||||
while (count--) {
|
||||
t = blocks[1]; blocks[1] = blocks[5]; blocks[5] = t;
|
||||
t = blocks[2]; blocks[2] = blocks[10]; blocks[10] = t;
|
||||
t = blocks[3]; blocks[3] = blocks[15]; blocks[15] = t;
|
||||
t = blocks[4]; blocks[4] = blocks[12]; blocks[12] = t;
|
||||
t = blocks[7]; blocks[7] = blocks[11]; blocks[11] = t;
|
||||
t = blocks[9]; blocks[9] = blocks[13]; blocks[13] = t;
|
||||
blocks += 16;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
317
deps/scrypt-jane-master/code/scrypt-jane-mix_salsa-xop.h
vendored
Normal file
317
deps/scrypt-jane-master/code/scrypt-jane-mix_salsa-xop.h
vendored
Normal file
@ -0,0 +1,317 @@
|
||||
/* x86 */
|
||||
#if defined(X86ASM_XOP) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS)
|
||||
|
||||
#define SCRYPT_SALSA_XOP
|
||||
|
||||
asm_naked_fn_proto(void, scrypt_ChunkMix_xop)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r)
|
||||
asm_naked_fn(scrypt_ChunkMix_xop)
|
||||
a1(push ebx)
|
||||
a1(push edi)
|
||||
a1(push esi)
|
||||
a1(push ebp)
|
||||
a2(mov ebp,esp)
|
||||
a2(mov edi,[ebp+20])
|
||||
a2(mov esi,[ebp+24])
|
||||
a2(mov eax,[ebp+28])
|
||||
a2(mov ebx,[ebp+32])
|
||||
a2(sub esp,32)
|
||||
a2(and esp,~63)
|
||||
a2(lea edx,[ebx*2])
|
||||
a2(shl edx,6)
|
||||
a2(lea ecx,[edx-64])
|
||||
a2(and eax, eax)
|
||||
a2(movdqa xmm0,[ecx+esi+0])
|
||||
a2(movdqa xmm1,[ecx+esi+16])
|
||||
a2(movdqa xmm2,[ecx+esi+32])
|
||||
a2(movdqa xmm3,[ecx+esi+48])
|
||||
aj(jz scrypt_ChunkMix_xop_no_xor1)
|
||||
a3(vpxor xmm0,xmm0,[ecx+eax+0])
|
||||
a3(vpxor xmm1,xmm1,[ecx+eax+16])
|
||||
a3(vpxor xmm2,xmm2,[ecx+eax+32])
|
||||
a3(vpxor xmm3,xmm3,[ecx+eax+48])
|
||||
a1(scrypt_ChunkMix_xop_no_xor1:)
|
||||
a2(xor ecx,ecx)
|
||||
a2(xor ebx,ebx)
|
||||
a1(scrypt_ChunkMix_xop_loop:)
|
||||
a2(and eax, eax)
|
||||
a3(vpxor xmm0,xmm0,[esi+ecx+0])
|
||||
a3(vpxor xmm1,xmm1,[esi+ecx+16])
|
||||
a3(vpxor xmm2,xmm2,[esi+ecx+32])
|
||||
a3(vpxor xmm3,xmm3,[esi+ecx+48])
|
||||
aj(jz scrypt_ChunkMix_xop_no_xor2)
|
||||
a3(vpxor xmm0,xmm0,[eax+ecx+0])
|
||||
a3(vpxor xmm1,xmm1,[eax+ecx+16])
|
||||
a3(vpxor xmm2,xmm2,[eax+ecx+32])
|
||||
a3(vpxor xmm3,xmm3,[eax+ecx+48])
|
||||
a1(scrypt_ChunkMix_xop_no_xor2:)
|
||||
a2(vmovdqa [esp+0],xmm0)
|
||||
a2(vmovdqa [esp+16],xmm1)
|
||||
a2(vmovdqa xmm6,xmm2)
|
||||
a2(vmovdqa xmm7,xmm3)
|
||||
a2(mov eax,8)
|
||||
a1(scrypt_salsa_xop_loop: )
|
||||
a3(vpaddd xmm4, xmm1, xmm0)
|
||||
a3(vprotd xmm4, xmm4, 7)
|
||||
a3(vpxor xmm3, xmm3, xmm4)
|
||||
a3(vpaddd xmm4, xmm0, xmm3)
|
||||
a3(vprotd xmm4, xmm4, 9)
|
||||
a3(vpxor xmm2, xmm2, xmm4)
|
||||
a3(vpaddd xmm4, xmm3, xmm2)
|
||||
a3(vprotd xmm4, xmm4, 13)
|
||||
a3(vpxor xmm1, xmm1, xmm4)
|
||||
a3(vpaddd xmm4, xmm2, xmm1)
|
||||
a3(pshufd xmm3, xmm3, 0x93)
|
||||
a3(vprotd xmm4, xmm4, 18)
|
||||
a3(pshufd xmm2, xmm2, 0x4e)
|
||||
a3(vpxor xmm0, xmm0, xmm4)
|
||||
a3(pshufd xmm1, xmm1, 0x39)
|
||||
a3(vpaddd xmm4, xmm3, xmm0)
|
||||
a3(vprotd xmm4, xmm4, 7)
|
||||
a3(vpxor xmm1, xmm1, xmm4)
|
||||
a3(vpaddd xmm4, xmm0, xmm1)
|
||||
a3(vprotd xmm4, xmm4, 9)
|
||||
a3(vpxor xmm2, xmm2, xmm4)
|
||||
a3(vpaddd xmm4, xmm1, xmm2)
|
||||
a3(vprotd xmm4, xmm4, 13)
|
||||
a3(vpxor xmm3, xmm3, xmm4)
|
||||
a3(pshufd xmm1, xmm1, 0x93)
|
||||
a3(vpaddd xmm4, xmm2, xmm3)
|
||||
a3(pshufd xmm2, xmm2, 0x4e)
|
||||
a3(vprotd xmm4, xmm4, 18)
|
||||
a3(pshufd xmm3, xmm3, 0x39)
|
||||
a3(vpxor xmm0, xmm0, xmm4)
|
||||
a2(sub eax, 2)
|
||||
aj(ja scrypt_salsa_xop_loop)
|
||||
a3(vpaddd xmm0,xmm0,[esp+0])
|
||||
a3(vpaddd xmm1,xmm1,[esp+16])
|
||||
a3(vpaddd xmm2,xmm2,xmm6)
|
||||
a3(vpaddd xmm3,xmm3,xmm7)
|
||||
a2(lea eax,[ebx+ecx])
|
||||
a2(xor ebx,edx)
|
||||
a2(and eax,~0x7f)
|
||||
a2(add ecx,64)
|
||||
a2(shr eax,1)
|
||||
a2(add eax, edi)
|
||||
a2(cmp ecx,edx)
|
||||
a2(vmovdqa [eax+0],xmm0)
|
||||
a2(vmovdqa [eax+16],xmm1)
|
||||
a2(vmovdqa [eax+32],xmm2)
|
||||
a2(vmovdqa [eax+48],xmm3)
|
||||
a2(mov eax,[ebp+28])
|
||||
aj(jne scrypt_ChunkMix_xop_loop)
|
||||
a2(mov esp,ebp)
|
||||
a1(pop ebp)
|
||||
a1(pop esi)
|
||||
a1(pop edi)
|
||||
a1(pop ebx)
|
||||
aret(16)
|
||||
asm_naked_fn_end(scrypt_ChunkMix_xop)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/* x64 */
|
||||
#if defined(X86_64ASM_XOP) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS)
|
||||
|
||||
#define SCRYPT_SALSA_XOP
|
||||
|
||||
asm_naked_fn_proto(void, scrypt_ChunkMix_xop)(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r)
|
||||
asm_naked_fn(scrypt_ChunkMix_xop)
|
||||
a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */
|
||||
a2(shl rcx,6)
|
||||
a2(lea r9,[rcx-64])
|
||||
a2(lea rax,[rsi+r9])
|
||||
a2(lea r9,[rdx+r9])
|
||||
a2(and rdx, rdx)
|
||||
a2(vmovdqa xmm0,[rax+0])
|
||||
a2(vmovdqa xmm1,[rax+16])
|
||||
a2(vmovdqa xmm2,[rax+32])
|
||||
a2(vmovdqa xmm3,[rax+48])
|
||||
aj(jz scrypt_ChunkMix_xop_no_xor1)
|
||||
a3(vpxor xmm0,xmm0,[r9+0])
|
||||
a3(vpxor xmm1,xmm1,[r9+16])
|
||||
a3(vpxor xmm2,xmm2,[r9+32])
|
||||
a3(vpxor xmm3,xmm3,[r9+48])
|
||||
a1(scrypt_ChunkMix_xop_no_xor1:)
|
||||
a2(xor r9,r9)
|
||||
a2(xor r8,r8)
|
||||
a1(scrypt_ChunkMix_xop_loop:)
|
||||
a2(and rdx, rdx)
|
||||
a3(vpxor xmm0,xmm0,[rsi+r9+0])
|
||||
a3(vpxor xmm1,xmm1,[rsi+r9+16])
|
||||
a3(vpxor xmm2,xmm2,[rsi+r9+32])
|
||||
a3(vpxor xmm3,xmm3,[rsi+r9+48])
|
||||
aj(jz scrypt_ChunkMix_xop_no_xor2)
|
||||
a3(vpxor xmm0,xmm0,[rdx+r9+0])
|
||||
a3(vpxor xmm1,xmm1,[rdx+r9+16])
|
||||
a3(vpxor xmm2,xmm2,[rdx+r9+32])
|
||||
a3(vpxor xmm3,xmm3,[rdx+r9+48])
|
||||
a1(scrypt_ChunkMix_xop_no_xor2:)
|
||||
a2(vmovdqa xmm8,xmm0)
|
||||
a2(vmovdqa xmm9,xmm1)
|
||||
a2(vmovdqa xmm10,xmm2)
|
||||
a2(vmovdqa xmm11,xmm3)
|
||||
a2(mov rax,8)
|
||||
a1(scrypt_salsa_xop_loop: )
|
||||
a3(vpaddd xmm4, xmm1, xmm0)
|
||||
a3(vprotd xmm4, xmm4, 7)
|
||||
a3(vpxor xmm3, xmm3, xmm4)
|
||||
a3(vpaddd xmm4, xmm0, xmm3)
|
||||
a3(vprotd xmm4, xmm4, 9)
|
||||
a3(vpxor xmm2, xmm2, xmm4)
|
||||
a3(vpaddd xmm4, xmm3, xmm2)
|
||||
a3(vprotd xmm4, xmm4, 13)
|
||||
a3(vpxor xmm1, xmm1, xmm4)
|
||||
a3(vpaddd xmm4, xmm2, xmm1)
|
||||
a3(pshufd xmm3, xmm3, 0x93)
|
||||
a3(vprotd xmm4, xmm4, 18)
|
||||
a3(pshufd xmm2, xmm2, 0x4e)
|
||||
a3(vpxor xmm0, xmm0, xmm4)
|
||||
a3(pshufd xmm1, xmm1, 0x39)
|
||||
a3(vpaddd xmm4, xmm3, xmm0)
|
||||
a3(vprotd xmm4, xmm4, 7)
|
||||
a3(vpxor xmm1, xmm1, xmm4)
|
||||
a3(vpaddd xmm4, xmm0, xmm1)
|
||||
a3(vprotd xmm4, xmm4, 9)
|
||||
a3(vpxor xmm2, xmm2, xmm4)
|
||||
a3(vpaddd xmm4, xmm1, xmm2)
|
||||
a3(vprotd xmm4, xmm4, 13)
|
||||
a3(vpxor xmm3, xmm3, xmm4)
|
||||
a3(pshufd xmm1, xmm1, 0x93)
|
||||
a3(vpaddd xmm4, xmm2, xmm3)
|
||||
a3(pshufd xmm2, xmm2, 0x4e)
|
||||
a3(vprotd xmm4, xmm4, 18)
|
||||
a3(pshufd xmm3, xmm3, 0x39)
|
||||
a3(vpxor xmm0, xmm0, xmm4)
|
||||
a2(sub rax, 2)
|
||||
aj(ja scrypt_salsa_xop_loop)
|
||||
a3(vpaddd xmm0,xmm0,xmm8)
|
||||
a3(vpaddd xmm1,xmm1,xmm9)
|
||||
a3(vpaddd xmm2,xmm2,xmm10)
|
||||
a3(vpaddd xmm3,xmm3,xmm11)
|
||||
a2(lea rax,[r8+r9])
|
||||
a2(xor r8,rcx)
|
||||
a2(and rax,~0x7f)
|
||||
a2(add r9,64)
|
||||
a2(shr rax,1)
|
||||
a2(add rax, rdi)
|
||||
a2(cmp r9,rcx)
|
||||
a2(vmovdqa [rax+0],xmm0)
|
||||
a2(vmovdqa [rax+16],xmm1)
|
||||
a2(vmovdqa [rax+32],xmm2)
|
||||
a2(vmovdqa [rax+48],xmm3)
|
||||
aj(jne scrypt_ChunkMix_xop_loop)
|
||||
a1(ret)
|
||||
asm_naked_fn_end(scrypt_ChunkMix_xop)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/* intrinsic */
|
||||
#if defined(X86_INTRINSIC_XOP) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA_INCLUDED))
|
||||
|
||||
#define SCRYPT_SALSA_XOP
|
||||
|
||||
static void asm_calling_convention NOINLINE
|
||||
scrypt_ChunkMix_xop(uint32_t *Bout/*[chunkBytes]*/, uint32_t *Bin/*[chunkBytes]*/, uint32_t *Bxor/*[chunkBytes]*/, uint32_t r) {
|
||||
uint32_t i, blocksPerChunk = r * 2, half = 0;
|
||||
xmmi *xmmp,x0,x1,x2,x3,x4,x5,t0,t1,t2,t3;
|
||||
size_t rounds;
|
||||
|
||||
/* 1: X = B_{2r - 1} */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1);
|
||||
x0 = xmmp[0];
|
||||
x1 = xmmp[1];
|
||||
x2 = xmmp[2];
|
||||
x3 = xmmp[3];
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
}
|
||||
|
||||
/* 2: for i = 0 to 2r - 1 do */
|
||||
for (i = 0; i < blocksPerChunk; i++, half ^= r) {
|
||||
/* 3: X = H(X ^ B_i) */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
}
|
||||
|
||||
t0 = x0;
|
||||
t1 = x1;
|
||||
t2 = x2;
|
||||
t3 = x3;
|
||||
|
||||
for (rounds = 8; rounds; rounds -= 2) {
|
||||
x4 = _mm_add_epi32(x1, x0);
|
||||
x4 = _mm_roti_epi32(x4, 7);
|
||||
x3 = _mm_xor_si128(x3, x4);
|
||||
x4 = _mm_add_epi32(x0, x3);
|
||||
x4 = _mm_roti_epi32(x4, 9);
|
||||
x2 = _mm_xor_si128(x2, x4);
|
||||
x4 = _mm_add_epi32(x3, x2);
|
||||
x4 = _mm_roti_epi32(x4, 13);
|
||||
x1 = _mm_xor_si128(x1, x4);
|
||||
x4 = _mm_add_epi32(x2, x1);
|
||||
x4 = _mm_roti_epi32(x4, 18);
|
||||
x0 = _mm_xor_si128(x0, x4);
|
||||
x3 = _mm_shuffle_epi32(x3, 0x93);
|
||||
x2 = _mm_shuffle_epi32(x2, 0x4e);
|
||||
x1 = _mm_shuffle_epi32(x1, 0x39);
|
||||
x4 = _mm_add_epi32(x3, x0);
|
||||
x4 = _mm_roti_epi32(x4, 7);
|
||||
x1 = _mm_xor_si128(x1, x4);
|
||||
x4 = _mm_add_epi32(x0, x1);
|
||||
x4 = _mm_roti_epi32(x4, 9);
|
||||
x2 = _mm_xor_si128(x2, x4);
|
||||
x4 = _mm_add_epi32(x1, x2);
|
||||
x4 = _mm_roti_epi32(x4, 13);
|
||||
x3 = _mm_xor_si128(x3, x4);
|
||||
x4 = _mm_add_epi32(x2, x3);
|
||||
x4 = _mm_roti_epi32(x4, 18);
|
||||
x0 = _mm_xor_si128(x0, x4);
|
||||
x1 = _mm_shuffle_epi32(x1, 0x93);
|
||||
x2 = _mm_shuffle_epi32(x2, 0x4e);
|
||||
x3 = _mm_shuffle_epi32(x3, 0x39);
|
||||
}
|
||||
|
||||
x0 = _mm_add_epi32(x0, t0);
|
||||
x1 = _mm_add_epi32(x1, t1);
|
||||
x2 = _mm_add_epi32(x2, t2);
|
||||
x3 = _mm_add_epi32(x3, t3);
|
||||
|
||||
/* 4: Y_i = X */
|
||||
/* 6: B'[0..r-1] = Y_even */
|
||||
/* 6: B'[r..2r-1] = Y_odd */
|
||||
xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half);
|
||||
xmmp[0] = x0;
|
||||
xmmp[1] = x1;
|
||||
xmmp[2] = x2;
|
||||
xmmp[3] = x3;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA_XOP)
|
||||
/* uses salsa_core_tangle_sse2 */
|
||||
|
||||
#undef SCRYPT_MIX
|
||||
#define SCRYPT_MIX "Salsa/8-XOP"
|
||||
#undef SCRYPT_SALSA_INCLUDED
|
||||
#define SCRYPT_SALSA_INCLUDED
|
||||
#endif
|
70
deps/scrypt-jane-master/code/scrypt-jane-mix_salsa.h
vendored
Normal file
70
deps/scrypt-jane-master/code/scrypt-jane-mix_salsa.h
vendored
Normal file
@ -0,0 +1,70 @@
|
||||
#if !defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA_INCLUDED)
|
||||
|
||||
#undef SCRYPT_MIX
|
||||
#define SCRYPT_MIX "Salsa20/8 Ref"
|
||||
|
||||
#undef SCRYPT_SALSA_INCLUDED
|
||||
#define SCRYPT_SALSA_INCLUDED
|
||||
#define SCRYPT_SALSA_BASIC
|
||||
|
||||
static void
|
||||
salsa_core_basic(uint32_t state[16]) {
|
||||
size_t rounds = 8;
|
||||
uint32_t x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15,t;
|
||||
|
||||
x0 = state[0];
|
||||
x1 = state[1];
|
||||
x2 = state[2];
|
||||
x3 = state[3];
|
||||
x4 = state[4];
|
||||
x5 = state[5];
|
||||
x6 = state[6];
|
||||
x7 = state[7];
|
||||
x8 = state[8];
|
||||
x9 = state[9];
|
||||
x10 = state[10];
|
||||
x11 = state[11];
|
||||
x12 = state[12];
|
||||
x13 = state[13];
|
||||
x14 = state[14];
|
||||
x15 = state[15];
|
||||
|
||||
#define quarter(a,b,c,d) \
|
||||
t = a+d; t = ROTL32(t, 7); b ^= t; \
|
||||
t = b+a; t = ROTL32(t, 9); c ^= t; \
|
||||
t = c+b; t = ROTL32(t, 13); d ^= t; \
|
||||
t = d+c; t = ROTL32(t, 18); a ^= t; \
|
||||
|
||||
for (; rounds; rounds -= 2) {
|
||||
quarter( x0, x4, x8,x12)
|
||||
quarter( x5, x9,x13, x1)
|
||||
quarter(x10,x14, x2, x6)
|
||||
quarter(x15, x3, x7,x11)
|
||||
quarter( x0, x1, x2, x3)
|
||||
quarter( x5, x6, x7, x4)
|
||||
quarter(x10,x11, x8, x9)
|
||||
quarter(x15,x12,x13,x14)
|
||||
}
|
||||
|
||||
state[0] += x0;
|
||||
state[1] += x1;
|
||||
state[2] += x2;
|
||||
state[3] += x3;
|
||||
state[4] += x4;
|
||||
state[5] += x5;
|
||||
state[6] += x6;
|
||||
state[7] += x7;
|
||||
state[8] += x8;
|
||||
state[9] += x9;
|
||||
state[10] += x10;
|
||||
state[11] += x11;
|
||||
state[12] += x12;
|
||||
state[13] += x13;
|
||||
state[14] += x14;
|
||||
state[15] += x15;
|
||||
|
||||
#undef quarter
|
||||
}
|
||||
|
||||
#endif
|
||||
|
367
deps/scrypt-jane-master/code/scrypt-jane-mix_salsa64-avx.h
vendored
Normal file
367
deps/scrypt-jane-master/code/scrypt-jane-mix_salsa64-avx.h
vendored
Normal file
@ -0,0 +1,367 @@
|
||||
/* x64 */
|
||||
#if defined(X86_64ASM_AVX) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS)
|
||||
|
||||
#define SCRYPT_SALSA64_AVX
|
||||
|
||||
asm_naked_fn_proto(void, scrypt_ChunkMix_avx)(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r)
|
||||
asm_naked_fn(scrypt_ChunkMix_avx)
|
||||
a1(push rbp)
|
||||
a2(mov rbp, rsp)
|
||||
a2(and rsp, ~63)
|
||||
a2(sub rsp, 128)
|
||||
a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */
|
||||
a2(shl rcx,7)
|
||||
a2(lea r9,[rcx-128])
|
||||
a2(lea rax,[rsi+r9])
|
||||
a2(lea r9,[rdx+r9])
|
||||
a2(and rdx, rdx)
|
||||
a2(vmovdqa xmm0,[rax+0])
|
||||
a2(vmovdqa xmm1,[rax+16])
|
||||
a2(vmovdqa xmm2,[rax+32])
|
||||
a2(vmovdqa xmm3,[rax+48])
|
||||
a2(vmovdqa xmm4,[rax+64])
|
||||
a2(vmovdqa xmm5,[rax+80])
|
||||
a2(vmovdqa xmm6,[rax+96])
|
||||
a2(vmovdqa xmm7,[rax+112])
|
||||
aj(jz scrypt_ChunkMix_avx_no_xor1)
|
||||
a3(vpxor xmm0,xmm0,[r9+0])
|
||||
a3(vpxor xmm1,xmm1,[r9+16])
|
||||
a3(vpxor xmm2,xmm2,[r9+32])
|
||||
a3(vpxor xmm3,xmm3,[r9+48])
|
||||
a3(vpxor xmm4,xmm4,[r9+64])
|
||||
a3(vpxor xmm5,xmm5,[r9+80])
|
||||
a3(vpxor xmm6,xmm6,[r9+96])
|
||||
a3(vpxor xmm7,xmm7,[r9+112])
|
||||
a1(scrypt_ChunkMix_avx_no_xor1:)
|
||||
a2(xor r9,r9)
|
||||
a2(xor r8,r8)
|
||||
a1(scrypt_ChunkMix_avx_loop:)
|
||||
a2(and rdx, rdx)
|
||||
a3(vpxor xmm0,xmm0,[rsi+r9+0])
|
||||
a3(vpxor xmm1,xmm1,[rsi+r9+16])
|
||||
a3(vpxor xmm2,xmm2,[rsi+r9+32])
|
||||
a3(vpxor xmm3,xmm3,[rsi+r9+48])
|
||||
a3(vpxor xmm4,xmm4,[rsi+r9+64])
|
||||
a3(vpxor xmm5,xmm5,[rsi+r9+80])
|
||||
a3(vpxor xmm6,xmm6,[rsi+r9+96])
|
||||
a3(vpxor xmm7,xmm7,[rsi+r9+112])
|
||||
aj(jz scrypt_ChunkMix_avx_no_xor2)
|
||||
a3(vpxor xmm0,xmm0,[rdx+r9+0])
|
||||
a3(vpxor xmm1,xmm1,[rdx+r9+16])
|
||||
a3(vpxor xmm2,xmm2,[rdx+r9+32])
|
||||
a3(vpxor xmm3,xmm3,[rdx+r9+48])
|
||||
a3(vpxor xmm4,xmm4,[rdx+r9+64])
|
||||
a3(vpxor xmm5,xmm5,[rdx+r9+80])
|
||||
a3(vpxor xmm6,xmm6,[rdx+r9+96])
|
||||
a3(vpxor xmm7,xmm7,[rdx+r9+112])
|
||||
a1(scrypt_ChunkMix_avx_no_xor2:)
|
||||
a2(vmovdqa [rsp+0],xmm0)
|
||||
a2(vmovdqa [rsp+16],xmm1)
|
||||
a2(vmovdqa [rsp+32],xmm2)
|
||||
a2(vmovdqa [rsp+48],xmm3)
|
||||
a2(vmovdqa [rsp+64],xmm4)
|
||||
a2(vmovdqa [rsp+80],xmm5)
|
||||
a2(vmovdqa [rsp+96],xmm6)
|
||||
a2(vmovdqa [rsp+112],xmm7)
|
||||
a2(mov rax,8)
|
||||
a1(scrypt_salsa64_avx_loop: )
|
||||
a3(vpaddq xmm8, xmm0, xmm2)
|
||||
a3(vpaddq xmm9, xmm1, xmm3)
|
||||
a3(vpshufd xmm8, xmm8, 0xb1)
|
||||
a3(vpshufd xmm9, xmm9, 0xb1)
|
||||
a3(vpxor xmm6, xmm6, xmm8)
|
||||
a3(vpxor xmm7, xmm7, xmm9)
|
||||
a3(vpaddq xmm10, xmm0, xmm6)
|
||||
a3(vpaddq xmm11, xmm1, xmm7)
|
||||
a3(vpsrlq xmm8, xmm10, 51)
|
||||
a3(vpsrlq xmm9, xmm11, 51)
|
||||
a3(vpsllq xmm10, xmm10, 13)
|
||||
a3(vpsllq xmm11, xmm11, 13)
|
||||
a3(vpxor xmm4, xmm4, xmm8)
|
||||
a3(vpxor xmm5, xmm5, xmm9)
|
||||
a3(vpxor xmm4, xmm4, xmm10)
|
||||
a3(vpxor xmm5, xmm5, xmm11)
|
||||
a3(vpaddq xmm8, xmm6, xmm4)
|
||||
a3(vpaddq xmm9, xmm7, xmm5)
|
||||
a3(vpsrlq xmm10, xmm8, 25)
|
||||
a3(vpsrlq xmm11, xmm9, 25)
|
||||
a3(vpsllq xmm8, xmm8, 39)
|
||||
a3(vpsllq xmm9, xmm9, 39)
|
||||
a3(vpxor xmm2, xmm2, xmm10)
|
||||
a3(vpxor xmm3, xmm3, xmm11)
|
||||
a3(vpxor xmm2, xmm2, xmm8)
|
||||
a3(vpxor xmm3, xmm3, xmm9)
|
||||
a3(vpaddq xmm10, xmm4, xmm2)
|
||||
a3(vpaddq xmm11, xmm5, xmm3)
|
||||
a3(vpshufd xmm10, xmm10, 0xb1)
|
||||
a3(vpshufd xmm11, xmm11, 0xb1)
|
||||
a3(vpxor xmm0, xmm0, xmm10)
|
||||
a3(vpxor xmm1, xmm1, xmm11)
|
||||
a2(vmovdqa xmm8, xmm2)
|
||||
a2(vmovdqa xmm9, xmm3)
|
||||
a4(vpalignr xmm2, xmm6, xmm7, 8)
|
||||
a4(vpalignr xmm3, xmm7, xmm6, 8)
|
||||
a4(vpalignr xmm6, xmm9, xmm8, 8)
|
||||
a4(vpalignr xmm7, xmm8, xmm9, 8)
|
||||
a3(vpaddq xmm10, xmm0, xmm2)
|
||||
a3(vpaddq xmm11, xmm1, xmm3)
|
||||
a3(vpshufd xmm10, xmm10, 0xb1)
|
||||
a3(vpshufd xmm11, xmm11, 0xb1)
|
||||
a3(vpxor xmm6, xmm6, xmm10)
|
||||
a3(vpxor xmm7, xmm7, xmm11)
|
||||
a3(vpaddq xmm8, xmm0, xmm6)
|
||||
a3(vpaddq xmm9, xmm1, xmm7)
|
||||
a3(vpsrlq xmm10, xmm8, 51)
|
||||
a3(vpsrlq xmm11, xmm9, 51)
|
||||
a3(vpsllq xmm8, xmm8, 13)
|
||||
a3(vpsllq xmm9, xmm9, 13)
|
||||
a3(vpxor xmm5, xmm5, xmm10)
|
||||
a3(vpxor xmm4, xmm4, xmm11)
|
||||
a3(vpxor xmm5, xmm5, xmm8)
|
||||
a3(vpxor xmm4, xmm4, xmm9)
|
||||
a3(vpaddq xmm10, xmm6, xmm5)
|
||||
a3(vpaddq xmm11, xmm7, xmm4)
|
||||
a3(vpsrlq xmm8, xmm10, 25)
|
||||
a3(vpsrlq xmm9, xmm11, 25)
|
||||
a3(vpsllq xmm10, xmm10, 39)
|
||||
a3(vpsllq xmm11, xmm11, 39)
|
||||
a3(vpxor xmm2, xmm2, xmm8)
|
||||
a3(vpxor xmm3, xmm3, xmm9)
|
||||
a3(vpxor xmm2, xmm2, xmm10)
|
||||
a3(vpxor xmm3, xmm3, xmm11)
|
||||
a3(vpaddq xmm8, xmm5, xmm2)
|
||||
a3(vpaddq xmm9, xmm4, xmm3)
|
||||
a3(vpshufd xmm8, xmm8, 0xb1)
|
||||
a3(vpshufd xmm9, xmm9, 0xb1)
|
||||
a3(vpxor xmm0, xmm0, xmm8)
|
||||
a3(vpxor xmm1, xmm1, xmm9)
|
||||
a2(vmovdqa xmm10, xmm2)
|
||||
a2(vmovdqa xmm11, xmm3)
|
||||
a4(vpalignr xmm2, xmm6, xmm7, 8)
|
||||
a4(vpalignr xmm3, xmm7, xmm6, 8)
|
||||
a4(vpalignr xmm6, xmm11, xmm10, 8)
|
||||
a4(vpalignr xmm7, xmm10, xmm11, 8)
|
||||
a2(sub rax, 2)
|
||||
aj(ja scrypt_salsa64_avx_loop)
|
||||
a3(vpaddq xmm0,xmm0,[rsp+0])
|
||||
a3(vpaddq xmm1,xmm1,[rsp+16])
|
||||
a3(vpaddq xmm2,xmm2,[rsp+32])
|
||||
a3(vpaddq xmm3,xmm3,[rsp+48])
|
||||
a3(vpaddq xmm4,xmm4,[rsp+64])
|
||||
a3(vpaddq xmm5,xmm5,[rsp+80])
|
||||
a3(vpaddq xmm6,xmm6,[rsp+96])
|
||||
a3(vpaddq xmm7,xmm7,[rsp+112])
|
||||
a2(lea rax,[r8+r9])
|
||||
a2(xor r8,rcx)
|
||||
a2(and rax,~0xff)
|
||||
a2(add r9,128)
|
||||
a2(shr rax,1)
|
||||
a2(add rax, rdi)
|
||||
a2(cmp r9,rcx)
|
||||
a2(vmovdqa [rax+0],xmm0)
|
||||
a2(vmovdqa [rax+16],xmm1)
|
||||
a2(vmovdqa [rax+32],xmm2)
|
||||
a2(vmovdqa [rax+48],xmm3)
|
||||
a2(vmovdqa [rax+64],xmm4)
|
||||
a2(vmovdqa [rax+80],xmm5)
|
||||
a2(vmovdqa [rax+96],xmm6)
|
||||
a2(vmovdqa [rax+112],xmm7)
|
||||
aj(jne scrypt_ChunkMix_avx_loop)
|
||||
a2(mov rsp, rbp)
|
||||
a1(pop rbp)
|
||||
a1(ret)
|
||||
asm_naked_fn_end(scrypt_ChunkMix_avx)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/* intrinsic */
|
||||
#if defined(X86_INTRINSIC_AVX) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED))
|
||||
|
||||
#define SCRYPT_SALSA64_AVX
|
||||
|
||||
static void asm_calling_convention
|
||||
scrypt_ChunkMix_avx(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r) {
|
||||
uint32_t i, blocksPerChunk = r * 2, half = 0;
|
||||
xmmi *xmmp,x0,x1,x2,x3,x4,x5,x6,x7,t0,t1,t2,t3,t4,t5,t6,t7,z0,z1,z2,z3;
|
||||
size_t rounds;
|
||||
|
||||
/* 1: X = B_{2r - 1} */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1);
|
||||
x0 = xmmp[0];
|
||||
x1 = xmmp[1];
|
||||
x2 = xmmp[2];
|
||||
x3 = xmmp[3];
|
||||
x4 = xmmp[4];
|
||||
x5 = xmmp[5];
|
||||
x6 = xmmp[6];
|
||||
x7 = xmmp[7];
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
x4 = _mm_xor_si128(x4, xmmp[4]);
|
||||
x5 = _mm_xor_si128(x5, xmmp[5]);
|
||||
x6 = _mm_xor_si128(x6, xmmp[6]);
|
||||
x7 = _mm_xor_si128(x7, xmmp[7]);
|
||||
}
|
||||
|
||||
/* 2: for i = 0 to 2r - 1 do */
|
||||
for (i = 0; i < blocksPerChunk; i++, half ^= r) {
|
||||
/* 3: X = H(X ^ B_i) */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
x4 = _mm_xor_si128(x4, xmmp[4]);
|
||||
x5 = _mm_xor_si128(x5, xmmp[5]);
|
||||
x6 = _mm_xor_si128(x6, xmmp[6]);
|
||||
x7 = _mm_xor_si128(x7, xmmp[7]);
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
x4 = _mm_xor_si128(x4, xmmp[4]);
|
||||
x5 = _mm_xor_si128(x5, xmmp[5]);
|
||||
x6 = _mm_xor_si128(x6, xmmp[6]);
|
||||
x7 = _mm_xor_si128(x7, xmmp[7]);
|
||||
}
|
||||
|
||||
t0 = x0;
|
||||
t1 = x1;
|
||||
t2 = x2;
|
||||
t3 = x3;
|
||||
t4 = x4;
|
||||
t5 = x5;
|
||||
t6 = x6;
|
||||
t7 = x7;
|
||||
|
||||
for (rounds = 8; rounds; rounds -= 2) {
|
||||
z0 = _mm_add_epi64(x0, x2);
|
||||
z1 = _mm_add_epi64(x1, x3);
|
||||
z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
|
||||
x6 = _mm_xor_si128(x6, z0);
|
||||
x7 = _mm_xor_si128(x7, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x6, x0);
|
||||
z1 = _mm_add_epi64(x7, x1);
|
||||
z2 = _mm_srli_epi64(z0, 64-13);
|
||||
z3 = _mm_srli_epi64(z1, 64-13);
|
||||
z0 = _mm_slli_epi64(z0, 13);
|
||||
z1 = _mm_slli_epi64(z1, 13);
|
||||
x4 = _mm_xor_si128(x4, z2);
|
||||
x5 = _mm_xor_si128(x5, z3);
|
||||
x4 = _mm_xor_si128(x4, z0);
|
||||
x5 = _mm_xor_si128(x5, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x4, x6);
|
||||
z1 = _mm_add_epi64(x5, x7);
|
||||
z2 = _mm_srli_epi64(z0, 64-39);
|
||||
z3 = _mm_srli_epi64(z1, 64-39);
|
||||
z0 = _mm_slli_epi64(z0, 39);
|
||||
z1 = _mm_slli_epi64(z1, 39);
|
||||
x2 = _mm_xor_si128(x2, z2);
|
||||
x3 = _mm_xor_si128(x3, z3);
|
||||
x2 = _mm_xor_si128(x2, z0);
|
||||
x3 = _mm_xor_si128(x3, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x2, x4);
|
||||
z1 = _mm_add_epi64(x3, x5);
|
||||
z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
|
||||
x0 = _mm_xor_si128(x0, z0);
|
||||
x1 = _mm_xor_si128(x1, z1);
|
||||
|
||||
z0 = x2;
|
||||
z1 = x3;
|
||||
x2 = _mm_alignr_epi8(x6, x7, 8);
|
||||
x3 = _mm_alignr_epi8(x7, x6, 8);
|
||||
x6 = _mm_alignr_epi8(z1, z0, 8);
|
||||
x7 = _mm_alignr_epi8(z0, z1, 8);
|
||||
|
||||
z0 = _mm_add_epi64(x0, x2);
|
||||
z1 = _mm_add_epi64(x1, x3);
|
||||
z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
|
||||
x6 = _mm_xor_si128(x6, z0);
|
||||
x7 = _mm_xor_si128(x7, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x6, x0);
|
||||
z1 = _mm_add_epi64(x7, x1);
|
||||
z2 = _mm_srli_epi64(z0, 64-13);
|
||||
z3 = _mm_srli_epi64(z1, 64-13);
|
||||
z0 = _mm_slli_epi64(z0, 13);
|
||||
z1 = _mm_slli_epi64(z1, 13);
|
||||
x5 = _mm_xor_si128(x5, z2);
|
||||
x4 = _mm_xor_si128(x4, z3);
|
||||
x5 = _mm_xor_si128(x5, z0);
|
||||
x4 = _mm_xor_si128(x4, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x5, x6);
|
||||
z1 = _mm_add_epi64(x4, x7);
|
||||
z2 = _mm_srli_epi64(z0, 64-39);
|
||||
z3 = _mm_srli_epi64(z1, 64-39);
|
||||
z0 = _mm_slli_epi64(z0, 39);
|
||||
z1 = _mm_slli_epi64(z1, 39);
|
||||
x2 = _mm_xor_si128(x2, z2);
|
||||
x3 = _mm_xor_si128(x3, z3);
|
||||
x2 = _mm_xor_si128(x2, z0);
|
||||
x3 = _mm_xor_si128(x3, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x2, x5);
|
||||
z1 = _mm_add_epi64(x3, x4);
|
||||
z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
|
||||
x0 = _mm_xor_si128(x0, z0);
|
||||
x1 = _mm_xor_si128(x1, z1);
|
||||
|
||||
z0 = x2;
|
||||
z1 = x3;
|
||||
x2 = _mm_alignr_epi8(x6, x7, 8);
|
||||
x3 = _mm_alignr_epi8(x7, x6, 8);
|
||||
x6 = _mm_alignr_epi8(z1, z0, 8);
|
||||
x7 = _mm_alignr_epi8(z0, z1, 8);
|
||||
}
|
||||
|
||||
x0 = _mm_add_epi64(x0, t0);
|
||||
x1 = _mm_add_epi64(x1, t1);
|
||||
x2 = _mm_add_epi64(x2, t2);
|
||||
x3 = _mm_add_epi64(x3, t3);
|
||||
x4 = _mm_add_epi64(x4, t4);
|
||||
x5 = _mm_add_epi64(x5, t5);
|
||||
x6 = _mm_add_epi64(x6, t6);
|
||||
x7 = _mm_add_epi64(x7, t7);
|
||||
|
||||
/* 4: Y_i = X */
|
||||
/* 6: B'[0..r-1] = Y_even */
|
||||
/* 6: B'[r..2r-1] = Y_odd */
|
||||
xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half);
|
||||
xmmp[0] = x0;
|
||||
xmmp[1] = x1;
|
||||
xmmp[2] = x2;
|
||||
xmmp[3] = x3;
|
||||
xmmp[4] = x4;
|
||||
xmmp[5] = x5;
|
||||
xmmp[6] = x6;
|
||||
xmmp[7] = x7;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA64_AVX)
|
||||
/* uses salsa64_core_tangle_sse2 */
|
||||
|
||||
#undef SCRYPT_MIX
|
||||
#define SCRYPT_MIX "Salsa64/8-AVX"
|
||||
#undef SCRYPT_SALSA64_INCLUDED
|
||||
#define SCRYPT_SALSA64_INCLUDED
|
||||
#endif
|
221
deps/scrypt-jane-master/code/scrypt-jane-mix_salsa64-avx2.h
vendored
Normal file
221
deps/scrypt-jane-master/code/scrypt-jane-mix_salsa64-avx2.h
vendored
Normal file
@ -0,0 +1,221 @@
|
||||
/* x64 */
|
||||
#if defined(X86_64ASM_AVX2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS)
|
||||
|
||||
#define SCRYPT_SALSA64_AVX2
|
||||
|
||||
asm_naked_fn_proto(void, scrypt_ChunkMix_avx2)(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r)
|
||||
asm_naked_fn(scrypt_ChunkMix_avx2)
|
||||
a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */
|
||||
a2(shl rcx,7)
|
||||
a2(lea r9,[rcx-128])
|
||||
a2(lea rax,[rsi+r9])
|
||||
a2(lea r9,[rdx+r9])
|
||||
a2(and rdx, rdx)
|
||||
a2(vmovdqa ymm0,[rax+0])
|
||||
a2(vmovdqa ymm1,[rax+32])
|
||||
a2(vmovdqa ymm2,[rax+64])
|
||||
a2(vmovdqa ymm3,[rax+96])
|
||||
aj(jz scrypt_ChunkMix_avx2_no_xor1)
|
||||
a3(vpxor ymm0,ymm0,[r9+0])
|
||||
a3(vpxor ymm1,ymm1,[r9+32])
|
||||
a3(vpxor ymm2,ymm2,[r9+64])
|
||||
a3(vpxor ymm3,ymm3,[r9+96])
|
||||
a1(scrypt_ChunkMix_avx2_no_xor1:)
|
||||
a2(xor r9,r9)
|
||||
a2(xor r8,r8)
|
||||
a1(scrypt_ChunkMix_avx2_loop:)
|
||||
a2(and rdx, rdx)
|
||||
a3(vpxor ymm0,ymm0,[rsi+r9+0])
|
||||
a3(vpxor ymm1,ymm1,[rsi+r9+32])
|
||||
a3(vpxor ymm2,ymm2,[rsi+r9+64])
|
||||
a3(vpxor ymm3,ymm3,[rsi+r9+96])
|
||||
aj(jz scrypt_ChunkMix_avx2_no_xor2)
|
||||
a3(vpxor ymm0,ymm0,[rdx+r9+0])
|
||||
a3(vpxor ymm1,ymm1,[rdx+r9+32])
|
||||
a3(vpxor ymm2,ymm2,[rdx+r9+64])
|
||||
a3(vpxor ymm3,ymm3,[rdx+r9+96])
|
||||
a1(scrypt_ChunkMix_avx2_no_xor2:)
|
||||
a2(vmovdqa ymm6,ymm0)
|
||||
a2(vmovdqa ymm7,ymm1)
|
||||
a2(vmovdqa ymm8,ymm2)
|
||||
a2(vmovdqa ymm9,ymm3)
|
||||
a2(mov rax,4)
|
||||
a1(scrypt_salsa64_avx2_loop: )
|
||||
a3(vpaddq ymm4, ymm1, ymm0)
|
||||
a3(vpshufd ymm4, ymm4, 0xb1)
|
||||
a3(vpxor ymm3, ymm3, ymm4)
|
||||
a3(vpaddq ymm4, ymm0, ymm3)
|
||||
a3(vpsrlq ymm5, ymm4, 51)
|
||||
a3(vpxor ymm2, ymm2, ymm5)
|
||||
a3(vpsllq ymm4, ymm4, 13)
|
||||
a3(vpxor ymm2, ymm2, ymm4)
|
||||
a3(vpaddq ymm4, ymm3, ymm2)
|
||||
a3(vpsrlq ymm5, ymm4, 25)
|
||||
a3(vpxor ymm1, ymm1, ymm5)
|
||||
a3(vpsllq ymm4, ymm4, 39)
|
||||
a3(vpxor ymm1, ymm1, ymm4)
|
||||
a3(vpaddq ymm4, ymm2, ymm1)
|
||||
a3(vpshufd ymm4, ymm4, 0xb1)
|
||||
a3(vpermq ymm1, ymm1, 0x39)
|
||||
a3(vpermq ymm10, ymm2, 0x4e)
|
||||
a3(vpxor ymm0, ymm0, ymm4)
|
||||
a3(vpermq ymm3, ymm3, 0x93)
|
||||
a3(vpaddq ymm4, ymm3, ymm0)
|
||||
a3(vpshufd ymm4, ymm4, 0xb1)
|
||||
a3(vpxor ymm1, ymm1, ymm4)
|
||||
a3(vpaddq ymm4, ymm0, ymm1)
|
||||
a3(vpsrlq ymm5, ymm4, 51)
|
||||
a3(vpxor ymm10, ymm10, ymm5)
|
||||
a3(vpsllq ymm4, ymm4, 13)
|
||||
a3(vpxor ymm10, ymm10, ymm4)
|
||||
a3(vpaddq ymm4, ymm1, ymm10)
|
||||
a3(vpsrlq ymm5, ymm4, 25)
|
||||
a3(vpxor ymm3, ymm3, ymm5)
|
||||
a3(vpsllq ymm4, ymm4, 39)
|
||||
a3(vpermq ymm1, ymm1, 0x93)
|
||||
a3(vpxor ymm3, ymm3, ymm4)
|
||||
a3(vpermq ymm2, ymm10, 0x4e)
|
||||
a3(vpaddq ymm4, ymm10, ymm3)
|
||||
a3(vpshufd ymm4, ymm4, 0xb1)
|
||||
a3(vpermq ymm3, ymm3, 0x39)
|
||||
a3(vpxor ymm0, ymm0, ymm4)
|
||||
a1(dec rax)
|
||||
aj(jnz scrypt_salsa64_avx2_loop)
|
||||
a3(vpaddq ymm0,ymm0,ymm6)
|
||||
a3(vpaddq ymm1,ymm1,ymm7)
|
||||
a3(vpaddq ymm2,ymm2,ymm8)
|
||||
a3(vpaddq ymm3,ymm3,ymm9)
|
||||
a2(lea rax,[r8+r9])
|
||||
a2(xor r8,rcx)
|
||||
a2(and rax,~0xff)
|
||||
a2(add r9,128)
|
||||
a2(shr rax,1)
|
||||
a2(add rax, rdi)
|
||||
a2(cmp r9,rcx)
|
||||
a2(vmovdqa [rax+0],ymm0)
|
||||
a2(vmovdqa [rax+32],ymm1)
|
||||
a2(vmovdqa [rax+64],ymm2)
|
||||
a2(vmovdqa [rax+96],ymm3)
|
||||
aj(jne scrypt_ChunkMix_avx2_loop)
|
||||
a1(vzeroupper)
|
||||
a1(ret)
|
||||
asm_naked_fn_end(scrypt_ChunkMix_avx2)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/* intrinsic */
|
||||
#if defined(X86_INTRINSIC_AVX2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED))
|
||||
|
||||
#define SCRYPT_SALSA64_AVX2
|
||||
|
||||
static void asm_calling_convention
|
||||
scrypt_ChunkMix_avx2(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r) {
|
||||
uint32_t i, blocksPerChunk = r * 2, half = 0;
|
||||
ymmi *ymmp,y0,y1,y2,y3,t0,t1,t2,t3,z0,z1;
|
||||
size_t rounds;
|
||||
|
||||
/* 1: X = B_{2r - 1} */
|
||||
ymmp = (ymmi *)scrypt_block(Bin, blocksPerChunk - 1);
|
||||
y0 = ymmp[0];
|
||||
y1 = ymmp[1];
|
||||
y2 = ymmp[2];
|
||||
y3 = ymmp[3];
|
||||
|
||||
if (Bxor) {
|
||||
ymmp = (ymmi *)scrypt_block(Bxor, blocksPerChunk - 1);
|
||||
y0 = _mm256_xor_si256(y0, ymmp[0]);
|
||||
y1 = _mm256_xor_si256(y1, ymmp[1]);
|
||||
y2 = _mm256_xor_si256(y2, ymmp[2]);
|
||||
y3 = _mm256_xor_si256(y3, ymmp[3]);
|
||||
}
|
||||
|
||||
/* 2: for i = 0 to 2r - 1 do */
|
||||
for (i = 0; i < blocksPerChunk; i++, half ^= r) {
|
||||
/* 3: X = H(X ^ B_i) */
|
||||
ymmp = (ymmi *)scrypt_block(Bin, i);
|
||||
y0 = _mm256_xor_si256(y0, ymmp[0]);
|
||||
y1 = _mm256_xor_si256(y1, ymmp[1]);
|
||||
y2 = _mm256_xor_si256(y2, ymmp[2]);
|
||||
y3 = _mm256_xor_si256(y3, ymmp[3]);
|
||||
|
||||
if (Bxor) {
|
||||
ymmp = (ymmi *)scrypt_block(Bxor, i);
|
||||
y0 = _mm256_xor_si256(y0, ymmp[0]);
|
||||
y1 = _mm256_xor_si256(y1, ymmp[1]);
|
||||
y2 = _mm256_xor_si256(y2, ymmp[2]);
|
||||
y3 = _mm256_xor_si256(y3, ymmp[3]);
|
||||
}
|
||||
|
||||
t0 = y0;
|
||||
t1 = y1;
|
||||
t2 = y2;
|
||||
t3 = y3;
|
||||
|
||||
for (rounds = 8; rounds; rounds -= 2) {
|
||||
z0 = _mm256_add_epi64(y0, y1);
|
||||
z0 = _mm256_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
y3 = _mm256_xor_si256(y3, z0);
|
||||
z0 = _mm256_add_epi64(y3, y0);
|
||||
z1 = _mm256_srli_epi64(z0, 64-13);
|
||||
y2 = _mm256_xor_si256(y2, z1);
|
||||
z0 = _mm256_slli_epi64(z0, 13);
|
||||
y2 = _mm256_xor_si256(y2, z0);
|
||||
z0 = _mm256_add_epi64(y2, y3);
|
||||
z1 = _mm256_srli_epi64(z0, 64-39);
|
||||
y1 = _mm256_xor_si256(y1, z1);
|
||||
z0 = _mm256_slli_epi64(z0, 39);
|
||||
y1 = _mm256_xor_si256(y1, z0);
|
||||
y1 = _mm256_permute4x64_epi64(y1, _MM_SHUFFLE(0,3,2,1));
|
||||
y2 = _mm256_permute4x64_epi64(y2, _MM_SHUFFLE(1,0,3,2));
|
||||
y3 = _mm256_permute4x64_epi64(y3, _MM_SHUFFLE(2,1,0,3));
|
||||
z0 = _mm256_add_epi64(y1, y2);
|
||||
z0 = _mm256_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
y0 = _mm256_xor_si256(y0, z0);
|
||||
z0 = _mm256_add_epi64(y0, y3);
|
||||
z0 = _mm256_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
y1 = _mm256_xor_si256(y1, z0);
|
||||
z0 = _mm256_add_epi64(y1, y0);
|
||||
z1 = _mm256_srli_epi64(z0, 64-13);
|
||||
y2 = _mm256_xor_si256(y2, z1);
|
||||
z0 = _mm256_slli_epi64(z0, 13);
|
||||
y2 = _mm256_xor_si256(y2, z0);
|
||||
z0 = _mm256_add_epi64(y2, y1);
|
||||
z1 = _mm256_srli_epi64(z0, 64-39);
|
||||
y3 = _mm256_xor_si256(y3, z1);
|
||||
z0 = _mm256_slli_epi64(z0, 39);
|
||||
y3 = _mm256_xor_si256(y3, z0);
|
||||
z0 = _mm256_add_epi64(y3, y2);
|
||||
z0 = _mm256_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
y0 = _mm256_xor_si256(y0, z0);
|
||||
y1 = _mm256_permute4x64_epi64(y1, _MM_SHUFFLE(2,1,0,3));
|
||||
y2 = _mm256_permute4x64_epi64(y2, _MM_SHUFFLE(1,0,3,2));
|
||||
y3 = _mm256_permute4x64_epi64(y3, _MM_SHUFFLE(0,3,2,1));
|
||||
}
|
||||
|
||||
y0 = _mm256_add_epi64(y0, t0);
|
||||
y1 = _mm256_add_epi64(y1, t1);
|
||||
y2 = _mm256_add_epi64(y2, t2);
|
||||
y3 = _mm256_add_epi64(y3, t3);
|
||||
|
||||
/* 4: Y_i = X */
|
||||
/* 6: B'[0..r-1] = Y_even */
|
||||
/* 6: B'[r..2r-1] = Y_odd */
|
||||
ymmp = (ymmi *)scrypt_block(Bout, (i / 2) + half);
|
||||
ymmp[0] = y0;
|
||||
ymmp[1] = y1;
|
||||
ymmp[2] = y2;
|
||||
ymmp[3] = y3;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA64_AVX2)
|
||||
/* uses salsa64_core_tangle_sse2 */
|
||||
|
||||
#undef SCRYPT_MIX
|
||||
#define SCRYPT_MIX "Salsa64/8-AVX2"
|
||||
#undef SCRYPT_SALSA64_INCLUDED
|
||||
#define SCRYPT_SALSA64_INCLUDED
|
||||
#endif
|
449
deps/scrypt-jane-master/code/scrypt-jane-mix_salsa64-sse2.h
vendored
Normal file
449
deps/scrypt-jane-master/code/scrypt-jane-mix_salsa64-sse2.h
vendored
Normal file
@ -0,0 +1,449 @@
|
||||
/* x64 */
|
||||
#if defined(X86_64ASM_SSE2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS)
|
||||
|
||||
#define SCRYPT_SALSA64_SSE2
|
||||
|
||||
asm_naked_fn_proto(void, scrypt_ChunkMix_sse2)(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r)
|
||||
asm_naked_fn(scrypt_ChunkMix_sse2)
|
||||
a1(push rbp)
|
||||
a2(mov rbp, rsp)
|
||||
a2(and rsp, ~63)
|
||||
a2(sub rsp, 128)
|
||||
a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */
|
||||
a2(shl rcx,7)
|
||||
a2(lea r9,[rcx-128])
|
||||
a2(lea rax,[rsi+r9])
|
||||
a2(lea r9,[rdx+r9])
|
||||
a2(and rdx, rdx)
|
||||
a2(movdqa xmm0,[rax+0])
|
||||
a2(movdqa xmm1,[rax+16])
|
||||
a2(movdqa xmm2,[rax+32])
|
||||
a2(movdqa xmm3,[rax+48])
|
||||
a2(movdqa xmm4,[rax+64])
|
||||
a2(movdqa xmm5,[rax+80])
|
||||
a2(movdqa xmm6,[rax+96])
|
||||
a2(movdqa xmm7,[rax+112])
|
||||
aj(jz scrypt_ChunkMix_sse2_no_xor1)
|
||||
a2(pxor xmm0,[r9+0])
|
||||
a2(pxor xmm1,[r9+16])
|
||||
a2(pxor xmm2,[r9+32])
|
||||
a2(pxor xmm3,[r9+48])
|
||||
a2(pxor xmm4,[r9+64])
|
||||
a2(pxor xmm5,[r9+80])
|
||||
a2(pxor xmm6,[r9+96])
|
||||
a2(pxor xmm7,[r9+112])
|
||||
a1(scrypt_ChunkMix_sse2_no_xor1:)
|
||||
a2(xor r9,r9)
|
||||
a2(xor r8,r8)
|
||||
a1(scrypt_ChunkMix_sse2_loop:)
|
||||
a2(and rdx, rdx)
|
||||
a2(pxor xmm0,[rsi+r9+0])
|
||||
a2(pxor xmm1,[rsi+r9+16])
|
||||
a2(pxor xmm2,[rsi+r9+32])
|
||||
a2(pxor xmm3,[rsi+r9+48])
|
||||
a2(pxor xmm4,[rsi+r9+64])
|
||||
a2(pxor xmm5,[rsi+r9+80])
|
||||
a2(pxor xmm6,[rsi+r9+96])
|
||||
a2(pxor xmm7,[rsi+r9+112])
|
||||
aj(jz scrypt_ChunkMix_sse2_no_xor2)
|
||||
a2(pxor xmm0,[rdx+r9+0])
|
||||
a2(pxor xmm1,[rdx+r9+16])
|
||||
a2(pxor xmm2,[rdx+r9+32])
|
||||
a2(pxor xmm3,[rdx+r9+48])
|
||||
a2(pxor xmm4,[rdx+r9+64])
|
||||
a2(pxor xmm5,[rdx+r9+80])
|
||||
a2(pxor xmm6,[rdx+r9+96])
|
||||
a2(pxor xmm7,[rdx+r9+112])
|
||||
a1(scrypt_ChunkMix_sse2_no_xor2:)
|
||||
a2(movdqa [rsp+0],xmm0)
|
||||
a2(movdqa [rsp+16],xmm1)
|
||||
a2(movdqa [rsp+32],xmm2)
|
||||
a2(movdqa [rsp+48],xmm3)
|
||||
a2(movdqa [rsp+64],xmm4)
|
||||
a2(movdqa [rsp+80],xmm5)
|
||||
a2(movdqa [rsp+96],xmm6)
|
||||
a2(movdqa [rsp+112],xmm7)
|
||||
a2(mov rax,8)
|
||||
a1(scrypt_salsa64_sse2_loop: )
|
||||
a2(movdqa xmm8, xmm0)
|
||||
a2(movdqa xmm9, xmm1)
|
||||
a2(paddq xmm8, xmm2)
|
||||
a2(paddq xmm9, xmm3)
|
||||
a3(pshufd xmm8, xmm8, 0xb1)
|
||||
a3(pshufd xmm9, xmm9, 0xb1)
|
||||
a2(pxor xmm6, xmm8)
|
||||
a2(pxor xmm7, xmm9)
|
||||
a2(movdqa xmm10, xmm0)
|
||||
a2(movdqa xmm11, xmm1)
|
||||
a2(paddq xmm10, xmm6)
|
||||
a2(paddq xmm11, xmm7)
|
||||
a2(movdqa xmm8, xmm10)
|
||||
a2(movdqa xmm9, xmm11)
|
||||
a2(psrlq xmm10, 51)
|
||||
a2(psrlq xmm11, 51)
|
||||
a2(psllq xmm8, 13)
|
||||
a2(psllq xmm9, 13)
|
||||
a2(pxor xmm4, xmm10)
|
||||
a2(pxor xmm5, xmm11)
|
||||
a2(pxor xmm4, xmm8)
|
||||
a2(pxor xmm5, xmm9)
|
||||
a2(movdqa xmm10, xmm6)
|
||||
a2(movdqa xmm11, xmm7)
|
||||
a2(paddq xmm10, xmm4)
|
||||
a2(paddq xmm11, xmm5)
|
||||
a2(movdqa xmm8, xmm10)
|
||||
a2(movdqa xmm9, xmm11)
|
||||
a2(psrlq xmm10, 25)
|
||||
a2(psrlq xmm11, 25)
|
||||
a2(psllq xmm8, 39)
|
||||
a2(psllq xmm9, 39)
|
||||
a2(pxor xmm2, xmm10)
|
||||
a2(pxor xmm3, xmm11)
|
||||
a2(pxor xmm2, xmm8)
|
||||
a2(pxor xmm3, xmm9)
|
||||
a2(movdqa xmm8, xmm4)
|
||||
a2(movdqa xmm9, xmm5)
|
||||
a2(paddq xmm8, xmm2)
|
||||
a2(paddq xmm9, xmm3)
|
||||
a3(pshufd xmm8, xmm8, 0xb1)
|
||||
a3(pshufd xmm9, xmm9, 0xb1)
|
||||
a2(pxor xmm0, xmm8)
|
||||
a2(pxor xmm1, xmm9)
|
||||
a2(movdqa xmm8, xmm2)
|
||||
a2(movdqa xmm9, xmm3)
|
||||
a2(movdqa xmm10, xmm6)
|
||||
a2(movdqa xmm11, xmm7)
|
||||
a2(movdqa xmm2, xmm7)
|
||||
a2(movdqa xmm3, xmm6)
|
||||
a2(punpcklqdq xmm10, xmm6)
|
||||
a2(punpcklqdq xmm11, xmm7)
|
||||
a2(movdqa xmm6, xmm8)
|
||||
a2(movdqa xmm7, xmm9)
|
||||
a2(punpcklqdq xmm9, xmm9)
|
||||
a2(punpcklqdq xmm8, xmm8)
|
||||
a2(punpckhqdq xmm2, xmm10)
|
||||
a2(punpckhqdq xmm3, xmm11)
|
||||
a2(punpckhqdq xmm6, xmm9)
|
||||
a2(punpckhqdq xmm7, xmm8)
|
||||
a2(sub rax, 2)
|
||||
a2(movdqa xmm8, xmm0)
|
||||
a2(movdqa xmm9, xmm1)
|
||||
a2(paddq xmm8, xmm2)
|
||||
a2(paddq xmm9, xmm3)
|
||||
a3(pshufd xmm8, xmm8, 0xb1)
|
||||
a3(pshufd xmm9, xmm9, 0xb1)
|
||||
a2(pxor xmm6, xmm8)
|
||||
a2(pxor xmm7, xmm9)
|
||||
a2(movdqa xmm10, xmm0)
|
||||
a2(movdqa xmm11, xmm1)
|
||||
a2(paddq xmm10, xmm6)
|
||||
a2(paddq xmm11, xmm7)
|
||||
a2(movdqa xmm8, xmm10)
|
||||
a2(movdqa xmm9, xmm11)
|
||||
a2(psrlq xmm10, 51)
|
||||
a2(psrlq xmm11, 51)
|
||||
a2(psllq xmm8, 13)
|
||||
a2(psllq xmm9, 13)
|
||||
a2(pxor xmm5, xmm10)
|
||||
a2(pxor xmm4, xmm11)
|
||||
a2(pxor xmm5, xmm8)
|
||||
a2(pxor xmm4, xmm9)
|
||||
a2(movdqa xmm10, xmm6)
|
||||
a2(movdqa xmm11, xmm7)
|
||||
a2(paddq xmm10, xmm5)
|
||||
a2(paddq xmm11, xmm4)
|
||||
a2(movdqa xmm8, xmm10)
|
||||
a2(movdqa xmm9, xmm11)
|
||||
a2(psrlq xmm10, 25)
|
||||
a2(psrlq xmm11, 25)
|
||||
a2(psllq xmm8, 39)
|
||||
a2(psllq xmm9, 39)
|
||||
a2(pxor xmm2, xmm10)
|
||||
a2(pxor xmm3, xmm11)
|
||||
a2(pxor xmm2, xmm8)
|
||||
a2(pxor xmm3, xmm9)
|
||||
a2(movdqa xmm8, xmm5)
|
||||
a2(movdqa xmm9, xmm4)
|
||||
a2(paddq xmm8, xmm2)
|
||||
a2(paddq xmm9, xmm3)
|
||||
a3(pshufd xmm8, xmm8, 0xb1)
|
||||
a3(pshufd xmm9, xmm9, 0xb1)
|
||||
a2(pxor xmm0, xmm8)
|
||||
a2(pxor xmm1, xmm9)
|
||||
a2(movdqa xmm8, xmm2)
|
||||
a2(movdqa xmm9, xmm3)
|
||||
a2(movdqa xmm10, xmm6)
|
||||
a2(movdqa xmm11, xmm7)
|
||||
a2(movdqa xmm2, xmm7)
|
||||
a2(movdqa xmm3, xmm6)
|
||||
a2(punpcklqdq xmm10, xmm6)
|
||||
a2(punpcklqdq xmm11, xmm7)
|
||||
a2(movdqa xmm6, xmm8)
|
||||
a2(movdqa xmm7, xmm9)
|
||||
a2(punpcklqdq xmm9, xmm9)
|
||||
a2(punpcklqdq xmm8, xmm8)
|
||||
a2(punpckhqdq xmm2, xmm10)
|
||||
a2(punpckhqdq xmm3, xmm11)
|
||||
a2(punpckhqdq xmm6, xmm9)
|
||||
a2(punpckhqdq xmm7, xmm8)
|
||||
aj(ja scrypt_salsa64_sse2_loop)
|
||||
a2(paddq xmm0,[rsp+0])
|
||||
a2(paddq xmm1,[rsp+16])
|
||||
a2(paddq xmm2,[rsp+32])
|
||||
a2(paddq xmm3,[rsp+48])
|
||||
a2(paddq xmm4,[rsp+64])
|
||||
a2(paddq xmm5,[rsp+80])
|
||||
a2(paddq xmm6,[rsp+96])
|
||||
a2(paddq xmm7,[rsp+112])
|
||||
a2(lea rax,[r8+r9])
|
||||
a2(xor r8,rcx)
|
||||
a2(and rax,~0xff)
|
||||
a2(add r9,128)
|
||||
a2(shr rax,1)
|
||||
a2(add rax, rdi)
|
||||
a2(cmp r9,rcx)
|
||||
a2(movdqa [rax+0],xmm0)
|
||||
a2(movdqa [rax+16],xmm1)
|
||||
a2(movdqa [rax+32],xmm2)
|
||||
a2(movdqa [rax+48],xmm3)
|
||||
a2(movdqa [rax+64],xmm4)
|
||||
a2(movdqa [rax+80],xmm5)
|
||||
a2(movdqa [rax+96],xmm6)
|
||||
a2(movdqa [rax+112],xmm7)
|
||||
aj(jne scrypt_ChunkMix_sse2_loop)
|
||||
a2(mov rsp, rbp)
|
||||
a1(pop rbp)
|
||||
a1(ret)
|
||||
asm_naked_fn_end(scrypt_ChunkMix_sse2)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/* intrinsic */
|
||||
#if defined(X86_INTRINSIC_SSE2) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED))
|
||||
|
||||
#define SCRYPT_SALSA64_SSE2
|
||||
|
||||
static void asm_calling_convention
|
||||
scrypt_ChunkMix_sse2(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r) {
|
||||
uint32_t i, blocksPerChunk = r * 2, half = 0;
|
||||
xmmi *xmmp,x0,x1,x2,x3,x4,x5,x6,x7,t0,t1,t2,t3,t4,t5,t6,t7,z0,z1,z2,z3;
|
||||
size_t rounds;
|
||||
|
||||
/* 1: X = B_{2r - 1} */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1);
|
||||
x0 = xmmp[0];
|
||||
x1 = xmmp[1];
|
||||
x2 = xmmp[2];
|
||||
x3 = xmmp[3];
|
||||
x4 = xmmp[4];
|
||||
x5 = xmmp[5];
|
||||
x6 = xmmp[6];
|
||||
x7 = xmmp[7];
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
x4 = _mm_xor_si128(x4, xmmp[4]);
|
||||
x5 = _mm_xor_si128(x5, xmmp[5]);
|
||||
x6 = _mm_xor_si128(x6, xmmp[6]);
|
||||
x7 = _mm_xor_si128(x7, xmmp[7]);
|
||||
}
|
||||
|
||||
/* 2: for i = 0 to 2r - 1 do */
|
||||
for (i = 0; i < blocksPerChunk; i++, half ^= r) {
|
||||
/* 3: X = H(X ^ B_i) */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
x4 = _mm_xor_si128(x4, xmmp[4]);
|
||||
x5 = _mm_xor_si128(x5, xmmp[5]);
|
||||
x6 = _mm_xor_si128(x6, xmmp[6]);
|
||||
x7 = _mm_xor_si128(x7, xmmp[7]);
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
x4 = _mm_xor_si128(x4, xmmp[4]);
|
||||
x5 = _mm_xor_si128(x5, xmmp[5]);
|
||||
x6 = _mm_xor_si128(x6, xmmp[6]);
|
||||
x7 = _mm_xor_si128(x7, xmmp[7]);
|
||||
}
|
||||
|
||||
t0 = x0;
|
||||
t1 = x1;
|
||||
t2 = x2;
|
||||
t3 = x3;
|
||||
t4 = x4;
|
||||
t5 = x5;
|
||||
t6 = x6;
|
||||
t7 = x7;
|
||||
|
||||
for (rounds = 8; rounds; rounds -= 2) {
|
||||
z0 = _mm_add_epi64(x0, x2);
|
||||
z1 = _mm_add_epi64(x1, x3);
|
||||
z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
|
||||
x6 = _mm_xor_si128(x6, z0);
|
||||
x7 = _mm_xor_si128(x7, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x6, x0);
|
||||
z1 = _mm_add_epi64(x7, x1);
|
||||
z2 = _mm_srli_epi64(z0, 64-13);
|
||||
z3 = _mm_srli_epi64(z1, 64-13);
|
||||
z0 = _mm_slli_epi64(z0, 13);
|
||||
z1 = _mm_slli_epi64(z1, 13);
|
||||
x4 = _mm_xor_si128(x4, z2);
|
||||
x5 = _mm_xor_si128(x5, z3);
|
||||
x4 = _mm_xor_si128(x4, z0);
|
||||
x5 = _mm_xor_si128(x5, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x4, x6);
|
||||
z1 = _mm_add_epi64(x5, x7);
|
||||
z2 = _mm_srli_epi64(z0, 64-39);
|
||||
z3 = _mm_srli_epi64(z1, 64-39);
|
||||
z0 = _mm_slli_epi64(z0, 39);
|
||||
z1 = _mm_slli_epi64(z1, 39);
|
||||
x2 = _mm_xor_si128(x2, z2);
|
||||
x3 = _mm_xor_si128(x3, z3);
|
||||
x2 = _mm_xor_si128(x2, z0);
|
||||
x3 = _mm_xor_si128(x3, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x2, x4);
|
||||
z1 = _mm_add_epi64(x3, x5);
|
||||
z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
|
||||
x0 = _mm_xor_si128(x0, z0);
|
||||
x1 = _mm_xor_si128(x1, z1);
|
||||
|
||||
z0 = x4;
|
||||
z1 = x5;
|
||||
z2 = x2;
|
||||
z3 = x3;
|
||||
x4 = z1;
|
||||
x5 = z0;
|
||||
x2 = _mm_unpackhi_epi64(x7, _mm_unpacklo_epi64(x6, x6));
|
||||
x3 = _mm_unpackhi_epi64(x6, _mm_unpacklo_epi64(x7, x7));
|
||||
x6 = _mm_unpackhi_epi64(z2, _mm_unpacklo_epi64(z3, z3));
|
||||
x7 = _mm_unpackhi_epi64(z3, _mm_unpacklo_epi64(z2, z2));
|
||||
|
||||
z0 = _mm_add_epi64(x0, x2);
|
||||
z1 = _mm_add_epi64(x1, x3);
|
||||
z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
|
||||
x6 = _mm_xor_si128(x6, z0);
|
||||
x7 = _mm_xor_si128(x7, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x6, x0);
|
||||
z1 = _mm_add_epi64(x7, x1);
|
||||
z2 = _mm_srli_epi64(z0, 64-13);
|
||||
z3 = _mm_srli_epi64(z1, 64-13);
|
||||
z0 = _mm_slli_epi64(z0, 13);
|
||||
z1 = _mm_slli_epi64(z1, 13);
|
||||
x4 = _mm_xor_si128(x4, z2);
|
||||
x5 = _mm_xor_si128(x5, z3);
|
||||
x4 = _mm_xor_si128(x4, z0);
|
||||
x5 = _mm_xor_si128(x5, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x4, x6);
|
||||
z1 = _mm_add_epi64(x5, x7);
|
||||
z2 = _mm_srli_epi64(z0, 64-39);
|
||||
z3 = _mm_srli_epi64(z1, 64-39);
|
||||
z0 = _mm_slli_epi64(z0, 39);
|
||||
z1 = _mm_slli_epi64(z1, 39);
|
||||
x2 = _mm_xor_si128(x2, z2);
|
||||
x3 = _mm_xor_si128(x3, z3);
|
||||
x2 = _mm_xor_si128(x2, z0);
|
||||
x3 = _mm_xor_si128(x3, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x2, x4);
|
||||
z1 = _mm_add_epi64(x3, x5);
|
||||
z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
|
||||
x0 = _mm_xor_si128(x0, z0);
|
||||
x1 = _mm_xor_si128(x1, z1);
|
||||
|
||||
z0 = x4;
|
||||
z1 = x5;
|
||||
z2 = x2;
|
||||
z3 = x3;
|
||||
x4 = z1;
|
||||
x5 = z0;
|
||||
x2 = _mm_unpackhi_epi64(x7, _mm_unpacklo_epi64(x6, x6));
|
||||
x3 = _mm_unpackhi_epi64(x6, _mm_unpacklo_epi64(x7, x7));
|
||||
x6 = _mm_unpackhi_epi64(z2, _mm_unpacklo_epi64(z3, z3));
|
||||
x7 = _mm_unpackhi_epi64(z3, _mm_unpacklo_epi64(z2, z2));
|
||||
}
|
||||
|
||||
x0 = _mm_add_epi64(x0, t0);
|
||||
x1 = _mm_add_epi64(x1, t1);
|
||||
x2 = _mm_add_epi64(x2, t2);
|
||||
x3 = _mm_add_epi64(x3, t3);
|
||||
x4 = _mm_add_epi64(x4, t4);
|
||||
x5 = _mm_add_epi64(x5, t5);
|
||||
x6 = _mm_add_epi64(x6, t6);
|
||||
x7 = _mm_add_epi64(x7, t7);
|
||||
|
||||
/* 4: Y_i = X */
|
||||
/* 6: B'[0..r-1] = Y_even */
|
||||
/* 6: B'[r..2r-1] = Y_odd */
|
||||
xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half);
|
||||
xmmp[0] = x0;
|
||||
xmmp[1] = x1;
|
||||
xmmp[2] = x2;
|
||||
xmmp[3] = x3;
|
||||
xmmp[4] = x4;
|
||||
xmmp[5] = x5;
|
||||
xmmp[6] = x6;
|
||||
xmmp[7] = x7;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA64_SSE2)
|
||||
#undef SCRYPT_MIX
|
||||
#define SCRYPT_MIX "Salsa64/8-SSE2"
|
||||
#undef SCRYPT_SALSA64_INCLUDED
|
||||
#define SCRYPT_SALSA64_INCLUDED
|
||||
#endif
|
||||
|
||||
/* sse3/avx use this as well */
|
||||
#if defined(SCRYPT_SALSA64_INCLUDED)
|
||||
/*
|
||||
Default layout:
|
||||
0 1 2 3
|
||||
4 5 6 7
|
||||
8 9 10 11
|
||||
12 13 14 15
|
||||
|
||||
SSE2 layout:
|
||||
0 5 10 15
|
||||
12 1 6 11
|
||||
8 13 2 7
|
||||
4 9 14 3
|
||||
*/
|
||||
|
||||
|
||||
static void asm_calling_convention
|
||||
salsa64_core_tangle_sse2(uint64_t *blocks, size_t count) {
|
||||
uint64_t t;
|
||||
while (count--) {
|
||||
t = blocks[1]; blocks[1] = blocks[5]; blocks[5] = t;
|
||||
t = blocks[2]; blocks[2] = blocks[10]; blocks[10] = t;
|
||||
t = blocks[3]; blocks[3] = blocks[15]; blocks[15] = t;
|
||||
t = blocks[4]; blocks[4] = blocks[12]; blocks[12] = t;
|
||||
t = blocks[7]; blocks[7] = blocks[11]; blocks[11] = t;
|
||||
t = blocks[9]; blocks[9] = blocks[13]; blocks[13] = t;
|
||||
blocks += 16;
|
||||
}
|
||||
}
|
||||
#endif
|
399
deps/scrypt-jane-master/code/scrypt-jane-mix_salsa64-ssse3.h
vendored
Normal file
399
deps/scrypt-jane-master/code/scrypt-jane-mix_salsa64-ssse3.h
vendored
Normal file
@ -0,0 +1,399 @@
|
||||
/* x64 */
|
||||
#if defined(X86_64ASM_SSSE3) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS)
|
||||
|
||||
#define SCRYPT_SALSA64_SSSE3
|
||||
|
||||
asm_naked_fn_proto(void, scrypt_ChunkMix_ssse3)(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r)
|
||||
asm_naked_fn(scrypt_ChunkMix_ssse3)
|
||||
a1(push rbp)
|
||||
a2(mov rbp, rsp)
|
||||
a2(and rsp, ~63)
|
||||
a2(sub rsp, 128)
|
||||
a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */
|
||||
a2(shl rcx,7)
|
||||
a2(lea r9,[rcx-128])
|
||||
a2(lea rax,[rsi+r9])
|
||||
a2(lea r9,[rdx+r9])
|
||||
a2(and rdx, rdx)
|
||||
a2(movdqa xmm0,[rax+0])
|
||||
a2(movdqa xmm1,[rax+16])
|
||||
a2(movdqa xmm2,[rax+32])
|
||||
a2(movdqa xmm3,[rax+48])
|
||||
a2(movdqa xmm4,[rax+64])
|
||||
a2(movdqa xmm5,[rax+80])
|
||||
a2(movdqa xmm6,[rax+96])
|
||||
a2(movdqa xmm7,[rax+112])
|
||||
aj(jz scrypt_ChunkMix_ssse3_no_xor1)
|
||||
a2(pxor xmm0,[r9+0])
|
||||
a2(pxor xmm1,[r9+16])
|
||||
a2(pxor xmm2,[r9+32])
|
||||
a2(pxor xmm3,[r9+48])
|
||||
a2(pxor xmm4,[r9+64])
|
||||
a2(pxor xmm5,[r9+80])
|
||||
a2(pxor xmm6,[r9+96])
|
||||
a2(pxor xmm7,[r9+112])
|
||||
a1(scrypt_ChunkMix_ssse3_no_xor1:)
|
||||
a2(xor r9,r9)
|
||||
a2(xor r8,r8)
|
||||
a1(scrypt_ChunkMix_ssse3_loop:)
|
||||
a2(and rdx, rdx)
|
||||
a2(pxor xmm0,[rsi+r9+0])
|
||||
a2(pxor xmm1,[rsi+r9+16])
|
||||
a2(pxor xmm2,[rsi+r9+32])
|
||||
a2(pxor xmm3,[rsi+r9+48])
|
||||
a2(pxor xmm4,[rsi+r9+64])
|
||||
a2(pxor xmm5,[rsi+r9+80])
|
||||
a2(pxor xmm6,[rsi+r9+96])
|
||||
a2(pxor xmm7,[rsi+r9+112])
|
||||
aj(jz scrypt_ChunkMix_ssse3_no_xor2)
|
||||
a2(pxor xmm0,[rdx+r9+0])
|
||||
a2(pxor xmm1,[rdx+r9+16])
|
||||
a2(pxor xmm2,[rdx+r9+32])
|
||||
a2(pxor xmm3,[rdx+r9+48])
|
||||
a2(pxor xmm4,[rdx+r9+64])
|
||||
a2(pxor xmm5,[rdx+r9+80])
|
||||
a2(pxor xmm6,[rdx+r9+96])
|
||||
a2(pxor xmm7,[rdx+r9+112])
|
||||
a1(scrypt_ChunkMix_ssse3_no_xor2:)
|
||||
a2(movdqa [rsp+0],xmm0)
|
||||
a2(movdqa [rsp+16],xmm1)
|
||||
a2(movdqa [rsp+32],xmm2)
|
||||
a2(movdqa [rsp+48],xmm3)
|
||||
a2(movdqa [rsp+64],xmm4)
|
||||
a2(movdqa [rsp+80],xmm5)
|
||||
a2(movdqa [rsp+96],xmm6)
|
||||
a2(movdqa [rsp+112],xmm7)
|
||||
a2(mov rax,8)
|
||||
a1(scrypt_salsa64_ssse3_loop: )
|
||||
a2(movdqa xmm8, xmm0)
|
||||
a2(movdqa xmm9, xmm1)
|
||||
a2(paddq xmm8, xmm2)
|
||||
a2(paddq xmm9, xmm3)
|
||||
a3(pshufd xmm8, xmm8, 0xb1)
|
||||
a3(pshufd xmm9, xmm9, 0xb1)
|
||||
a2(pxor xmm6, xmm8)
|
||||
a2(pxor xmm7, xmm9)
|
||||
a2(movdqa xmm10, xmm0)
|
||||
a2(movdqa xmm11, xmm1)
|
||||
a2(paddq xmm10, xmm6)
|
||||
a2(paddq xmm11, xmm7)
|
||||
a2(movdqa xmm8, xmm10)
|
||||
a2(movdqa xmm9, xmm11)
|
||||
a2(psrlq xmm10, 51)
|
||||
a2(psrlq xmm11, 51)
|
||||
a2(psllq xmm8, 13)
|
||||
a2(psllq xmm9, 13)
|
||||
a2(pxor xmm4, xmm10)
|
||||
a2(pxor xmm5, xmm11)
|
||||
a2(pxor xmm4, xmm8)
|
||||
a2(pxor xmm5, xmm9)
|
||||
a2(movdqa xmm10, xmm6)
|
||||
a2(movdqa xmm11, xmm7)
|
||||
a2(paddq xmm10, xmm4)
|
||||
a2(paddq xmm11, xmm5)
|
||||
a2(movdqa xmm8, xmm10)
|
||||
a2(movdqa xmm9, xmm11)
|
||||
a2(psrlq xmm10, 25)
|
||||
a2(psrlq xmm11, 25)
|
||||
a2(psllq xmm8, 39)
|
||||
a2(psllq xmm9, 39)
|
||||
a2(pxor xmm2, xmm10)
|
||||
a2(pxor xmm3, xmm11)
|
||||
a2(pxor xmm2, xmm8)
|
||||
a2(pxor xmm3, xmm9)
|
||||
a2(movdqa xmm8, xmm4)
|
||||
a2(movdqa xmm9, xmm5)
|
||||
a2(paddq xmm8, xmm2)
|
||||
a2(paddq xmm9, xmm3)
|
||||
a3(pshufd xmm8, xmm8, 0xb1)
|
||||
a3(pshufd xmm9, xmm9, 0xb1)
|
||||
a2(pxor xmm0, xmm8)
|
||||
a2(pxor xmm1, xmm9)
|
||||
a2(movdqa xmm10, xmm2)
|
||||
a2(movdqa xmm11, xmm3)
|
||||
a2(movdqa xmm2, xmm6)
|
||||
a2(movdqa xmm3, xmm7)
|
||||
a3(palignr xmm2, xmm7, 8)
|
||||
a3(palignr xmm3, xmm6, 8)
|
||||
a2(movdqa xmm6, xmm11)
|
||||
a2(movdqa xmm7, xmm10)
|
||||
a3(palignr xmm6, xmm10, 8)
|
||||
a3(palignr xmm7, xmm11, 8)
|
||||
a2(sub rax, 2)
|
||||
a2(movdqa xmm8, xmm0)
|
||||
a2(movdqa xmm9, xmm1)
|
||||
a2(paddq xmm8, xmm2)
|
||||
a2(paddq xmm9, xmm3)
|
||||
a3(pshufd xmm8, xmm8, 0xb1)
|
||||
a3(pshufd xmm9, xmm9, 0xb1)
|
||||
a2(pxor xmm6, xmm8)
|
||||
a2(pxor xmm7, xmm9)
|
||||
a2(movdqa xmm10, xmm0)
|
||||
a2(movdqa xmm11, xmm1)
|
||||
a2(paddq xmm10, xmm6)
|
||||
a2(paddq xmm11, xmm7)
|
||||
a2(movdqa xmm8, xmm10)
|
||||
a2(movdqa xmm9, xmm11)
|
||||
a2(psrlq xmm10, 51)
|
||||
a2(psrlq xmm11, 51)
|
||||
a2(psllq xmm8, 13)
|
||||
a2(psllq xmm9, 13)
|
||||
a2(pxor xmm5, xmm10)
|
||||
a2(pxor xmm4, xmm11)
|
||||
a2(pxor xmm5, xmm8)
|
||||
a2(pxor xmm4, xmm9)
|
||||
a2(movdqa xmm10, xmm6)
|
||||
a2(movdqa xmm11, xmm7)
|
||||
a2(paddq xmm10, xmm5)
|
||||
a2(paddq xmm11, xmm4)
|
||||
a2(movdqa xmm8, xmm10)
|
||||
a2(movdqa xmm9, xmm11)
|
||||
a2(psrlq xmm10, 25)
|
||||
a2(psrlq xmm11, 25)
|
||||
a2(psllq xmm8, 39)
|
||||
a2(psllq xmm9, 39)
|
||||
a2(pxor xmm2, xmm10)
|
||||
a2(pxor xmm3, xmm11)
|
||||
a2(pxor xmm2, xmm8)
|
||||
a2(pxor xmm3, xmm9)
|
||||
a2(movdqa xmm8, xmm5)
|
||||
a2(movdqa xmm9, xmm4)
|
||||
a2(paddq xmm8, xmm2)
|
||||
a2(paddq xmm9, xmm3)
|
||||
a3(pshufd xmm8, xmm8, 0xb1)
|
||||
a3(pshufd xmm9, xmm9, 0xb1)
|
||||
a2(pxor xmm0, xmm8)
|
||||
a2(pxor xmm1, xmm9)
|
||||
a2(movdqa xmm10, xmm2)
|
||||
a2(movdqa xmm11, xmm3)
|
||||
a2(movdqa xmm2, xmm6)
|
||||
a2(movdqa xmm3, xmm7)
|
||||
a3(palignr xmm2, xmm7, 8)
|
||||
a3(palignr xmm3, xmm6, 8)
|
||||
a2(movdqa xmm6, xmm11)
|
||||
a2(movdqa xmm7, xmm10)
|
||||
a3(palignr xmm6, xmm10, 8)
|
||||
a3(palignr xmm7, xmm11, 8)
|
||||
aj(ja scrypt_salsa64_ssse3_loop)
|
||||
a2(paddq xmm0,[rsp+0])
|
||||
a2(paddq xmm1,[rsp+16])
|
||||
a2(paddq xmm2,[rsp+32])
|
||||
a2(paddq xmm3,[rsp+48])
|
||||
a2(paddq xmm4,[rsp+64])
|
||||
a2(paddq xmm5,[rsp+80])
|
||||
a2(paddq xmm6,[rsp+96])
|
||||
a2(paddq xmm7,[rsp+112])
|
||||
a2(lea rax,[r8+r9])
|
||||
a2(xor r8,rcx)
|
||||
a2(and rax,~0xff)
|
||||
a2(add r9,128)
|
||||
a2(shr rax,1)
|
||||
a2(add rax, rdi)
|
||||
a2(cmp r9,rcx)
|
||||
a2(movdqa [rax+0],xmm0)
|
||||
a2(movdqa [rax+16],xmm1)
|
||||
a2(movdqa [rax+32],xmm2)
|
||||
a2(movdqa [rax+48],xmm3)
|
||||
a2(movdqa [rax+64],xmm4)
|
||||
a2(movdqa [rax+80],xmm5)
|
||||
a2(movdqa [rax+96],xmm6)
|
||||
a2(movdqa [rax+112],xmm7)
|
||||
aj(jne scrypt_ChunkMix_ssse3_loop)
|
||||
a2(mov rsp, rbp)
|
||||
a1(pop rbp)
|
||||
a1(ret)
|
||||
asm_naked_fn_end(scrypt_ChunkMix_ssse3)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/* intrinsic */
|
||||
#if defined(X86_INTRINSIC_SSSE3) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED))
|
||||
|
||||
#define SCRYPT_SALSA64_SSSE3
|
||||
|
||||
static void asm_calling_convention
|
||||
scrypt_ChunkMix_ssse3(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r) {
|
||||
uint32_t i, blocksPerChunk = r * 2, half = 0;
|
||||
xmmi *xmmp,x0,x1,x2,x3,x4,x5,x6,x7,t0,t1,t2,t3,t4,t5,t6,t7,z0,z1,z2,z3;
|
||||
size_t rounds;
|
||||
|
||||
/* 1: X = B_{2r - 1} */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1);
|
||||
x0 = xmmp[0];
|
||||
x1 = xmmp[1];
|
||||
x2 = xmmp[2];
|
||||
x3 = xmmp[3];
|
||||
x4 = xmmp[4];
|
||||
x5 = xmmp[5];
|
||||
x6 = xmmp[6];
|
||||
x7 = xmmp[7];
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
x4 = _mm_xor_si128(x4, xmmp[4]);
|
||||
x5 = _mm_xor_si128(x5, xmmp[5]);
|
||||
x6 = _mm_xor_si128(x6, xmmp[6]);
|
||||
x7 = _mm_xor_si128(x7, xmmp[7]);
|
||||
}
|
||||
|
||||
/* 2: for i = 0 to 2r - 1 do */
|
||||
for (i = 0; i < blocksPerChunk; i++, half ^= r) {
|
||||
/* 3: X = H(X ^ B_i) */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
x4 = _mm_xor_si128(x4, xmmp[4]);
|
||||
x5 = _mm_xor_si128(x5, xmmp[5]);
|
||||
x6 = _mm_xor_si128(x6, xmmp[6]);
|
||||
x7 = _mm_xor_si128(x7, xmmp[7]);
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
x4 = _mm_xor_si128(x4, xmmp[4]);
|
||||
x5 = _mm_xor_si128(x5, xmmp[5]);
|
||||
x6 = _mm_xor_si128(x6, xmmp[6]);
|
||||
x7 = _mm_xor_si128(x7, xmmp[7]);
|
||||
}
|
||||
|
||||
t0 = x0;
|
||||
t1 = x1;
|
||||
t2 = x2;
|
||||
t3 = x3;
|
||||
t4 = x4;
|
||||
t5 = x5;
|
||||
t6 = x6;
|
||||
t7 = x7;
|
||||
|
||||
for (rounds = 8; rounds; rounds -= 2) {
|
||||
z0 = _mm_add_epi64(x0, x2);
|
||||
z1 = _mm_add_epi64(x1, x3);
|
||||
z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
|
||||
x6 = _mm_xor_si128(x6, z0);
|
||||
x7 = _mm_xor_si128(x7, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x6, x0);
|
||||
z1 = _mm_add_epi64(x7, x1);
|
||||
z2 = _mm_srli_epi64(z0, 64-13);
|
||||
z3 = _mm_srli_epi64(z1, 64-13);
|
||||
z0 = _mm_slli_epi64(z0, 13);
|
||||
z1 = _mm_slli_epi64(z1, 13);
|
||||
x4 = _mm_xor_si128(x4, z2);
|
||||
x5 = _mm_xor_si128(x5, z3);
|
||||
x4 = _mm_xor_si128(x4, z0);
|
||||
x5 = _mm_xor_si128(x5, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x4, x6);
|
||||
z1 = _mm_add_epi64(x5, x7);
|
||||
z2 = _mm_srli_epi64(z0, 64-39);
|
||||
z3 = _mm_srli_epi64(z1, 64-39);
|
||||
z0 = _mm_slli_epi64(z0, 39);
|
||||
z1 = _mm_slli_epi64(z1, 39);
|
||||
x2 = _mm_xor_si128(x2, z2);
|
||||
x3 = _mm_xor_si128(x3, z3);
|
||||
x2 = _mm_xor_si128(x2, z0);
|
||||
x3 = _mm_xor_si128(x3, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x2, x4);
|
||||
z1 = _mm_add_epi64(x3, x5);
|
||||
z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
|
||||
x0 = _mm_xor_si128(x0, z0);
|
||||
x1 = _mm_xor_si128(x1, z1);
|
||||
|
||||
z0 = x2;
|
||||
z1 = x3;
|
||||
x2 = _mm_alignr_epi8(x6, x7, 8);
|
||||
x3 = _mm_alignr_epi8(x7, x6, 8);
|
||||
x6 = _mm_alignr_epi8(z1, z0, 8);
|
||||
x7 = _mm_alignr_epi8(z0, z1, 8);
|
||||
|
||||
z0 = _mm_add_epi64(x0, x2);
|
||||
z1 = _mm_add_epi64(x1, x3);
|
||||
z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
|
||||
x6 = _mm_xor_si128(x6, z0);
|
||||
x7 = _mm_xor_si128(x7, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x6, x0);
|
||||
z1 = _mm_add_epi64(x7, x1);
|
||||
z2 = _mm_srli_epi64(z0, 64-13);
|
||||
z3 = _mm_srli_epi64(z1, 64-13);
|
||||
z0 = _mm_slli_epi64(z0, 13);
|
||||
z1 = _mm_slli_epi64(z1, 13);
|
||||
x5 = _mm_xor_si128(x5, z2);
|
||||
x4 = _mm_xor_si128(x4, z3);
|
||||
x5 = _mm_xor_si128(x5, z0);
|
||||
x4 = _mm_xor_si128(x4, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x5, x6);
|
||||
z1 = _mm_add_epi64(x4, x7);
|
||||
z2 = _mm_srli_epi64(z0, 64-39);
|
||||
z3 = _mm_srli_epi64(z1, 64-39);
|
||||
z0 = _mm_slli_epi64(z0, 39);
|
||||
z1 = _mm_slli_epi64(z1, 39);
|
||||
x2 = _mm_xor_si128(x2, z2);
|
||||
x3 = _mm_xor_si128(x3, z3);
|
||||
x2 = _mm_xor_si128(x2, z0);
|
||||
x3 = _mm_xor_si128(x3, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x2, x5);
|
||||
z1 = _mm_add_epi64(x3, x4);
|
||||
z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
|
||||
x0 = _mm_xor_si128(x0, z0);
|
||||
x1 = _mm_xor_si128(x1, z1);
|
||||
|
||||
z0 = x2;
|
||||
z1 = x3;
|
||||
x2 = _mm_alignr_epi8(x6, x7, 8);
|
||||
x3 = _mm_alignr_epi8(x7, x6, 8);
|
||||
x6 = _mm_alignr_epi8(z1, z0, 8);
|
||||
x7 = _mm_alignr_epi8(z0, z1, 8);
|
||||
}
|
||||
|
||||
x0 = _mm_add_epi64(x0, t0);
|
||||
x1 = _mm_add_epi64(x1, t1);
|
||||
x2 = _mm_add_epi64(x2, t2);
|
||||
x3 = _mm_add_epi64(x3, t3);
|
||||
x4 = _mm_add_epi64(x4, t4);
|
||||
x5 = _mm_add_epi64(x5, t5);
|
||||
x6 = _mm_add_epi64(x6, t6);
|
||||
x7 = _mm_add_epi64(x7, t7);
|
||||
|
||||
/* 4: Y_i = X */
|
||||
/* 6: B'[0..r-1] = Y_even */
|
||||
/* 6: B'[r..2r-1] = Y_odd */
|
||||
xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half);
|
||||
xmmp[0] = x0;
|
||||
xmmp[1] = x1;
|
||||
xmmp[2] = x2;
|
||||
xmmp[3] = x3;
|
||||
xmmp[4] = x4;
|
||||
xmmp[5] = x5;
|
||||
xmmp[6] = x6;
|
||||
xmmp[7] = x7;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA64_SSSE3)
|
||||
/* uses salsa64_core_tangle_sse2 */
|
||||
|
||||
#undef SCRYPT_MIX
|
||||
#define SCRYPT_MIX "Salsa64/8-SSSE3"
|
||||
#undef SCRYPT_SALSA64_INCLUDED
|
||||
#define SCRYPT_SALSA64_INCLUDED
|
||||
#endif
|
335
deps/scrypt-jane-master/code/scrypt-jane-mix_salsa64-xop.h
vendored
Normal file
335
deps/scrypt-jane-master/code/scrypt-jane-mix_salsa64-xop.h
vendored
Normal file
@ -0,0 +1,335 @@
|
||||
/* x64 */
|
||||
#if defined(X86_64ASM_XOP) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED)) && !defined(CPU_X86_FORCE_INTRINSICS)
|
||||
|
||||
#define SCRYPT_SALSA64_XOP
|
||||
|
||||
asm_naked_fn_proto(void, scrypt_ChunkMix_xop)(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r)
|
||||
asm_naked_fn(scrypt_ChunkMix_xop)
|
||||
a1(push rbp)
|
||||
a2(mov rbp, rsp)
|
||||
a2(and rsp, ~63)
|
||||
a2(sub rsp, 128)
|
||||
a2(lea rcx,[ecx*2]) /* zero extend uint32_t by using ecx, win64 can leave garbage in the top half */
|
||||
a2(shl rcx,7)
|
||||
a2(lea r9,[rcx-128])
|
||||
a2(lea rax,[rsi+r9])
|
||||
a2(lea r9,[rdx+r9])
|
||||
a2(and rdx, rdx)
|
||||
a2(vmovdqa xmm0,[rax+0])
|
||||
a2(vmovdqa xmm1,[rax+16])
|
||||
a2(vmovdqa xmm2,[rax+32])
|
||||
a2(vmovdqa xmm3,[rax+48])
|
||||
a2(vmovdqa xmm4,[rax+64])
|
||||
a2(vmovdqa xmm5,[rax+80])
|
||||
a2(vmovdqa xmm6,[rax+96])
|
||||
a2(vmovdqa xmm7,[rax+112])
|
||||
aj(jz scrypt_ChunkMix_xop_no_xor1)
|
||||
a3(vpxor xmm0,xmm0,[r9+0])
|
||||
a3(vpxor xmm1,xmm1,[r9+16])
|
||||
a3(vpxor xmm2,xmm2,[r9+32])
|
||||
a3(vpxor xmm3,xmm3,[r9+48])
|
||||
a3(vpxor xmm4,xmm4,[r9+64])
|
||||
a3(vpxor xmm5,xmm5,[r9+80])
|
||||
a3(vpxor xmm6,xmm6,[r9+96])
|
||||
a3(vpxor xmm7,xmm7,[r9+112])
|
||||
a1(scrypt_ChunkMix_xop_no_xor1:)
|
||||
a2(xor r9,r9)
|
||||
a2(xor r8,r8)
|
||||
a1(scrypt_ChunkMix_xop_loop:)
|
||||
a2(and rdx, rdx)
|
||||
a3(vpxor xmm0,xmm0,[rsi+r9+0])
|
||||
a3(vpxor xmm1,xmm1,[rsi+r9+16])
|
||||
a3(vpxor xmm2,xmm2,[rsi+r9+32])
|
||||
a3(vpxor xmm3,xmm3,[rsi+r9+48])
|
||||
a3(vpxor xmm4,xmm4,[rsi+r9+64])
|
||||
a3(vpxor xmm5,xmm5,[rsi+r9+80])
|
||||
a3(vpxor xmm6,xmm6,[rsi+r9+96])
|
||||
a3(vpxor xmm7,xmm7,[rsi+r9+112])
|
||||
aj(jz scrypt_ChunkMix_xop_no_xor2)
|
||||
a3(vpxor xmm0,xmm0,[rdx+r9+0])
|
||||
a3(vpxor xmm1,xmm1,[rdx+r9+16])
|
||||
a3(vpxor xmm2,xmm2,[rdx+r9+32])
|
||||
a3(vpxor xmm3,xmm3,[rdx+r9+48])
|
||||
a3(vpxor xmm4,xmm4,[rdx+r9+64])
|
||||
a3(vpxor xmm5,xmm5,[rdx+r9+80])
|
||||
a3(vpxor xmm6,xmm6,[rdx+r9+96])
|
||||
a3(vpxor xmm7,xmm7,[rdx+r9+112])
|
||||
a1(scrypt_ChunkMix_xop_no_xor2:)
|
||||
a2(vmovdqa [rsp+0],xmm0)
|
||||
a2(vmovdqa [rsp+16],xmm1)
|
||||
a2(vmovdqa [rsp+32],xmm2)
|
||||
a2(vmovdqa [rsp+48],xmm3)
|
||||
a2(vmovdqa [rsp+64],xmm4)
|
||||
a2(vmovdqa [rsp+80],xmm5)
|
||||
a2(vmovdqa [rsp+96],xmm6)
|
||||
a2(vmovdqa [rsp+112],xmm7)
|
||||
a2(mov rax,8)
|
||||
a1(scrypt_salsa64_xop_loop: )
|
||||
a3(vpaddq xmm8, xmm0, xmm2)
|
||||
a3(vpaddq xmm9, xmm1, xmm3)
|
||||
a3(vpshufd xmm8, xmm8, 0xb1)
|
||||
a3(vpshufd xmm9, xmm9, 0xb1)
|
||||
a3(vpxor xmm6, xmm6, xmm8)
|
||||
a3(vpxor xmm7, xmm7, xmm9)
|
||||
a3(vpaddq xmm10, xmm0, xmm6)
|
||||
a3(vpaddq xmm11, xmm1, xmm7)
|
||||
a3(vprotq xmm10, xmm10, 13)
|
||||
a3(vprotq xmm11, xmm11, 13)
|
||||
a3(vpxor xmm4, xmm4, xmm10)
|
||||
a3(vpxor xmm5, xmm5, xmm11)
|
||||
a3(vpaddq xmm8, xmm6, xmm4)
|
||||
a3(vpaddq xmm9, xmm7, xmm5)
|
||||
a3(vprotq xmm8, xmm8, 39)
|
||||
a3(vprotq xmm9, xmm9, 39)
|
||||
a3(vpxor xmm2, xmm2, xmm8)
|
||||
a3(vpxor xmm3, xmm3, xmm9)
|
||||
a3(vpaddq xmm10, xmm4, xmm2)
|
||||
a3(vpaddq xmm11, xmm5, xmm3)
|
||||
a3(vpshufd xmm10, xmm10, 0xb1)
|
||||
a3(vpshufd xmm11, xmm11, 0xb1)
|
||||
a3(vpxor xmm0, xmm0, xmm10)
|
||||
a3(vpxor xmm1, xmm1, xmm11)
|
||||
a2(vmovdqa xmm8, xmm2)
|
||||
a2(vmovdqa xmm9, xmm3)
|
||||
a4(vpalignr xmm2, xmm6, xmm7, 8)
|
||||
a4(vpalignr xmm3, xmm7, xmm6, 8)
|
||||
a4(vpalignr xmm6, xmm9, xmm8, 8)
|
||||
a4(vpalignr xmm7, xmm8, xmm9, 8)
|
||||
a3(vpaddq xmm10, xmm0, xmm2)
|
||||
a3(vpaddq xmm11, xmm1, xmm3)
|
||||
a3(vpshufd xmm10, xmm10, 0xb1)
|
||||
a3(vpshufd xmm11, xmm11, 0xb1)
|
||||
a3(vpxor xmm6, xmm6, xmm10)
|
||||
a3(vpxor xmm7, xmm7, xmm11)
|
||||
a3(vpaddq xmm8, xmm0, xmm6)
|
||||
a3(vpaddq xmm9, xmm1, xmm7)
|
||||
a3(vprotq xmm8, xmm8, 13)
|
||||
a3(vprotq xmm9, xmm9, 13)
|
||||
a3(vpxor xmm5, xmm5, xmm8)
|
||||
a3(vpxor xmm4, xmm4, xmm9)
|
||||
a3(vpaddq xmm10, xmm6, xmm5)
|
||||
a3(vpaddq xmm11, xmm7, xmm4)
|
||||
a3(vprotq xmm10, xmm10, 39)
|
||||
a3(vprotq xmm11, xmm11, 39)
|
||||
a3(vpxor xmm2, xmm2, xmm10)
|
||||
a3(vpxor xmm3, xmm3, xmm11)
|
||||
a3(vpaddq xmm8, xmm5, xmm2)
|
||||
a3(vpaddq xmm9, xmm4, xmm3)
|
||||
a3(vpshufd xmm8, xmm8, 0xb1)
|
||||
a3(vpshufd xmm9, xmm9, 0xb1)
|
||||
a3(vpxor xmm0, xmm0, xmm8)
|
||||
a3(vpxor xmm1, xmm1, xmm9)
|
||||
a2(vmovdqa xmm10, xmm2)
|
||||
a2(vmovdqa xmm11, xmm3)
|
||||
a4(vpalignr xmm2, xmm6, xmm7, 8)
|
||||
a4(vpalignr xmm3, xmm7, xmm6, 8)
|
||||
a4(vpalignr xmm6, xmm11, xmm10, 8)
|
||||
a4(vpalignr xmm7, xmm10, xmm11, 8)
|
||||
a2(sub rax, 2)
|
||||
aj(ja scrypt_salsa64_xop_loop)
|
||||
a3(vpaddq xmm0,xmm0,[rsp+0])
|
||||
a3(vpaddq xmm1,xmm1,[rsp+16])
|
||||
a3(vpaddq xmm2,xmm2,[rsp+32])
|
||||
a3(vpaddq xmm3,xmm3,[rsp+48])
|
||||
a3(vpaddq xmm4,xmm4,[rsp+64])
|
||||
a3(vpaddq xmm5,xmm5,[rsp+80])
|
||||
a3(vpaddq xmm6,xmm6,[rsp+96])
|
||||
a3(vpaddq xmm7,xmm7,[rsp+112])
|
||||
a2(lea rax,[r8+r9])
|
||||
a2(xor r8,rcx)
|
||||
a2(and rax,~0xff)
|
||||
a2(add r9,128)
|
||||
a2(shr rax,1)
|
||||
a2(add rax, rdi)
|
||||
a2(cmp r9,rcx)
|
||||
a2(vmovdqa [rax+0],xmm0)
|
||||
a2(vmovdqa [rax+16],xmm1)
|
||||
a2(vmovdqa [rax+32],xmm2)
|
||||
a2(vmovdqa [rax+48],xmm3)
|
||||
a2(vmovdqa [rax+64],xmm4)
|
||||
a2(vmovdqa [rax+80],xmm5)
|
||||
a2(vmovdqa [rax+96],xmm6)
|
||||
a2(vmovdqa [rax+112],xmm7)
|
||||
aj(jne scrypt_ChunkMix_xop_loop)
|
||||
a2(mov rsp, rbp)
|
||||
a1(pop rbp)
|
||||
a1(ret)
|
||||
asm_naked_fn_end(scrypt_ChunkMix_xop)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/* intrinsic */
|
||||
#if defined(X86_INTRINSIC_XOP) && (!defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED))
|
||||
|
||||
#define SCRYPT_SALSA64_XOP
|
||||
|
||||
static void asm_calling_convention
|
||||
scrypt_ChunkMix_xop(uint64_t *Bout/*[chunkBytes]*/, uint64_t *Bin/*[chunkBytes]*/, uint64_t *Bxor/*[chunkBytes]*/, uint32_t r) {
|
||||
uint32_t i, blocksPerChunk = r * 2, half = 0;
|
||||
xmmi *xmmp,x0,x1,x2,x3,x4,x5,x6,x7,t0,t1,t2,t3,t4,t5,t6,t7,z0,z1,z2,z3;
|
||||
size_t rounds;
|
||||
|
||||
/* 1: X = B_{2r - 1} */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, blocksPerChunk - 1);
|
||||
x0 = xmmp[0];
|
||||
x1 = xmmp[1];
|
||||
x2 = xmmp[2];
|
||||
x3 = xmmp[3];
|
||||
x4 = xmmp[4];
|
||||
x5 = xmmp[5];
|
||||
x6 = xmmp[6];
|
||||
x7 = xmmp[7];
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, blocksPerChunk - 1);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
x4 = _mm_xor_si128(x4, xmmp[4]);
|
||||
x5 = _mm_xor_si128(x5, xmmp[5]);
|
||||
x6 = _mm_xor_si128(x6, xmmp[6]);
|
||||
x7 = _mm_xor_si128(x7, xmmp[7]);
|
||||
}
|
||||
|
||||
/* 2: for i = 0 to 2r - 1 do */
|
||||
for (i = 0; i < blocksPerChunk; i++, half ^= r) {
|
||||
/* 3: X = H(X ^ B_i) */
|
||||
xmmp = (xmmi *)scrypt_block(Bin, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
x4 = _mm_xor_si128(x4, xmmp[4]);
|
||||
x5 = _mm_xor_si128(x5, xmmp[5]);
|
||||
x6 = _mm_xor_si128(x6, xmmp[6]);
|
||||
x7 = _mm_xor_si128(x7, xmmp[7]);
|
||||
|
||||
if (Bxor) {
|
||||
xmmp = (xmmi *)scrypt_block(Bxor, i);
|
||||
x0 = _mm_xor_si128(x0, xmmp[0]);
|
||||
x1 = _mm_xor_si128(x1, xmmp[1]);
|
||||
x2 = _mm_xor_si128(x2, xmmp[2]);
|
||||
x3 = _mm_xor_si128(x3, xmmp[3]);
|
||||
x4 = _mm_xor_si128(x4, xmmp[4]);
|
||||
x5 = _mm_xor_si128(x5, xmmp[5]);
|
||||
x6 = _mm_xor_si128(x6, xmmp[6]);
|
||||
x7 = _mm_xor_si128(x7, xmmp[7]);
|
||||
}
|
||||
|
||||
t0 = x0;
|
||||
t1 = x1;
|
||||
t2 = x2;
|
||||
t3 = x3;
|
||||
t4 = x4;
|
||||
t5 = x5;
|
||||
t6 = x6;
|
||||
t7 = x7;
|
||||
|
||||
for (rounds = 8; rounds; rounds -= 2) {
|
||||
z0 = _mm_add_epi64(x0, x2);
|
||||
z1 = _mm_add_epi64(x1, x3);
|
||||
z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
|
||||
x6 = _mm_xor_si128(x6, z0);
|
||||
x7 = _mm_xor_si128(x7, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x6, x0);
|
||||
z1 = _mm_add_epi64(x7, x1);
|
||||
z0 = _mm_roti_epi64(z0, 13);
|
||||
z1 = _mm_roti_epi64(z1, 13);
|
||||
x4 = _mm_xor_si128(x4, z0);
|
||||
x5 = _mm_xor_si128(x5, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x4, x6);
|
||||
z1 = _mm_add_epi64(x5, x7);
|
||||
z0 = _mm_roti_epi64(z0, 39);
|
||||
z1 = _mm_roti_epi64(z1, 39);
|
||||
x2 = _mm_xor_si128(x2, z0);
|
||||
x3 = _mm_xor_si128(x3, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x2, x4);
|
||||
z1 = _mm_add_epi64(x3, x5);
|
||||
z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
|
||||
x0 = _mm_xor_si128(x0, z0);
|
||||
x1 = _mm_xor_si128(x1, z1);
|
||||
|
||||
z0 = x2;
|
||||
z1 = x3;
|
||||
x2 = _mm_alignr_epi8(x6, x7, 8);
|
||||
x3 = _mm_alignr_epi8(x7, x6, 8);
|
||||
x6 = _mm_alignr_epi8(z1, z0, 8);
|
||||
x7 = _mm_alignr_epi8(z0, z1, 8);
|
||||
|
||||
z0 = _mm_add_epi64(x0, x2);
|
||||
z1 = _mm_add_epi64(x1, x3);
|
||||
z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
|
||||
x6 = _mm_xor_si128(x6, z0);
|
||||
x7 = _mm_xor_si128(x7, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x6, x0);
|
||||
z1 = _mm_add_epi64(x7, x1);
|
||||
z0 = _mm_roti_epi64(z0, 13);
|
||||
z1 = _mm_roti_epi64(z1, 13);
|
||||
x5 = _mm_xor_si128(x5, z0);
|
||||
x4 = _mm_xor_si128(x4, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x5, x6);
|
||||
z1 = _mm_add_epi64(x4, x7);
|
||||
z0 = _mm_roti_epi64(z0, 39);
|
||||
z1 = _mm_roti_epi64(z1, 39);
|
||||
x2 = _mm_xor_si128(x2, z0);
|
||||
x3 = _mm_xor_si128(x3, z1);
|
||||
|
||||
z0 = _mm_add_epi64(x2, x5);
|
||||
z1 = _mm_add_epi64(x3, x4);
|
||||
z0 = _mm_shuffle_epi32(z0, _MM_SHUFFLE(2,3,0,1));
|
||||
z1 = _mm_shuffle_epi32(z1, _MM_SHUFFLE(2,3,0,1));
|
||||
x0 = _mm_xor_si128(x0, z0);
|
||||
x1 = _mm_xor_si128(x1, z1);
|
||||
|
||||
z0 = x2;
|
||||
z1 = x3;
|
||||
x2 = _mm_alignr_epi8(x6, x7, 8);
|
||||
x3 = _mm_alignr_epi8(x7, x6, 8);
|
||||
x6 = _mm_alignr_epi8(z1, z0, 8);
|
||||
x7 = _mm_alignr_epi8(z0, z1, 8);
|
||||
}
|
||||
|
||||
x0 = _mm_add_epi64(x0, t0);
|
||||
x1 = _mm_add_epi64(x1, t1);
|
||||
x2 = _mm_add_epi64(x2, t2);
|
||||
x3 = _mm_add_epi64(x3, t3);
|
||||
x4 = _mm_add_epi64(x4, t4);
|
||||
x5 = _mm_add_epi64(x5, t5);
|
||||
x6 = _mm_add_epi64(x6, t6);
|
||||
x7 = _mm_add_epi64(x7, t7);
|
||||
|
||||
/* 4: Y_i = X */
|
||||
/* 6: B'[0..r-1] = Y_even */
|
||||
/* 6: B'[r..2r-1] = Y_odd */
|
||||
xmmp = (xmmi *)scrypt_block(Bout, (i / 2) + half);
|
||||
xmmp[0] = x0;
|
||||
xmmp[1] = x1;
|
||||
xmmp[2] = x2;
|
||||
xmmp[3] = x3;
|
||||
xmmp[4] = x4;
|
||||
xmmp[5] = x5;
|
||||
xmmp[6] = x6;
|
||||
xmmp[7] = x7;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA64_XOP)
|
||||
/* uses salsa64_core_tangle_sse2 */
|
||||
|
||||
#undef SCRYPT_MIX
|
||||
#define SCRYPT_MIX "Salsa64/8-XOP"
|
||||
#undef SCRYPT_SALSA64_INCLUDED
|
||||
#define SCRYPT_SALSA64_INCLUDED
|
||||
#endif
|
41
deps/scrypt-jane-master/code/scrypt-jane-mix_salsa64.h
vendored
Normal file
41
deps/scrypt-jane-master/code/scrypt-jane-mix_salsa64.h
vendored
Normal file
@ -0,0 +1,41 @@
|
||||
#if !defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_SALSA64_INCLUDED)
|
||||
|
||||
#undef SCRYPT_MIX
|
||||
#define SCRYPT_MIX "Salsa64/8 Ref"
|
||||
|
||||
#undef SCRYPT_SALSA64_INCLUDED
|
||||
#define SCRYPT_SALSA64_INCLUDED
|
||||
#define SCRYPT_SALSA64_BASIC
|
||||
|
||||
static void
|
||||
salsa64_core_basic(uint64_t state[16]) {
|
||||
const size_t rounds = 8;
|
||||
uint64_t v[16], t;
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < 16; i++) v[i] = state[i];
|
||||
|
||||
#define G(a,b,c,d) \
|
||||
t = v[a]+v[d]; t = ROTL64(t, 32); v[b] ^= t; \
|
||||
t = v[b]+v[a]; t = ROTL64(t, 13); v[c] ^= t; \
|
||||
t = v[c]+v[b]; t = ROTL64(t, 39); v[d] ^= t; \
|
||||
t = v[d]+v[c]; t = ROTL64(t, 32); v[a] ^= t; \
|
||||
|
||||
for (i = 0; i < rounds; i += 2) {
|
||||
G( 0, 4, 8,12);
|
||||
G( 5, 9,13, 1);
|
||||
G(10,14, 2, 6);
|
||||
G(15, 3, 7,11);
|
||||
G( 0, 1, 2, 3);
|
||||
G( 5, 6, 7, 4);
|
||||
G(10,11, 8, 9);
|
||||
G(15,12,13,14);
|
||||
}
|
||||
|
||||
for (i = 0; i < 16; i++) state[i] += v[i];
|
||||
|
||||
#undef G
|
||||
}
|
||||
|
||||
#endif
|
||||
|
112
deps/scrypt-jane-master/code/scrypt-jane-pbkdf2.h
vendored
Normal file
112
deps/scrypt-jane-master/code/scrypt-jane-pbkdf2.h
vendored
Normal file
@ -0,0 +1,112 @@
|
||||
typedef struct scrypt_hmac_state_t {
|
||||
scrypt_hash_state inner, outer;
|
||||
} scrypt_hmac_state;
|
||||
|
||||
|
||||
static void
|
||||
scrypt_hash(scrypt_hash_digest hash, const uint8_t *m, size_t mlen) {
|
||||
scrypt_hash_state st;
|
||||
scrypt_hash_init(&st);
|
||||
scrypt_hash_update(&st, m, mlen);
|
||||
scrypt_hash_finish(&st, hash);
|
||||
}
|
||||
|
||||
/* hmac */
|
||||
static void
|
||||
scrypt_hmac_init(scrypt_hmac_state *st, const uint8_t *key, size_t keylen) {
|
||||
uint8_t pad[SCRYPT_HASH_BLOCK_SIZE] = {0};
|
||||
size_t i;
|
||||
|
||||
scrypt_hash_init(&st->inner);
|
||||
scrypt_hash_init(&st->outer);
|
||||
|
||||
if (keylen <= SCRYPT_HASH_BLOCK_SIZE) {
|
||||
/* use the key directly if it's <= blocksize bytes */
|
||||
memcpy(pad, key, keylen);
|
||||
} else {
|
||||
/* if it's > blocksize bytes, hash it */
|
||||
scrypt_hash(pad, key, keylen);
|
||||
}
|
||||
|
||||
/* inner = (key ^ 0x36) */
|
||||
/* h(inner || ...) */
|
||||
for (i = 0; i < SCRYPT_HASH_BLOCK_SIZE; i++)
|
||||
pad[i] ^= 0x36;
|
||||
scrypt_hash_update(&st->inner, pad, SCRYPT_HASH_BLOCK_SIZE);
|
||||
|
||||
/* outer = (key ^ 0x5c) */
|
||||
/* h(outer || ...) */
|
||||
for (i = 0; i < SCRYPT_HASH_BLOCK_SIZE; i++)
|
||||
pad[i] ^= (0x5c ^ 0x36);
|
||||
scrypt_hash_update(&st->outer, pad, SCRYPT_HASH_BLOCK_SIZE);
|
||||
|
||||
scrypt_ensure_zero(pad, sizeof(pad));
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_hmac_update(scrypt_hmac_state *st, const uint8_t *m, size_t mlen) {
|
||||
/* h(inner || m...) */
|
||||
scrypt_hash_update(&st->inner, m, mlen);
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_hmac_finish(scrypt_hmac_state *st, scrypt_hash_digest mac) {
|
||||
/* h(inner || m) */
|
||||
scrypt_hash_digest innerhash;
|
||||
scrypt_hash_finish(&st->inner, innerhash);
|
||||
|
||||
/* h(outer || h(inner || m)) */
|
||||
scrypt_hash_update(&st->outer, innerhash, sizeof(innerhash));
|
||||
scrypt_hash_finish(&st->outer, mac);
|
||||
|
||||
scrypt_ensure_zero(st, sizeof(*st));
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_pbkdf2(const uint8_t *password, size_t password_len, const uint8_t *salt, size_t salt_len, uint64_t N, uint8_t *out, size_t bytes) {
|
||||
scrypt_hmac_state hmac_pw, hmac_pw_salt, work;
|
||||
scrypt_hash_digest ti, u;
|
||||
uint8_t be[4];
|
||||
uint32_t i, j, blocks;
|
||||
uint64_t c;
|
||||
|
||||
/* bytes must be <= (0xffffffff - (SCRYPT_HASH_DIGEST_SIZE - 1)), which they will always be under scrypt */
|
||||
|
||||
/* hmac(password, ...) */
|
||||
scrypt_hmac_init(&hmac_pw, password, password_len);
|
||||
|
||||
/* hmac(password, salt...) */
|
||||
hmac_pw_salt = hmac_pw;
|
||||
scrypt_hmac_update(&hmac_pw_salt, salt, salt_len);
|
||||
|
||||
blocks = ((uint32_t)bytes + (SCRYPT_HASH_DIGEST_SIZE - 1)) / SCRYPT_HASH_DIGEST_SIZE;
|
||||
for (i = 1; i <= blocks; i++) {
|
||||
/* U1 = hmac(password, salt || be(i)) */
|
||||
U32TO8_BE(be, i);
|
||||
work = hmac_pw_salt;
|
||||
scrypt_hmac_update(&work, be, 4);
|
||||
scrypt_hmac_finish(&work, ti);
|
||||
memcpy(u, ti, sizeof(u));
|
||||
|
||||
/* T[i] = U1 ^ U2 ^ U3... */
|
||||
for (c = 0; c < N - 1; c++) {
|
||||
/* UX = hmac(password, U{X-1}) */
|
||||
work = hmac_pw;
|
||||
scrypt_hmac_update(&work, u, SCRYPT_HASH_DIGEST_SIZE);
|
||||
scrypt_hmac_finish(&work, u);
|
||||
|
||||
/* T[i] ^= UX */
|
||||
for (j = 0; j < sizeof(u); j++)
|
||||
ti[j] ^= u[j];
|
||||
}
|
||||
|
||||
memcpy(out, ti, (bytes > SCRYPT_HASH_DIGEST_SIZE) ? SCRYPT_HASH_DIGEST_SIZE : bytes);
|
||||
out += SCRYPT_HASH_DIGEST_SIZE;
|
||||
bytes -= SCRYPT_HASH_DIGEST_SIZE;
|
||||
}
|
||||
|
||||
scrypt_ensure_zero(ti, sizeof(ti));
|
||||
scrypt_ensure_zero(u, sizeof(u));
|
||||
scrypt_ensure_zero(&hmac_pw, sizeof(hmac_pw));
|
||||
scrypt_ensure_zero(&hmac_pw_salt, sizeof(hmac_pw_salt));
|
||||
}
|
469
deps/scrypt-jane-master/code/scrypt-jane-portable-x86.h
vendored
Normal file
469
deps/scrypt-jane-master/code/scrypt-jane-portable-x86.h
vendored
Normal file
@ -0,0 +1,469 @@
|
||||
#if defined(CPU_X86) && (defined(COMPILER_MSVC) || defined(COMPILER_GCC))
|
||||
#define X86ASM
|
||||
|
||||
/* gcc 2.95 royally screws up stack alignments on variables */
|
||||
#if ((defined(COMPILER_MSVC) && (COMPILER_MSVC >= COMPILER_MSVC_VS6PP)) || (defined(COMPILER_GCC) && (COMPILER_GCC >= 30000)))
|
||||
#define X86ASM_SSE
|
||||
#define X86ASM_SSE2
|
||||
#endif
|
||||
#if ((defined(COMPILER_MSVC) && (COMPILER_MSVC >= COMPILER_MSVC_VS2005)) || (defined(COMPILER_GCC) && (COMPILER_GCC >= 40102)))
|
||||
#define X86ASM_SSSE3
|
||||
#endif
|
||||
#if ((defined(COMPILER_MSVC) && (COMPILER_MSVC >= COMPILER_MSVC_VS2010SP1)) || (defined(COMPILER_GCC) && (COMPILER_GCC >= 40400)))
|
||||
#define X86ASM_AVX
|
||||
#define X86ASM_XOP
|
||||
#endif
|
||||
#if ((defined(COMPILER_MSVC) && (COMPILER_MSVC >= COMPILER_MSVC_VS2012)) || (defined(COMPILER_GCC) && (COMPILER_GCC >= 40700)))
|
||||
#define X86ASM_AVX2
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(CPU_X86_64) && defined(COMPILER_GCC)
|
||||
#define X86_64ASM
|
||||
#define X86_64ASM_SSE2
|
||||
#if (COMPILER_GCC >= 40102)
|
||||
#define X86_64ASM_SSSE3
|
||||
#endif
|
||||
#if (COMPILER_GCC >= 40400)
|
||||
#define X86_64ASM_AVX
|
||||
#define X86_64ASM_XOP
|
||||
#endif
|
||||
#if (COMPILER_GCC >= 40700)
|
||||
#define X86_64ASM_AVX2
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(COMPILER_MSVC) && (defined(CPU_X86_FORCE_INTRINSICS) || defined(CPU_X86_64))
|
||||
#define X86_INTRINSIC
|
||||
#if defined(CPU_X86_64) || defined(X86ASM_SSE)
|
||||
#define X86_INTRINSIC_SSE
|
||||
#endif
|
||||
#if defined(CPU_X86_64) || defined(X86ASM_SSE2)
|
||||
#define X86_INTRINSIC_SSE2
|
||||
#endif
|
||||
#if (COMPILER_MSVC >= COMPILER_MSVC_VS2005)
|
||||
#define X86_INTRINSIC_SSSE3
|
||||
#endif
|
||||
#if (COMPILER_MSVC >= COMPILER_MSVC_VS2010SP1)
|
||||
#define X86_INTRINSIC_AVX
|
||||
#define X86_INTRINSIC_XOP
|
||||
#endif
|
||||
#if (COMPILER_MSVC >= COMPILER_MSVC_VS2012)
|
||||
#define X86_INTRINSIC_AVX2
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(COMPILER_GCC) && defined(CPU_X86_FORCE_INTRINSICS)
|
||||
#define X86_INTRINSIC
|
||||
#if defined(__SSE__)
|
||||
#define X86_INTRINSIC_SSE
|
||||
#endif
|
||||
#if defined(__SSE2__)
|
||||
#define X86_INTRINSIC_SSE2
|
||||
#endif
|
||||
#if defined(__SSSE3__)
|
||||
#define X86_INTRINSIC_SSSE3
|
||||
#endif
|
||||
#if defined(__AVX__)
|
||||
#define X86_INTRINSIC_AVX
|
||||
#endif
|
||||
#if defined(__XOP__)
|
||||
#define X86_INTRINSIC_XOP
|
||||
#endif
|
||||
#if defined(__AVX2__)
|
||||
#define X86_INTRINSIC_AVX2
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* only use simd on windows (or SSE2 on gcc)! */
|
||||
#if defined(CPU_X86_FORCE_INTRINSICS) || defined(X86_INTRINSIC)
|
||||
#if defined(X86_INTRINSIC_SSE)
|
||||
#include <mmintrin.h>
|
||||
#include <xmmintrin.h>
|
||||
typedef __m64 qmm;
|
||||
typedef __m128 xmm;
|
||||
typedef __m128d xmmd;
|
||||
#endif
|
||||
#if defined(X86_INTRINSIC_SSE2)
|
||||
#include <emmintrin.h>
|
||||
typedef __m128i xmmi;
|
||||
#endif
|
||||
#if defined(X86_INTRINSIC_SSSE3)
|
||||
#include <tmmintrin.h>
|
||||
#endif
|
||||
#if defined(X86_INTRINSIC_AVX)
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
#if defined(X86_INTRINSIC_XOP)
|
||||
#if defined(COMPILER_MSVC)
|
||||
#include <intrin.h>
|
||||
#else
|
||||
#include <x86intrin.h>
|
||||
#endif
|
||||
#endif
|
||||
#if defined(X86_INTRINSIC_AVX2)
|
||||
typedef __m256i ymmi;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(X86_INTRINSIC_SSE2)
|
||||
typedef union packedelem8_t {
|
||||
uint8_t u[16];
|
||||
xmmi v;
|
||||
} packedelem8;
|
||||
|
||||
typedef union packedelem32_t {
|
||||
uint32_t u[4];
|
||||
xmmi v;
|
||||
} packedelem32;
|
||||
|
||||
typedef union packedelem64_t {
|
||||
uint64_t u[2];
|
||||
xmmi v;
|
||||
} packedelem64;
|
||||
#else
|
||||
typedef union packedelem8_t {
|
||||
uint8_t u[16];
|
||||
uint32_t dw[4];
|
||||
} packedelem8;
|
||||
|
||||
typedef union packedelem32_t {
|
||||
uint32_t u[4];
|
||||
uint8_t b[16];
|
||||
} packedelem32;
|
||||
|
||||
typedef union packedelem64_t {
|
||||
uint64_t u[2];
|
||||
uint8_t b[16];
|
||||
} packedelem64;
|
||||
#endif
|
||||
|
||||
#if defined(X86_INTRINSIC_SSSE3)
|
||||
static const packedelem8 ALIGN(16) ssse3_rotl16_32bit = {{2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13}};
|
||||
static const packedelem8 ALIGN(16) ssse3_rotl8_32bit = {{3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14}};
|
||||
#endif
|
||||
|
||||
/*
|
||||
x86 inline asm for gcc/msvc. usage:
|
||||
|
||||
asm_naked_fn_proto(return_type, name) (type parm1, type parm2..)
|
||||
asm_naked_fn(name)
|
||||
a1(..)
|
||||
a2(.., ..)
|
||||
a3(.., .., ..)
|
||||
64bit OR 0 paramters: a1(ret)
|
||||
32bit AND n parameters: aret(4n), eg aret(16) for 4 parameters
|
||||
asm_naked_fn_end(name)
|
||||
*/
|
||||
|
||||
#if defined(X86ASM) || defined(X86_64ASM)
|
||||
|
||||
#if defined(COMPILER_MSVC)
|
||||
#pragma warning(disable : 4731) /* frame pointer modified by inline assembly */
|
||||
#define a1(x) __asm {x}
|
||||
#define a2(x, y) __asm {x, y}
|
||||
#define a3(x, y, z) __asm {x, y, z}
|
||||
#define a4(x, y, z, w) __asm {x, y, z, w}
|
||||
#define aj(x) __asm {x}
|
||||
#define asm_align8 a1(ALIGN 8)
|
||||
#define asm_align16 a1(ALIGN 16)
|
||||
|
||||
#define asm_calling_convention STDCALL
|
||||
#define aret(n) a1(ret n)
|
||||
#define asm_naked_fn_proto(type, fn) static NAKED type asm_calling_convention fn
|
||||
#define asm_naked_fn(fn) {
|
||||
#define asm_naked_fn_end(fn) }
|
||||
#elif defined(COMPILER_GCC)
|
||||
#define GNU_AS1(x) #x ";\n"
|
||||
#define GNU_AS2(x, y) #x ", " #y ";\n"
|
||||
#define GNU_AS3(x, y, z) #x ", " #y ", " #z ";\n"
|
||||
#define GNU_AS4(x, y, z, w) #x ", " #y ", " #z ", " #w ";\n"
|
||||
#define GNU_ASFN(x) "\n_" #x ":\n" #x ":\n"
|
||||
#define GNU_ASJ(x) ".att_syntax prefix\n" #x "\n.intel_syntax noprefix\n"
|
||||
|
||||
#define a1(x) GNU_AS1(x)
|
||||
#define a2(x, y) GNU_AS2(x, y)
|
||||
#define a3(x, y, z) GNU_AS3(x, y, z)
|
||||
#define a4(x, y, z, w) GNU_AS4(x, y, z, w)
|
||||
#define aj(x) GNU_ASJ(x)
|
||||
#define asm_align8 ".p2align 3,,7"
|
||||
#define asm_align16 ".p2align 4,,15"
|
||||
|
||||
#if defined(OS_WINDOWS)
|
||||
#define asm_calling_convention CDECL
|
||||
#define aret(n) a1(ret)
|
||||
|
||||
#if defined(X86_64ASM)
|
||||
#define asm_naked_fn(fn) ; __asm__ ( \
|
||||
".text\n" \
|
||||
asm_align16 GNU_ASFN(fn) \
|
||||
"subq $136, %rsp;" \
|
||||
"movdqa %xmm6, 0(%rsp);" \
|
||||
"movdqa %xmm7, 16(%rsp);" \
|
||||
"movdqa %xmm8, 32(%rsp);" \
|
||||
"movdqa %xmm9, 48(%rsp);" \
|
||||
"movdqa %xmm10, 64(%rsp);" \
|
||||
"movdqa %xmm11, 80(%rsp);" \
|
||||
"movdqa %xmm12, 96(%rsp);" \
|
||||
"movq %rdi, 112(%rsp);" \
|
||||
"movq %rsi, 120(%rsp);" \
|
||||
"movq %rcx, %rdi;" \
|
||||
"movq %rdx, %rsi;" \
|
||||
"movq %r8, %rdx;" \
|
||||
"movq %r9, %rcx;" \
|
||||
"call 1f;" \
|
||||
"movdqa 0(%rsp), %xmm6;" \
|
||||
"movdqa 16(%rsp), %xmm7;" \
|
||||
"movdqa 32(%rsp), %xmm8;" \
|
||||
"movdqa 48(%rsp), %xmm9;" \
|
||||
"movdqa 64(%rsp), %xmm10;" \
|
||||
"movdqa 80(%rsp), %xmm11;" \
|
||||
"movdqa 96(%rsp), %xmm12;" \
|
||||
"movq 112(%rsp), %rdi;" \
|
||||
"movq 120(%rsp), %rsi;" \
|
||||
"addq $136, %rsp;" \
|
||||
"ret;" \
|
||||
".intel_syntax noprefix;" \
|
||||
".p2align 4,,15;" \
|
||||
"1:;"
|
||||
#else
|
||||
#define asm_naked_fn(fn) ; __asm__ (".intel_syntax noprefix;\n.text\n" asm_align16 GNU_ASFN(fn)
|
||||
#endif
|
||||
#else
|
||||
#define asm_calling_convention STDCALL
|
||||
#define aret(n) a1(ret n)
|
||||
#define asm_naked_fn(fn) ; __asm__ (".intel_syntax noprefix;\n.text\n" asm_align16 GNU_ASFN(fn)
|
||||
#endif
|
||||
|
||||
#define asm_naked_fn_proto(type, fn) extern type asm_calling_convention fn
|
||||
#define asm_naked_fn_end(fn) ".att_syntax prefix;\n" );
|
||||
|
||||
#define asm_gcc() __asm__ __volatile__(".intel_syntax noprefix;\n"
|
||||
#define asm_gcc_parms() ".att_syntax prefix;"
|
||||
#define asm_gcc_trashed() __asm__ __volatile__("" :::
|
||||
#define asm_gcc_end() );
|
||||
#else
|
||||
need x86 asm
|
||||
#endif
|
||||
|
||||
#endif /* X86ASM || X86_64ASM */
|
||||
|
||||
|
||||
#if defined(CPU_X86) || defined(CPU_X86_64)
|
||||
|
||||
typedef enum cpu_flags_x86_t {
|
||||
cpu_mmx = 1 << 0,
|
||||
cpu_sse = 1 << 1,
|
||||
cpu_sse2 = 1 << 2,
|
||||
cpu_sse3 = 1 << 3,
|
||||
cpu_ssse3 = 1 << 4,
|
||||
cpu_sse4_1 = 1 << 5,
|
||||
cpu_sse4_2 = 1 << 6,
|
||||
cpu_avx = 1 << 7,
|
||||
cpu_xop = 1 << 8,
|
||||
cpu_avx2 = 1 << 9
|
||||
} cpu_flags_x86;
|
||||
|
||||
typedef enum cpu_vendors_x86_t {
|
||||
cpu_nobody,
|
||||
cpu_intel,
|
||||
cpu_amd
|
||||
} cpu_vendors_x86;
|
||||
|
||||
typedef struct x86_regs_t {
|
||||
uint32_t eax, ebx, ecx, edx;
|
||||
} x86_regs;
|
||||
|
||||
#if defined(X86ASM)
|
||||
asm_naked_fn_proto(int, has_cpuid)(void)
|
||||
asm_naked_fn(has_cpuid)
|
||||
a1(pushfd)
|
||||
a1(pop eax)
|
||||
a2(mov ecx, eax)
|
||||
a2(xor eax, 0x200000)
|
||||
a1(push eax)
|
||||
a1(popfd)
|
||||
a1(pushfd)
|
||||
a1(pop eax)
|
||||
a2(xor eax, ecx)
|
||||
a2(shr eax, 21)
|
||||
a2(and eax, 1)
|
||||
a1(push ecx)
|
||||
a1(popfd)
|
||||
a1(ret)
|
||||
asm_naked_fn_end(has_cpuid)
|
||||
#endif /* X86ASM */
|
||||
|
||||
|
||||
static void NOINLINE
|
||||
get_cpuid(x86_regs *regs, uint32_t flags) {
|
||||
#if defined(COMPILER_MSVC)
|
||||
__cpuid((int *)regs, (int)flags);
|
||||
#else
|
||||
#if defined(CPU_X86_64)
|
||||
#define cpuid_bx rbx
|
||||
#else
|
||||
#define cpuid_bx ebx
|
||||
#endif
|
||||
|
||||
asm_gcc()
|
||||
a1(push cpuid_bx)
|
||||
a2(xor ecx, ecx)
|
||||
a1(cpuid)
|
||||
a2(mov [%1 + 0], eax)
|
||||
a2(mov [%1 + 4], ebx)
|
||||
a2(mov [%1 + 8], ecx)
|
||||
a2(mov [%1 + 12], edx)
|
||||
a1(pop cpuid_bx)
|
||||
asm_gcc_parms() : "+a"(flags) : "S"(regs) : "%ecx", "%edx", "cc"
|
||||
asm_gcc_end()
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(X86ASM_AVX) || defined(X86_64ASM_AVX)
|
||||
static uint64_t NOINLINE
|
||||
get_xgetbv(uint32_t flags) {
|
||||
#if defined(COMPILER_MSVC)
|
||||
return _xgetbv(flags);
|
||||
#else
|
||||
uint32_t lo, hi;
|
||||
asm_gcc()
|
||||
a1(xgetbv)
|
||||
asm_gcc_parms() : "+c"(flags), "=a" (lo), "=d" (hi)
|
||||
asm_gcc_end()
|
||||
return ((uint64_t)lo | ((uint64_t)hi << 32));
|
||||
#endif
|
||||
}
|
||||
#endif // AVX support
|
||||
|
||||
#if defined(SCRYPT_TEST_SPEED)
|
||||
size_t cpu_detect_mask = (size_t)-1;
|
||||
#endif
|
||||
|
||||
static size_t
|
||||
detect_cpu(void) {
|
||||
union { uint8_t s[12]; uint32_t i[3]; } vendor_string;
|
||||
cpu_vendors_x86 vendor = cpu_nobody;
|
||||
x86_regs regs;
|
||||
uint32_t max_level, max_ext_level;
|
||||
size_t cpu_flags = 0;
|
||||
#if defined(X86ASM_AVX) || defined(X86_64ASM_AVX)
|
||||
uint64_t xgetbv_flags;
|
||||
#endif
|
||||
|
||||
#if defined(CPU_X86)
|
||||
if (!has_cpuid())
|
||||
return cpu_flags;
|
||||
#endif
|
||||
|
||||
get_cpuid(®s, 0);
|
||||
max_level = regs.eax;
|
||||
vendor_string.i[0] = regs.ebx;
|
||||
vendor_string.i[1] = regs.edx;
|
||||
vendor_string.i[2] = regs.ecx;
|
||||
|
||||
if (scrypt_verify(vendor_string.s, (const uint8_t *)"GenuineIntel", 12))
|
||||
vendor = cpu_intel;
|
||||
else if (scrypt_verify(vendor_string.s, (const uint8_t *)"AuthenticAMD", 12))
|
||||
vendor = cpu_amd;
|
||||
|
||||
if (max_level & 0x00000500) {
|
||||
/* "Intel P5 pre-B0" */
|
||||
cpu_flags |= cpu_mmx;
|
||||
return cpu_flags;
|
||||
}
|
||||
|
||||
if (max_level < 1)
|
||||
return cpu_flags;
|
||||
|
||||
get_cpuid(®s, 1);
|
||||
#if defined(X86ASM_AVX) || defined(X86_64ASM_AVX)
|
||||
/* xsave/xrestore */
|
||||
if (regs.ecx & (1 << 27)) {
|
||||
xgetbv_flags = get_xgetbv(0);
|
||||
if ((regs.ecx & (1 << 28)) && (xgetbv_flags & 0x6)) cpu_flags |= cpu_avx;
|
||||
}
|
||||
#endif
|
||||
if (regs.ecx & (1 << 20)) cpu_flags |= cpu_sse4_2;
|
||||
if (regs.ecx & (1 << 19)) cpu_flags |= cpu_sse4_2;
|
||||
if (regs.ecx & (1 << 9)) cpu_flags |= cpu_ssse3;
|
||||
if (regs.ecx & (1 )) cpu_flags |= cpu_sse3;
|
||||
if (regs.edx & (1 << 26)) cpu_flags |= cpu_sse2;
|
||||
if (regs.edx & (1 << 25)) cpu_flags |= cpu_sse;
|
||||
if (regs.edx & (1 << 23)) cpu_flags |= cpu_mmx;
|
||||
|
||||
if (cpu_flags & cpu_avx) {
|
||||
if (max_level >= 7) {
|
||||
get_cpuid(®s, 7);
|
||||
if (regs.ebx & (1 << 5)) cpu_flags |= cpu_avx2;
|
||||
}
|
||||
|
||||
get_cpuid(®s, 0x80000000);
|
||||
max_ext_level = regs.eax;
|
||||
if (max_ext_level >= 0x80000001) {
|
||||
get_cpuid(®s, 0x80000001);
|
||||
if (regs.ecx & (1 << 11)) cpu_flags |= cpu_xop;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#if defined(SCRYPT_TEST_SPEED)
|
||||
cpu_flags &= cpu_detect_mask;
|
||||
#endif
|
||||
|
||||
return cpu_flags;
|
||||
}
|
||||
|
||||
#if defined(SCRYPT_TEST_SPEED)
|
||||
static const char *
|
||||
get_top_cpuflag_desc(size_t flag) {
|
||||
if (flag & cpu_avx2) return "AVX2";
|
||||
else if (flag & cpu_xop) return "XOP";
|
||||
else if (flag & cpu_avx) return "AVX";
|
||||
else if (flag & cpu_sse4_2) return "SSE4.2";
|
||||
else if (flag & cpu_sse4_1) return "SSE4.1";
|
||||
else if (flag & cpu_ssse3) return "SSSE3";
|
||||
else if (flag & cpu_sse2) return "SSE2";
|
||||
else if (flag & cpu_sse) return "SSE";
|
||||
else if (flag & cpu_mmx) return "MMX";
|
||||
else return "Basic";
|
||||
}
|
||||
#endif
|
||||
|
||||
/* enable the highest system-wide option */
|
||||
#if defined(SCRYPT_CHOOSE_COMPILETIME)
|
||||
#if !defined(__AVX2__)
|
||||
#undef X86_64ASM_AVX2
|
||||
#undef X86ASM_AVX2
|
||||
#undef X86_INTRINSIC_AVX2
|
||||
#endif
|
||||
#if !defined(__XOP__)
|
||||
#undef X86_64ASM_XOP
|
||||
#undef X86ASM_XOP
|
||||
#undef X86_INTRINSIC_XOP
|
||||
#endif
|
||||
#if !defined(__AVX__)
|
||||
#undef X86_64ASM_AVX
|
||||
#undef X86ASM_AVX
|
||||
#undef X86_INTRINSIC_AVX
|
||||
#endif
|
||||
#if !defined(__SSSE3__)
|
||||
#undef X86_64ASM_SSSE3
|
||||
#undef X86ASM_SSSE3
|
||||
#undef X86_INTRINSIC_SSSE3
|
||||
#endif
|
||||
#if !defined(__SSE2__)
|
||||
#undef X86_64ASM_SSE2
|
||||
#undef X86ASM_SSE2
|
||||
#undef X86_INTRINSIC_SSE2
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
static size_t
|
||||
detect_cpu(void) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* defined(CPU_X86) || defined(CPU_X86_64) */
|
307
deps/scrypt-jane-master/code/scrypt-jane-portable.h
vendored
Normal file
307
deps/scrypt-jane-master/code/scrypt-jane-portable.h
vendored
Normal file
@ -0,0 +1,307 @@
|
||||
/* determine os */
|
||||
#if defined(_WIN32) || defined(_WIN64) || defined(__TOS_WIN__) || defined(__WINDOWS__)
|
||||
#include <windows.h>
|
||||
#include <wincrypt.h>
|
||||
#define OS_WINDOWS
|
||||
#elif defined(sun) || defined(__sun) || defined(__SVR4) || defined(__svr4__)
|
||||
#include <sys/mman.h>
|
||||
#include <sys/time.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
#define OS_SOLARIS
|
||||
#else
|
||||
#include <sys/mman.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/param.h> /* need this to define BSD */
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
#define OS_NIX
|
||||
#if defined(__linux__)
|
||||
#include <endian.h>
|
||||
#define OS_LINUX
|
||||
#elif defined(BSD)
|
||||
#define OS_BSD
|
||||
|
||||
#if defined(MACOS_X) || (defined(__APPLE__) & defined(__MACH__))
|
||||
#define OS_OSX
|
||||
#elif defined(macintosh) || defined(Macintosh)
|
||||
#define OS_MAC
|
||||
#elif defined(__OpenBSD__)
|
||||
#define OS_OPENBSD
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
/* determine compiler */
|
||||
#if defined(_MSC_VER)
|
||||
#define COMPILER_MSVC_VS6 120000000
|
||||
#define COMPILER_MSVC_VS6PP 121000000
|
||||
#define COMPILER_MSVC_VS2002 130000000
|
||||
#define COMPILER_MSVC_VS2003 131000000
|
||||
#define COMPILER_MSVC_VS2005 140050727
|
||||
#define COMPILER_MSVC_VS2008 150000000
|
||||
#define COMPILER_MSVC_VS2008SP1 150030729
|
||||
#define COMPILER_MSVC_VS2010 160000000
|
||||
#define COMPILER_MSVC_VS2010SP1 160040219
|
||||
#define COMPILER_MSVC_VS2012RC 170000000
|
||||
#define COMPILER_MSVC_VS2012 170050727
|
||||
|
||||
#if _MSC_FULL_VER > 100000000
|
||||
#define COMPILER_MSVC (_MSC_FULL_VER)
|
||||
#else
|
||||
#define COMPILER_MSVC (_MSC_FULL_VER * 10)
|
||||
#endif
|
||||
|
||||
#if ((_MSC_VER == 1200) && defined(_mm_free))
|
||||
#undef COMPILER_MSVC
|
||||
#define COMPILER_MSVC COMPILER_MSVC_VS6PP
|
||||
#endif
|
||||
|
||||
#pragma warning(disable : 4127) /* conditional expression is constant */
|
||||
#pragma warning(disable : 4100) /* unreferenced formal parameter */
|
||||
|
||||
#define _CRT_SECURE_NO_WARNINGS
|
||||
#include <float.h>
|
||||
#include <stdlib.h> /* _rotl */
|
||||
#include <intrin.h>
|
||||
|
||||
typedef unsigned char uint8_t;
|
||||
typedef unsigned short uint16_t;
|
||||
typedef unsigned int uint32_t;
|
||||
typedef signed int int32_t;
|
||||
typedef unsigned __int64 uint64_t;
|
||||
typedef signed __int64 int64_t;
|
||||
|
||||
#define ROTL32(a,b) _rotl(a,b)
|
||||
#define ROTR32(a,b) _rotr(a,b)
|
||||
#define ROTL64(a,b) _rotl64(a,b)
|
||||
#define ROTR64(a,b) _rotr64(a,b)
|
||||
#undef NOINLINE
|
||||
#define NOINLINE __declspec(noinline)
|
||||
#undef NORETURN
|
||||
#define NORETURN
|
||||
#undef INLINE
|
||||
#define INLINE __forceinline
|
||||
#undef FASTCALL
|
||||
#define FASTCALL __fastcall
|
||||
#undef CDECL
|
||||
#define CDECL __cdecl
|
||||
#undef STDCALL
|
||||
#define STDCALL __stdcall
|
||||
#undef NAKED
|
||||
#define NAKED __declspec(naked)
|
||||
#define JANE_ALIGN(n) __declspec(align(n))
|
||||
#endif
|
||||
#if defined(__ICC)
|
||||
#define COMPILER_INTEL
|
||||
#endif
|
||||
#if defined(__GNUC__)
|
||||
#if (__GNUC__ >= 3)
|
||||
#define COMPILER_GCC_PATCHLEVEL __GNUC_PATCHLEVEL__
|
||||
#else
|
||||
#define COMPILER_GCC_PATCHLEVEL 0
|
||||
#endif
|
||||
#define COMPILER_GCC (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + COMPILER_GCC_PATCHLEVEL)
|
||||
#define ROTL32(a,b) (((a) << (b)) | ((a) >> (32 - b)))
|
||||
#define ROTR32(a,b) (((a) >> (b)) | ((a) << (32 - b)))
|
||||
#define ROTL64(a,b) (((a) << (b)) | ((a) >> (64 - b)))
|
||||
#define ROTR64(a,b) (((a) >> (b)) | ((a) << (64 - b)))
|
||||
#undef NOINLINE
|
||||
#if (COMPILER_GCC >= 30000)
|
||||
#define NOINLINE __attribute__((noinline))
|
||||
#else
|
||||
#define NOINLINE
|
||||
#endif
|
||||
#undef NORETURN
|
||||
#if (COMPILER_GCC >= 30000)
|
||||
#define NORETURN __attribute__((noreturn))
|
||||
#else
|
||||
#define NORETURN
|
||||
#endif
|
||||
#undef INLINE
|
||||
#if (COMPILER_GCC >= 30000)
|
||||
#define INLINE __attribute__((always_inline))
|
||||
#else
|
||||
#define INLINE inline
|
||||
#endif
|
||||
#undef FASTCALL
|
||||
#if (COMPILER_GCC >= 30400)
|
||||
#define FASTCALL __attribute__((fastcall))
|
||||
#else
|
||||
#define FASTCALL
|
||||
#endif
|
||||
#undef CDECL
|
||||
#define CDECL __attribute__((cdecl))
|
||||
#undef STDCALL
|
||||
#define STDCALL __attribute__((stdcall))
|
||||
#define JANE_ALIGN(n) __attribute__((aligned(n)))
|
||||
#include <stdint.h>
|
||||
#endif
|
||||
#if defined(__MINGW32__) || defined(__MINGW64__)
|
||||
#define COMPILER_MINGW
|
||||
#endif
|
||||
#if defined(__PATHCC__)
|
||||
#define COMPILER_PATHCC
|
||||
#endif
|
||||
|
||||
#define OPTIONAL_INLINE
|
||||
#if defined(OPTIONAL_INLINE)
|
||||
#undef OPTIONAL_INLINE
|
||||
#define OPTIONAL_INLINE INLINE
|
||||
#else
|
||||
#define OPTIONAL_INLINE
|
||||
#endif
|
||||
|
||||
#define CRYPTO_FN NOINLINE STDCALL
|
||||
|
||||
/* determine cpu */
|
||||
#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__ ) || defined(_M_X64)
|
||||
#define CPU_X86_64
|
||||
#elif defined(__i586__) || defined(__i686__) || (defined(_M_IX86) && (_M_IX86 >= 500))
|
||||
#define CPU_X86 500
|
||||
#elif defined(__i486__) || (defined(_M_IX86) && (_M_IX86 >= 400))
|
||||
#define CPU_X86 400
|
||||
#elif defined(__i386__) || (defined(_M_IX86) && (_M_IX86 >= 300)) || defined(__X86__) || defined(_X86_) || defined(__I86__)
|
||||
#define CPU_X86 300
|
||||
#elif defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(_M_IA64) || defined(__ia64)
|
||||
#define CPU_IA64
|
||||
#endif
|
||||
|
||||
#if defined(__sparc__) || defined(__sparc) || defined(__sparcv9)
|
||||
#define CPU_SPARC
|
||||
#if defined(__sparcv9)
|
||||
#define CPU_SPARC64
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(CPU_X86_64) || defined(CPU_IA64) || defined(CPU_SPARC64) || defined(__64BIT__) || defined(__LP64__) || defined(_LP64) || (defined(_MIPS_SZLONG) && (_MIPS_SZLONG == 64))
|
||||
#define CPU_64BITS
|
||||
#undef FASTCALL
|
||||
#define FASTCALL
|
||||
#undef CDECL
|
||||
#define CDECL
|
||||
#undef STDCALL
|
||||
#define STDCALL
|
||||
#endif
|
||||
|
||||
#if defined(powerpc) || defined(__PPC__) || defined(__ppc__) || defined(_ARCH_PPC) || defined(__powerpc__) || defined(__powerpc) || defined(POWERPC) || defined(_M_PPC)
|
||||
#define CPU_PPC
|
||||
#if defined(_ARCH_PWR7)
|
||||
#define CPU_POWER7
|
||||
#elif defined(__64BIT__)
|
||||
#define CPU_PPC64
|
||||
#else
|
||||
#define CPU_PPC32
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__hppa__) || defined(__hppa)
|
||||
#define CPU_HPPA
|
||||
#endif
|
||||
|
||||
#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA)
|
||||
#define CPU_ALPHA
|
||||
#endif
|
||||
|
||||
/* endian */
|
||||
|
||||
#if ((defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN)) || \
|
||||
(defined(BYTE_ORDER) && defined(LITTLE_ENDIAN) && (BYTE_ORDER == LITTLE_ENDIAN)) || \
|
||||
(defined(CPU_X86) || defined(CPU_X86_64)) || \
|
||||
(defined(vax) || defined(MIPSEL) || defined(_MIPSEL)))
|
||||
#define CPU_LE
|
||||
#elif ((defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN)) || \
|
||||
(defined(BYTE_ORDER) && defined(BIG_ENDIAN) && (BYTE_ORDER == BIG_ENDIAN)) || \
|
||||
(defined(CPU_SPARC) || defined(CPU_PPC) || defined(mc68000) || defined(sel)) || defined(_MIPSEB))
|
||||
#define CPU_BE
|
||||
#else
|
||||
/* unknown endian! */
|
||||
#endif
|
||||
|
||||
|
||||
#define U8TO32_BE(p) \
|
||||
(((uint32_t)((p)[0]) << 24) | ((uint32_t)((p)[1]) << 16) | \
|
||||
((uint32_t)((p)[2]) << 8) | ((uint32_t)((p)[3]) ))
|
||||
|
||||
#define U8TO32_LE(p) \
|
||||
(((uint32_t)((p)[0]) ) | ((uint32_t)((p)[1]) << 8) | \
|
||||
((uint32_t)((p)[2]) << 16) | ((uint32_t)((p)[3]) << 24))
|
||||
|
||||
#define U32TO8_BE(p, v) \
|
||||
(p)[0] = (uint8_t)((v) >> 24); (p)[1] = (uint8_t)((v) >> 16); \
|
||||
(p)[2] = (uint8_t)((v) >> 8); (p)[3] = (uint8_t)((v) );
|
||||
|
||||
#define U32TO8_LE(p, v) \
|
||||
(p)[0] = (uint8_t)((v) ); (p)[1] = (uint8_t)((v) >> 8); \
|
||||
(p)[2] = (uint8_t)((v) >> 16); (p)[3] = (uint8_t)((v) >> 24);
|
||||
|
||||
#define U8TO64_BE(p) \
|
||||
(((uint64_t)U8TO32_BE(p) << 32) | (uint64_t)U8TO32_BE((p) + 4))
|
||||
|
||||
#define U8TO64_LE(p) \
|
||||
(((uint64_t)U8TO32_LE(p)) | ((uint64_t)U8TO32_LE((p) + 4) << 32))
|
||||
|
||||
#define U64TO8_BE(p, v) \
|
||||
U32TO8_BE((p), (uint32_t)((v) >> 32)); \
|
||||
U32TO8_BE((p) + 4, (uint32_t)((v) ));
|
||||
|
||||
#define U64TO8_LE(p, v) \
|
||||
U32TO8_LE((p), (uint32_t)((v) )); \
|
||||
U32TO8_LE((p) + 4, (uint32_t)((v) >> 32));
|
||||
|
||||
#define U32_SWAP(v) { \
|
||||
(v) = (((v) << 8) & 0xFF00FF00 ) | (((v) >> 8) & 0xFF00FF ); \
|
||||
(v) = ((v) << 16) | ((v) >> 16); \
|
||||
}
|
||||
|
||||
#define U64_SWAP(v) { \
|
||||
(v) = (((v) << 8) & 0xFF00FF00FF00FF00ull ) | (((v) >> 8) & 0x00FF00FF00FF00FFull ); \
|
||||
(v) = (((v) << 16) & 0xFFFF0000FFFF0000ull ) | (((v) >> 16) & 0x0000FFFF0000FFFFull ); \
|
||||
(v) = ((v) << 32) | ((v) >> 32); \
|
||||
}
|
||||
|
||||
static int
|
||||
scrypt_verify(const uint8_t *x, const uint8_t *y, size_t len) {
|
||||
uint32_t differentbits = 0;
|
||||
while (len--)
|
||||
differentbits |= (*x++ ^ *y++);
|
||||
return (1 & ((differentbits - 1) >> 8));
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_ensure_zero(void *p, size_t len) {
|
||||
#if ((defined(CPU_X86) || defined(CPU_X86_64)) && defined(COMPILER_MSVC))
|
||||
__stosb((unsigned char *)p, 0, len);
|
||||
#elif (defined(CPU_X86) && defined(COMPILER_GCC))
|
||||
__asm__ __volatile__(
|
||||
"pushl %%edi;\n"
|
||||
"pushl %%ecx;\n"
|
||||
"rep stosb;\n"
|
||||
"popl %%ecx;\n"
|
||||
"popl %%edi;\n"
|
||||
:: "a"(0), "D"(p), "c"(len) : "cc", "memory"
|
||||
);
|
||||
#elif (defined(CPU_X86_64) && defined(COMPILER_GCC))
|
||||
__asm__ __volatile__(
|
||||
"pushq %%rdi;\n"
|
||||
"pushq %%rcx;\n"
|
||||
"rep stosb;\n"
|
||||
"popq %%rcx;\n"
|
||||
"popq %%rdi;\n"
|
||||
:: "a"(0), "D"(p), "c"(len) : "cc", "memory"
|
||||
);
|
||||
#else
|
||||
volatile uint8_t *b = (volatile uint8_t *)p;
|
||||
size_t i;
|
||||
for (i = 0; i < len; i++)
|
||||
b[i] = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
#include "scrypt-jane-portable-x86.h"
|
||||
|
||||
#if !defined(asm_calling_convention)
|
||||
#define asm_calling_convention
|
||||
#endif
|
74
deps/scrypt-jane-master/code/scrypt-jane-romix-basic.h
vendored
Normal file
74
deps/scrypt-jane-master/code/scrypt-jane-romix-basic.h
vendored
Normal file
@ -0,0 +1,74 @@
|
||||
#if !defined(SCRYPT_CHOOSE_COMPILETIME)
|
||||
/* function type returned by scrypt_getROMix, used with cpu detection */
|
||||
typedef void (FASTCALL *scrypt_ROMixfn)(scrypt_mix_word_t *X/*[chunkWords]*/, scrypt_mix_word_t *Y/*[chunkWords]*/, scrypt_mix_word_t *V/*[chunkWords * N]*/, uint32_t N, uint32_t r);
|
||||
#endif
|
||||
|
||||
/* romix pre/post nop function */
|
||||
static void asm_calling_convention
|
||||
scrypt_romix_nop(scrypt_mix_word_t *blocks, size_t nblocks) {
|
||||
(void)blocks; (void)nblocks;
|
||||
}
|
||||
|
||||
/* romix pre/post endian conversion function */
|
||||
static void asm_calling_convention
|
||||
scrypt_romix_convert_endian(scrypt_mix_word_t *blocks, size_t nblocks) {
|
||||
#if !defined(CPU_LE)
|
||||
static const union { uint8_t b[2]; uint16_t w; } endian_test = {{1,0}};
|
||||
size_t i;
|
||||
if (endian_test.w == 0x100) {
|
||||
nblocks *= SCRYPT_BLOCK_WORDS;
|
||||
for (i = 0; i < nblocks; i++) {
|
||||
SCRYPT_WORD_ENDIAN_SWAP(blocks[i]);
|
||||
}
|
||||
}
|
||||
#else
|
||||
(void)blocks; (void)nblocks;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* chunkmix test function */
|
||||
typedef void (asm_calling_convention *chunkmixfn)(scrypt_mix_word_t *Bout/*[chunkWords]*/, scrypt_mix_word_t *Bin/*[chunkWords]*/, scrypt_mix_word_t *Bxor/*[chunkWords]*/, uint32_t r);
|
||||
typedef void (asm_calling_convention *blockfixfn)(scrypt_mix_word_t *blocks, size_t nblocks);
|
||||
|
||||
static int
|
||||
scrypt_test_mix_instance(chunkmixfn mixfn, blockfixfn prefn, blockfixfn postfn, const uint8_t expected[16]) {
|
||||
/* r = 2, (2 * r) = 4 blocks in a chunk, 4 * SCRYPT_BLOCK_WORDS total */
|
||||
const uint32_t r = 2, blocks = 2 * r, words = blocks * SCRYPT_BLOCK_WORDS;
|
||||
#if (defined(X86ASM_AVX2) || defined(X86_64ASM_AVX2) || defined(X86_INTRINSIC_AVX2))
|
||||
scrypt_mix_word_t JANE_ALIGN(32) chunk[2][4 * SCRYPT_BLOCK_WORDS], v;
|
||||
#else
|
||||
scrypt_mix_word_t JANE_ALIGN(16) chunk[2][4 * SCRYPT_BLOCK_WORDS], v;
|
||||
#endif
|
||||
uint8_t final[16];
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < words; i++) {
|
||||
v = (scrypt_mix_word_t)i;
|
||||
v = (v << 8) | v;
|
||||
v = (v << 16) | v;
|
||||
chunk[0][i] = v;
|
||||
}
|
||||
|
||||
prefn(chunk[0], blocks);
|
||||
mixfn(chunk[1], chunk[0], NULL, r);
|
||||
postfn(chunk[1], blocks);
|
||||
|
||||
/* grab the last 16 bytes of the final block */
|
||||
for (i = 0; i < 16; i += sizeof(scrypt_mix_word_t)) {
|
||||
SCRYPT_WORDTO8_LE(final + i, chunk[1][words - (16 / sizeof(scrypt_mix_word_t)) + (i / sizeof(scrypt_mix_word_t))]);
|
||||
}
|
||||
|
||||
return scrypt_verify(expected, final, 16);
|
||||
}
|
||||
|
||||
/* returns a pointer to item i, where item is len scrypt_mix_word_t's long */
|
||||
static scrypt_mix_word_t *
|
||||
scrypt_item(scrypt_mix_word_t *base, scrypt_mix_word_t i, scrypt_mix_word_t len) {
|
||||
return base + (i * len);
|
||||
}
|
||||
|
||||
/* returns a pointer to block i */
|
||||
static scrypt_mix_word_t *
|
||||
scrypt_block(scrypt_mix_word_t *base, scrypt_mix_word_t i) {
|
||||
return base + (i * SCRYPT_BLOCK_WORDS);
|
||||
}
|
122
deps/scrypt-jane-master/code/scrypt-jane-romix-template.h
vendored
Normal file
122
deps/scrypt-jane-master/code/scrypt-jane-romix-template.h
vendored
Normal file
@ -0,0 +1,122 @@
|
||||
#if !defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_HAVE_ROMIX)
|
||||
|
||||
#if defined(SCRYPT_CHOOSE_COMPILETIME)
|
||||
#undef SCRYPT_ROMIX_FN
|
||||
#define SCRYPT_ROMIX_FN scrypt_ROMix
|
||||
#endif
|
||||
|
||||
#undef SCRYPT_HAVE_ROMIX
|
||||
#define SCRYPT_HAVE_ROMIX
|
||||
|
||||
#if !defined(SCRYPT_CHUNKMIX_FN)
|
||||
|
||||
#define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_basic
|
||||
|
||||
/*
|
||||
Bout = ChunkMix(Bin)
|
||||
|
||||
2*r: number of blocks in the chunk
|
||||
*/
|
||||
static void asm_calling_convention
|
||||
SCRYPT_CHUNKMIX_FN(scrypt_mix_word_t *Bout/*[chunkWords]*/, scrypt_mix_word_t *Bin/*[chunkWords]*/, scrypt_mix_word_t *Bxor/*[chunkWords]*/, uint32_t r) {
|
||||
#if (defined(X86ASM_AVX2) || defined(X86_64ASM_AVX2) || defined(X86_INTRINSIC_AVX2))
|
||||
scrypt_mix_word_t JANE_ALIGN(32) X[SCRYPT_BLOCK_WORDS], *block;
|
||||
#else
|
||||
scrypt_mix_word_t JANE_ALIGN(16) X[SCRYPT_BLOCK_WORDS], *block;
|
||||
#endif
|
||||
uint32_t i, j, blocksPerChunk = r * 2, half = 0;
|
||||
|
||||
/* 1: X = B_{2r - 1} */
|
||||
block = scrypt_block(Bin, blocksPerChunk - 1);
|
||||
for (i = 0; i < SCRYPT_BLOCK_WORDS; i++)
|
||||
X[i] = block[i];
|
||||
|
||||
if (Bxor) {
|
||||
block = scrypt_block(Bxor, blocksPerChunk - 1);
|
||||
for (i = 0; i < SCRYPT_BLOCK_WORDS; i++)
|
||||
X[i] ^= block[i];
|
||||
}
|
||||
|
||||
/* 2: for i = 0 to 2r - 1 do */
|
||||
for (i = 0; i < blocksPerChunk; i++, half ^= r) {
|
||||
/* 3: X = H(X ^ B_i) */
|
||||
block = scrypt_block(Bin, i);
|
||||
for (j = 0; j < SCRYPT_BLOCK_WORDS; j++)
|
||||
X[j] ^= block[j];
|
||||
|
||||
if (Bxor) {
|
||||
block = scrypt_block(Bxor, i);
|
||||
for (j = 0; j < SCRYPT_BLOCK_WORDS; j++)
|
||||
X[j] ^= block[j];
|
||||
}
|
||||
SCRYPT_MIX_FN(X);
|
||||
|
||||
/* 4: Y_i = X */
|
||||
/* 6: B'[0..r-1] = Y_even */
|
||||
/* 6: B'[r..2r-1] = Y_odd */
|
||||
block = scrypt_block(Bout, (i / 2) + half);
|
||||
for (j = 0; j < SCRYPT_BLOCK_WORDS; j++)
|
||||
block[j] = X[j];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
X = ROMix(X)
|
||||
|
||||
X: chunk to mix
|
||||
Y: scratch chunk
|
||||
N: number of rounds
|
||||
V[N]: array of chunks to randomly index in to
|
||||
2*r: number of blocks in a chunk
|
||||
*/
|
||||
|
||||
static void NOINLINE FASTCALL
|
||||
SCRYPT_ROMIX_FN(scrypt_mix_word_t *X/*[chunkWords]*/, scrypt_mix_word_t *Y/*[chunkWords]*/, scrypt_mix_word_t *V/*[N * chunkWords]*/, uint32_t N, uint32_t r) {
|
||||
uint32_t i, j, chunkWords = (uint32_t)(SCRYPT_BLOCK_WORDS * r * 2);
|
||||
scrypt_mix_word_t *block = V;
|
||||
|
||||
SCRYPT_ROMIX_TANGLE_FN(X, r * 2);
|
||||
|
||||
/* 1: X = B */
|
||||
/* implicit */
|
||||
|
||||
/* 2: for i = 0 to N - 1 do */
|
||||
memcpy(block, X, chunkWords * sizeof(scrypt_mix_word_t));
|
||||
for (i = 0; i < N - 1; i++, block += chunkWords) {
|
||||
/* 3: V_i = X */
|
||||
/* 4: X = H(X) */
|
||||
SCRYPT_CHUNKMIX_FN(block + chunkWords, block, NULL, r);
|
||||
}
|
||||
SCRYPT_CHUNKMIX_FN(X, block, NULL, r);
|
||||
|
||||
/* 6: for i = 0 to N - 1 do */
|
||||
for (i = 0; i < N; i += 2) {
|
||||
/* 7: j = Integerify(X) % N */
|
||||
j = X[chunkWords - SCRYPT_BLOCK_WORDS] & (N - 1);
|
||||
|
||||
/* 8: X = H(Y ^ V_j) */
|
||||
SCRYPT_CHUNKMIX_FN(Y, X, scrypt_item(V, j, chunkWords), r);
|
||||
|
||||
/* 7: j = Integerify(Y) % N */
|
||||
j = Y[chunkWords - SCRYPT_BLOCK_WORDS] & (N - 1);
|
||||
|
||||
/* 8: X = H(Y ^ V_j) */
|
||||
SCRYPT_CHUNKMIX_FN(X, Y, scrypt_item(V, j, chunkWords), r);
|
||||
}
|
||||
|
||||
/* 10: B' = X */
|
||||
/* implicit */
|
||||
|
||||
SCRYPT_ROMIX_UNTANGLE_FN(X, r * 2);
|
||||
}
|
||||
|
||||
#endif /* !defined(SCRYPT_CHOOSE_COMPILETIME) || !defined(SCRYPT_HAVE_ROMIX) */
|
||||
|
||||
|
||||
#undef SCRYPT_CHUNKMIX_FN
|
||||
#undef SCRYPT_ROMIX_FN
|
||||
#undef SCRYPT_MIX_FN
|
||||
#undef SCRYPT_ROMIX_TANGLE_FN
|
||||
#undef SCRYPT_ROMIX_UNTANGLE_FN
|
||||
|
27
deps/scrypt-jane-master/code/scrypt-jane-romix.h
vendored
Normal file
27
deps/scrypt-jane-master/code/scrypt-jane-romix.h
vendored
Normal file
@ -0,0 +1,27 @@
|
||||
#if defined(SCRYPT_CHACHA)
|
||||
#include "scrypt-jane-chacha.h"
|
||||
#elif defined(SCRYPT_SALSA)
|
||||
#include "scrypt-jane-salsa.h"
|
||||
#elif defined(SCRYPT_SALSA64)
|
||||
#include "scrypt-jane-salsa64.h"
|
||||
#else
|
||||
#define SCRYPT_MIX_BASE "ERROR"
|
||||
typedef uint32_t scrypt_mix_word_t;
|
||||
#define SCRYPT_WORDTO8_LE U32TO8_LE
|
||||
#define SCRYPT_WORD_ENDIAN_SWAP U32_SWAP
|
||||
#define SCRYPT_BLOCK_BYTES 64
|
||||
#define SCRYPT_BLOCK_WORDS (SCRYPT_BLOCK_BYTES / sizeof(scrypt_mix_word_t))
|
||||
#if !defined(SCRYPT_CHOOSE_COMPILETIME)
|
||||
static void FASTCALL scrypt_ROMix_error(scrypt_mix_word_t *X/*[chunkWords]*/, scrypt_mix_word_t *Y/*[chunkWords]*/, scrypt_mix_word_t *V/*[chunkWords * N]*/, uint32_t N, uint32_t r) {}
|
||||
static scrypt_ROMixfn scrypt_getROMix(void) { return scrypt_ROMix_error; }
|
||||
#else
|
||||
static void FASTCALL scrypt_ROMix(scrypt_mix_word_t *X, scrypt_mix_word_t *Y, scrypt_mix_word_t *V, uint32_t N, uint32_t r) {}
|
||||
#endif
|
||||
static int scrypt_test_mix(void) { return 0; }
|
||||
#error must define a mix function!
|
||||
#endif
|
||||
|
||||
#if !defined(SCRYPT_CHOOSE_COMPILETIME)
|
||||
#undef SCRYPT_MIX
|
||||
#define SCRYPT_MIX SCRYPT_MIX_BASE
|
||||
#endif
|
134
deps/scrypt-jane-master/code/scrypt-jane-salsa.h
vendored
Normal file
134
deps/scrypt-jane-master/code/scrypt-jane-salsa.h
vendored
Normal file
@ -0,0 +1,134 @@
|
||||
#define SCRYPT_MIX_BASE "Salsa20/8"
|
||||
|
||||
typedef uint32_t scrypt_mix_word_t;
|
||||
|
||||
#define SCRYPT_WORDTO8_LE U32TO8_LE
|
||||
#define SCRYPT_WORD_ENDIAN_SWAP U32_SWAP
|
||||
|
||||
#define SCRYPT_BLOCK_BYTES 64
|
||||
#define SCRYPT_BLOCK_WORDS (SCRYPT_BLOCK_BYTES / sizeof(scrypt_mix_word_t))
|
||||
|
||||
/* must have these here in case block bytes is ever != 64 */
|
||||
#include "scrypt-jane-romix-basic.h"
|
||||
|
||||
#include "scrypt-jane-mix_salsa-xop.h"
|
||||
#include "scrypt-jane-mix_salsa-avx.h"
|
||||
#include "scrypt-jane-mix_salsa-sse2.h"
|
||||
#include "scrypt-jane-mix_salsa.h"
|
||||
|
||||
#if defined(SCRYPT_SALSA_XOP)
|
||||
#define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_xop
|
||||
#define SCRYPT_ROMIX_FN scrypt_ROMix_xop
|
||||
#define SCRYPT_ROMIX_TANGLE_FN salsa_core_tangle_sse2
|
||||
#define SCRYPT_ROMIX_UNTANGLE_FN salsa_core_tangle_sse2
|
||||
#include "scrypt-jane-romix-template.h"
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA_AVX)
|
||||
#define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_avx
|
||||
#define SCRYPT_ROMIX_FN scrypt_ROMix_avx
|
||||
#define SCRYPT_ROMIX_TANGLE_FN salsa_core_tangle_sse2
|
||||
#define SCRYPT_ROMIX_UNTANGLE_FN salsa_core_tangle_sse2
|
||||
#include "scrypt-jane-romix-template.h"
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA_SSE2)
|
||||
#define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_sse2
|
||||
#define SCRYPT_ROMIX_FN scrypt_ROMix_sse2
|
||||
#define SCRYPT_MIX_FN salsa_core_sse2
|
||||
#define SCRYPT_ROMIX_TANGLE_FN salsa_core_tangle_sse2
|
||||
#define SCRYPT_ROMIX_UNTANGLE_FN salsa_core_tangle_sse2
|
||||
#include "scrypt-jane-romix-template.h"
|
||||
#endif
|
||||
|
||||
/* cpu agnostic */
|
||||
#define SCRYPT_ROMIX_FN scrypt_ROMix_basic
|
||||
#define SCRYPT_MIX_FN salsa_core_basic
|
||||
#define SCRYPT_ROMIX_TANGLE_FN scrypt_romix_convert_endian
|
||||
#define SCRYPT_ROMIX_UNTANGLE_FN scrypt_romix_convert_endian
|
||||
#include "scrypt-jane-romix-template.h"
|
||||
|
||||
#if !defined(SCRYPT_CHOOSE_COMPILETIME)
|
||||
static scrypt_ROMixfn
|
||||
scrypt_getROMix(void) {
|
||||
size_t cpuflags = detect_cpu();
|
||||
|
||||
#if defined(SCRYPT_SALSA_XOP)
|
||||
if (cpuflags & cpu_xop)
|
||||
return scrypt_ROMix_xop;
|
||||
else
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA_AVX)
|
||||
if (cpuflags & cpu_avx)
|
||||
return scrypt_ROMix_avx;
|
||||
else
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA_SSE2)
|
||||
if (cpuflags & cpu_sse2)
|
||||
return scrypt_ROMix_sse2;
|
||||
else
|
||||
#endif
|
||||
|
||||
return scrypt_ROMix_basic;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(SCRYPT_TEST_SPEED)
|
||||
static size_t
|
||||
available_implementations(void) {
|
||||
size_t cpuflags = detect_cpu();
|
||||
size_t flags = 0;
|
||||
|
||||
#if defined(SCRYPT_SALSA_XOP)
|
||||
if (cpuflags & cpu_xop)
|
||||
flags |= cpu_xop;
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA_AVX)
|
||||
if (cpuflags & cpu_avx)
|
||||
flags |= cpu_avx;
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA_SSE2)
|
||||
if (cpuflags & cpu_sse2)
|
||||
flags |= cpu_sse2;
|
||||
#endif
|
||||
|
||||
return flags;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
static int
|
||||
scrypt_test_mix(void) {
|
||||
static const uint8_t expected[16] = {
|
||||
0x41,0x1f,0x2e,0xa3,0xab,0xa3,0x1a,0x34,0x87,0x1d,0x8a,0x1c,0x76,0xa0,0x27,0x66,
|
||||
};
|
||||
|
||||
int ret = 1;
|
||||
size_t cpuflags = detect_cpu();
|
||||
|
||||
#if defined(SCRYPT_SALSA_XOP)
|
||||
if (cpuflags & cpu_xop)
|
||||
ret &= scrypt_test_mix_instance(scrypt_ChunkMix_xop, salsa_core_tangle_sse2, salsa_core_tangle_sse2, expected);
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA_AVX)
|
||||
if (cpuflags & cpu_avx)
|
||||
ret &= scrypt_test_mix_instance(scrypt_ChunkMix_avx, salsa_core_tangle_sse2, salsa_core_tangle_sse2, expected);
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA_SSE2)
|
||||
if (cpuflags & cpu_sse2)
|
||||
ret &= scrypt_test_mix_instance(scrypt_ChunkMix_sse2, salsa_core_tangle_sse2, salsa_core_tangle_sse2, expected);
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA_BASIC)
|
||||
ret &= scrypt_test_mix_instance(scrypt_ChunkMix_basic, scrypt_romix_convert_endian, scrypt_romix_convert_endian, expected);
|
||||
#endif
|
||||
|
||||
return ret;
|
||||
}
|
183
deps/scrypt-jane-master/code/scrypt-jane-salsa64.h
vendored
Normal file
183
deps/scrypt-jane-master/code/scrypt-jane-salsa64.h
vendored
Normal file
@ -0,0 +1,183 @@
|
||||
#define SCRYPT_MIX_BASE "Salsa64/8"
|
||||
|
||||
typedef uint64_t scrypt_mix_word_t;
|
||||
|
||||
#define SCRYPT_WORDTO8_LE U64TO8_LE
|
||||
#define SCRYPT_WORD_ENDIAN_SWAP U64_SWAP
|
||||
|
||||
#define SCRYPT_BLOCK_BYTES 128
|
||||
#define SCRYPT_BLOCK_WORDS (SCRYPT_BLOCK_BYTES / sizeof(scrypt_mix_word_t))
|
||||
|
||||
/* must have these here in case block bytes is ever != 64 */
|
||||
#include "scrypt-jane-romix-basic.h"
|
||||
|
||||
#include "scrypt-jane-mix_salsa64-avx2.h"
|
||||
#include "scrypt-jane-mix_salsa64-xop.h"
|
||||
#include "scrypt-jane-mix_salsa64-avx.h"
|
||||
#include "scrypt-jane-mix_salsa64-ssse3.h"
|
||||
#include "scrypt-jane-mix_salsa64-sse2.h"
|
||||
#include "scrypt-jane-mix_salsa64.h"
|
||||
|
||||
#if defined(SCRYPT_SALSA64_AVX2)
|
||||
#define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_avx2
|
||||
#define SCRYPT_ROMIX_FN scrypt_ROMix_avx2
|
||||
#define SCRYPT_ROMIX_TANGLE_FN salsa64_core_tangle_sse2
|
||||
#define SCRYPT_ROMIX_UNTANGLE_FN salsa64_core_tangle_sse2
|
||||
#include "scrypt-jane-romix-template.h"
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA64_XOP)
|
||||
#define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_xop
|
||||
#define SCRYPT_ROMIX_FN scrypt_ROMix_xop
|
||||
#define SCRYPT_ROMIX_TANGLE_FN salsa64_core_tangle_sse2
|
||||
#define SCRYPT_ROMIX_UNTANGLE_FN salsa64_core_tangle_sse2
|
||||
#include "scrypt-jane-romix-template.h"
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA64_AVX)
|
||||
#define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_avx
|
||||
#define SCRYPT_ROMIX_FN scrypt_ROMix_avx
|
||||
#define SCRYPT_ROMIX_TANGLE_FN salsa64_core_tangle_sse2
|
||||
#define SCRYPT_ROMIX_UNTANGLE_FN salsa64_core_tangle_sse2
|
||||
#include "scrypt-jane-romix-template.h"
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA64_SSSE3)
|
||||
#define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_ssse3
|
||||
#define SCRYPT_ROMIX_FN scrypt_ROMix_ssse3
|
||||
#define SCRYPT_ROMIX_TANGLE_FN salsa64_core_tangle_sse2
|
||||
#define SCRYPT_ROMIX_UNTANGLE_FN salsa64_core_tangle_sse2
|
||||
#include "scrypt-jane-romix-template.h"
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA64_SSE2)
|
||||
#define SCRYPT_CHUNKMIX_FN scrypt_ChunkMix_sse2
|
||||
#define SCRYPT_ROMIX_FN scrypt_ROMix_sse2
|
||||
#define SCRYPT_ROMIX_TANGLE_FN salsa64_core_tangle_sse2
|
||||
#define SCRYPT_ROMIX_UNTANGLE_FN salsa64_core_tangle_sse2
|
||||
#include "scrypt-jane-romix-template.h"
|
||||
#endif
|
||||
|
||||
/* cpu agnostic */
|
||||
#define SCRYPT_ROMIX_FN scrypt_ROMix_basic
|
||||
#define SCRYPT_MIX_FN salsa64_core_basic
|
||||
#define SCRYPT_ROMIX_TANGLE_FN scrypt_romix_convert_endian
|
||||
#define SCRYPT_ROMIX_UNTANGLE_FN scrypt_romix_convert_endian
|
||||
#include "scrypt-jane-romix-template.h"
|
||||
|
||||
#if !defined(SCRYPT_CHOOSE_COMPILETIME)
|
||||
static scrypt_ROMixfn
|
||||
scrypt_getROMix(void) {
|
||||
size_t cpuflags = detect_cpu();
|
||||
|
||||
#if defined(SCRYPT_SALSA64_AVX2)
|
||||
if (cpuflags & cpu_avx2)
|
||||
return scrypt_ROMix_avx2;
|
||||
else
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA64_XOP)
|
||||
if (cpuflags & cpu_xop)
|
||||
return scrypt_ROMix_xop;
|
||||
else
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA64_AVX)
|
||||
if (cpuflags & cpu_avx)
|
||||
return scrypt_ROMix_avx;
|
||||
else
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA64_SSSE3)
|
||||
if (cpuflags & cpu_ssse3)
|
||||
return scrypt_ROMix_ssse3;
|
||||
else
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA64_SSE2)
|
||||
if (cpuflags & cpu_sse2)
|
||||
return scrypt_ROMix_sse2;
|
||||
else
|
||||
#endif
|
||||
|
||||
return scrypt_ROMix_basic;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(SCRYPT_TEST_SPEED)
|
||||
static size_t
|
||||
available_implementations(void) {
|
||||
size_t cpuflags = detect_cpu();
|
||||
size_t flags = 0;
|
||||
|
||||
#if defined(SCRYPT_SALSA64_AVX2)
|
||||
if (cpuflags & cpu_avx2)
|
||||
flags |= cpu_avx2;
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA64_XOP)
|
||||
if (cpuflags & cpu_xop)
|
||||
flags |= cpu_xop;
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA64_AVX)
|
||||
if (cpuflags & cpu_avx)
|
||||
flags |= cpu_avx;
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA64_SSSE3)
|
||||
if (cpuflags & cpu_ssse3)
|
||||
flags |= cpu_ssse3;
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA64_SSE2)
|
||||
if (cpuflags & cpu_sse2)
|
||||
flags |= cpu_sse2;
|
||||
#endif
|
||||
|
||||
return flags;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
scrypt_test_mix(void) {
|
||||
static const uint8_t expected[16] = {
|
||||
0xf8,0x92,0x9b,0xf8,0xcc,0x1d,0xce,0x2e,0x13,0x82,0xac,0x96,0xb2,0x6c,0xee,0x2c,
|
||||
};
|
||||
|
||||
int ret = 1;
|
||||
size_t cpuflags = detect_cpu();
|
||||
|
||||
#if defined(SCRYPT_SALSA64_AVX2)
|
||||
if (cpuflags & cpu_avx2)
|
||||
ret &= scrypt_test_mix_instance(scrypt_ChunkMix_avx2, salsa64_core_tangle_sse2, salsa64_core_tangle_sse2, expected);
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA64_XOP)
|
||||
if (cpuflags & cpu_xop)
|
||||
ret &= scrypt_test_mix_instance(scrypt_ChunkMix_xop, salsa64_core_tangle_sse2, salsa64_core_tangle_sse2, expected);
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA64_AVX)
|
||||
if (cpuflags & cpu_avx)
|
||||
ret &= scrypt_test_mix_instance(scrypt_ChunkMix_avx, salsa64_core_tangle_sse2, salsa64_core_tangle_sse2, expected);
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA64_SSSE3)
|
||||
if (cpuflags & cpu_ssse3)
|
||||
ret &= scrypt_test_mix_instance(scrypt_ChunkMix_ssse3, salsa64_core_tangle_sse2, salsa64_core_tangle_sse2, expected);
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA64_SSE2)
|
||||
if (cpuflags & cpu_sse2)
|
||||
ret &= scrypt_test_mix_instance(scrypt_ChunkMix_sse2, salsa64_core_tangle_sse2, salsa64_core_tangle_sse2, expected);
|
||||
#endif
|
||||
|
||||
#if defined(SCRYPT_SALSA64_BASIC)
|
||||
ret &= scrypt_test_mix_instance(scrypt_ChunkMix_basic, scrypt_romix_convert_endian, scrypt_romix_convert_endian, expected);
|
||||
#endif
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
261
deps/scrypt-jane-master/code/scrypt-jane-test-vectors.h
vendored
Normal file
261
deps/scrypt-jane-master/code/scrypt-jane-test-vectors.h
vendored
Normal file
@ -0,0 +1,261 @@
|
||||
typedef struct scrypt_test_setting_t {
|
||||
const char *pw, *salt;
|
||||
uint8_t Nfactor, rfactor, pfactor;
|
||||
} scrypt_test_setting;
|
||||
|
||||
static const scrypt_test_setting post_settings[] = {
|
||||
{"", "", 3, 0, 0},
|
||||
{"password", "NaCl", 9, 3, 4},
|
||||
{0, 0, 0, 0, 0}
|
||||
};
|
||||
|
||||
#if defined(SCRYPT_SHA256)
|
||||
#if defined(SCRYPT_SALSA)
|
||||
/* sha256 + salsa20/8, the only 'official' test vectors! */
|
||||
static const uint8_t post_vectors[][64] = {
|
||||
{0x77,0xd6,0x57,0x62,0x38,0x65,0x7b,0x20,0x3b,0x19,0xca,0x42,0xc1,0x8a,0x04,0x97,
|
||||
0xf1,0x6b,0x48,0x44,0xe3,0x07,0x4a,0xe8,0xdf,0xdf,0xfa,0x3f,0xed,0xe2,0x14,0x42,
|
||||
0xfc,0xd0,0x06,0x9d,0xed,0x09,0x48,0xf8,0x32,0x6a,0x75,0x3a,0x0f,0xc8,0x1f,0x17,
|
||||
0xe8,0xd3,0xe0,0xfb,0x2e,0x0d,0x36,0x28,0xcf,0x35,0xe2,0x0c,0x38,0xd1,0x89,0x06},
|
||||
{0xfd,0xba,0xbe,0x1c,0x9d,0x34,0x72,0x00,0x78,0x56,0xe7,0x19,0x0d,0x01,0xe9,0xfe,
|
||||
0x7c,0x6a,0xd7,0xcb,0xc8,0x23,0x78,0x30,0xe7,0x73,0x76,0x63,0x4b,0x37,0x31,0x62,
|
||||
0x2e,0xaf,0x30,0xd9,0x2e,0x22,0xa3,0x88,0x6f,0xf1,0x09,0x27,0x9d,0x98,0x30,0xda,
|
||||
0xc7,0x27,0xaf,0xb9,0x4a,0x83,0xee,0x6d,0x83,0x60,0xcb,0xdf,0xa2,0xcc,0x06,0x40}
|
||||
};
|
||||
#elif defined(SCRYPT_CHACHA)
|
||||
static const uint8_t post_vectors[][64] = {
|
||||
{0xef,0x8f,0x44,0x8f,0xc3,0xef,0x78,0x13,0xb2,0x26,0xa7,0x2a,0x40,0xa1,0x98,0x7f,
|
||||
0xc8,0x7f,0x0d,0x5f,0x40,0x66,0xa2,0x05,0x07,0x4f,0xc7,0xac,0x3b,0x47,0x07,0x0c,
|
||||
0xf5,0x20,0x46,0x76,0x20,0x7b,0xee,0x51,0x6d,0x5f,0xfa,0x9c,0x27,0xac,0xa9,0x36,
|
||||
0x62,0xbd,0xde,0x0b,0xa3,0xc0,0x66,0x84,0xde,0x82,0xd0,0x1a,0xb4,0xd1,0xb5,0xfe},
|
||||
{0xf1,0x94,0xf7,0x5f,0x15,0x12,0x10,0x4d,0x6e,0xfb,0x04,0x8c,0x35,0xc4,0x51,0xb6,
|
||||
0x11,0x04,0xa7,0x9b,0xb0,0x46,0xaf,0x7b,0x47,0x39,0xf0,0xac,0xb2,0x8a,0xfa,0x45,
|
||||
0x09,0x86,0x8f,0x10,0x4b,0xc6,0xee,0x00,0x11,0x38,0x73,0x7a,0x6a,0xd8,0x25,0x67,
|
||||
0x85,0xa4,0x10,0x4e,0xa9,0x2f,0x15,0xfe,0xcf,0x63,0xe1,0xe8,0xcf,0xab,0xe8,0xbd}
|
||||
};
|
||||
#elif defined(SCRYPT_SALSA64)
|
||||
static const uint8_t post_vectors[][64] = {
|
||||
{0xf4,0x87,0x29,0xf4,0xc3,0x31,0x8c,0xe8,0xdf,0xe5,0xd8,0x73,0xff,0xca,0x32,0xcf,
|
||||
0xd8,0xac,0xe7,0xf7,0x15,0xda,0x84,0x41,0x60,0x23,0x26,0x4a,0xc8,0x3e,0xee,0xa6,
|
||||
0xa5,0x6e,0x52,0xd6,0x64,0x55,0x16,0x31,0x3e,0x66,0x7b,0x65,0xd5,0xe2,0xc9,0x95,
|
||||
0x1b,0xf0,0x81,0x40,0xb7,0x2f,0xff,0xa6,0xe6,0x02,0xcc,0x63,0x08,0x4a,0x74,0x31},
|
||||
{0x7a,0xd8,0xad,0x02,0x9c,0xa5,0xf4,0x42,0x6a,0x29,0xd2,0xb5,0x53,0xf1,0x6d,0x1d,
|
||||
0x25,0xc8,0x70,0x48,0x80,0xb9,0xa3,0xf6,0x94,0xf8,0xfa,0xb8,0x52,0x42,0xcd,0x14,
|
||||
0x26,0x46,0x28,0x06,0xc7,0xf6,0x1f,0xa7,0x89,0x6d,0xc5,0xa0,0x36,0xcc,0xde,0xcb,
|
||||
0x73,0x0b,0xa4,0xe2,0xd3,0xd1,0x44,0x06,0x35,0x08,0xe0,0x35,0x5b,0xf8,0xd7,0xe7}
|
||||
};
|
||||
#endif
|
||||
#elif defined(SCRYPT_SHA512)
|
||||
#if defined(SCRYPT_SALSA)
|
||||
static const uint8_t post_vectors[][64] = {
|
||||
{0xae,0x54,0xe7,0x74,0xe4,0x51,0x6b,0x0f,0xe1,0xe7,0x28,0x03,0x17,0xe4,0x8c,0xfa,
|
||||
0x2f,0x66,0x55,0x7f,0xdc,0x3b,0x40,0xab,0x47,0x84,0xc9,0x63,0x36,0x07,0x9d,0xe5,
|
||||
0x86,0x43,0x95,0x89,0xb6,0xc0,0x6c,0x72,0x64,0x00,0xc1,0x2a,0xd7,0x69,0x21,0x92,
|
||||
0x8e,0xba,0xa4,0x59,0x9f,0x00,0x14,0x3a,0x7c,0x12,0x58,0x91,0x09,0xa0,0x32,0xfe},
|
||||
{0xc5,0xb3,0xd6,0xea,0x0a,0x4b,0x1e,0xcc,0x40,0x00,0xe5,0x98,0x5c,0xdc,0x06,0x06,
|
||||
0x78,0x34,0x92,0x16,0xcf,0xe4,0x9f,0x03,0x96,0x2d,0x41,0x35,0x00,0x9b,0xff,0x74,
|
||||
0x60,0x19,0x6e,0xe6,0xa6,0x46,0xf7,0x37,0xcb,0xfa,0xd0,0x9f,0x80,0x72,0x2e,0x85,
|
||||
0x13,0x3e,0x1a,0x91,0x90,0x53,0xa1,0x33,0x85,0x51,0xdc,0x62,0x1c,0x0e,0x4d,0x30}
|
||||
};
|
||||
#elif defined(SCRYPT_CHACHA)
|
||||
static const uint8_t post_vectors[][64] = {
|
||||
{0xe2,0x05,0x7c,0x44,0xf9,0x55,0x9f,0x64,0xbe,0xd5,0x7f,0x85,0x69,0xc7,0x8c,0x7f,
|
||||
0x2b,0x91,0xd6,0x9a,0x6c,0xf8,0x57,0x55,0x61,0x25,0x3d,0xee,0xb8,0xd5,0x8c,0xdc,
|
||||
0x2d,0xd5,0x53,0x84,0x8c,0x06,0xaa,0x37,0x77,0xa6,0xf0,0xf1,0x35,0xfe,0xb5,0xcb,
|
||||
0x61,0xd7,0x2c,0x67,0xf3,0x7e,0x8a,0x1b,0x04,0xa3,0xa3,0x43,0xa2,0xb2,0x29,0xf2},
|
||||
{0x82,0xda,0x29,0xb2,0x08,0x27,0xfc,0x78,0x22,0xc4,0xb8,0x7e,0xbc,0x36,0xcf,0xcd,
|
||||
0x17,0x4b,0xa1,0x30,0x16,0x4a,0x25,0x70,0xc7,0xcb,0xe0,0x2b,0x56,0xd3,0x16,0x4e,
|
||||
0x85,0xb6,0x84,0xe7,0x9b,0x7f,0x8b,0xb5,0x94,0x33,0xcf,0x33,0x44,0x65,0xc8,0xa1,
|
||||
0x46,0xf9,0xf5,0xfc,0x74,0x29,0x7e,0xd5,0x46,0xec,0xbd,0x95,0xc1,0x80,0x24,0xe4}
|
||||
};
|
||||
#elif defined(SCRYPT_SALSA64)
|
||||
static const uint8_t post_vectors[][64] = {
|
||||
{0xa6,0xcb,0x77,0x9a,0x64,0x1f,0x95,0x02,0x53,0xe7,0x5c,0x78,0xdb,0xa3,0x43,0xff,
|
||||
0xbe,0x10,0x4c,0x7b,0xe4,0xe1,0x91,0xcf,0x67,0x69,0x5a,0x2c,0x12,0xd6,0x99,0x49,
|
||||
0x92,0xfd,0x5a,0xaa,0x12,0x4c,0x2e,0xf6,0x95,0x46,0x8f,0x5e,0x77,0x62,0x16,0x29,
|
||||
0xdb,0xe7,0xab,0x02,0x2b,0x9c,0x35,0x03,0xf8,0xd4,0x04,0x7d,0x2d,0x73,0x85,0xf1},
|
||||
{0x54,0xb7,0xca,0xbb,0xaf,0x0f,0xb0,0x5f,0xb7,0x10,0x63,0x48,0xb3,0x15,0xd8,0xb5,
|
||||
0x62,0x64,0x89,0x6a,0x59,0xc6,0x0f,0x86,0x96,0x38,0xf0,0xcf,0xd4,0x62,0x90,0x61,
|
||||
0x7d,0xce,0xd6,0x13,0x85,0x67,0x4a,0xf5,0x32,0x03,0x74,0x30,0x0b,0x5a,0x2f,0x86,
|
||||
0x82,0x6e,0x0c,0x3e,0x40,0x7a,0xde,0xbe,0x42,0x6e,0x80,0x2b,0xaf,0xdb,0xcc,0x94}
|
||||
};
|
||||
#endif
|
||||
#elif defined(SCRYPT_BLAKE512)
|
||||
#if defined(SCRYPT_SALSA)
|
||||
static const uint8_t post_vectors[][64] = {
|
||||
{0x4a,0x48,0xb3,0xfa,0xdc,0xb0,0xb8,0xdb,0x54,0xee,0xf3,0x5c,0x27,0x65,0x6c,0x20,
|
||||
0xab,0x61,0x9a,0x5b,0xd5,0x1d,0xd9,0x95,0xab,0x88,0x0e,0x4d,0x1e,0x71,0x2f,0x11,
|
||||
0x43,0x2e,0xef,0x23,0xca,0x8a,0x49,0x3b,0x11,0x38,0xa5,0x28,0x61,0x2f,0xb7,0x89,
|
||||
0x5d,0xef,0x42,0x4c,0xc1,0x74,0xea,0x8a,0x56,0xbe,0x4a,0x82,0x76,0x15,0x1a,0x87},
|
||||
{0x96,0x24,0xbf,0x40,0xeb,0x03,0x8e,0xfe,0xc0,0xd5,0xa4,0x81,0x85,0x7b,0x09,0x88,
|
||||
0x52,0xb5,0xcb,0xc4,0x48,0xe1,0xb9,0x1d,0x3f,0x8b,0x3a,0xc6,0x38,0x32,0xc7,0x55,
|
||||
0x30,0x28,0x7a,0x42,0xa9,0x5d,0x54,0x33,0x62,0xf3,0xd9,0x3c,0x96,0x40,0xd1,0x80,
|
||||
0xe4,0x0e,0x7e,0xf0,0x64,0x53,0xfe,0x7b,0xd7,0x15,0xba,0xad,0x16,0x80,0x01,0xb5}
|
||||
};
|
||||
#elif defined(SCRYPT_CHACHA)
|
||||
static const uint8_t post_vectors[][64] = {
|
||||
{0x45,0x42,0x22,0x31,0x26,0x13,0x5f,0x94,0xa4,0x00,0x04,0x47,0xe8,0x50,0x6d,0xd6,
|
||||
0xdd,0xd5,0x08,0xd4,0x90,0x64,0xe0,0x59,0x70,0x46,0xff,0xfc,0x29,0xb3,0x6a,0xc9,
|
||||
0x4d,0x45,0x97,0x95,0xa8,0xf0,0x53,0xe7,0xee,0x4b,0x6b,0x5d,0x1e,0xa5,0xb2,0x58,
|
||||
0x4b,0x93,0xc9,0x89,0x4c,0xa8,0xab,0x03,0x74,0x38,0xbd,0x54,0x97,0x6b,0xab,0x4a},
|
||||
{0x4b,0x4a,0x63,0x96,0x73,0x34,0x9f,0x39,0x64,0x51,0x0e,0x2e,0x3b,0x07,0xd5,0x1c,
|
||||
0xd2,0xf7,0xce,0x60,0xab,0xac,0x89,0xa4,0x16,0x0c,0x58,0x82,0xb3,0xd3,0x25,0x5b,
|
||||
0xd5,0x62,0x32,0xf4,0x86,0x5d,0xb2,0x4b,0xbf,0x8e,0xc6,0xc0,0xac,0x40,0x48,0xb4,
|
||||
0x69,0x08,0xba,0x40,0x4b,0x07,0x2a,0x13,0x9c,0x98,0x3b,0x8b,0x20,0x0c,0xac,0x9e}
|
||||
};
|
||||
#elif defined(SCRYPT_SALSA64)
|
||||
static const uint8_t post_vectors[][64] = {
|
||||
{0xcb,0x4b,0xc2,0xd1,0xf4,0x77,0x32,0x3c,0x42,0x9d,0xf7,0x7d,0x1f,0x22,0x64,0xa4,
|
||||
0xe2,0x88,0x30,0x2d,0x54,0x9d,0xb6,0x26,0x89,0x25,0x30,0xc3,0x3d,0xdb,0xba,0x99,
|
||||
0xe9,0x8e,0x1e,0x5e,0x57,0x66,0x75,0x7c,0x24,0xda,0x00,0x6f,0x79,0xf7,0x47,0xf5,
|
||||
0xea,0x40,0x70,0x37,0xd2,0x91,0xc7,0x4d,0xdf,0x46,0xb6,0x3e,0x95,0x7d,0xcb,0xc1},
|
||||
{0x25,0xc2,0xcb,0x7f,0xc8,0x50,0xb7,0x0b,0x11,0x9e,0x1d,0x10,0xb2,0xa8,0x35,0x23,
|
||||
0x91,0x39,0xfb,0x45,0xf2,0xbf,0xe4,0xd0,0x84,0xec,0x72,0x33,0x6d,0x09,0xed,0x41,
|
||||
0x9a,0x7e,0x4f,0x10,0x73,0x97,0x22,0x76,0x58,0x93,0x39,0x24,0xdf,0xd2,0xaa,0x2f,
|
||||
0x6b,0x2b,0x64,0x48,0xa5,0xb7,0xf5,0x56,0x77,0x02,0xa7,0x71,0x46,0xe5,0x0e,0x8d},
|
||||
};
|
||||
#endif
|
||||
#elif defined(SCRYPT_BLAKE256)
|
||||
#if defined(SCRYPT_SALSA)
|
||||
static const uint8_t post_vectors[][64] = {
|
||||
{0xf1,0xf1,0x91,0x1a,0x81,0xe6,0x9f,0xc1,0xce,0x43,0xab,0xb1,0x1a,0x02,0x1e,0x16,
|
||||
0x08,0xc6,0xf9,0x00,0x50,0x1b,0x6d,0xf1,0x31,0x06,0x95,0x48,0x5d,0xf7,0x6c,0x00,
|
||||
0xa2,0x4c,0xb1,0x0e,0x52,0x66,0x94,0x7e,0x84,0xfc,0xa5,0x34,0xfd,0xf0,0xe9,0x57,
|
||||
0x85,0x2d,0x8c,0x05,0x5c,0x0f,0x04,0xd4,0x8d,0x3e,0x13,0x52,0x3d,0x90,0x2d,0x2c},
|
||||
{0xd5,0x42,0xd2,0x7b,0x06,0xae,0x63,0x90,0x9e,0x30,0x00,0x0e,0xd8,0xa4,0x3a,0x0b,
|
||||
0xee,0x4a,0xef,0xb2,0xc4,0x95,0x0d,0x72,0x07,0x70,0xcc,0xa3,0xf9,0x1e,0xc2,0x75,
|
||||
0xcf,0xaf,0xe1,0x44,0x1c,0x8c,0xe2,0x3e,0x0c,0x81,0xf3,0x92,0xe1,0x13,0xe6,0x4f,
|
||||
0x2d,0x27,0xc3,0x87,0xe5,0xb6,0xf9,0xd7,0x02,0x04,0x37,0x64,0x78,0x36,0x6e,0xb3}
|
||||
};
|
||||
#elif defined(SCRYPT_CHACHA)
|
||||
static const uint8_t post_vectors[][64] = {
|
||||
{0xad,0x1b,0x4b,0xca,0xe3,0x26,0x1a,0xfd,0xb7,0x77,0x8c,0xde,0x8d,0x26,0x14,0xe1,
|
||||
0x54,0x38,0x42,0xf3,0xb3,0x66,0x29,0xf9,0x90,0x04,0xf1,0x82,0x7c,0x5a,0x6f,0xa8,
|
||||
0x7d,0xd6,0x08,0x0d,0x8b,0x78,0x04,0xad,0x31,0xea,0xd4,0x87,0x2d,0xf7,0x74,0x9a,
|
||||
0xe5,0xce,0x97,0xef,0xa3,0xbb,0x90,0x46,0x7c,0xf4,0x51,0x38,0xc7,0x60,0x53,0x21},
|
||||
{0x39,0xbb,0x56,0x3d,0x0d,0x7b,0x74,0x82,0xfe,0x5a,0x78,0x3d,0x66,0xe8,0x3a,0xdf,
|
||||
0x51,0x6f,0x3e,0xf4,0x86,0x20,0x8d,0xe1,0x81,0x22,0x02,0xf7,0x0d,0xb5,0x1a,0x0f,
|
||||
0xfc,0x59,0xb6,0x60,0xc9,0xdb,0x38,0x0b,0x5b,0x95,0xa5,0x94,0xda,0x42,0x2d,0x90,
|
||||
0x47,0xeb,0x73,0x31,0x9f,0x20,0xf6,0x81,0xc2,0xef,0x33,0x77,0x51,0xd8,0x2c,0xe4}
|
||||
};
|
||||
#elif defined(SCRYPT_SALSA64)
|
||||
static const uint8_t post_vectors[][64] = {
|
||||
{0x9e,0xf2,0x60,0x7c,0xbd,0x7c,0x19,0x5c,0x79,0xc6,0x1b,0x7e,0xb0,0x65,0x1b,0xc3,
|
||||
0x70,0x0d,0x89,0xfc,0x72,0xb2,0x03,0x72,0x15,0xcb,0x8e,0x8c,0x49,0x50,0x4c,0x27,
|
||||
0x99,0xda,0x47,0x32,0x5e,0xb4,0xa2,0x07,0x83,0x51,0x6b,0x06,0x37,0x60,0x42,0xc4,
|
||||
0x59,0x49,0x99,0xdd,0xc0,0xd2,0x08,0x94,0x7f,0xe3,0x9e,0x4e,0x43,0x8e,0x5b,0xba},
|
||||
{0x86,0x6f,0x3b,0x11,0xb8,0xca,0x4b,0x6e,0xa7,0x6f,0xc2,0xc9,0x33,0xb7,0x8b,0x9f,
|
||||
0xa3,0xb9,0xf5,0xb5,0x62,0xa6,0x17,0x66,0xe4,0xc3,0x9d,0x9b,0xca,0x51,0xb0,0x2f,
|
||||
0xda,0x09,0xc1,0x77,0xed,0x8b,0x89,0xc2,0x69,0x5a,0x34,0x05,0x4a,0x1f,0x4d,0x76,
|
||||
0xcb,0xd5,0xa4,0x78,0xfa,0x1b,0xb9,0x5b,0xbc,0x3d,0xce,0x04,0x63,0x99,0xad,0x54}
|
||||
};
|
||||
#endif
|
||||
#elif defined(SCRYPT_SKEIN512)
|
||||
#if defined(SCRYPT_SALSA)
|
||||
static const uint8_t post_vectors[][64] = {
|
||||
{0xe4,0x36,0xa0,0x9a,0xdb,0xf0,0xd1,0x45,0x56,0xda,0x25,0x53,0x00,0xf9,0x2c,0x69,
|
||||
0xa4,0xc2,0xa5,0x8e,0x1a,0x85,0xfa,0x53,0xbd,0x55,0x3d,0x11,0x2a,0x44,0x13,0x87,
|
||||
0x8f,0x81,0x88,0x13,0x1e,0x49,0xa8,0xc4,0xc5,0xcd,0x1f,0xe1,0x5f,0xf5,0xcb,0x2f,
|
||||
0x8b,0xab,0x57,0x38,0x59,0xeb,0x6b,0xac,0x3b,0x73,0x10,0xa6,0xe1,0xfe,0x17,0x3e},
|
||||
{0x6d,0x61,0xde,0x43,0xa9,0x38,0x53,0x5f,0xd8,0xf2,0x6d,0xf3,0xe4,0xd6,0xd8,0x5e,
|
||||
0x81,0x89,0xd0,0x0b,0x86,0x16,0xb1,0x91,0x65,0x76,0xd8,0xc1,0xf7,0x3b,0xca,0x8b,
|
||||
0x35,0x07,0x58,0xba,0x77,0xdf,0x11,0x6c,0xbc,0x58,0xee,0x11,0x59,0xf2,0xfe,0xcb,
|
||||
0x51,0xdc,0xcd,0x35,0x2e,0x46,0x22,0xa0,0xaa,0x55,0x60,0x7c,0x91,0x15,0xb8,0x00}
|
||||
};
|
||||
#elif defined(SCRYPT_CHACHA)
|
||||
static const uint8_t post_vectors[][64] = {
|
||||
{0xd1,0x12,0x6d,0x64,0x10,0x0e,0x98,0x6c,0xbe,0x70,0x21,0xd9,0xc6,0x04,0x62,0xa4,
|
||||
0x29,0x13,0x9a,0x3c,0xf8,0xe9,0x1e,0x87,0x9f,0x88,0xf4,0x98,0x01,0x41,0x8e,0xce,
|
||||
0x60,0xf7,0xbe,0x17,0x0a,0xec,0xd6,0x30,0x80,0xcf,0x6b,0x1e,0xcf,0x95,0xa0,0x4d,
|
||||
0x37,0xed,0x3a,0x09,0xd1,0xeb,0x0c,0x80,0x82,0x22,0x8e,0xd3,0xb1,0x7f,0xd6,0xa8},
|
||||
{0x5c,0x5c,0x05,0xe2,0x75,0xa5,0xa4,0xec,0x81,0x97,0x9c,0x5b,0xd7,0x26,0xb3,0x16,
|
||||
0xb4,0x02,0x8c,0x56,0xe6,0x32,0x57,0x33,0x47,0x19,0x06,0x6c,0xde,0x68,0x41,0x37,
|
||||
0x5b,0x7d,0xa7,0xb3,0x73,0xeb,0x82,0xca,0x0f,0x86,0x2e,0x6b,0x47,0xa2,0x70,0x39,
|
||||
0x35,0xfd,0x2d,0x2e,0x7b,0xc3,0x68,0xbb,0x52,0x42,0x19,0x3b,0x78,0x96,0xe7,0xc8}
|
||||
};
|
||||
#elif defined(SCRYPT_SALSA64)
|
||||
static const uint8_t post_vectors[][64] = {
|
||||
{0xd2,0xad,0x32,0x05,0xee,0x80,0xe3,0x44,0x70,0xc6,0x34,0xde,0x05,0xb6,0xcf,0x60,
|
||||
0x89,0x98,0x70,0xc0,0xb8,0xf5,0x54,0xf1,0xa6,0xb2,0xc8,0x76,0x34,0xec,0xc4,0x59,
|
||||
0x8e,0x64,0x42,0xd0,0xa9,0xed,0xe7,0x19,0xb2,0x8a,0x11,0xc6,0xa6,0xbf,0xa7,0xa9,
|
||||
0x4e,0x44,0x32,0x7e,0x12,0x91,0x9d,0xfe,0x52,0x48,0xa8,0x27,0xb3,0xfc,0xb1,0x89},
|
||||
{0xd6,0x67,0xd2,0x3e,0x30,0x1e,0x9d,0xe2,0x55,0x68,0x17,0x3d,0x2b,0x75,0x5a,0xe5,
|
||||
0x04,0xfb,0x3d,0x0e,0x86,0xe0,0xaa,0x1d,0xd4,0x72,0xda,0xb0,0x79,0x41,0xb7,0x99,
|
||||
0x68,0xe5,0xd9,0x55,0x79,0x7d,0xc3,0xd1,0xa6,0x56,0xc1,0xbe,0x0b,0x6c,0x62,0x23,
|
||||
0x66,0x67,0x91,0x47,0x99,0x13,0x6b,0xe3,0xda,0x59,0x55,0x18,0x67,0x8f,0x2e,0x3b}
|
||||
};
|
||||
#endif
|
||||
#elif defined(SCRYPT_KECCAK512)
|
||||
#if defined(SCRYPT_SALSA)
|
||||
static const uint8_t post_vectors[][64] = {
|
||||
{0xc2,0x7b,0xbe,0x1d,0xf1,0x99,0xd8,0xe7,0x1b,0xac,0xe0,0x9d,0xeb,0x5a,0xfe,0x21,
|
||||
0x71,0xff,0x41,0x51,0x4f,0xbe,0x41,0x01,0x15,0xe2,0xb7,0xb9,0x55,0x15,0x25,0xa1,
|
||||
0x40,0x4c,0x66,0x29,0x32,0xb7,0xc9,0x62,0x60,0x88,0xe0,0x99,0x39,0xae,0xce,0x25,
|
||||
0x3c,0x11,0x89,0xdd,0xc6,0x14,0xd7,0x3e,0xa3,0x6d,0x07,0x2e,0x56,0xa0,0xff,0x97},
|
||||
{0x3c,0x91,0x12,0x4a,0x37,0x7d,0xd6,0x96,0xd2,0x9b,0x5d,0xea,0xb8,0xb9,0x82,0x4e,
|
||||
0x4f,0x6b,0x60,0x4c,0x59,0x01,0xe5,0x73,0xfd,0xf6,0xb8,0x9a,0x5a,0xd3,0x7c,0x7a,
|
||||
0xd2,0x4f,0x8e,0x74,0xc1,0x90,0x88,0xa0,0x3f,0x55,0x75,0x79,0x10,0xd0,0x09,0x79,
|
||||
0x0f,0x6c,0x74,0x0c,0x05,0x08,0x3c,0x8c,0x94,0x7b,0x30,0x56,0xca,0xdf,0xdf,0x34}
|
||||
};
|
||||
#elif defined(SCRYPT_CHACHA)
|
||||
static const uint8_t post_vectors[][64] = {
|
||||
{0x77,0xcb,0x70,0xbf,0xae,0xd4,0x4c,0x5b,0xbc,0xd3,0xec,0x8a,0x82,0x43,0x8d,0xb3,
|
||||
0x7f,0x1f,0xfb,0x70,0x36,0x32,0x4d,0xa6,0xb7,0x13,0x37,0x77,0x30,0x0c,0x3c,0xfb,
|
||||
0x2c,0x20,0x8f,0x2a,0xf4,0x47,0x4d,0x69,0x8e,0xae,0x2d,0xad,0xba,0x35,0xe9,0x2f,
|
||||
0xe6,0x99,0x7a,0xf8,0xcf,0x70,0x78,0xbb,0x0c,0x72,0x64,0x95,0x8b,0x36,0x77,0x3d},
|
||||
{0xc6,0x43,0x17,0x16,0x87,0x09,0x5f,0x12,0xed,0x21,0xe2,0xb4,0xad,0x55,0xa1,0xa1,
|
||||
0x49,0x50,0x90,0x70,0xab,0x81,0x83,0x7a,0xcd,0xdf,0x23,0x52,0x19,0xc0,0xa2,0xd8,
|
||||
0x8e,0x98,0xeb,0xf0,0x37,0xab,0xad,0xfd,0x1c,0x04,0x97,0x18,0x42,0x85,0xf7,0x4b,
|
||||
0x18,0x2c,0x55,0xd3,0xa9,0xe6,0x89,0xfb,0x58,0x0a,0xb2,0x37,0xb9,0xf8,0xfb,0xc5}
|
||||
};
|
||||
#elif defined(SCRYPT_SALSA64)
|
||||
static const uint8_t post_vectors[][64] = {
|
||||
{0xc7,0x34,0x95,0x02,0x5e,0x31,0x0d,0x1f,0x10,0x38,0x9c,0x3f,0x04,0x53,0xed,0x05,
|
||||
0x27,0x38,0xc1,0x3f,0x6a,0x0f,0xc5,0xa3,0x9b,0x73,0x8a,0x28,0x7e,0x5d,0x3c,0xdc,
|
||||
0x9d,0x5a,0x09,0xbf,0x8c,0x0a,0xad,0xe4,0x73,0x52,0xe3,0x6d,0xaa,0xd1,0x8b,0xbf,
|
||||
0xa3,0xb7,0xf0,0x58,0xad,0x22,0x24,0xc9,0xaa,0x96,0xb7,0x5d,0xfc,0x5f,0xb0,0xcf},
|
||||
{0x76,0x22,0xfd,0xe8,0xa2,0x79,0x8e,0x9d,0x43,0x8c,0x7a,0xba,0x78,0xb7,0x84,0xf1,
|
||||
0xc8,0xee,0x3b,0xae,0x31,0x89,0xbf,0x7e,0xd0,0x4b,0xc1,0x2d,0x58,0x5d,0x84,0x6b,
|
||||
0xec,0x86,0x56,0xe0,0x87,0x94,0x7f,0xbc,0xf9,0x48,0x92,0xef,0x54,0x7f,0x23,0x8d,
|
||||
0x4f,0x8b,0x0a,0x75,0xa7,0x39,0x0e,0x46,0x6e,0xee,0x58,0xc8,0xfa,0xea,0x90,0x53}
|
||||
};
|
||||
#endif
|
||||
#elif defined(SCRYPT_KECCAK256)
|
||||
#if defined(SCRYPT_SALSA)
|
||||
static const uint8_t post_vectors[][64] = {
|
||||
{0x2e,0x96,0xd8,0x87,0x45,0xcd,0xd6,0xc8,0xf6,0xd2,0x87,0x33,0x50,0xc7,0x04,0xe5,
|
||||
0x3c,0x4b,0x48,0x44,0x57,0xc1,0x74,0x09,0x76,0x02,0xaa,0xd3,0x7b,0xf3,0xbf,0xed,
|
||||
0x4b,0x72,0xd7,0x1b,0x49,0x6b,0xe0,0x44,0x83,0xee,0x8f,0xaf,0xa1,0xb5,0x33,0xa9,
|
||||
0x9e,0x86,0xab,0xe2,0x9f,0xcf,0x68,0x6e,0x7e,0xbd,0xf5,0x7a,0x83,0x4b,0x1c,0x10},
|
||||
{0x42,0x7e,0xf9,0x4b,0x72,0x61,0xda,0x2d,0xb3,0x27,0x0e,0xe1,0xd9,0xde,0x5f,0x3e,
|
||||
0x64,0x2f,0xd6,0xda,0x90,0x59,0xce,0xbf,0x02,0x5b,0x32,0xf7,0x6d,0x94,0x51,0x7b,
|
||||
0xb6,0xa6,0x0d,0x99,0x3e,0x7f,0x39,0xbe,0x1b,0x1d,0x6c,0x97,0x12,0xd8,0xb7,0xfd,
|
||||
0x5b,0xb5,0xf3,0x73,0x5a,0x89,0xb2,0xdd,0xcc,0x3d,0x74,0x2e,0x3d,0x9e,0x3c,0x22}
|
||||
};
|
||||
#elif defined(SCRYPT_CHACHA)
|
||||
static const uint8_t post_vectors[][64] = {
|
||||
{0x76,0x1d,0x5b,0x8f,0xa9,0xe1,0xa6,0x01,0xcb,0xc5,0x7a,0x5f,0x02,0x23,0xb6,0x82,
|
||||
0x57,0x79,0x60,0x2f,0x05,0x7f,0xb8,0x0a,0xcb,0x5e,0x54,0x11,0x49,0x2e,0xdd,0x85,
|
||||
0x83,0x30,0x67,0xb3,0x24,0x5c,0xce,0xfc,0x32,0xcf,0x12,0xc3,0xff,0xe0,0x79,0x36,
|
||||
0x74,0x17,0xa6,0x3e,0xcd,0xa0,0x7e,0xcb,0x37,0xeb,0xcb,0xb6,0xe1,0xb9,0xf5,0x15},
|
||||
{0xf5,0x66,0xa7,0x4c,0xe4,0xdc,0x18,0x56,0x2f,0x3e,0x86,0x4d,0x92,0xa5,0x5c,0x5a,
|
||||
0x8f,0xc3,0x6b,0x32,0xdb,0xe5,0x72,0x50,0x84,0xfc,0x6e,0x5d,0x15,0x77,0x3d,0xca,
|
||||
0xc5,0x2b,0x20,0x3c,0x78,0x37,0x80,0x78,0x23,0x56,0x91,0xa0,0xce,0xa4,0x06,0x5a,
|
||||
0x7f,0xe3,0xbf,0xab,0x51,0x57,0x32,0x2c,0x0a,0xf0,0xc5,0x6f,0xf4,0xcb,0xff,0x42}
|
||||
};
|
||||
#elif defined(SCRYPT_SALSA64)
|
||||
static const uint8_t post_vectors[][64] = {
|
||||
{0xb0,0xb7,0x10,0xb5,0x1f,0x2b,0x7f,0xaf,0x9d,0x95,0x5f,0x4c,0x2d,0x98,0x7c,0xc1,
|
||||
0xbc,0x37,0x2f,0x50,0x8d,0xb2,0x9f,0xfd,0x48,0x0d,0xe0,0x44,0x19,0xdf,0x28,0x6c,
|
||||
0xab,0xbf,0x1e,0x17,0x26,0xcc,0x57,0x95,0x18,0x17,0x83,0x4c,0x12,0x48,0xd9,0xee,
|
||||
0x4b,0x00,0x29,0x06,0x31,0x01,0x6b,0x8c,0x26,0x39,0xbf,0xe4,0xe4,0xd4,0x6a,0x26},
|
||||
{0xa0,0x40,0xb2,0xf2,0x11,0xb6,0x5f,0x3d,0x4c,0x1e,0xef,0x59,0xd4,0x98,0xdb,0x14,
|
||||
0x01,0xff,0xe3,0x34,0xd7,0x19,0xcd,0xeb,0xde,0x52,0x1c,0xf4,0x86,0x43,0xc9,0xe2,
|
||||
0xfb,0xf9,0x4f,0x0a,0xbb,0x1f,0x5c,0x6a,0xdf,0xb9,0x28,0xfa,0xac,0xc4,0x48,0xed,
|
||||
0xcc,0xd2,0x2e,0x25,0x5f,0xf3,0x56,0x1d,0x2d,0x23,0x22,0xc1,0xbc,0xff,0x78,0x80}
|
||||
};
|
||||
#endif
|
||||
#else
|
||||
static const uint8_t post_vectors[][64] = {{0}};
|
||||
#endif
|
||||
|
13
deps/scrypt-jane-master/example.c
vendored
Normal file
13
deps/scrypt-jane-master/example.c
vendored
Normal file
@ -0,0 +1,13 @@
|
||||
#include <stdio.h>
|
||||
#include "scrypt-jane.h"
|
||||
|
||||
|
||||
int main(void) {
|
||||
unsigned char digest[16];
|
||||
int i;
|
||||
scrypt("pw", 2, "salt", 4, 0, 0, 0, digest, 16);
|
||||
for (i = 0; i < sizeof(digest); i++)
|
||||
printf("%02x, ", digest[i]);
|
||||
printf("\n");
|
||||
return 0;
|
||||
}
|
121
deps/scrypt-jane-master/scrypt-jane-speed.c
vendored
Normal file
121
deps/scrypt-jane-master/scrypt-jane-speed.c
vendored
Normal file
@ -0,0 +1,121 @@
|
||||
#define SCRYPT_TEST_SPEED
|
||||
#include "scrypt-jane.c"
|
||||
|
||||
/* ticks - not tested on anything other than x86 */
|
||||
static uint64_t
|
||||
get_ticks(void) {
|
||||
#if defined(CPU_X86) || defined(CPU_X86_64)
|
||||
#if defined(COMPILER_INTEL)
|
||||
return _rdtsc();
|
||||
#elif defined(COMPILER_MSVC)
|
||||
return __rdtsc();
|
||||
#elif defined(COMPILER_GCC)
|
||||
uint32_t lo, hi;
|
||||
__asm__ __volatile__("rdtsc" : "=a" (lo), "=d" (hi));
|
||||
return ((uint64_t)lo | ((uint64_t)hi << 32));
|
||||
#else
|
||||
need rdtsc for this compiler
|
||||
#endif
|
||||
#elif defined(OS_SOLARIS)
|
||||
return (uint64_t)gethrtime();
|
||||
#elif defined(CPU_SPARC) && !defined(OS_OPENBSD)
|
||||
uint64_t t;
|
||||
__asm__ __volatile__("rd %%tick, %0" : "=r" (t));
|
||||
return t;
|
||||
#elif defined(CPU_PPC)
|
||||
uint32_t lo = 0, hi = 0;
|
||||
__asm__ __volatile__("mftbu %0; mftb %1" : "=r" (hi), "=r" (lo));
|
||||
return ((uint64_t)lo | ((uint64_t)hi << 32));
|
||||
#elif defined(CPU_IA64)
|
||||
uint64_t t;
|
||||
__asm__ __volatile__("mov %0=ar.itc" : "=r" (t));
|
||||
return t;
|
||||
#elif defined(OS_NIX)
|
||||
timeval t2;
|
||||
gettimeofday(&t2, NULL);
|
||||
t = ((uint64_t)t2.tv_usec << 32) | (uint64_t)t2.tv_sec;
|
||||
return t;
|
||||
#else
|
||||
need ticks for this platform
|
||||
#endif
|
||||
}
|
||||
|
||||
#define timeit(x,minvar) { \
|
||||
ticks = get_ticks(); \
|
||||
x; \
|
||||
ticks = get_ticks() - ticks; \
|
||||
if (ticks < minvar) \
|
||||
minvar = ticks; \
|
||||
}
|
||||
|
||||
#define maxticks 0xffffffffffffffffull
|
||||
|
||||
typedef struct scrypt_speed_settings_t {
|
||||
const char *desc;
|
||||
uint8_t Nfactor, rfactor, pfactor;
|
||||
} scrypt_speed_settings;
|
||||
|
||||
/* scrypt_r_32kb is set to a 32kb chunk, so (1 << (scrypt_r_32kb - 5)) = 1kb chunk */
|
||||
static const scrypt_speed_settings settings[] = {
|
||||
{"scrypt high volume ( ~4mb)", 11, scrypt_r_32kb - 5, 0},
|
||||
{"scrypt interactive (~16mb)", 13, scrypt_r_32kb - 5, 0},
|
||||
{"scrypt non-interactive (~ 1gb)", 19, scrypt_r_32kb - 5, 0},
|
||||
{0}
|
||||
};
|
||||
|
||||
int main(void) {
|
||||
const scrypt_speed_settings *s;
|
||||
uint8_t password[64], salt[24], digest[64];
|
||||
uint64_t minticks, ticks;
|
||||
size_t i, passes;
|
||||
size_t cpuflags, topbit;
|
||||
|
||||
for (i = 0; i < sizeof(password); i++)
|
||||
password[i] = (uint8_t)i;
|
||||
for (i = 0; i < sizeof(salt); i++)
|
||||
salt[i] = 255 - (uint8_t)i;
|
||||
|
||||
/* warm up a little */
|
||||
scrypt(password, sizeof(password), salt, sizeof(salt), 15, 3, 4, digest, sizeof(digest));
|
||||
|
||||
cpuflags = available_implementations();
|
||||
topbit = 0;
|
||||
for (i = cpuflags; i != 0; i >>= 1)
|
||||
topbit++;
|
||||
topbit = ((size_t)1 << topbit);
|
||||
|
||||
while (1) {
|
||||
#if defined(SCRYPT_CHOOSE_COMPILETIME)
|
||||
printf("speed test for scrypt[%s,%s]\n", SCRYPT_HASH, SCRYPT_MIX);
|
||||
#else
|
||||
printf("speed test for scrypt[%s,%s,%s]\n", SCRYPT_HASH, SCRYPT_MIX, get_top_cpuflag_desc(cpuflags));
|
||||
#endif
|
||||
|
||||
cpu_detect_mask = cpuflags;
|
||||
for (i = 0; settings[i].desc; i++) {
|
||||
s = &settings[i];
|
||||
minticks = maxticks;
|
||||
for (passes = 0; passes < 16; passes++)
|
||||
timeit(scrypt(password, sizeof(password), salt, sizeof(salt), s->Nfactor, s->rfactor, s->pfactor, digest, sizeof(digest)), minticks)
|
||||
|
||||
printf("%s, %.0f ticks\n", s->desc, (double)minticks);
|
||||
}
|
||||
|
||||
#if defined(SCRYPT_CHOOSE_COMPILETIME)
|
||||
break;
|
||||
#else
|
||||
while (topbit && ((cpuflags & topbit) == 0))
|
||||
topbit >>= 1;
|
||||
cpuflags &= ~topbit;
|
||||
|
||||
/* (cpuflags == 0) is the basic/portable version, don't bother timing it */
|
||||
if (!cpuflags)
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
|
||||
printf("\n\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
12
deps/scrypt-jane-master/scrypt-jane-test.c
vendored
Normal file
12
deps/scrypt-jane-master/scrypt-jane-test.c
vendored
Normal file
@ -0,0 +1,12 @@
|
||||
#define SCRYPT_TEST
|
||||
#include "scrypt-jane.c"
|
||||
|
||||
int main(void) {
|
||||
int res = scrypt_power_on_self_test();
|
||||
|
||||
printf("%s: test %s\n", SCRYPT_MIX, (res & 1) ? "ok" : "FAILED");
|
||||
printf("%s: test %s\n", SCRYPT_HASH, (res & 2) ? "ok" : "FAILED");
|
||||
printf("scrypt: test vectors %s\n", (res & 4) ? "ok" : "FAILED");
|
||||
|
||||
return ((res & 7) == 7) ? 0 : 1;
|
||||
}
|
182
deps/scrypt-jane-master/scrypt-jane.c
vendored
Normal file
182
deps/scrypt-jane-master/scrypt-jane.c
vendored
Normal file
@ -0,0 +1,182 @@
|
||||
/*
|
||||
scrypt-jane by Andrew M, https://github.com/floodyberry/scrypt-jane
|
||||
|
||||
Public Domain or MIT License, whichever is easier
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "scrypt-jane.h"
|
||||
#include "code/scrypt-jane-portable.h"
|
||||
#include "code/scrypt-jane-hash.h"
|
||||
#include "code/scrypt-jane-romix.h"
|
||||
#include "code/scrypt-jane-test-vectors.h"
|
||||
|
||||
|
||||
#define scrypt_maxNfactor 30 /* (1 << (30 + 1)) = ~2 billion */
|
||||
#if (SCRYPT_BLOCK_BYTES == 64)
|
||||
#define scrypt_r_32kb 8 /* (1 << 8) = 256 * 2 blocks in a chunk * 64 bytes = Max of 32kb in a chunk */
|
||||
#elif (SCRYPT_BLOCK_BYTES == 128)
|
||||
#define scrypt_r_32kb 7 /* (1 << 7) = 128 * 2 blocks in a chunk * 128 bytes = Max of 32kb in a chunk */
|
||||
#elif (SCRYPT_BLOCK_BYTES == 256)
|
||||
#define scrypt_r_32kb 6 /* (1 << 6) = 64 * 2 blocks in a chunk * 256 bytes = Max of 32kb in a chunk */
|
||||
#elif (SCRYPT_BLOCK_BYTES == 512)
|
||||
#define scrypt_r_32kb 5 /* (1 << 5) = 32 * 2 blocks in a chunk * 512 bytes = Max of 32kb in a chunk */
|
||||
#endif
|
||||
#define scrypt_maxrfactor scrypt_r_32kb /* 32kb */
|
||||
#define scrypt_maxpfactor 25 /* (1 << 25) = ~33 million */
|
||||
|
||||
#include <stdio.h>
|
||||
//#include <malloc.h>
|
||||
|
||||
static void NORETURN
|
||||
scrypt_fatal_error_default(const char *msg) {
|
||||
fprintf(stderr, "%s\n", msg);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
static scrypt_fatal_errorfn scrypt_fatal_error = scrypt_fatal_error_default;
|
||||
|
||||
void
|
||||
scrypt_set_fatal_error(scrypt_fatal_errorfn fn) {
|
||||
scrypt_fatal_error = fn;
|
||||
}
|
||||
|
||||
static int
|
||||
scrypt_power_on_self_test(void) {
|
||||
const scrypt_test_setting *t;
|
||||
uint8_t test_digest[64];
|
||||
uint32_t i;
|
||||
int res = 7, scrypt_valid;
|
||||
|
||||
if (!scrypt_test_mix()) {
|
||||
#if !defined(SCRYPT_TEST)
|
||||
scrypt_fatal_error("scrypt: mix function power-on-self-test failed");
|
||||
#endif
|
||||
res &= ~1;
|
||||
}
|
||||
|
||||
if (!scrypt_test_hash()) {
|
||||
#if !defined(SCRYPT_TEST)
|
||||
scrypt_fatal_error("scrypt: hash function power-on-self-test failed");
|
||||
#endif
|
||||
res &= ~2;
|
||||
}
|
||||
|
||||
for (i = 0, scrypt_valid = 1; post_settings[i].pw; i++) {
|
||||
t = post_settings + i;
|
||||
scrypt((uint8_t *)t->pw, strlen(t->pw), (uint8_t *)t->salt, strlen(t->salt), t->Nfactor, t->rfactor, t->pfactor, test_digest, sizeof(test_digest));
|
||||
scrypt_valid &= scrypt_verify(post_vectors[i], test_digest, sizeof(test_digest));
|
||||
}
|
||||
|
||||
if (!scrypt_valid) {
|
||||
#if !defined(SCRYPT_TEST)
|
||||
scrypt_fatal_error("scrypt: scrypt power-on-self-test failed");
|
||||
#endif
|
||||
res &= ~4;
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
typedef struct scrypt_aligned_alloc_t {
|
||||
uint8_t *mem, *ptr;
|
||||
} scrypt_aligned_alloc;
|
||||
|
||||
#if defined(SCRYPT_TEST_SPEED)
|
||||
static uint8_t *mem_base = (uint8_t *)0;
|
||||
static size_t mem_bump = 0;
|
||||
|
||||
/* allocations are assumed to be multiples of 64 bytes and total allocations not to exceed ~1.01gb */
|
||||
static scrypt_aligned_alloc
|
||||
scrypt_alloc(uint64_t size) {
|
||||
scrypt_aligned_alloc aa;
|
||||
if (!mem_base) {
|
||||
mem_base = (uint8_t *)malloc((1024 * 1024 * 1024) + (1024 * 1024) + (SCRYPT_BLOCK_BYTES - 1));
|
||||
if (!mem_base)
|
||||
scrypt_fatal_error("scrypt: out of memory");
|
||||
mem_base = (uint8_t *)(((size_t)mem_base + (SCRYPT_BLOCK_BYTES - 1)) & ~(SCRYPT_BLOCK_BYTES - 1));
|
||||
}
|
||||
aa.mem = mem_base + mem_bump;
|
||||
aa.ptr = aa.mem;
|
||||
mem_bump += (size_t)size;
|
||||
return aa;
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_free(scrypt_aligned_alloc *aa) {
|
||||
mem_bump = 0;
|
||||
}
|
||||
#else
|
||||
static scrypt_aligned_alloc
|
||||
scrypt_alloc(uint64_t size) {
|
||||
static const size_t max_alloc = (size_t)-1;
|
||||
scrypt_aligned_alloc aa;
|
||||
size += (SCRYPT_BLOCK_BYTES - 1);
|
||||
if (size > max_alloc)
|
||||
scrypt_fatal_error("scrypt: not enough address space on this CPU to allocate required memory");
|
||||
aa.mem = (uint8_t *)malloc((size_t)size);
|
||||
aa.ptr = (uint8_t *)(((size_t)aa.mem + (SCRYPT_BLOCK_BYTES - 1)) & ~(SCRYPT_BLOCK_BYTES - 1));
|
||||
if (!aa.mem)
|
||||
scrypt_fatal_error("scrypt: out of memory");
|
||||
return aa;
|
||||
}
|
||||
|
||||
static void
|
||||
scrypt_free(scrypt_aligned_alloc *aa) {
|
||||
free(aa->mem);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
void
|
||||
scrypt(const uint8_t *password, size_t password_len, const uint8_t *salt, size_t salt_len, uint8_t Nfactor, uint8_t rfactor, uint8_t pfactor, uint8_t *out, size_t bytes) {
|
||||
scrypt_aligned_alloc YX, V;
|
||||
uint8_t *X, *Y;
|
||||
uint32_t N, r, p, chunk_bytes, i;
|
||||
|
||||
#if !defined(SCRYPT_CHOOSE_COMPILETIME)
|
||||
scrypt_ROMixfn scrypt_ROMix = scrypt_getROMix();
|
||||
#endif
|
||||
|
||||
#if !defined(SCRYPT_TEST)
|
||||
static int power_on_self_test = 0;
|
||||
if (!power_on_self_test) {
|
||||
power_on_self_test = 1;
|
||||
if (!scrypt_power_on_self_test())
|
||||
scrypt_fatal_error("scrypt: power on self test failed");
|
||||
}
|
||||
#endif
|
||||
|
||||
if (Nfactor > scrypt_maxNfactor)
|
||||
scrypt_fatal_error("scrypt: N out of range");
|
||||
if (rfactor > scrypt_maxrfactor)
|
||||
scrypt_fatal_error("scrypt: r out of range");
|
||||
if (pfactor > scrypt_maxpfactor)
|
||||
scrypt_fatal_error("scrypt: p out of range");
|
||||
|
||||
N = (1 << (Nfactor + 1));
|
||||
r = (1 << rfactor);
|
||||
p = (1 << pfactor);
|
||||
|
||||
chunk_bytes = SCRYPT_BLOCK_BYTES * r * 2;
|
||||
V = scrypt_alloc((uint64_t)N * chunk_bytes);
|
||||
YX = scrypt_alloc((p + 1) * chunk_bytes);
|
||||
|
||||
/* 1: X = PBKDF2(password, salt) */
|
||||
Y = YX.ptr;
|
||||
X = Y + chunk_bytes;
|
||||
scrypt_pbkdf2(password, password_len, salt, salt_len, 1, X, chunk_bytes * p);
|
||||
|
||||
/* 2: X = ROMix(X) */
|
||||
for (i = 0; i < p; i++)
|
||||
scrypt_ROMix((scrypt_mix_word_t *)(X + (chunk_bytes * i)), (scrypt_mix_word_t *)Y, (scrypt_mix_word_t *)V.ptr, N, r);
|
||||
|
||||
/* 3: Out = PBKDF2(password, X) */
|
||||
scrypt_pbkdf2(password, password_len, X, chunk_bytes * p, 1, out, bytes);
|
||||
|
||||
scrypt_ensure_zero(YX.ptr, (p + 1) * chunk_bytes);
|
||||
|
||||
scrypt_free(&V);
|
||||
scrypt_free(&YX);
|
||||
}
|
27
deps/scrypt-jane-master/scrypt-jane.h
vendored
Normal file
27
deps/scrypt-jane-master/scrypt-jane.h
vendored
Normal file
@ -0,0 +1,27 @@
|
||||
#ifndef SCRYPT_JANE_H
|
||||
#define SCRYPT_JANE_H
|
||||
|
||||
/*
|
||||
Nfactor: Increases CPU & Memory Hardness
|
||||
N = (1 << (Nfactor + 1)): How many times to mix a chunk and how many temporary chunks are used
|
||||
|
||||
rfactor: Increases Memory Hardness
|
||||
r = (1 << rfactor): How large a chunk is
|
||||
|
||||
pfactor: Increases CPU Hardness
|
||||
p = (1 << pfactor): Number of times to mix the main chunk
|
||||
|
||||
A block is the basic mixing unit (salsa/chacha block = 64 bytes)
|
||||
A chunk is (2 * r) blocks
|
||||
|
||||
~Memory used = (N + 2) * ((2 * r) * block size)
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
typedef void (*scrypt_fatal_errorfn)(const char *msg);
|
||||
void scrypt_set_fatal_error(scrypt_fatal_errorfn fn);
|
||||
|
||||
void scrypt(const unsigned char *password, size_t password_len, const unsigned char *salt, size_t salt_len, unsigned char Nfactor, unsigned char rfactor, unsigned char pfactor, unsigned char *out, size_t bytes);
|
||||
|
||||
#endif /* SCRYPT_JANE_H */
|
38
deps/scrypt-jane-master/test-speed.sh
vendored
Normal file
38
deps/scrypt-jane-master/test-speed.sh
vendored
Normal file
@ -0,0 +1,38 @@
|
||||
#!/bin/sh
|
||||
|
||||
test() {
|
||||
sleep 0.25 # mingw is stupid and will occasionally not have permission to overwrite scrypt_speed
|
||||
gcc scrypt-jane-speed.c -O3 -DSCRYPT_$1 -DSCRYPT_$2 $3 -o scrypt_speed 2>/dev/null
|
||||
local RC=$?
|
||||
if [ $RC -ne 0 ]; then
|
||||
echo "$1/$2: failed to compile "
|
||||
return
|
||||
fi
|
||||
./scrypt_speed
|
||||
}
|
||||
|
||||
testhash() {
|
||||
test $1 SALSA $2
|
||||
test $1 CHACHA $2
|
||||
test $1 SALSA64 $2
|
||||
}
|
||||
|
||||
testhashes() {
|
||||
testhash SHA256 $1
|
||||
testhash SHA512 $1
|
||||
testhash BLAKE256 $1
|
||||
testhash BLAKE512 $1
|
||||
testhash SKEIN512 $1
|
||||
testhash KECCAK256 $1
|
||||
testhash KECCAK512 $1
|
||||
}
|
||||
|
||||
if [ -z $1 ]; then
|
||||
testhashes
|
||||
elif [ $1 -eq 32 ]; then
|
||||
testhashes -m32
|
||||
elif [ $1 -eq 64 ]; then
|
||||
testhashes -m64
|
||||
fi
|
||||
|
||||
rm -f scrypt_speed
|
44
deps/scrypt-jane-master/test.sh
vendored
Normal file
44
deps/scrypt-jane-master/test.sh
vendored
Normal file
@ -0,0 +1,44 @@
|
||||
#!/bin/sh
|
||||
|
||||
test() {
|
||||
sleep 0.25 # mingw is stupid and will occasionally not have permission to overwrite scrypt_test
|
||||
gcc scrypt-jane-test.c -O3 -DSCRYPT_$1 -DSCRYPT_$2 $3 -o scrypt_test #2>/dev/null
|
||||
local RC=$?
|
||||
if [ $RC -ne 0 ]; then
|
||||
echo "$1/$2: failed to compile "
|
||||
return
|
||||
fi
|
||||
./scrypt_test >/dev/null
|
||||
local RC=$?
|
||||
if [ $RC -ne 0 ]; then
|
||||
echo "$1/$2: validation failed"
|
||||
return
|
||||
fi
|
||||
echo "$1/$2: OK"
|
||||
}
|
||||
|
||||
testhash() {
|
||||
test $1 SALSA $2
|
||||
test $1 CHACHA $2
|
||||
test $1 SALSA64 $2
|
||||
}
|
||||
|
||||
testhashes() {
|
||||
testhash SHA256 $1
|
||||
testhash SHA512 $1
|
||||
testhash BLAKE256 $1
|
||||
testhash BLAKE512 $1
|
||||
testhash SKEIN512 $1
|
||||
testhash KECCAK256 $1
|
||||
testhash KECCAK512 $1
|
||||
}
|
||||
|
||||
if [ -z $1 ]; then
|
||||
testhashes
|
||||
elif [ $1 -eq 32 ]; then
|
||||
testhashes -m32
|
||||
elif [ $1 -eq 64 ]; then
|
||||
testhashes -m64
|
||||
fi
|
||||
|
||||
rm -f scrypt_test
|
292
src/bridges/bridge_scrypt_jane.c
Normal file
292
src/bridges/bridge_scrypt_jane.c
Normal file
@ -0,0 +1,292 @@
|
||||
/**
|
||||
* Author......: See docs/credits.txt
|
||||
* License.....: MIT
|
||||
*/
|
||||
|
||||
#include "common.h"
|
||||
#include "types.h"
|
||||
#include "bridges.h"
|
||||
#include "memory.h"
|
||||
|
||||
#include "code/scrypt-jane-portable.h"
|
||||
#include "code/scrypt-jane-hash.h"
|
||||
#include "code/scrypt-jane-romix.h"
|
||||
|
||||
// good: we can use this multiplier do reduce copy overhead to increase the guessing speed,
|
||||
// bad: but we also increase the password candidate batch size.
|
||||
// slow hashes which make use of this bridge probably are used with smaller wordlists,
|
||||
// and therefore it's easier for hashcat to parallelize if this multiplier is low.
|
||||
// in the end, it's a trade-off.
|
||||
|
||||
#define N_ACCEL 8
|
||||
|
||||
#define SCRYPT_R_MAX 16
|
||||
#define SCRYPT_P_MAX 16
|
||||
|
||||
#define SCRYPT_TMP_SIZE (128ULL * SCRYPT_R_MAX * SCRYPT_P_MAX)
|
||||
#define SCRYPT_TMP_SIZE4 (SCRYPT_TMP_SIZE / 4)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
u32 P[SCRYPT_TMP_SIZE4];
|
||||
|
||||
} scrypt_tmp_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
u32 salt_buf[64];
|
||||
u32 salt_len;
|
||||
|
||||
u32 digest_buf[64];
|
||||
u32 digest_len;
|
||||
|
||||
u32 N;
|
||||
u32 r;
|
||||
u32 p;
|
||||
|
||||
} scrypt_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
void *V;
|
||||
//void *X;
|
||||
void *Y;
|
||||
|
||||
// implementation specific
|
||||
|
||||
char unit_info_buf[1024];
|
||||
int unit_info_len;
|
||||
|
||||
u64 workitem_count;
|
||||
size_t workitem_size;
|
||||
|
||||
} unit_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
unit_t *units_buf;
|
||||
int units_cnt;
|
||||
|
||||
} bridge_scrypt_jane_t;
|
||||
|
||||
static bool units_init (bridge_scrypt_jane_t *bridge_scrypt_jane)
|
||||
{
|
||||
#if defined (_WIN)
|
||||
|
||||
SYSTEM_INFO sysinfo;
|
||||
|
||||
GetSystemInfo (&sysinfo);
|
||||
|
||||
int num_devices = sysinfo.dwNumberOfProcessors;
|
||||
|
||||
#else
|
||||
|
||||
int num_devices = sysconf (_SC_NPROCESSORS_ONLN);
|
||||
|
||||
#endif
|
||||
|
||||
unit_t *units_buf = (unit_t *) hccalloc (num_devices, sizeof (unit_t));
|
||||
|
||||
int units_cnt = 0;
|
||||
|
||||
for (int i = 0; i < num_devices; i++)
|
||||
{
|
||||
unit_t *unit_buf = &units_buf[i];
|
||||
|
||||
unit_buf->unit_info_len = snprintf (unit_buf->unit_info_buf, sizeof (unit_buf->unit_info_buf) - 1,
|
||||
"%s",
|
||||
"Scrypt-Jane ROMix");
|
||||
|
||||
unit_buf->unit_info_buf[unit_buf->unit_info_len] = 0;
|
||||
|
||||
unit_buf->workitem_count = N_ACCEL;
|
||||
|
||||
units_cnt++;
|
||||
}
|
||||
|
||||
bridge_scrypt_jane->units_buf = units_buf;
|
||||
bridge_scrypt_jane->units_cnt = units_cnt;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void units_term (bridge_scrypt_jane_t *bridge_scrypt_jane)
|
||||
{
|
||||
if (bridge_scrypt_jane)
|
||||
{
|
||||
hcfree (bridge_scrypt_jane->units_buf);
|
||||
}
|
||||
}
|
||||
|
||||
void *platform_init ()
|
||||
{
|
||||
// bridge_scrypt_jane_t will be our platform context
|
||||
|
||||
bridge_scrypt_jane_t *bridge_scrypt_jane = (bridge_scrypt_jane_t *) hcmalloc (sizeof (bridge_scrypt_jane_t));
|
||||
|
||||
if (units_init (bridge_scrypt_jane) == false)
|
||||
{
|
||||
hcfree (bridge_scrypt_jane);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return bridge_scrypt_jane;
|
||||
}
|
||||
|
||||
void platform_term (void *platform_context)
|
||||
{
|
||||
bridge_scrypt_jane_t *bridge_scrypt_jane = platform_context;
|
||||
|
||||
if (bridge_scrypt_jane)
|
||||
{
|
||||
units_term (bridge_scrypt_jane);
|
||||
|
||||
hcfree (bridge_scrypt_jane);
|
||||
}
|
||||
}
|
||||
|
||||
int get_unit_count (void *platform_context)
|
||||
{
|
||||
bridge_scrypt_jane_t *bridge_scrypt_jane = platform_context;
|
||||
|
||||
return bridge_scrypt_jane->units_cnt;
|
||||
}
|
||||
|
||||
// we support units of mixed speed, that's why the workitem count is unit specific
|
||||
|
||||
int get_workitem_count (void *platform_context, const int unit_idx)
|
||||
{
|
||||
bridge_scrypt_jane_t *bridge_scrypt_jane = platform_context;
|
||||
|
||||
unit_t *unit_buf = &bridge_scrypt_jane->units_buf[unit_idx];
|
||||
|
||||
return unit_buf->workitem_count;
|
||||
}
|
||||
|
||||
char *get_unit_info (void *platform_context, const int unit_idx)
|
||||
{
|
||||
bridge_scrypt_jane_t *bridge_scrypt_jane = platform_context;
|
||||
|
||||
unit_t *unit_buf = &bridge_scrypt_jane->units_buf[unit_idx];
|
||||
|
||||
return unit_buf->unit_info_buf;
|
||||
}
|
||||
|
||||
bool salt_prepare (void *platform_context, MAYBE_UNUSED hashconfig_t *hashconfig, MAYBE_UNUSED hashes_t *hashes)
|
||||
{
|
||||
// selftest hash
|
||||
|
||||
scrypt_t *scrypt_st = (scrypt_t *) hashes->st_esalts_buf;
|
||||
|
||||
const size_t chunk_bytes = 64 * 2 * scrypt_st->r;
|
||||
|
||||
size_t largest_V = chunk_bytes * scrypt_st->N;
|
||||
//size_t largest_X = chunk_bytes * scrypt_st->p;
|
||||
size_t largest_Y = chunk_bytes;
|
||||
|
||||
// from here regular hashes
|
||||
|
||||
scrypt_t *scrypt = (scrypt_t *) hashes->esalts_buf;
|
||||
|
||||
for (u32 salt_idx = 0; salt_idx < hashes->salts_cnt; salt_idx++, scrypt++)
|
||||
{
|
||||
const size_t chunk_bytes = 64 * 2 * scrypt->r;
|
||||
|
||||
const size_t sz_V = chunk_bytes * scrypt->N;
|
||||
//const size_t sz_X = chunk_bytes * scrypt->p;
|
||||
const size_t sz_Y = chunk_bytes;
|
||||
|
||||
if (sz_V > largest_V) largest_V = sz_V;
|
||||
//if (sz_X > largest_X) largest_X = sz_X;
|
||||
if (sz_Y > largest_Y) largest_Y = sz_Y;
|
||||
}
|
||||
|
||||
bridge_scrypt_jane_t *bridge_scrypt_jane = platform_context;
|
||||
|
||||
for (int unit_idx = 0; unit_idx < bridge_scrypt_jane->units_cnt; unit_idx++)
|
||||
{
|
||||
unit_t *unit_buf = &bridge_scrypt_jane->units_buf[unit_idx];
|
||||
|
||||
unit_buf->V = hcmalloc_aligned (largest_V, 64);
|
||||
//unit_buf->X = hcmalloc_aligned (largest_X, 64);
|
||||
unit_buf->Y = hcmalloc_aligned (largest_Y, 64);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void salt_destroy (void *platform_context, MAYBE_UNUSED hashconfig_t *hashconfig, MAYBE_UNUSED hashes_t *hashes)
|
||||
{
|
||||
bridge_scrypt_jane_t *bridge_scrypt_jane = platform_context;
|
||||
|
||||
for (int unit_idx = 0; unit_idx < bridge_scrypt_jane->units_cnt; unit_idx++)
|
||||
{
|
||||
unit_t *unit_buf = &bridge_scrypt_jane->units_buf[unit_idx];
|
||||
|
||||
hcfree_aligned (unit_buf->V);
|
||||
//hcfree_aligned (unit_buf->X);
|
||||
hcfree_aligned (unit_buf->Y);
|
||||
}
|
||||
}
|
||||
|
||||
bool launch_loop (MAYBE_UNUSED void *platform_context, MAYBE_UNUSED hc_device_param_t *device_param, MAYBE_UNUSED hashconfig_t *hashconfig, MAYBE_UNUSED hashes_t *hashes, MAYBE_UNUSED const u32 salt_pos, MAYBE_UNUSED const u64 pws_cnt)
|
||||
{
|
||||
bridge_scrypt_jane_t *bridge_scrypt_jane = platform_context;
|
||||
|
||||
const int unit_idx = device_param->bridge_link_device;
|
||||
|
||||
unit_t *unit_buf = &bridge_scrypt_jane->units_buf[unit_idx];
|
||||
|
||||
scrypt_t *esalts_buf = (scrypt_t *) hashes->esalts_buf;
|
||||
|
||||
scrypt_t *esalt_buf = &esalts_buf[salt_pos];
|
||||
|
||||
scrypt_tmp_t *scrypt_tmp = (scrypt_tmp_t *) device_param->h_tmps;
|
||||
|
||||
scrypt_mix_word_t *V = unit_buf->V;
|
||||
//scrypt_mix_word_t *X = unit_buf->X;
|
||||
scrypt_mix_word_t *Y = unit_buf->Y;
|
||||
|
||||
const u32 N = esalt_buf->N;
|
||||
const u32 r = esalt_buf->r;
|
||||
const u32 p = esalt_buf->p;
|
||||
|
||||
const size_t chunk_bytes = 64 * 2 * r;
|
||||
|
||||
// hashcat guarantees h_tmps[] is 64 byte aligned
|
||||
|
||||
for (u64 pw_cnt = 0; pw_cnt < pws_cnt; pw_cnt++)
|
||||
{
|
||||
u8 *X = (u8 *) scrypt_tmp->P;
|
||||
|
||||
for (u32 i = 0; i < p; i++)
|
||||
{
|
||||
scrypt_ROMix ((scrypt_mix_word_t *) (X + (chunk_bytes * i)), (scrypt_mix_word_t *) Y, (scrypt_mix_word_t *) V, N, r);
|
||||
}
|
||||
|
||||
scrypt_tmp++;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void bridge_init (bridge_ctx_t *bridge_ctx)
|
||||
{
|
||||
bridge_ctx->bridge_context_size = BRIDGE_CONTEXT_SIZE_CURRENT;
|
||||
bridge_ctx->bridge_interface_version = BRIDGE_INTERFACE_VERSION_CURRENT;
|
||||
|
||||
bridge_ctx->platform_init = platform_init;
|
||||
bridge_ctx->platform_term = platform_term;
|
||||
bridge_ctx->get_unit_count = get_unit_count;
|
||||
bridge_ctx->get_unit_info = get_unit_info;
|
||||
bridge_ctx->get_workitem_count = get_workitem_count;
|
||||
bridge_ctx->thread_init = BRIDGE_DEFAULT;
|
||||
bridge_ctx->thread_term = BRIDGE_DEFAULT;
|
||||
bridge_ctx->salt_prepare = salt_prepare;
|
||||
bridge_ctx->salt_destroy = salt_destroy;
|
||||
bridge_ctx->launch_loop = launch_loop;
|
||||
bridge_ctx->launch_loop2 = BRIDGE_DEFAULT;
|
||||
bridge_ctx->st_update_hash = BRIDGE_DEFAULT;
|
||||
bridge_ctx->st_update_pass = BRIDGE_DEFAULT;
|
||||
}
|
30
src/bridges/bridge_scrypt_jane.mk
Normal file
30
src/bridges/bridge_scrypt_jane.mk
Normal file
@ -0,0 +1,30 @@
|
||||
|
||||
SCRYPT_JANE := deps/scrypt-jane-master
|
||||
SCRYPT_JANE_CFLAGS := -I$(SCRYPT_JANE)/ -DSCRYPT_SHA256 -DSCRYPT_SALSA -DSCRYPT_CHOOSE_COMPILETIME -Wno-unused-function -Wno-unused-but-set-variable
|
||||
|
||||
ifeq ($(MAKECMDGOALS),binaries)
|
||||
SCRYPT_JANE_CFLAGS += -mavx2
|
||||
else
|
||||
ifeq ($(UNAME),Darwin)
|
||||
ifeq ($(IS_APPLE_SILICON),0)
|
||||
SCRYPT_JANE_CFLAGS += -mavx2
|
||||
endif
|
||||
else
|
||||
SCRYPT_JANE_CFLAGS += -march=native
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(MAKECMDGOALS),binaries)
|
||||
bridges/bridge_scrypt_jane.so: src/bridges/bridge_scrypt_jane.c obj/combined.LINUX.a
|
||||
$(CC_LINUX) $(CCFLAGS) $(CFLAGS_CROSS_LINUX) $^ -o $@ $(LFLAGS_CROSS_LINUX) -shared -fPIC -D BRIDGE_INTERFACE_VERSION_CURRENT=$(BRIDGE_INTERFACE_VERSION) $(SCRYPT_JANE_CFLAGS)
|
||||
bridges/bridge_scrypt_jane.dll: src/bridges/bridge_scrypt_jane.c obj/combined.WIN.a
|
||||
$(CC_WIN) $(CCFLAGS) $(CFLAGS_CROSS_WIN) $^ -o $@ $(LFLAGS_CROSS_WIN) -shared -fPIC -D BRIDGE_INTERFACE_VERSION_CURRENT=$(BRIDGE_INTERFACE_VERSION) $(SCRYPT_JANE_CFLAGS)
|
||||
else
|
||||
ifeq ($(SHARED),1)
|
||||
bridges/bridge_scrypt_jane.$(BRIDGE_SUFFIX): src/bridges/bridge_scrypt_jane.c $(HASHCAT_LIBRARY)
|
||||
$(CC) $(CCFLAGS) $(CFLAGS_NATIVE) $^ -o $@ $(LFLAGS_NATIVE) -shared -fPIC -D BRIDGE_INTERFACE_VERSION_CURRENT=$(BRIDGE_INTERFACE_VERSION) $(SCRYPT_JANE_CFLAGS)
|
||||
else
|
||||
bridges/bridge_scrypt_jane.$(BRIDGE_SUFFIX): src/bridges/bridge_scrypt_jane.c obj/combined.NATIVE.a
|
||||
$(CC) $(CCFLAGS) $(CFLAGS_NATIVE) $^ -o $@ $(LFLAGS_NATIVE) -shared -fPIC -D BRIDGE_INTERFACE_VERSION_CURRENT=$(BRIDGE_INTERFACE_VERSION) $(SCRYPT_JANE_CFLAGS)
|
||||
endif
|
||||
endif
|
298
src/modules/module_70100.c
Normal file
298
src/modules/module_70100.c
Normal file
@ -0,0 +1,298 @@
|
||||
/**
|
||||
* Author......: See docs/credits.txt
|
||||
* License.....: MIT
|
||||
*/
|
||||
|
||||
#include "common.h"
|
||||
#include "types.h"
|
||||
#include "modules.h"
|
||||
#include "bitops.h"
|
||||
#include "convert.h"
|
||||
#include "shared.h"
|
||||
|
||||
static const u32 ATTACK_EXEC = ATTACK_EXEC_OUTSIDE_KERNEL;
|
||||
static const u32 DGST_POS0 = 0;
|
||||
static const u32 DGST_POS1 = 1;
|
||||
static const u32 DGST_POS2 = 2;
|
||||
static const u32 DGST_POS3 = 3;
|
||||
static const u32 DGST_SIZE = DGST_SIZE_4_4;
|
||||
static const u32 HASH_CATEGORY = HASH_CATEGORY_GENERIC_KDF;
|
||||
static const char *HASH_NAME = "scrypt [Bridged: Scrypt-Jane ROMix]";
|
||||
static const u64 KERN_TYPE = 70100;
|
||||
static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE;
|
||||
static const u64 OPTS_TYPE = OPTS_TYPE_STOCK_MODULE
|
||||
| OPTS_TYPE_PT_GENERATE_LE
|
||||
| OPTS_TYPE_NATIVE_THREADS
|
||||
| OPTS_TYPE_MP_MULTI_DISABLE;
|
||||
static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED;
|
||||
static const u64 BRIDGE_TYPE = BRIDGE_TYPE_MATCH_TUNINGS // optional - improves performance
|
||||
| BRIDGE_TYPE_LAUNCH_LOOP;
|
||||
static const char *BRIDGE_NAME = "scrypt_jane";
|
||||
static const char *ST_PASS = "hashcat";
|
||||
static const char *ST_HASH = "SCRYPT:16384:8:2:ODEzMTA2Mw==:NuOcXzv+MOqXmwTXnH6bbUEjN/vjlDG28IM7WXaUkk0=";
|
||||
|
||||
u32 module_attack_exec (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ATTACK_EXEC; }
|
||||
u32 module_dgst_pos0 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS0; }
|
||||
u32 module_dgst_pos1 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS1; }
|
||||
u32 module_dgst_pos2 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS2; }
|
||||
u32 module_dgst_pos3 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS3; }
|
||||
u32 module_dgst_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_SIZE; }
|
||||
u32 module_hash_category (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_CATEGORY; }
|
||||
const char *module_hash_name (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_NAME; }
|
||||
u64 module_kern_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return KERN_TYPE; }
|
||||
u32 module_opti_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTI_TYPE; }
|
||||
u64 module_opts_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTS_TYPE; }
|
||||
u32 module_salt_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return SALT_TYPE; }
|
||||
const char *module_st_hash (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_HASH; }
|
||||
const char *module_st_pass (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_PASS; }
|
||||
const char *module_bridge_name (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return BRIDGE_NAME; }
|
||||
u64 module_bridge_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return BRIDGE_TYPE; }
|
||||
|
||||
static const char *SIGNATURE_SCRYPT = "SCRYPT";
|
||||
|
||||
typedef struct
|
||||
{
|
||||
u32 salt_buf[64];
|
||||
u32 salt_len;
|
||||
|
||||
u32 digest_buf[64];
|
||||
u32 digest_len;
|
||||
|
||||
u32 N;
|
||||
u32 r;
|
||||
u32 p;
|
||||
|
||||
} scrypt_t;
|
||||
|
||||
u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
{
|
||||
// this overrides the reductions of PW_MAX in case optimized kernel is selected
|
||||
// IOW, even in optimized kernel mode it support length 256
|
||||
|
||||
const u32 pw_max = PW_MAX;
|
||||
|
||||
return pw_max;
|
||||
}
|
||||
|
||||
u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
{
|
||||
const u64 esalt_size = (const u64) sizeof (scrypt_t);
|
||||
|
||||
return esalt_size;
|
||||
}
|
||||
|
||||
u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra)
|
||||
{
|
||||
#define SCRYPT_R_MAX 16
|
||||
#define SCRYPT_P_MAX 16
|
||||
|
||||
const u64 tmp_size = 128ULL * SCRYPT_R_MAX * SCRYPT_P_MAX;
|
||||
|
||||
return tmp_size;
|
||||
}
|
||||
|
||||
int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len)
|
||||
{
|
||||
u32 *digest = (u32 *) digest_buf;
|
||||
|
||||
scrypt_t *scrypt = (scrypt_t *) esalt_buf;
|
||||
|
||||
hc_token_t token;
|
||||
|
||||
token.token_cnt = 6;
|
||||
|
||||
token.signatures_cnt = 1;
|
||||
token.signatures_buf[0] = SIGNATURE_SCRYPT;
|
||||
|
||||
token.len_min[0] = 6;
|
||||
token.len_max[0] = 6;
|
||||
token.sep[0] = ':';
|
||||
token.attr[0] = TOKEN_ATTR_VERIFY_LENGTH
|
||||
| TOKEN_ATTR_VERIFY_SIGNATURE;
|
||||
|
||||
token.len_min[1] = 1;
|
||||
token.len_max[1] = 6;
|
||||
token.sep[1] = ':';
|
||||
token.attr[1] = TOKEN_ATTR_VERIFY_LENGTH;
|
||||
|
||||
token.len_min[2] = 1;
|
||||
token.len_max[2] = 6;
|
||||
token.sep[2] = ':';
|
||||
token.attr[2] = TOKEN_ATTR_VERIFY_LENGTH;
|
||||
|
||||
token.len_min[3] = 1;
|
||||
token.len_max[3] = 6;
|
||||
token.sep[3] = ':';
|
||||
token.attr[3] = TOKEN_ATTR_VERIFY_LENGTH;
|
||||
|
||||
token.len_min[4] = 0;
|
||||
token.len_max[4] = 45;
|
||||
token.sep[4] = ':';
|
||||
token.attr[4] = TOKEN_ATTR_VERIFY_LENGTH
|
||||
| TOKEN_ATTR_VERIFY_BASE64A;
|
||||
|
||||
token.len_min[5] = 44;
|
||||
token.len_max[5] = 88;
|
||||
token.sep[5] = ':';
|
||||
token.attr[5] = TOKEN_ATTR_VERIFY_LENGTH
|
||||
| TOKEN_ATTR_VERIFY_BASE64A;
|
||||
|
||||
const int rc_tokenizer = input_tokenizer ((const u8 *) line_buf, line_len, &token);
|
||||
|
||||
if (rc_tokenizer != PARSER_OK) return (rc_tokenizer);
|
||||
|
||||
// scrypt settings
|
||||
|
||||
const u8 *N_pos = token.buf[1];
|
||||
const u8 *r_pos = token.buf[2];
|
||||
const u8 *p_pos = token.buf[3];
|
||||
|
||||
scrypt->N = hc_strtoul ((const char *) N_pos, NULL, 10);
|
||||
scrypt->r = hc_strtoul ((const char *) r_pos, NULL, 10);
|
||||
scrypt->p = hc_strtoul ((const char *) p_pos, NULL, 10);
|
||||
|
||||
if (scrypt->r > 16) return (PARSER_SALT_VALUE);
|
||||
if (scrypt->p > 16) return (PARSER_SALT_VALUE);
|
||||
|
||||
// salt
|
||||
|
||||
const u8 *salt_pos = token.buf[4];
|
||||
const int salt_len = token.len[4];
|
||||
|
||||
scrypt->salt_len = base64_decode (base64_to_int, (const u8 *) salt_pos, salt_len, (u8 *) scrypt->salt_buf);
|
||||
|
||||
// digest - base64 decode
|
||||
|
||||
const u8 *hash_pos = token.buf[5];
|
||||
const int hash_len = token.len[5];
|
||||
|
||||
scrypt->digest_len = base64_decode (base64_to_int, (const u8 *) hash_pos, hash_len, (u8 *) scrypt->digest_buf);
|
||||
|
||||
// comparison digest
|
||||
|
||||
digest[0] = scrypt->digest_buf[0];
|
||||
digest[1] = scrypt->digest_buf[1];
|
||||
digest[2] = scrypt->digest_buf[2];
|
||||
digest[3] = scrypt->digest_buf[3];
|
||||
|
||||
// fake salt, we just need to make this unique
|
||||
|
||||
salt->salt_buf[0] = digest[0];
|
||||
salt->salt_buf[1] = digest[1];
|
||||
salt->salt_buf[2] = digest[2];
|
||||
salt->salt_buf[3] = digest[3];
|
||||
salt->salt_buf[4] = scrypt->N;
|
||||
salt->salt_buf[5] = scrypt->r;
|
||||
salt->salt_buf[6] = scrypt->p;
|
||||
salt->salt_buf[7] = 0;
|
||||
|
||||
salt->salt_len = 32;
|
||||
salt->salt_iter = 1;
|
||||
|
||||
return (PARSER_OK);
|
||||
}
|
||||
|
||||
int module_hash_encode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const void *digest_buf, MAYBE_UNUSED const salt_t *salt, MAYBE_UNUSED const void *esalt_buf, MAYBE_UNUSED const void *hook_salt_buf, MAYBE_UNUSED const hashinfo_t *hash_info, char *line_buf, MAYBE_UNUSED const int line_size)
|
||||
{
|
||||
const scrypt_t *scrypt = (const scrypt_t *) esalt_buf;
|
||||
|
||||
char base64_salt[64] = { 0 };
|
||||
|
||||
base64_encode (int_to_base64, (const u8 *) scrypt->salt_buf, scrypt->salt_len, (u8 *) base64_salt);
|
||||
|
||||
char base64_digest[128] = { 0 };
|
||||
|
||||
base64_encode (int_to_base64, (const u8 *) scrypt->digest_buf, scrypt->digest_len, (u8 *) base64_digest);
|
||||
|
||||
const int line_len = snprintf (line_buf, line_size, "%s:%u:%u:%u:%s:%s",
|
||||
SIGNATURE_SCRYPT,
|
||||
scrypt->N,
|
||||
scrypt->r,
|
||||
scrypt->p,
|
||||
base64_salt,
|
||||
base64_digest);
|
||||
|
||||
return line_len;
|
||||
}
|
||||
|
||||
void module_init (module_ctx_t *module_ctx)
|
||||
{
|
||||
module_ctx->module_context_size = MODULE_CONTEXT_SIZE_CURRENT;
|
||||
module_ctx->module_interface_version = MODULE_INTERFACE_VERSION_CURRENT;
|
||||
|
||||
module_ctx->module_attack_exec = module_attack_exec;
|
||||
module_ctx->module_benchmark_esalt = MODULE_DEFAULT;
|
||||
module_ctx->module_benchmark_hook_salt = MODULE_DEFAULT;
|
||||
module_ctx->module_benchmark_mask = MODULE_DEFAULT;
|
||||
module_ctx->module_benchmark_charset = MODULE_DEFAULT;
|
||||
module_ctx->module_benchmark_salt = MODULE_DEFAULT;
|
||||
module_ctx->module_bridge_name = module_bridge_name;
|
||||
module_ctx->module_bridge_type = module_bridge_type;
|
||||
module_ctx->module_build_plain_postprocess = MODULE_DEFAULT;
|
||||
module_ctx->module_deep_comp_kernel = MODULE_DEFAULT;
|
||||
module_ctx->module_deprecated_notice = MODULE_DEFAULT;
|
||||
module_ctx->module_dgst_pos0 = module_dgst_pos0;
|
||||
module_ctx->module_dgst_pos1 = module_dgst_pos1;
|
||||
module_ctx->module_dgst_pos2 = module_dgst_pos2;
|
||||
module_ctx->module_dgst_pos3 = module_dgst_pos3;
|
||||
module_ctx->module_dgst_size = module_dgst_size;
|
||||
module_ctx->module_dictstat_disable = MODULE_DEFAULT;
|
||||
module_ctx->module_esalt_size = module_esalt_size;
|
||||
module_ctx->module_extra_buffer_size = MODULE_DEFAULT;
|
||||
module_ctx->module_extra_tmp_size = MODULE_DEFAULT;
|
||||
module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT;
|
||||
module_ctx->module_forced_outfile_format = MODULE_DEFAULT;
|
||||
module_ctx->module_hash_binary_count = MODULE_DEFAULT;
|
||||
module_ctx->module_hash_binary_parse = MODULE_DEFAULT;
|
||||
module_ctx->module_hash_binary_save = MODULE_DEFAULT;
|
||||
module_ctx->module_hash_decode_postprocess = MODULE_DEFAULT;
|
||||
module_ctx->module_hash_decode_potfile = MODULE_DEFAULT;
|
||||
module_ctx->module_hash_decode_zero_hash = MODULE_DEFAULT;
|
||||
module_ctx->module_hash_decode = module_hash_decode;
|
||||
module_ctx->module_hash_encode_status = MODULE_DEFAULT;
|
||||
module_ctx->module_hash_encode_potfile = MODULE_DEFAULT;
|
||||
module_ctx->module_hash_encode = module_hash_encode;
|
||||
module_ctx->module_hash_init_selftest = MODULE_DEFAULT;
|
||||
module_ctx->module_hash_mode = MODULE_DEFAULT;
|
||||
module_ctx->module_hash_category = module_hash_category;
|
||||
module_ctx->module_hash_name = module_hash_name;
|
||||
module_ctx->module_hashes_count_min = MODULE_DEFAULT;
|
||||
module_ctx->module_hashes_count_max = MODULE_DEFAULT;
|
||||
module_ctx->module_hlfmt_disable = MODULE_DEFAULT;
|
||||
module_ctx->module_hook_extra_param_size = MODULE_DEFAULT;
|
||||
module_ctx->module_hook_extra_param_init = MODULE_DEFAULT;
|
||||
module_ctx->module_hook_extra_param_term = MODULE_DEFAULT;
|
||||
module_ctx->module_hook12 = MODULE_DEFAULT;
|
||||
module_ctx->module_hook23 = MODULE_DEFAULT;
|
||||
module_ctx->module_hook_salt_size = MODULE_DEFAULT;
|
||||
module_ctx->module_hook_size = MODULE_DEFAULT;
|
||||
module_ctx->module_jit_build_options = MODULE_DEFAULT;
|
||||
module_ctx->module_jit_cache_disable = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_accel_max = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_accel_min = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_loops_max = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_loops_min = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_threads_max = MODULE_DEFAULT;
|
||||
module_ctx->module_kernel_threads_min = MODULE_DEFAULT;
|
||||
module_ctx->module_kern_type = module_kern_type;
|
||||
module_ctx->module_kern_type_dynamic = MODULE_DEFAULT;
|
||||
module_ctx->module_opti_type = module_opti_type;
|
||||
module_ctx->module_opts_type = module_opts_type;
|
||||
module_ctx->module_outfile_check_disable = MODULE_DEFAULT;
|
||||
module_ctx->module_outfile_check_nocomp = MODULE_DEFAULT;
|
||||
module_ctx->module_potfile_custom_check = MODULE_DEFAULT;
|
||||
module_ctx->module_potfile_disable = MODULE_DEFAULT;
|
||||
module_ctx->module_potfile_keep_all_hashes = MODULE_DEFAULT;
|
||||
module_ctx->module_pwdump_column = MODULE_DEFAULT;
|
||||
module_ctx->module_pw_max = module_pw_max;
|
||||
module_ctx->module_pw_min = MODULE_DEFAULT;
|
||||
module_ctx->module_salt_max = MODULE_DEFAULT;
|
||||
module_ctx->module_salt_min = MODULE_DEFAULT;
|
||||
module_ctx->module_salt_type = module_salt_type;
|
||||
module_ctx->module_separator = MODULE_DEFAULT;
|
||||
module_ctx->module_st_hash = module_st_hash;
|
||||
module_ctx->module_st_pass = module_st_pass;
|
||||
module_ctx->module_tmp_size = module_tmp_size;
|
||||
module_ctx->module_unstable_warning = MODULE_DEFAULT;
|
||||
module_ctx->module_warmup_disable = MODULE_DEFAULT;
|
||||
}
|
90
tools/test_modules/m70100.pm
Normal file
90
tools/test_modules/m70100.pm
Normal file
@ -0,0 +1,90 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
##
|
||||
## Author......: See docs/credits.txt
|
||||
## License.....: MIT
|
||||
##
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
use Crypt::ScryptKDF qw (scrypt_hash);
|
||||
use MIME::Base64 qw (decode_base64);
|
||||
|
||||
sub module_constraints { [[0, 256], [1, 15], [-1, -1], [-1, -1], [-1, -1]] }
|
||||
|
||||
sub module_generate_hash
|
||||
{
|
||||
my $word = shift;
|
||||
my $salt = shift;
|
||||
my $N = shift // 16384;
|
||||
my $r = shift // 8;
|
||||
my $p = shift // 2;
|
||||
|
||||
my $hash_buf = scrypt_hash ($word, $salt, $N, $r, $p, 32);
|
||||
|
||||
my $hash = sprintf ('%s', $hash_buf);
|
||||
|
||||
return $hash;
|
||||
}
|
||||
|
||||
sub module_verify_hash
|
||||
{
|
||||
my $line = shift;
|
||||
|
||||
# scrypt
|
||||
return unless (substr ($line, 0, 7) eq 'SCRYPT:');
|
||||
|
||||
# get hash
|
||||
my $index1 = index ($line, ":", 7);
|
||||
|
||||
return if $index1 < 1;
|
||||
|
||||
# N
|
||||
my $N = substr ($line, 7, $index1 - 7);
|
||||
|
||||
my $index2 = index ($line, ":", $index1 + 1);
|
||||
|
||||
return if $index2 < 1;
|
||||
|
||||
# r
|
||||
my $r = substr ($line, $index1 + 1, $index2 - $index1 - 1);
|
||||
|
||||
$index1 = index ($line, ":", $index2 + 1);
|
||||
|
||||
return if $index1 < 1;
|
||||
|
||||
# p
|
||||
my $p = substr ($line, $index2 + 1, $index1 - $index2 - 1);
|
||||
|
||||
$index2 = index ($line, ":", $index1 + 1);
|
||||
|
||||
return if $index2 < 1;
|
||||
|
||||
# salt
|
||||
my $salt = substr ($line, $index1 + 1, $index2 - $index1 - 1);
|
||||
|
||||
$salt = decode_base64 ($salt);
|
||||
|
||||
$index1 = index ($line, ":", $index2 + 1);
|
||||
|
||||
return if $index1 < 1;
|
||||
|
||||
# digest
|
||||
|
||||
my $word = substr ($line, $index1 + 1);
|
||||
|
||||
return unless defined $salt;
|
||||
return unless defined $word;
|
||||
return unless defined $N;
|
||||
return unless defined $r;
|
||||
return unless defined $p;
|
||||
|
||||
$word = pack_if_HEX_notation ($word);
|
||||
|
||||
my $new_hash = module_generate_hash ($word, $salt, $N, $r, $p);
|
||||
|
||||
return ($new_hash, $word);
|
||||
}
|
||||
|
||||
1;
|
Loading…
Reference in New Issue
Block a user