diff --git a/deps/yescrypt-master/CHANGES b/deps/yescrypt-master/CHANGES new file mode 100644 index 000000000..e1dc1e2e8 --- /dev/null +++ b/deps/yescrypt-master/CHANGES @@ -0,0 +1,200 @@ + Changes made since 1.1.0 (2019/06/30). + +Use AVX512VL XOP-like bit rotates for faster Salsa20 on supporting CPUs. + +Implemented a little-known SHA-2 Maj() optimization proposed by Wei Dai. + +Minor code cleanups and documentation updates. + + + Changes made between 1.0.3 (2018/06/13) and 1.1.0 (2019/06/30). + +Merged yescrypt-opt.c and yescrypt-simd.c into one source file, which is +a closer match to -simd but is called -opt (and -simd is now gone). +With this change, performance of SIMD builds should be almost unchanged, +while scalar builds should be faster than before on register-rich 64-bit +architectures but may be slower than before on register-starved 32-bit +architectures (this shortcoming may be addressed later). This also +happens to make SSE prefetch available even in otherwise-scalar builds +and it paves the way for adding SIMD support on big-endian architectures +(previously, -simd assumed little-endian). + +Added x32 ABI support (x86-64 with 32-bit pointers). + + + Changes made between 1.0.2 (2018/06/06) and 1.0.3 (2018/06/13). + +In SMix1, optimized out the indexing of V for the sequential writes. + + + Changes made between 1.0.1 (2018/04/22) and 1.0.2 (2018/06/06). + +Don't use MAP_POPULATE anymore because new multi-threaded benchmarks on +RHEL6'ish and RHEL7'ish systems revealed that it sometimes has adverse +effect far in excess of its occasional positive effect. + +In the SIMD code, we now reuse the same buffer for BlockMix_pwxform's +input and output in SMix2. This might slightly improve cache hit rate +and thus performance. + +Also in the SIMD code, a compiler memory barrier has been added between +sub-blocks to ensure that none of the writes into what was S2 during +processing of the previous sub-block are postponed until after a read +from S0 or S1 in the inline asm code for the current sub-block. This +potential problem was never observed so far due to other constraints +that we have, but strictly speaking those constraints were insufficient +to guarantee it couldn't occur. + + + Changes made between 1.0.0 (2018/03/09) and 1.0.1 (2018/04/22). + +The included documentation has been improved, most notably adding new +text files PARAMETERS (guidelines on parameter selection, and currently +recommended parameter sets by use case) and COMPARISON (comparison to +scrypt and Argon2). + +Code cleanups have been made, including removal of AVX2 support, which +was deliberately temporarily preserved for the 1.0.0 release, but which +almost always hurt performance with currently recommended low-level +yescrypt parameters on Intel & AMD CPUs tested so far. (The low-level +parameters are chosen with consideration for relative performance of +defensive vs. offensive implementations on different hardware, and not +only for seemingly best performance on CPUs. It is possible to change +them such that AVX2 would be worthwhile, and this might happen in the +future, but currently this wouldn't be obviously beneficial overall.) + + + Changes made between 0.8.1 (2015/10/25) and 1.0.0 (2018/03/09). + +Hash string encoding has been finalized under the "$y$" prefix for both +native yescrypt and classic scrypt hashes, using a new variable-length +and extremely compact encoding of (ye)scrypt's many parameters. (Also +still recognized under the "$7$" prefix is the previously used encoding +for classic scrypt hashes, which is fixed-length and not so compact.) + +Optional format-preserving salt and hash (re-)encryption has been added, +using the Luby-Rackoff construction with SHA-256 as the PRF. + +Support for hash upgrades has been temporarily excluded to allow for its +finalization at a later time and based on actual needs (e.g., will 3x +ROM size upgrades be in demand now that Intel went from 4 to 6 memory +channels in their server CPUs, bringing a factor of 3 into RAM sizes?) + +ROM initialization has been sped up through a new simplified algorithm. + +ROM tags (magic constant values) and digests (values that depend on the +entire computation of the ROM contents) have been added to the last +block of ROM. (The placement of these tags/digests is such that nested +ROMs are possible, to allow for ROM size upgrades later.) + +The last block of ROM is now checked for the tag and is always used for +hash computation before a secret-dependent memory access is first made. +This ensures that hashes won't be computed with a partially initialized +ROM or with one initialized using different machine word endianness, and +that they will be consistently miscomputed if the ROM digest is other +than what the caller expected. This in turn helps early detection of +problems with ROM initialization even if the calling application fails +to check for them. This also helps mitigate cache-timing attacks when +the attacker doesn't know the contents of the last block of ROM. + +Many implementation changes have been made, such as for performance, +portability, security (intentional reuse and thus rewrite of memory +where practical and optional zeroization elsewhere), and coding style. +This includes addition of optional SSE2 inline assembly code (a macro +with 8 instructions) to yescrypt-simd.c, which tends to slightly +outperform compiler-generated code, including AVX(2)-enabled code, for +yescrypt's currently recommended settings. This is no surprise since +yescrypt was designed to fit the 64-bit mode extended SSE2 instruction +set perfectly (including SSE2's lack of 3-register instructions), so for +its optimal implementation AVX would merely result in extra instruction +prefixes and not provide any benefit (except for the uses of Salsa20 +inherited from scrypt, but those are infrequent). + +The auxiliary files inherited from scrypt have been sync'ed with scrypt +1.2.1, and the implementation of PBKDF2 has been further optimized, +especially for its use in (ye)scrypt where the "iteration count" is 1 +but the output size is relatively large. (The speedup is measurable at +realistically low settings for yescrypt, such as at 2 MiB of memory.) + +The included tests have been revised and test vectors regenerated to +account for the ROM initialization/use updates and hash (re-)encryption. + +The PHC test vectors have been compacted into a single SHA-256 hash of +the expected output of phc.c, but have otherwise remained unchanged as +none of the incompatible changes have affected the subset of yescrypt +exposed via the PHS() interface for the Password Hashing Competition. + +The specification document and extra programs that were included with +the PHC submission and its updates are now excluded from this release. + +The rest of documentation files have been updated for the 1.0.0 release. + + + Changes made between 0.7.1 (2015/01/31) and 0.8.1 (2015/10/25). + +pwxform became stateful, through writes to its S-boxes. This further +discourages TMTO attacks on yescrypt as a whole, as well as on pwxform +S-boxes separately. It also increases the total size of the S-boxes by +a factor of 1.5 (8 KiB to 12 KiB by default) and it puts the previously +mostly idle L1 cache write ports on CPUs to use. + +Salsa20/8 in BlockMix_pwxform has been replaced with Salsa20/2. + +An extra HMAC-SHA256 update of the password buffer (which is eventually +passed into the final PBKDF2 invocation) is now performed right after +the pwxform S-boxes initialization. + +Nloop_rw rounding has been adjusted to be the same as Nloop_all's. +This avoids an unnecessary invocation of SMix2 with Nloop = 2, which +would otherwise have occurred in some cases. + +t is now halved per hash upgrade (rather than reset to 0 right away on +the very first upgrade, like it was in 0.7.1). + +Minor corrections and improvements to the specification and the code +have been made. + + + Changes made between 0.6.4 (2015/01/30) and 0.7.1 (2015/01/31). + +The YESCRYPT_PARALLEL_SMIX and YESCRYPT_PWXFORM flags have been removed, +with the corresponding functionality enabled along with the YESCRYPT_RW +flag. This change has simplified the SIMD implementation a little bit +(eliminating specialized code for some flag combinations that are no +longer possible), and it should help simplify documentation, analysis, +testing, and benchmarking (fewer combinations of settings to test). + +Adjustments to pre- and post-hashing have been made to address subtle +issues and non-intuitive behavior, as well as in some cases to reduce +impact of garbage collector attacks. + +Support for hash upgrades has been added (the g parameter). + +Extra tests have been written and test vectors re-generated. + + + Changes made between 0.5.2 (2014/03/31) and 0.6.4 (2015/01/30). + +Dropped support for ROM access frequency mask since it made little sense +when supporting only one ROM at a time. (It'd make sense with two ROMs, +for simultaneous use of a ROM-in-RAM and a ROM-on-SSD. With just one +ROM, the mask could still be used for a ROM-on-SSD, but only in lieu of +a ROM-in-RAM, which would arguably be unreasonable.) + +Simplified the API by having it accept NULL for the "shared" parameter +to indicate no ROM in use. (Previously, a dummy "shared" structure had +to be created.) + +Completed the specification of pwxform, BlockMix_pwxform, Salsa20 SIMD +shuffling, and potential endianness conversion. (No change to these has +been made - they have just been specified in the included document more +completely.) + +Provided rationale for the default compile-time settings for pwxform. + +Revised the reference and optimized implementations' source code to more +closely match the current specification document in terms of identifier +names, compile-time constant expressions, source code comments, and in +some cases the ordering of source code lines. None of these changes +affect the computed hash values, hence the test vectors have remained +the same. diff --git a/deps/yescrypt-master/COMPARISON b/deps/yescrypt-master/COMPARISON new file mode 100644 index 000000000..05a9d570e --- /dev/null +++ b/deps/yescrypt-master/COMPARISON @@ -0,0 +1,129 @@ + Comparison of yescrypt to scrypt and Argon2. + +yescrypt's advantages: + + + Greater resistance to offline attacks (increasing attacker's cost at + same defender's cost) + + + yescrypt supports optional ROM for protection from use of botnet + nodes (and other relatively small memory devices) + + + yescrypt has a dependency not only on RAM and maybe ROM, but + also on fast on-die local memory (such as a CPU's L1 or L2 cache), + which provides bcrypt-like anti-GPU properties even at very low + per-hash RAM sizes (where scrypt and Argon2 are more likely to lose + to bcrypt in terms of GPU attack speed) and even without ROM + + + yescrypt and scrypt currently have little low-level + parallelism within processing of a block (yescrypt allows for + tuning this later, scrypt does not), whereas Argon2 has a fixed and + currently commonly excessive amount of such parallelism, which may + be extracted to speed up e.g. GPU attacks through use of more + computing resources per the same total memory size due to each hash + computation's memory needs being split between 32 threads (yescrypt + currently has four 16-byte lanes that can be processed in parallel + within a 64-byte sub-block before running into a data dependency + for the next sub-block, whereas Argon2 allows for parallel + processing of eight 128-byte chunks within a 1 KiB block with only + two synchronization points for the entire block, as well as of four + 32-byte parts of the 128-byte chunks with only two more + synchronization points for the entire 1 KiB block) + + + yescrypt uses computation latency hardening based on integer + multiplication and local memory access speed, which ties its + per-hash RAMs up for a guaranteed minimum amount of time regardless + of possibly much higher memory bandwidth on the attacker's + hardware, whereas Argon2 uses only the multiplications and performs + 6 times fewer of those sequentially (96 sequential multiplications + per 1 KiB for yescrypt vs. 16 per 1 KiB for Argon2, providing + correspondingly different minimum time guarantees) and scrypt does + not use this technique at all (but is no worse than Argon2 in this + respect anyway due to having less low-level parallelism) + + + yescrypt and Argon2 are time-memory trade-off (TMTO) resistant + (thus, computing them in less memory takes disproportionately + longer), whereas scrypt is deliberately TMTO-friendly (and + moreover, computing it in less memory takes up to 4x less than + proportionately longer) + + + Extra optional built-in features + + + Hash encryption so that the hashes are not crackable without + the key (to be stored separately) + + + Hash upgrade to higher settings without knowledge of password + (temporarily removed from 1.0, to be re-added later) + + + SCRAM-like client-side computation of challenge responses + (already part of the algorithm, not yet exposed via the API) + + + yescrypt's and Argon2's running time is tunable on top of + memory usage and parallelism, unlike in scrypt's + + + Cryptographic security provided by NIST-approved primitives + + + (ye)scrypt's cryptographic security is provided by SHA-256, + HMAC, and PBKDF2, which are NIST-approved and time-tested (the rest + of yescrypt's processing, while most crucial for its offline attack + resistance properties, provably does not affect its basic + cryptographic hash properties), whereas Argon2 relies on the newer + BLAKE2 (either choice is just fine for security, but use of + approved algorithms may sometimes be required for compliance) + + + SHA-256, HMAC, PBKDF2, and scrypt are usable from the same codebase + +yescrypt's drawbacks: + + - Complex (higher risk of human error occurring and remaining + unnoticed for long) + + - Cache-timing unsafe (like bcrypt, scrypt, and Argon2d, but + unlike Argon2i) + + - Not the PHC winner (Argon2 is), but is merely a "special recognition" + + - Supported in fewer third-party projects (as of this writing, there's + yescrypt support in libxcrypt, Linux-PAM, shadow, and mkpasswd) + +Other observations: + + * yescrypt's complexity is related to its current primary target use + case (mass user authentication) and is relatively small compared to + the total complexity of the authentication service, so the risk may + be justified + + * Cache-timing safety is unimportant on dedicated servers, is + mitigated for some use cases and threat models by proper use of + salts, and is fully achieved in Argon2 only in its 2i flavor and only + through reduction of resistance to the usual offline attacks compared + to the 2d flavor + + * yescrypt's single-threaded memory filling speed on an otherwise + idle machine and at our currently recommended settings is lower than + Argon2's, but that's a result of our deliberate tuning (there's a + knob to change that, but we don't recommend doing so) preventing + yescrypt from bumping into memory access speed prematurely, and is + irrelevant for determining server request rate capacity and maximum + response latency where multiple instances or threads would be run + (under that scenario, the algorithms deliver similar speeds) + + * yescrypt has been designed and currently configured to fit the + SSE2 and NEON instruction sets and 128-bit SIMD perfectly, not + benefiting from AVX's 3-register instructions (unlike classic scrypt, + which doesn't fit SSE2 as perfectly and thus benefits from AVX and + XOP) nor from AVX2's and AVX-512's wider SIMD (although it can be + reconfigured for wider SIMD later), whereas Argon2 significantly + benefits from those at least when running fewer threads or concurrent + instances than are supported by the hardware (yet yescrypt's SSE2 + code is competitive with Argon2's AVX2 code under full server load) + + * yescrypt vs. Argon2 benchmarks are further complicated by these + two schemes having different minimum amount of processing over memory + (yescrypt's is 4/3 of Argon2's), and thus different average memory + usage (5/8 of peak for yescrypt t=0 vs. 1/2 of peak for Argon2 t=1), + which needs to be taken into account + + * scrypt benchmarks are also different in amount of processing over + memory (twice Argon2's minimum) and average memory usage (3/4 of + peak), but that's even further complicated by scrypt's + TMTO-friendliness providing up to a 4x advantage to some attackers diff --git a/deps/yescrypt-master/Makefile b/deps/yescrypt-master/Makefile new file mode 100644 index 000000000..7abea8d62 --- /dev/null +++ b/deps/yescrypt-master/Makefile @@ -0,0 +1,85 @@ +# Copyright 2013-2018 Alexander Peslyak +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. + +CC = gcc +LD = $(CC) +RM = rm -f +OMPFLAGS = -fopenmp +OMPFLAGS_MAYBE = $(OMPFLAGS) +#CFLAGS = -Wall -O2 -fomit-frame-pointer $(OMPFLAGS_MAYBE) -DSKIP_MEMZERO +CFLAGS = -Wall -O2 -march=native -fomit-frame-pointer $(OMPFLAGS_MAYBE) -DSKIP_MEMZERO +#CFLAGS = -Wall -O2 -funroll-loops -fomit-frame-pointer $(OMPFLAGS_MAYBE) -DSKIP_MEMZERO +#CFLAGS = -Wall -O2 -march=native -funroll-loops -fomit-frame-pointer $(OMPFLAGS_MAYBE) -DSKIP_MEMZERO +# -lrt is for userom's use of clock_gettime() +LDFLAGS = -s -lrt $(OMPFLAGS_MAYBE) + +PROJ = tests phc-test initrom userom +OBJS_CORE = yescrypt-opt.o +OBJS_COMMON = yescrypt-common.o sha256.o insecure_memzero.o +OBJS_TESTS = $(OBJS_CORE) $(OBJS_COMMON) tests.o +OBJS_PHC = $(OBJS_CORE) $(OBJS_COMMON) phc-test.o +OBJS_INITROM = $(OBJS_CORE) $(OBJS_COMMON) initrom.o +OBJS_USEROM = $(OBJS_CORE) $(OBJS_COMMON) userom.o +OBJS_RM = yescrypt-*.o + +all: $(PROJ) + +check: tests phc-test + @echo 'Running main tests' + @time ./tests | tee TESTS-OUT + @diff -U0 TESTS-OK TESTS-OUT && echo PASSED || echo FAILED + @if [ -e PHC-TEST-OK-SHA256 ]; then \ + echo 'Running PHC tests'; \ + time ./phc-test > PHC-TEST-OUT; \ + sha256sum -c PHC-TEST-OK-SHA256; \ + fi + +ref: + $(MAKE) $(PROJ) OBJS_CORE=yescrypt-ref.o + +check-ref: + $(MAKE) check OBJS_CORE=yescrypt-ref.o + +tests: $(OBJS_TESTS) + $(LD) $(LDFLAGS) $(OBJS_TESTS) -o $@ + +phc-test.o: phc.c + $(CC) -c $(CFLAGS) -DTEST phc.c -o $@ + +phc-test: $(OBJS_PHC) + $(LD) $(LDFLAGS) $(OBJS_PHC) -o $@ + +initrom: $(OBJS_INITROM) + $(LD) $(LDFLAGS) $(OBJS_INITROM) -o $@ + +userom: $(OBJS_USEROM) + $(LD) $(LDFLAGS) $(OMPFLAGS) $(OBJS_USEROM) -o $@ + +userom.o: userom.c + $(CC) -c $(CFLAGS) $(OMPFLAGS) $*.c + +.c.o: + $(CC) -c $(CFLAGS) $*.c + +yescrypt-opt.o: yescrypt-platform.c + +clean: + $(RM) $(PROJ) + $(RM) $(OBJS_TESTS) $(OBJS_PHC) $(OBJS_INITROM) $(OBJS_USEROM) + $(RM) $(OBJS_RM) + $(RM) TESTS-OUT PHC-TEST-OUT diff --git a/deps/yescrypt-master/PARAMETERS b/deps/yescrypt-master/PARAMETERS new file mode 100644 index 000000000..d9f5d24e6 --- /dev/null +++ b/deps/yescrypt-master/PARAMETERS @@ -0,0 +1,196 @@ + Optimal yescrypt configuration. + +yescrypt is very flexible, but configuring it optimally is complicated. +Here are some guidelines to simplify near-optimal configuration. We +start by listing the parameters and their typical values, and then give +currently recommended parameter sets by use case. + + + Parameters and their typical values. + +Set flags (yescrypt flavor) to YESCRYPT_DEFAULTS to use the currently +recommended flavor. (Other flags values exist for compatibility and for +specialized cases where you think you know what you're doing.) + +Set N (block count) based on target memory usage and running time, as +well as on the value of r (block size in 128 byte units). N must be a +power of two. + +Set r (block size) to 8 (so that N is in KiB, which is convenient) or to +another small value (if more optimal or for fine-tuning of the total +size and/or running time). Reasonable values for r are from 8 to 96. + +Set p (parallelism) to 1 meaning no thread-level parallelism within one +computation of yescrypt. (Use of thread-level parallelism within +yescrypt makes sense for ROM initialization and for key derivation at +high memory usage, but usually not for password hashing where +parallelism is available through concurrent authentication attempts. +Don't use p > 1 unnecessarily.) + +Set t (time) to 0 to use the optimal running time for a given memory +usage. This will allow you to maximize the memory usage (the value of +N*r) while staying within your running time constraints. (Non-zero t +makes sense in specialized cases where you can't afford higher memory +usage but can afford more time.) + +Set g (upgrades) to 0 because there have been no hash upgrades yet. + +Set NROM (block count of ROM) to 0 unless you use a ROM (see below). +NROM must be a power of two. + + + Password hashing for user authentication, no ROM. + +Small and fast (memory usage 2 MiB, performance like bcrypt cost 2^5 - +latency 2-3 ms and throughput 10,000+ per second on a 16-core server): + +flags = YESCRYPT_DEFAULTS, N = 2048, r = 8, p = 1, t = 0, g = 0, NROM = 0 + +Large and slow (memory usage 16 MiB, performance like bcrypt cost 2^8 - +latency 10-30 ms and throughput 1000+ per second on a 16-core server): + +flags = YESCRYPT_DEFAULTS, N = 4096, r = 32, p = 1, t = 0, g = 0, NROM = 0 + +Of course, even heavier and slower settings are possible, if affordable. +Simply double the value of N as many times as needed. Since N must be a +power of two, you may use r (in the range of 8 to 32) or/and t (in the +range of 0 to 2) for fine-tuning the running time, but first bring N to +the maximum you can afford. If this feels too complicated, just use one +of the two parameter sets given above (preferably the second) as-is. + + + Password hashing for user authentication, with ROM. + +It's similar to the above, except that you need to adjust r, set NROM, +and initialize the ROM. + +First decide on a ROM size, such as making it a large portion of your +dedicated authentication servers' RAM sizes. Since NROM (block count) +must be a power of two, you might need to choose r (block size) based on +how your desired ROM size corresponds to a power of two. Also tuning +for performance on current hardware, you'll likely end up with r in the +range from slightly below 16 to 32. For example, to use 15/16 of a +server's 256 GiB RAM as ROM (thus, making it 240 GiB), you could use +r=15 or r=30. To use 23/24 of a server's 384 GiB RAM as ROM (thus, +making it 368 GiB), you'd use r=23. Then set NROM to your desired ROM +size in KiB divided by 128*r. Note that these examples might (or might +not) be too extreme, leaving little memory for the rest of the system. +You could as well opt for 7/8 with r=14 or 11/12 with r=11 or r=22. + +Note that higher r may make placing of ROM in e.g. NVMe flash memory +instead of in RAM more reasonable (or less unreasonable) than it would +have been with a lower r. If this is a concern as it relates to +possible attacks and you do not intend to ever do it defensively, you +might want to keep r lower (e.g., prefer r=15 over r=30 in the example +above, even if 30 performs slightly faster). + +Your adjustments to r, if you deviate from powers of two, will also +result in weirder memory usage per hash. Like 1.75 MiB at r=14 instead +of 2 MiB at r=8 that you would have used without a ROM. That's OK. + +For ROM initialization, which you do with yescrypt_init_shared(), use +the same r and NROM that you'd later use for password hashing, choose p +based on your servers' physical and/or logical CPU count (maybe +considering eventual upgrades as you won't be able to change this later, +but without going unnecessarily high - e.g., p=28, p=56, or p=112 make +sense on servers that currently have 28 physical / 56 logical CPUs), and +set the rest of the parameters to: + +flags = YESCRYPT_DEFAULTS, N = 0, t = 0, g = 0 + +N is set to 0 because it isn't relevant during ROM initialization (you +can use different values of N for hashing passwords with the same ROM). + +To keep the ROM in e.g. SysV shared memory and reuse it across your +authentication service restarts, you'd need to allocate the memory and +set the flags to "YESCRYPT_DEFAULTS | YESCRYPT_SHARED_PREALLOCATED". + +For actual password hashing, you'd use your chosen values for N, r, +NROM, and set the rest of the parameters to: + +flags = YESCRYPT_DEFAULTS, p = 1, t = 0, g = 0 + +Note that although you'd use a large p for ROM initialization, you +should use p=1 for actual password hashing like you would without a ROM. + +Do not forget to pass the ROM into the actual password hashing (and keep +r and NROM set accordingly). + +Since N must be a power of two and r is dependent on ROM size, you may +use t (in the range of 0 to 2) for fine-tuning the running time, but +first bring N to the maximum you can afford. + +If this feels too complicated, or even if it doesn't, please consider +engaging Openwall for your yescrypt deployment. We'd be happy to help. + + + Password-based key derivation. + +(Or rather passphrase-based.) + +Use settings similar to those for password hashing without a ROM, but +adjusted for higher memory usage and running time, and optionally with +thread-level parallelism. + +Small and fast (memory usage 128 MiB, running time under 100 ms on a +fast desktop): + +flags = YESCRYPT_DEFAULTS, N = 32768, r = 32, p = 1, t = 0, g = 0, NROM = 0 + +Large and fast (memory usage 1 GiB, running time under 200 ms on a fast +quad-core desktop not including memory allocation overhead, under 250 ms +with the overhead included), but requires build with OpenMP support (or +otherwise will run as slow as yet be weaker than its p=1 alternative): + +flags = YESCRYPT_DEFAULTS, N = 262144, r = 32, p = 4, t = 0, g = 0, NROM = 0 + +Large and slower (memory usage 1 GiB, running time under 300 ms on a +fast quad-core desktop not including memory allocation overhead, under +350 ms with the overhead included), also requires build with OpenMP +support (or otherwise will run slower than the p=1 alternative below): + +flags = YESCRYPT_DEFAULTS, N = 262144, r = 32, p = 4, t = 2, g = 0, NROM = 0 + +Large and slow (memory usage 1 GiB, running time under 600 ms on a fast +desktop not including memory allocation overhead, under 650 ms with the +overhead included): + +flags = YESCRYPT_DEFAULTS, N = 262144, r = 32, p = 1, t = 0, g = 0, NROM = 0 + +Just like with password hashing, even heavier and slower settings are +possible, if affordable, and you achieve them by adjusting N, r, t in +the same way and in the same preferred ranges (please see the section on +password hashing without a ROM, above). Unlike with password hashing, +it makes some sense to go above t=2 if you expect that your users might +not be able to afford more memory but can afford more time. However, +increasing the memory usage provides better protection, and we don't +recommend forcing your users to wait for more than 1 second as they +could as well type more characters in that time. If this feels too +complicated, just use one of the above parameter sets as-is. + + + Amortization of memory allocation overhead. + +It takes a significant fraction of yescrypt's total running time to +allocate memory from the operating system, especially considering that +the kernel zeroizes the memory before handing it over to your program. + +Unless you naturally need to compute yescrypt just once per process, you +may achieve greater efficiency by fully using advanced yescrypt APIs +that let you preserve and reuse the memory allocation across yescrypt +invocations. This is done by reusing the structure pointed to by the +"yescrypt_local_t *local" argument of yescrypt_r() or yescrypt_kdf() +without calling yescrypt_free_local() inbetween the repeated invocations +of yescrypt. + + + YESCRYPT_DEFAULTS macro. + +Please note that the value of the YESCRYPT_DEFAULTS macro might change +later, so if you use the macro like it's recommended here then for +results reproducible across versions you might need to store its value +somewhere along with the hashes or the encrypted data. + +If you use yescrypt's standard hash string encoding, then yescrypt +already encodes and decodes this value for you, so you don't need to +worry about this. diff --git a/deps/yescrypt-master/PERFORMANCE b/deps/yescrypt-master/PERFORMANCE new file mode 100644 index 000000000..7d303d395 --- /dev/null +++ b/deps/yescrypt-master/PERFORMANCE @@ -0,0 +1,206 @@ +Although yescrypt is usable for a variety of purposes (password hashing, +KDF, PoW) and is extremely scalable (from 1 CPU core to many, from +kilobytes to terabytes and beyond) while achieving good security +properties across this whole range of use cases and settings, at this +time we're primarily targeting the mass user authentication use case. +Hence, this is what the setup and benchmarks shown in here focus on. + +The test system is a server (kindly provided by Packet.net) with dual +Xeon Gold 5120 CPUs (2.2 GHz, turbo to up to 3.2 GHz) and 384 GiB RAM +(12x DDR4-2400 ECC Reg). These CPUs have 14 cores and 6 memory channels +each, for a total of 28 physical cores, 56 logical CPUs (HT is enabled), +and 12 memory channels. The OS is Ubuntu 17.10 with kernel +"4.13.0-25-generic #29-Ubuntu SMP Mon Jan 8 21:14:41 UTC 2018 x86_64" +and compiler "gcc (Ubuntu 7.2.0-8ubuntu3.2) 7.2.0". + +First, we need to configure the Linux system, as root. Grant our user +account's group the privilege to access "huge pages" (2 MiB or 1 GiB as +opposed to x86's default 4 KiB pages): + +# sysctl -w vm.hugetlb_shm_group=1000 + +(You may need to replace the "1000" with your user account's actual +group id.) + +Disable swap, so that it doesn't get in the way: + +# swapoff -a + +Let processes allocate shared memory segments of up to 368 GiB each, and +up to 369 GiB total for the system: + +# sysctl -w kernel.shmmax=395136991232 +# sysctl -w kernel.shmall=396210733056 + +(The allowance for an extra gigabyte is in case any processes unrelated +to ours make use of SysV shared memory as well.) + +Preallocate the 368 GiB and 2 GiB more (to be potentially used for +threads' "RAM" lookup tables), thus 370 GiB total, into huge pages (this +will only work when existing memory allocations aren't too fragmented +yet, so is normally to be performed right upon system bootup): + +# sysctl -w vm.nr_hugepages=189440 + +Check that the preallocation has succeeded by examining /proc/meminfo: + +# grep ^Huge /proc/meminfo +HugePages_Total: 189440 +HugePages_Free: 189440 + +(If the memory were too fragmented, this would show lower numbers, which +would be problematic - worse than no use of huge pages at all.) + +This is quite extreme. Although it sort of leaves 14 GiB free (as the +difference between 370 GiB and the physical RAM of 384 GiB), in practice +on our test system only less than 5 GiB remains allocatable by user +processes after this point. For actual use, you might consider using a +slightly lower fraction (than 23/24 that we're targeting here) of total +memory for yescrypt ROM, or/and not pre-allocating any huge pages for +the RAMs (especially if they won't be used that way anyway, which +depends on the HUGEPAGE_THRESHOLD setting in yescrypt-platform.c - +currently at 32 MiB). + +Now initialization of the ROM is possible, and we can work as non-root +from this point on: + +$ GOMP_CPU_AFFINITY=0-55 time ./initrom 368 1 +r=23 N=2^9 NROM=2^27 +Will use 385875968.00 KiB ROM + 1472.00 KiB RAM +Initializing ROM ... DONE (98764b03) +'$y$j6K5O$LdJMENpBABJJ3hIHjB1Bi.$VtHhEYlX3mDbxmXUUYt9Xldf.2R5/G0E/tMioNUQ/F8' +1058.64user 163.70system 0:22.04elapsed 5544%CPU (0avgtext+0avgdata 4692maxresident)k +0inputs+0outputs (0major+193094minor)pagefaults 0swaps + +It took 22 seconds to initialize our 368 GiB ROM, and now we may hash +passwords from another process (this may be the authentication service): + +$ GOMP_CPU_AFFINITY=0-55 ./userom 368 1 +r=23 N=2^9 NROM=2^27 +Will use 385875968.00 KiB ROM + 1472.00 KiB RAM +Plaintext: '$y$j6K5O$LdJMENpBABJJ3hIHjB1Bi.$VtHhEYlX3mDbxmXUUYt9Xldf.2R5/G0E/tMioNUQ/F8' +Encrypted: '$y$j6K5O$LdJMENpBABJJ3hIHjB1Bi.$LZropFwwbIVeo/8DfHbxg6VhFLkUqdvdNy7L.T8tud.' +Benchmarking 1 thread ... +809 c/s real, 812 c/s virtual (2047 hashes in 2.53 seconds) +Benchmarking 56 threads ... +21307 c/s real, 384 c/s virtual (114632 hashes in 5.38 seconds) +min 1.393 ms, avg 2.591 ms, max 3.628 ms + +$ GOMP_CPU_AFFINITY=0-55 ./userom 368 2 +r=23 N=2^10 NROM=2^27 +Will use 385875968.00 KiB ROM + 2944.00 KiB RAM +Plaintext: '$y$j7K5O$LdJMENpBABJJ3hIHjB1Bi.$ljg0jm5/lpMa98qlF1GeAI9YkqWXSA4KVTGxidC6Gy0' +Encrypted: '$y$j7K5O$LdJMENpBABJJ3hIHjB1Bi.$7X1fd6TFYK5VQOH.7M2dRJMxvFTZbv5d1i7.GwQ/7YC' +Benchmarking 1 thread ... +419 c/s real, 420 c/s virtual (1023 hashes in 2.44 seconds) +Benchmarking 56 threads ... +10248 c/s real, 184 c/s virtual (57288 hashes in 5.59 seconds) +min 2.571 ms, avg 5.413 ms, max 6.704 ms + +$ GOMP_CPU_AFFINITY=0-55 ./userom 368 23 +r=23 N=2^13 NROM=2^27 +Will use 385875968.00 KiB ROM + 23552.00 KiB RAM +Plaintext: '$y$jAK5O$LdJMENpBABJJ3hIHjB1Bi.$UL29LYGiz.rXa6c620meFuqT3IiZmBO0BlW6HenRmA4' +Encrypted: '$y$jAK5O$LdJMENpBABJJ3hIHjB1Bi.$U15LiKcR4vHbUmCbt7SUllXp/jUyNXYOC1I.426Vk80' +Benchmarking 1 thread ... +50 c/s real, 50 c/s virtual (127 hashes in 2.52 seconds) +Benchmarking 56 threads ... +1201 c/s real, 21 c/s virtual (7112 hashes in 5.92 seconds) +min 32.444 ms, avg 46.362 ms, max 48.067 ms + +While using the ROM, we're able to compute over 21k, over 10k, or around +1200 password hashes per second with per-thread RAM sizes of 1.4375 MiB, +2.875 MiB, or 23 MiB, respectively. + +We can also reasonably use yescrypt without a ROM: + +$ GOMP_CPU_AFFINITY=0-55 ./userom 0 2 +r=16 N=2^10 NROM=2^0 +Will use 0.00 KiB ROM + 2048.00 KiB RAM +Plaintext: '$y$j7D$LdJMENpBABJJ3hIHjB1Bi.$MpcIFGNF/2yn.6pugGKCS3k6Js5sbJ7j3qLBBqKLUk4' +Encrypted: '$y$j7D$LdJMENpBABJJ3hIHjB1Bi.$7yuShztNep5CDrsQE9Ms9DkH1zqJzTy8wRiSHozJy.9' +Benchmarking 1 thread ... +828 c/s real, 828 c/s virtual (2047 hashes in 2.47 seconds) +Benchmarking 56 threads ... +21710 c/s real, 388 c/s virtual (114632 hashes in 5.28 seconds) +min 1.679 ms, avg 2.571 ms, max 3.591 ms + +$ GOMP_CPU_AFFINITY=0-55 ./userom 0 4 +r=16 N=2^11 NROM=2^0 +Will use 0.00 KiB ROM + 4096.00 KiB RAM +Plaintext: '$y$j8D$LdJMENpBABJJ3hIHjB1Bi.$dT8UO1PVT6lpQcAuWsreFpgdw9TeYdEkqsCp5syNoL9' +Encrypted: '$y$j8D$LdJMENpBABJJ3hIHjB1Bi.$evEI7SjEM6GKYxcIaNYmanAesDLMRezuOfT4V01aj33' +Benchmarking 1 thread ... +417 c/s real, 417 c/s virtual (1023 hashes in 2.45 seconds) +Benchmarking 56 threads ... +10434 c/s real, 186 c/s virtual (57288 hashes in 5.49 seconds) +min 3.120 ms, avg 5.339 ms, max 6.878 ms + +$ GOMP_CPU_AFFINITY=0-55 ./userom 0 16 +r=16 N=2^13 NROM=2^0 +Will use 0.00 KiB ROM + 16384.00 KiB RAM +Plaintext: '$y$jAD$LdJMENpBABJJ3hIHjB1Bi.$Cap65IlIDN8g9Lh0aVhLLWORQhpwxvh0rhkIB6OOpqC' +Encrypted: '$y$jAD$LdJMENpBABJJ3hIHjB1Bi.$d5Qoew0sKNt63xBRsAxNDhGV52p1jHAFN1/fglibMbA' +Benchmarking 1 thread ... +100 c/s real, 100 c/s virtual (255 hashes in 2.54 seconds) +Benchmarking 56 threads ... +2314 c/s real, 41 c/s virtual (14280 hashes in 6.17 seconds) +min 13.440 ms, avg 24.049 ms, max 25.467 ms + +$ GOMP_CPU_AFFINITY=0-55 ./userom 0 32 +r=16 N=2^14 NROM=2^0 +Will use 0.00 KiB ROM + 32768.00 KiB RAM +Plaintext: '$y$jBD$LdJMENpBABJJ3hIHjB1Bi.$zdJjnnDFSqeRbC8ZUQFZShGpP2gvFCGjAZ01h10dWa9' +Encrypted: '$y$jBD$LdJMENpBABJJ3hIHjB1Bi.$U45EV25V/KtqyetJ7AHsJaeeNTJwvQ3hBG7lokzkyR6' +Benchmarking 1 thread ... +48 c/s real, 49 c/s virtual (127 hashes in 2.61 seconds) +Benchmarking 56 threads ... +1136 c/s real, 20 c/s virtual (7112 hashes in 6.26 seconds) +min 27.844 ms, avg 48.837 ms, max 50.792 ms + +Slightly higher speeds are possible for 4 MiB and higher with larger +yescrypt block size (r=32 instead of r=16, thus 4 KiB blocks instead of +2 KiB blocks benchmarked above). Here they are for 4 MiB, 16 MiB, and +32 MiB, respectively: + +10589 c/s real, 189 c/s virtual (57288 hashes in 5.41 seconds) +min 3.465 ms, avg 5.260 ms, max 6.705 ms + +2462 c/s real, 44 c/s virtual (14280 hashes in 5.80 seconds) +min 13.923 ms, avg 22.638 ms, max 24.042 ms + +1221 c/s real, 21 c/s virtual (7112 hashes in 5.82 seconds) +min 28.909 ms, avg 45.658 ms, max 47.265 ms + +Thus, when not using a ROM we're able to compute over 21k, over 10k, +around 2400, or around 1200 hashes per second with per-thread RAM sizes +of 2 MiB, 4 MiB, 16 MiB, or 32 MiB, respectively. + +The same might not hold on another machine. + +By the way, here's what our SysV shared memory segment looks like: + +$ ipcs -m + +------ Shared Memory Segments -------- +key shmid owner perms bytes nattch status +[...] +0x7965730a 327683 user 640 395136991232 0 + +The 395+ GB size corresponds to 368 GiB. + +To cleanup, let's remove the SysV shared memory segment holding the ROM: + +$ ipcrm -M 0x7965730a + +and free up the preallocated huge pages, as root: + +# sysctl -w vm.nr_hugepages=0 diff --git a/deps/yescrypt-master/PHC-TEST-OK-SHA256 b/deps/yescrypt-master/PHC-TEST-OK-SHA256 new file mode 100644 index 000000000..aab409ae7 --- /dev/null +++ b/deps/yescrypt-master/PHC-TEST-OK-SHA256 @@ -0,0 +1 @@ +05ad49b9d9fb9eb65df78c919c3a778b7952e2e63f2e8ddb6b2b3f4b645e3cd0 PHC-TEST-OUT diff --git a/deps/yescrypt-master/README b/deps/yescrypt-master/README new file mode 100644 index 000000000..f017c6717 --- /dev/null +++ b/deps/yescrypt-master/README @@ -0,0 +1,197 @@ + What is yescrypt? + +yescrypt is a password-based key derivation function (KDF) and password +hashing scheme. It builds upon Colin Percival's scrypt. This +implementation is able to compute native yescrypt hashes as well as +classic scrypt. + +As of this writing, yescrypt is the default password hashing scheme on +recent ALT Linux, Arch Linux, Debian 11+, Fedora 35+, Kali Linux 2021.1+, +and Ubuntu 22.04+. It is also supported in Fedora 29+, RHEL 9+, and +Ubuntu 20.04+, and is recommended for new passwords in Fedora CoreOS. + + + Why yescrypt? + +Like it or not, password authentication remains relevant (including as +one of several authentication factors), password hash database leaks +happen, the leaks are not always detected and fully dealt with right +away, and even once they are many users' same or similar passwords +reused elsewhere remain exposed. To mitigate these risks (as well as +those present in other scenarios where password-based key derivation or +password hashing is relevant), computationally expensive (bcrypt, +PBKDF2, etc.) and more recently also memory-hard (scrypt, Argon2, etc.) +password hashing schemes have been introduced. Unfortunately, at high +target throughput and/or low target latency their memory usage is +unreasonably low, up to the point where they're not obviously better +than the much older bcrypt (considering attackers with pre-existing +hardware). This is a primary drawback that yescrypt addresses. + +Most notable for large-scale deployments is yescrypt's optional +initialization and reuse of a large lookup table, typically occupying +at least tens of gigabytes of RAM and essentially forming a +site-specific ROM. This limits attackers' use of pre-existing hardware +such as botnet nodes. + +yescrypt's other changes from scrypt additionally slow down GPUs and to +a lesser extent FPGAs and ASICs even when its memory usage is low and +even when there's no ROM, and provide extra knobs and built-in features. + +Technically, yescrypt is the most scalable password hashing scheme so +far, providing near-optimal security from offline password cracking +across the whole range from kilobytes to terabytes and beyond. However, +the price for this is complexity, and we recognize that complexity is a +major drawback of any software. Thus, at this time we focus on +large-scale deployments, where the added complexity is relatively small +compared to the total complexity of the authentication service setup. +For smaller deployments, bcrypt with its simplicity and existing library +support is a reasonable short-term choice (although we're making +progress towards more efficient FPGA attacks on bcrypt under a separate +project). We might introduce a cut-down yescrypt-lite later or/and +yescrypt might become part of standard or popular libraries, making it +more suitable for smaller deployments as well. + + + Parameter selection. + +Please refer to PARAMETERS for guidelines on parameter selection and the +currently recommended parameter sets by use case (password hashing with +or without a ROM, and KDF). + + + Performance. + +Please refer to PERFORMANCE for example setup and benchmarks relevant to +the mass user authentication use case. + +The test system is a server (kindly provided by Packet.net) with dual +Xeon Gold 5120 CPUs (2.2 GHz, turbo to up to 3.2 GHz) and 384 GiB RAM +(12x DDR4-2400 ECC Reg). These CPUs have 14 cores and 6 memory channels +each, for a total of 28 physical cores, 56 logical CPUs (HT is enabled), +and 12 memory channels. + +Some highlights: initialization of a 368 GiB ROM takes 22 seconds (to +be done on server bootup), and while using the ROM we're able to compute +over 21k, over 10k, or around 1200 hashes per second with per-hash RAM +usage of 1.4375 MiB, 2.875 MiB, or 23 MiB, respectively. + +When not using a ROM, we're able to compute over 21k, over 10k, or +around 1200 hashes per second with per-hash RAM usage of 2 MiB, 4 MiB, +or 32 MiB, respectively. + + + Comparison to scrypt and Argon2. + +yescrypt's advantages: + + + Greater resistance to offline attacks + + Extra optional built-in features + + Cryptographic security provided by NIST-approved primitives + + SHA-256, HMAC, PBKDF2, and scrypt are usable from the same codebase + +yescrypt's drawbacks: + + - Complex + - Cache-timing unsafe (like scrypt and Argon2d, but unlike Argon2i) + - Not the PHC winner (Argon2 is), but is merely a "special recognition" + - Supported in fewer third-party projects + +Please refer to COMPARISON for a lot more detail and other observations. + + + A note on cryptocurrencies. + +For historical reasons, multiple CPU mining focused cryptocurrencies use +yescrypt 0.5'ish as their proof-of-work (PoW) scheme. We currently have +a separate project for the PoW use case: yespower. Thus, rather than +misuse yescrypt 1.0+ for PoW, those and other projects are advised to +use yespower 1.0+ instead. The yespower homepage is: + + https://www.openwall.com/yespower/ + + + How to test yescrypt for proper operation. + +On a Unix-like system, invoke "make check". This will build and run a +program called "tests", and check its output against the supplied file +TESTS-OK. It will also build a program called "phc-test", and if a file +called PHC-TEST-OK-SHA256 is present will run that program and check its +output against that file's contents. If everything matches, each of +these two sets of tests prints one word "PASSED", so there will be two +such lines among "make check" output, one of them being the final line +of output. + +We do most of our testing on Linux systems with gcc. The supplied +Makefile assumes that you use gcc. + + + ROM in SysV shared memory demo and benchmark. + +Also included with this version of yescrypt are "initrom" and "userom" +demo programs. They're built by simply typing "make". Please refer to +PERFORMANCE for their usage. + + + Alternate code versions and make targets. + +Two implementations of yescrypt are included: reference and optimized. +By default, the optimized implementation is built. Internally, the +optimized implementation uses conditional compilation to choose between +usage of various SIMD instruction sets where supported and scalar code. + +The reference implementation is unoptimized and is very slow, but it has +simpler and shorter source code. Its purpose is to provide a simple +human- and machine-readable specification that implementations intended +for actual use should be tested against. It is deliberately mostly not +optimized, and it is not meant to be used in production. + +Similarly to "make check", there's "make check-ref" to build and test +the reference implementation. There's also "make ref" to build the +reference implementation and have the "initrom" and "userom" programs +use it. + +"make clean" may need to be run between making different builds. + + + Development status. + +This yescrypt distribution is a work-in-progress. Its interfaces other +than crypto_scrypt() are subject to change in future revisions, however +no incompatible changes to the yescrypt algorithm are expected. + + + Credits. + +scrypt has been designed by Colin Percival. yescrypt has been designed +by Solar Designer building upon scrypt. + +The following other people and projects have also indirectly helped make +yescrypt what it is: + + - Bill Cox + - Rich Felker + - Anthony Ferrara + - Christian Forler + - Taylor Hornby + - Dmitry Khovratovich + - Samuel Neves + - Marcos Simplicio + - Ken T Takusagawa + - Jakob Wenzel + - Christian Winnerlein + + - DARPA Cyber Fast Track + - Password Hashing Competition + + + Contact info. + +First, please check the yescrypt homepage for new versions, etc.: + + https://www.openwall.com/yescrypt/ + +If you have anything valuable to add or a non-trivial question to ask, +you may join and post to the yescrypt mailing list (referenced on the +yescrypt homepage above) or contact the maintainer of yescrypt at: + + Solar Designer diff --git a/deps/yescrypt-master/TESTS-OK b/deps/yescrypt-master/TESTS-OK new file mode 100644 index 000000000..ace7fb2ef --- /dev/null +++ b/deps/yescrypt-master/TESTS-OK @@ -0,0 +1,80 @@ +scrypt("", "", 16, 1, 1) = 77 d6 57 62 38 65 7b 20 3b 19 ca 42 c1 8a 04 97 f1 6b 48 44 e3 07 4a e8 df df fa 3f ed e2 14 42 fc d0 06 9d ed 09 48 f8 32 6a 75 3a 0f c8 1f 17 e8 d3 e0 fb 2e 0d 36 28 cf 35 e2 0c 38 d1 89 06 +scrypt("password", "NaCl", 1024, 8, 16) = fd ba be 1c 9d 34 72 00 78 56 e7 19 0d 01 e9 fe 7c 6a d7 cb c8 23 78 30 e7 73 76 63 4b 37 31 62 2e af 30 d9 2e 22 a3 88 6f f1 09 27 9d 98 30 da c7 27 af b9 4a 83 ee 6d 83 60 cb df a2 cc 06 40 +scrypt("pleaseletmein", "SodiumChloride", 16384, 8, 1) = 70 23 bd cb 3a fd 73 48 46 1c 06 cd 81 fd 38 eb fd a8 fb ba 90 4f 8e 3e a9 b5 43 f6 54 5d a1 f2 d5 43 29 55 61 3f 0f cf 62 d4 97 05 24 2a 9a f9 e6 1e 85 dc 0d 65 1e 40 df cf 01 7b 45 57 58 87 +scrypt("pleaseletmein", "SodiumChloride", 1048576, 8, 1) = 21 01 cb 9b 6a 51 1a ae ad db be 09 cf 70 f8 81 ec 56 8d 57 4a 2f fd 4d ab e5 ee 98 20 ad aa 47 8e 56 fd 8f 4b a5 d0 9f fa 1c 6d 92 7c 40 f4 c3 37 30 40 49 e8 a9 52 fb cb f4 5c 6f a7 7a 41 a4 +yescrypt("", "", 0, 16, 1, 1, 0, 0) = 77 d6 57 62 38 65 7b 20 3b 19 ca 42 c1 8a 04 97 f1 6b 48 44 e3 07 4a e8 df df fa 3f ed e2 14 42 fc d0 06 9d ed 09 48 f8 32 6a 75 3a 0f c8 1f 17 e8 d3 e0 fb 2e 0d 36 28 cf 35 e2 0c 38 d1 89 06 +yescrypt("", "", 0, 16, 1, 1, 0, 0) = 77 d6 57 62 38 65 7b 20 +yescrypt("", "", 0, 4, 1, 1, 0, 0) = ef ad 0c 23 31 4c b5 72 bc 3c fb 15 43 da 42 f8 a8 b0 73 00 4c 86 6b 64 ab 50 55 a4 f0 9f a5 f5 71 14 2e bf e7 e0 5a 3b 92 c4 32 f3 1d ea 95 ad 5f 9c 85 4b 64 56 46 2f 4b d0 f7 32 b7 cd c5 49 +yescrypt("", "", 1, 4, 1, 1, 0, 0) = 85 dd a4 8c 9e c9 de 2f 7f 1a e8 b4 df ed a5 1f 8b 6d 56 f3 08 1b e1 a7 c0 83 3b a2 71 9a 36 ab 02 88 5d ae 36 55 7d 34 26 86 b1 7b a7 5f 2c 21 77 92 de 09 70 ab 1d 07 a9 c7 50 93 6d 31 42 6f +yescrypt("", "", 1, 4, 1, 1, 0, 0) = 85 dd a4 8c 9e c9 de 2f +yescrypt("", "", 1, 4, 1, 1, 1, 0) = 4b aa 8c d8 60 8b a9 1f 3e 34 39 d9 ec 4f ae 8f 9f c0 92 d9 ca 22 b7 37 7e 31 ae 5b 9a d7 87 7c 11 68 69 11 62 dd 0e 5e f0 49 e5 70 65 0c be d4 38 4a d6 05 34 fb 0c be d1 9f f3 f0 33 c9 4b 0c +yescrypt("", "", 1, 4, 1, 1, 2, 0) = e6 e8 bb a0 9b 64 12 ff b0 b3 cc 35 e3 7d 0b 78 2a 47 fb aa dc 57 a0 76 d7 c6 cc 2e 70 91 9a 1b 8d 47 38 c4 f8 33 55 69 07 42 d9 be d7 1c 3b 8f b0 d7 eb 08 6a b1 34 c5 e5 57 07 c2 c1 3c 75 ef +yescrypt("", "", 1, 4, 1, 1, 3, 0) = ac d9 a4 20 1c f4 a4 76 ec f7 ba a6 11 3d 86 fb 65 cd 07 10 2b 40 04 e4 f9 d9 9c d3 42 55 a1 08 99 7d 70 ae 0a 64 bf 0a 4d 96 c1 73 ab f8 82 79 c1 a9 4a d9 bd f1 68 ed fb bd 90 f6 6e d5 c8 0d +yescrypt("", "", 1, 4, 1, 1, 3, 0) = ac d9 a4 20 1c f4 a4 76 ec f7 ba a6 11 3d 86 fb 65 cd 07 10 2b 40 04 e4 f9 d9 9c d3 42 55 a1 08 99 +yescrypt("", "", 1, 4, 1, 1, 3, 0) = ac d9 a4 20 1c f4 a4 76 ec f7 ba a6 11 3d 86 fb 65 cd 07 10 2b 40 04 e4 f9 d9 9c d3 42 55 a1 08 +yescrypt("", "", 1, 4, 1, 1, 3, 0) = ac d9 a4 20 1c f4 a4 76 ec f7 ba a6 11 3d 86 fb 65 cd 07 10 2b 40 04 e4 f9 d9 9c d3 42 55 a1 +yescrypt("", "", 1, 4, 1, 1, 3, 0) = ac +yescrypt("", "", 182, 4, 1, 1, 0, 0) = 0c d5 af 76 eb 24 1d f8 11 9a 9a 12 2a e3 69 20 bc c7 f4 14 b9 c0 d5 8f 45 00 80 60 da de 46 b0 c8 09 22 bd cc 16 a3 ab 5d 20 1d 4c 61 40 c6 71 be 1f 75 27 2c a9 04 73 9d 5a d1 ff 67 2b 0c 21 +yescrypt("", "", 182, 4, 1, 1, 0, 0) = 0c d5 af 76 +yescrypt("", "", 182, 4, 1, 1, 1, 0) = 23 b6 ad f0 b6 0c 9a 99 7f 58 58 3d 80 cd a4 8c 63 8c dc 2f 28 9e df 93 a7 08 07 72 5a 0d 35 c4 68 ca 36 2c 55 57 cc 04 b6 81 1e 2e 73 08 41 f5 26 d8 f4 f7 ac fb fa 9e 06 fe 1f 38 3a 71 15 5e +yescrypt("", "", 182, 4, 1, 1, 1, 0) = 23 b6 ad f0 b6 0c 9a 99 7f 58 58 3d 80 cd a4 8c 63 8c dc 2f 28 9e df 93 a7 08 07 72 5a 0d 35 c4 68 +yescrypt("", "", 182, 4, 1, 1, 1, 0) = 23 b6 ad f0 b6 0c 9a 99 7f 58 58 3d 80 cd a4 8c 63 8c dc 2f 28 9e df 93 a7 08 07 72 5a 0d 35 c4 +yescrypt("", "", 182, 4, 1, 1, 1, 0) = 23 b6 ad f0 b6 0c 9a 99 7f 58 58 3d 80 cd a4 8c 63 8c dc 2f 28 9e df 93 a7 08 07 72 5a 0d 35 +yescrypt("", "", 182, 4, 1, 1, 1, 0) = 23 +yescrypt("p", "s", 182, 16, 8, 1, 10, 0) = e1 f9 81 73 3a 94 05 2f cd 7a cb 14 05 df 0b bd e8 e4 99 b6 a1 33 1b 77 59 09 b4 8c 2f 51 6c 40 dc c8 30 16 35 b7 23 7b +yescrypt("p", "s", 1, 16, 8, 1, 10, 0) = 9e 7a 40 97 64 42 84 cf 3b 73 b6 04 50 ff 23 0c dc b6 b1 b1 9b 15 09 ee b4 82 f6 96 c4 f1 c7 05 c0 0f 74 02 16 18 3a 12 +yescrypt("p", "s", 182, 16, 8, 1, 0, 0) = c8 c7 ff 11 22 b0 b2 91 c3 f2 60 89 48 78 2c d6 89 cc 45 57 90 17 aa a5 ff 8b aa 74 a6 32 ec 99 c3 d6 69 30 fb 20 23 bb +yescrypt("p", "s", 1, 16, 8, 1, 0, 0) = 9d d6 36 c2 d0 bb 92 34 52 86 ef da f8 a6 8c fc 1b 4f fd c4 b1 ad ac cc 7d 86 4b 9a 67 87 b8 5d 6a e0 f5 28 0d a8 88 9f +yescrypt("p", "s", 182, 16, 8, 1, 0, 0) = c8 c7 ff 11 22 b0 b2 91 c3 f2 60 89 48 78 2c d6 89 cc 45 57 90 17 aa a5 ff 8b aa 74 a6 32 ec 99 +yescrypt("p", "s", 182, 16, 8, 1, 0, 0) = c8 c7 ff 11 22 b0 b2 91 +'$y$jD5.7$LdJMENpBABJJ3hIHjB1Bi.' +Plaintext: '$y$jD5.7$LdJMENpBABJJ3hIHjB1Bi.$HboGM6qPrsK.StKYGt6KErmUYtioHreJd98oIugoNB6' +Encrypted: '$y$jD5.7$BkbiDbyWZnzlLWOAcru671$zLAHafRUyp9n9XZWnltUbj3ULWUtMN4fteTltjWkARC' +Plaintext: '$y$jC4$LdJMENpBABJJ3hIHjB1B$jVg4HoqqpbmQv/NCpin.QCMagJ8o4QX7lXdzvVV0xFC' +Encrypted: '$y$jC4$qiyh2SQgE5vrF3ORvFho$HurI7MuukXHz..TpxrwKuakji/j9VKDh2WVUK4DIsq5' +Plaintext: '$y$/B3.6$LdJMENpBABJJ3hIHjB1$h8sE4hJo.BsdlfJr0.d8bNJNPZymH7Y3kLj4aY1Rfc8' +Encrypted: '$y$/B3.6$YiN5s/dKpjNrdKm9ND0$lBNMoUaAsw.JR0zMq9IBKIi/VPxj7lD7Sg64nB5LFw2' +Plaintext: '$y$/A2$LdJMENpBABJJ3hIHj/$5IEld1eWdmh5lylrqHLF5dvA3ISpimEM9J1Dd05n/.3' +Encrypted: '$y$/A2$TqjvmGXoqnsNU/8Y40$ZB43..7UnMt6ySU7XbpPyvyahahHSkJJgztkLWp6/IC' +Plaintext: '$y$j91.5$LdJMENpBABJJ3hIH$ebKnn23URD5vyLgF9cP2EvVosrUXf7UErGRV0KmC6e6' +Encrypted: '$y$j91.5$bICydADAaInC9UR/$rpsDDkULkr1caCVYV9PNAsiZxijoQ2/gDcbonlowRi7' +Plaintext: '$y$j80$LdJMENpBABJJ3h2$ysXVVJwuaVlI1BWoEKt/Bz3WNDDmdOWz/8KTQaHL1cC' +Encrypted: '$y$j80$fmxyeGYOC34lh19$mm9FRBs0iHLTHfSNznm1kJVchXN4PaS8hoNI6TTAlB7' +Plaintext: '$y$/7/.4$LdJMENpBABJJ3/$lXHleh7bIZMGNtJVxGVrsIWkEIXfBedlfPui/PITflC' +Encrypted: '$y$/7/.4$EuTbL3Wtm3khW0$1jvKQzIcAqYnYxfb4TEs.FeAZ7rLDl5vNQEkPPcj2KC' +Plaintext: '$y$/6.$LdJMENpBABJJ$zQITmYSih5.CTY47x0IuE4wl.b3HzYGKKCSggakaQ22' +Encrypted: '$y$/6.$x0G/jIr053ui$4O.aVGTPptkjx6eXrW8fdvqcPEV28w7a1PSos6CXV31' +Plaintext: '$y$j5..3$LdJMENpBAB3$xi27PTUNd8NsChHeLOz85JFnUOyibRHkWzprowRlR5/' +Encrypted: '$y$j5..3$/nwg3UXJWp/$5jcvDgeotKpaG9IeSJx0fJNSz33JjTYYD4Kwao3Eki5' +Plaintext: '$y$j4/$LdJMENpBA/$tHlkpTQ8V/eEnTVau1uW36T97LIXlfPrEzdeV5SE5K7' +Encrypted: '$y$j4/$yoneNBwae0$uPBnH0yXBCOM5v5BU9qlvUUtUr3QD5btS0upc6sdvf4' +Plaintext: '$y$/3..2$LdJMENpB$tNczXFuNUd3HMqypStCRsEaL4e4KF7ZYLBe8Hbeg0B7' +Encrypted: '$y$/3..2$VD20uHT3$AV5WWaN6bEKRvZlCuurj.mnHMmZmJ9ExQ9HjiReCDwC' +Plaintext: '$y$/2/$LdJMEN3$RRorHhfsw1/P/WR6Aurg4U72e9Q7qt9vFPURdyfiqK8' +Encrypted: '$y$/2/$BYujKJA$fsMwVvFm8r1caFQP.mem3OUuMYBCDGj9CEoDfSwFDLB' +Plaintext: '$y$j2..1$LdJME/$iLEt6kuTwHch6XdCxtTHfsQzYwWFmpUwgl6Ax8RH4d1' +Encrypted: '$y$j2..1$.mZga/$X6GFMkoYPxFapo.3H4LllEjltFapONQcKUOdEd9oPa/' +Plaintext: '$y$j0/$LdJM$k7BXzSDuoGHW56SY3HxROCiA0gWRscZe2aA0q5oHPM0' +Encrypted: '$y$j0/$SkNZ$DQ06H0br45bpE7lGgCD9gOxTMP9SsO6Mt1T9lo5PHz1' +Plaintext: '$y$//..0$Ld3$6BJXezMFxaMiO5wsuoEmztvtCs/79085dZO56ADlV5B' +Encrypted: '$y$//..0$lM1$60gjeUIW/3QidfN6zU9NqB09Ni1NBMfj2VaSZMjDd18' +Plaintext: '$y$///$L/$Rrrkp6OVljrIk0kcwkCDhAiHJiSthh3cKeIGHUW7Z0C' +Encrypted: '$y$///$q.$/.tR4GqigxciLYGoB8fmzudWQR7IzSu9s3dR8wp3VsD' +Plaintext: '$y$j1../$LdJMENpBABJJ3hIHjB1Bi.$L8OQFc8mxJPd7CpUFgkS7KqJM2I9jGXu3BdqX2D.647' +Encrypted: '$y$j1../$BkbiDbyWZnzlLWOAcru671$iicGI2gNZyhimPVgz2VoKrJAB9fWykBN.3Mh0AwEy29' +Plaintext: '$y$j//$LdJMENpBABJJ3hIHjB1B$U8a2MaK.yesqWySK8Owk6PWeWmp/XuagMbpP45q1/q1' +Encrypted: '$y$j//$qiyh2SQgE5vrF3ORvFho$5dD9ick8ugystfp8wa3xbV7ASDux0dpoOh0QJxFuXH5' +'$7$C6..../....SodiumChloride$kBGj9fHznVYFQMEn/qDCfrDevf9YDtcDdKvEqHJLV8D' +'$7$06..../....SodiumChloride$ENlyo6fGw4PCcDBOFepfSZjFUnVatHzCcW55.ZGz3B0' +r=8 N=2^11 NROM=2^18 +Will use 262144.00 KiB ROM + 2048.00 KiB RAM +Initializing ROM ... DONE (696ebab2) +'$y$j8567F$LdJMENpBABJJ3hIHjB1Bi.' +'$y$j8567F$LdJMENpBABJJ3hIHjB1Bi.$4XJGTsv75AjIN60Z31kPN3.86vkCYzIq7LMz2Pb2lC.' +Initializing ROM in preallocated memory ... DONE (696ebab2) +'$y$j8567F$LdJMENpBABJJ3hIHjB1Bi.$4XJGTsv75AjIN60Z31kPN3.86vkCYzIq7LMz2Pb2lC.' +'$y$j8567F$LdJMENpBABJJ3hIHjB1Bi.$K3wFVK9/t3QsjCk/oK2s8dKzzZ4m7QTP8Ms5uywhWv8' +'$y$j8567F$LdJMENpBABJJ3ZIHjB1Bi.$/OnmIkP0UK5OxyxD0Af/V1oL0zWvTLAUWg3Nr0bsFEB' +'$y$j8567F$LdJMENpBABJJ3ZIHjB1Bi.$DskEGULspNduIZVFK5SOK8enlXnSs/vkuXFdi0wkQ1.' +'$y$j/.5I$LdJMENpBABJJ3hIHjB1Bi.$NqCMKxN9Y9Uw821.72ScGDMpyk7U7V51qnHSRPapzW8' diff --git a/deps/yescrypt-master/initrom.c b/deps/yescrypt-master/initrom.c new file mode 100644 index 000000000..fde858cc4 --- /dev/null +++ b/deps/yescrypt-master/initrom.c @@ -0,0 +1,217 @@ +/*- + * Copyright 2013-2018 Alexander Peslyak + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#define YESCRYPT_FLAGS YESCRYPT_DEFAULTS + +#define ROM_SHM_KEY 0x7965730a +#define ROM_LOCAL_PARAM "change this before use" + +/* Maximum parallelism factor during ROM initialization */ +#define YESCRYPT_PROM_SHM 112 +#define YESCRYPT_PROM_FILE 4 + +//#define USE_HUGEPAGE +//#define DUMP_SHARED + +#include +#include +#include /* for atoi() */ +#include +#include +#include +#include +#include +#include + +#include "yescrypt.h" + +int main(int argc, const char * const *argv) +{ +#if 0 + uint64_t rom_bytes = 112 * (1024ULL*1024*1024); + uint64_t ram_bytes = 1 * (1024ULL*1024); +#else + uint64_t rom_bytes = 3 * (1024ULL*1024*1024); + uint64_t ram_bytes = 2 * (1024ULL*1024); +#endif + uint32_t r, min_r; + uint64_t NROM_log2, N_log2; + int shmid; + yescrypt_shared_t shared; + yescrypt_binary_t *digest; + const char *rom_filename = NULL; + int rom_fd; + + if (argc > 1) + rom_bytes = atoi(argv[1]) * (1024ULL*1024*1024); + if (argc > 2) + ram_bytes = atoi(argv[2]) * (1024ULL*1024); + if (argc > 3) + rom_filename = argv[3]; + + if (!rom_bytes) { + puts("Wrong ROM size requested"); + return 1; + } + + min_r = 9; + if (rom_filename) + min_r = 8 * 256; + + NROM_log2 = 0; + while (((rom_bytes >> NROM_log2) & 0xff) == 0) + NROM_log2++; + r = rom_bytes >> (7 + NROM_log2); + while (r < min_r && NROM_log2 > 0) { + r <<= 1; + NROM_log2--; + } + rom_bytes = (uint64_t)r << (7 + NROM_log2); + + N_log2 = 3; + while (((uint64_t)r << (7 + N_log2)) < ram_bytes) + N_log2++; + ram_bytes = (uint64_t)r << (7 + N_log2); + + printf("r=%u N=2^%u NROM=2^%u\n", r, + (unsigned int)N_log2, (unsigned int)NROM_log2); + + printf("Will use %.2f KiB ROM\n", rom_bytes / 1024.0); + printf(" %.2f KiB RAM\n", ram_bytes / 1024.0); + + shared.aligned_size = rom_bytes; + + if (rom_filename) { + rom_fd = open(rom_filename, O_CREAT|O_RDWR|O_EXCL, + S_IRUSR|S_IRGRP|S_IWUSR); + if (rom_fd < 0) { + perror("open"); + return 1; + } + if (ftruncate(rom_fd, rom_bytes)) { + perror("ftruncate"); + close(rom_fd); + unlink(rom_filename); + return 1; + } + + int flags = +#ifdef MAP_NOCORE + MAP_NOCORE | +#endif +#if defined(MAP_HUGETLB) && defined(USE_HUGEPAGE) + MAP_HUGETLB | +#endif + MAP_SHARED; + void *p = mmap(NULL, rom_bytes, PROT_READ | PROT_WRITE, + flags, rom_fd, 0); +#if defined(MAP_HUGETLB) && defined(USE_HUGEPAGE) + if (p == MAP_FAILED) + p = mmap(NULL, rom_bytes, PROT_READ | PROT_WRITE, + flags & ~MAP_HUGETLB, rom_fd, 0); +#endif + if (p == MAP_FAILED) { + perror("mmap"); + close(rom_fd); + unlink(rom_filename); + return 1; + } + close(rom_fd); + shared.base = shared.aligned = p; + } else { + shmid = shmget(ROM_SHM_KEY, shared.aligned_size, +#ifdef SHM_HUGETLB + SHM_HUGETLB | +#endif + IPC_CREAT|IPC_EXCL | S_IRUSR|S_IRGRP|S_IWUSR); + if (shmid == -1) { +#ifdef SHM_HUGETLB + perror("shmget"); + puts("Retrying without SHM_HUGETLB"); + shmid = shmget(ROM_SHM_KEY, shared.aligned_size, + IPC_CREAT|IPC_EXCL | S_IRUSR|S_IRGRP|S_IWUSR); +#endif + if (shmid == -1) { + perror("shmget"); + return 1; + } + } + + shared.base = shared.aligned = shmat(shmid, NULL, 0); + if (shared.base == (void *)-1) { + int save_errno = errno; + shmctl(shmid, IPC_RMID, NULL); + errno = save_errno; + perror("shmat"); + return 1; + } + } + + printf("Initializing ROM ..."); + fflush(stdout); + yescrypt_params_t rom_params = { + .flags = YESCRYPT_DEFAULTS | YESCRYPT_SHARED_PREALLOCATED, + .NROM = (uint64_t)1 << NROM_log2, + .r = r, + .p = rom_filename ? YESCRYPT_PROM_FILE : YESCRYPT_PROM_SHM }; + if (yescrypt_init_shared(&shared, + (uint8_t *)ROM_LOCAL_PARAM, strlen(ROM_LOCAL_PARAM), + &rom_params)) { + puts(" FAILED"); + if (rom_filename) + unlink(rom_filename); + return 1; + } +#ifdef DUMP_SHARED + fwrite(shared.aligned, shared.aligned_size, 1, stderr); +#endif + digest = yescrypt_digest_shared(&shared); + printf(" DONE (%02x%02x%02x%02x)\n", + digest->uc[0], digest->uc[1], digest->uc[2], digest->uc[3]); + + { + yescrypt_local_t local; + const uint8_t *setting; + uint8_t hash[128]; + + if (yescrypt_init_local(&local)) { + puts("yescrypt_init_local() FAILED"); + return 1; + } + + yescrypt_params_t params = rom_params; + params.flags = YESCRYPT_FLAGS; + params.N = (uint64_t)1 << N_log2; + params.p = 1; + setting = yescrypt_encode_params(¶ms, + (const uint8_t *)"WZaPV7LSUEKMo34.", 16); + + printf("'%s'\n", (char *)yescrypt_r(&shared, &local, + (const uint8_t *)"pleaseletmein", 13, setting, NULL, + hash, sizeof(hash))); + } + + if (rom_filename && munmap(shared.base, rom_bytes)) { + perror("munmap"); + return 1; + } + + return 0; +} diff --git a/deps/yescrypt-master/insecure_memzero.c b/deps/yescrypt-master/insecure_memzero.c new file mode 100644 index 000000000..d0e8cbad7 --- /dev/null +++ b/deps/yescrypt-master/insecure_memzero.c @@ -0,0 +1,49 @@ +/*- + * Copyright 2014 Colin Percival + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef SKIP_MEMZERO + +#include +#include + +#include "insecure_memzero.h" + +/* Function which does the zeroing. */ +static void +insecure_memzero_func(volatile void * buf, size_t len) +{ + volatile uint8_t * _buf = buf; + size_t i; + + for (i = 0; i < len; i++) + _buf[i] = 0; +} + +/* Pointer to memory-zeroing function. */ +void (* volatile insecure_memzero_ptr)(volatile void *, size_t) = + insecure_memzero_func; + +#endif diff --git a/deps/yescrypt-master/insecure_memzero.h b/deps/yescrypt-master/insecure_memzero.h new file mode 100644 index 000000000..d5a41d157 --- /dev/null +++ b/deps/yescrypt-master/insecure_memzero.h @@ -0,0 +1,69 @@ +/*- + * Copyright 2014 Colin Percival + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _INSECURE_MEMZERO_H_ +#define _INSECURE_MEMZERO_H_ + +#ifdef SKIP_MEMZERO +#define insecure_memzero(buf, len) /* empty */ +#else + +#include + +/* Pointer to memory-zeroing function. */ +extern void (* volatile insecure_memzero_ptr)(volatile void *, size_t); + +/** + * insecure_memzero(buf, len): + * Attempt to zero ${len} bytes at ${buf} in spite of optimizing compilers' + * best (standards-compliant) attempts to remove the buffer-zeroing. In + * particular, to avoid performing the zeroing, a compiler would need to + * use optimistic devirtualization; recognize that non-volatile objects do not + * need to be treated as volatile, even if they are accessed via volatile + * qualified pointers; and perform link-time optimization; in addition to the + * dead-code elimination which often causes buffer-zeroing to be elided. + * + * Note however that zeroing a buffer does not guarantee that the data held + * in the buffer is not stored elsewhere; in particular, there may be copies + * held in CPU registers or in anonymous allocations on the stack, even if + * every named variable is successfully sanitized. Solving the "wipe data + * from the system" problem will require a C language extension which does not + * yet exist. + * + * For more information, see: + * http://www.daemonology.net/blog/2014-09-04-how-to-zero-a-buffer.html + * http://www.daemonology.net/blog/2014-09-06-zeroing-buffers-is-insufficient.html + */ +static inline void +insecure_memzero(volatile void * buf, size_t len) +{ + + (insecure_memzero_ptr)(buf, len); +} + +#endif + +#endif /* !_INSECURE_MEMZERO_H_ */ diff --git a/deps/yescrypt-master/phc.c b/deps/yescrypt-master/phc.c new file mode 100644 index 000000000..ceb5935cc --- /dev/null +++ b/deps/yescrypt-master/phc.c @@ -0,0 +1,145 @@ +/*- + * Copyright 2014-2016,2018 Alexander Peslyak + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#define YESCRYPT_FLAGS YESCRYPT_DEFAULTS +#define YESCRYPT_BASE_N 8 +#define YESCRYPT_R 8 +#define YESCRYPT_P 1 + +#include "yescrypt.h" + +#ifdef TEST +static +#endif +int PHS(void *out, size_t outlen, const void *in, size_t inlen, + const void *salt, size_t saltlen, unsigned int t_cost, unsigned int m_cost) +{ + yescrypt_local_t local; + yescrypt_params_t params = { + .flags = YESCRYPT_FLAGS, + .N = (uint64_t)YESCRYPT_BASE_N << m_cost, + .r = YESCRYPT_R, + .p = YESCRYPT_P, + .t = t_cost, + .g = 0 }; + int retval; + + if (yescrypt_init_local(&local)) + return -1; + retval = yescrypt_kdf(NULL, &local, in, inlen, salt, saltlen, ¶ms, + out, outlen); + if (yescrypt_free_local(&local)) + return -1; + return retval; +} + +#ifdef TEST +#include +#include /* for sysconf() */ +#include + +static void print_hex(const uint8_t *buf, size_t buflen, const char *sep) +{ + size_t i; + + putchar('"'); + for (i = 0; i < buflen; i++) + printf("\\x%02x", buf[i]); + printf("\"%s", sep); +} + +static void print_PHS(const void *in, size_t inlen, + const void *salt, size_t saltlen, unsigned int t_cost, unsigned int m_cost) +{ + uint8_t dk[32]; + + printf("PHS("); + print_hex(in, inlen, ", "); + print_hex(salt, saltlen, ", "); + printf("%u, %u) = ", t_cost, m_cost); + + if (PHS(dk, sizeof(dk), in, inlen, salt, saltlen, t_cost, m_cost)) { + puts("FAILED"); + return; + } + + print_hex(dk, sizeof(dk), "\n"); +} + +static void print_all_PHS(unsigned int t_cost, unsigned int m_cost) +{ + clock_t clk_tck = sysconf(_SC_CLK_TCK); + struct tms start_tms, end_tms; + clock_t start = times(&start_tms), end, start_v, end_v; + const size_t count = 0x102; + size_t inlen, i, j; + + inlen = 0; + for (i = 0; i < count; i++) { + uint8_t in[128], salt[16]; + + for (j = 0; j < inlen; j++) + in[j] = (i + j) & 0xff; + for (j = 0; j < sizeof(salt); j++) + salt[j] = ~(i + j) & 0xff; + + print_PHS(in, inlen, salt, sizeof(salt), t_cost, m_cost); + + if (++inlen > sizeof(in)) + inlen = 0; + } + + end = times(&end_tms); + + start_v = start_tms.tms_utime + start_tms.tms_stime + + start_tms.tms_cutime + start_tms.tms_cstime; + end_v = end_tms.tms_utime + end_tms.tms_stime + + end_tms.tms_cutime + end_tms.tms_cstime; + + if (end == start) + end++; + if (end_v == start_v) + end_v++; + + fprintf(stderr, "m_cost=%u (%.0f KiB), t_cost=%u\n" + "%llu c/s real, %llu c/s virtual (%llu hashes in %.2f seconds)\n", + m_cost, (YESCRYPT_BASE_N << m_cost) * YESCRYPT_R / 8.0, t_cost, + (unsigned long long)count * clk_tck / (end - start), + (unsigned long long)count * clk_tck / (end_v - start_v), + (unsigned long long)count, (double)(end - start) / clk_tck); +} + +int main(void) +{ +#if 0 + setvbuf(stdout, NULL, _IOLBF, 0); +#endif + + print_all_PHS(0, 0); + print_all_PHS(0, 7); + print_all_PHS(0, 8); + print_all_PHS(1, 8); + print_all_PHS(2, 8); + print_all_PHS(3, 8); + print_all_PHS(0, 11); + + return 0; +} +#endif diff --git a/deps/yescrypt-master/sha256.c b/deps/yescrypt-master/sha256.c new file mode 100644 index 000000000..1a5372793 --- /dev/null +++ b/deps/yescrypt-master/sha256.c @@ -0,0 +1,652 @@ +/*- + * Copyright 2005-2016 Colin Percival + * Copyright 2016-2018,2021 Alexander Peslyak + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include + +#include "insecure_memzero.h" +#include "sysendian.h" + +#include "sha256.h" + +#ifdef __ICC +/* Miscompile with icc 14.0.0 (at least), so don't use restrict there */ +#define restrict +#elif __STDC_VERSION__ >= 199901L +/* Have restrict */ +#elif defined(__GNUC__) +#define restrict __restrict +#else +#define restrict +#endif + +/* + * Encode a length len*2 vector of (uint32_t) into a length len*8 vector of + * (uint8_t) in big-endian form. + */ +static void +be32enc_vect(uint8_t * dst, const uint32_t * src, size_t len) +{ + + /* Encode vector, two words at a time. */ + do { + be32enc(&dst[0], src[0]); + be32enc(&dst[4], src[1]); + src += 2; + dst += 8; + } while (--len); +} + +/* + * Decode a big-endian length len*8 vector of (uint8_t) into a length + * len*2 vector of (uint32_t). + */ +static void +be32dec_vect(uint32_t * dst, const uint8_t * src, size_t len) +{ + + /* Decode vector, two words at a time. */ + do { + dst[0] = be32dec(&src[0]); + dst[1] = be32dec(&src[4]); + src += 8; + dst += 2; + } while (--len); +} + +/* SHA256 round constants. */ +static const uint32_t Krnd[64] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, + 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, + 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, + 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, + 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, + 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 +}; + +/* Elementary functions used by SHA256 */ +#define Ch(x, y, z) ((x & (y ^ z)) ^ z) +#if 1 /* Explicit caching/reuse of common subexpression between rounds */ +#define Maj(x, y, z) (y ^ ((x_xor_y = x ^ y) & y_xor_z)) +#else /* Let the compiler cache/reuse or not */ +#define Maj(x, y, z) (y ^ ((x ^ y) & (y ^ z))) +#endif +#define SHR(x, n) (x >> n) +#define ROTR(x, n) ((x >> n) | (x << (32 - n))) +#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22)) +#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25)) +#define s0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ SHR(x, 3)) +#define s1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ SHR(x, 10)) + +/* SHA256 round function */ +#define RND(a, b, c, d, e, f, g, h, k) \ + h += S1(e) + Ch(e, f, g) + k; \ + d += h; \ + h += S0(a) + Maj(a, b, c); \ + y_xor_z = x_xor_y; + +/* Adjusted round function for rotating state */ +#define RNDr(S, W, i, ii) \ + RND(S[(64 - i) % 8], S[(65 - i) % 8], \ + S[(66 - i) % 8], S[(67 - i) % 8], \ + S[(68 - i) % 8], S[(69 - i) % 8], \ + S[(70 - i) % 8], S[(71 - i) % 8], \ + W[i + ii] + Krnd[i + ii]) + +/* Message schedule computation */ +#define MSCH(W, ii, i) \ + W[i + ii + 16] = s1(W[i + ii + 14]) + W[i + ii + 9] + s0(W[i + ii + 1]) + W[i + ii] + +/* + * SHA256 block compression function. The 256-bit state is transformed via + * the 512-bit input block to produce a new state. + */ +static void +SHA256_Transform(uint32_t state[static restrict 8], + const uint8_t block[static restrict 64], + uint32_t W[static restrict 64], uint32_t S[static restrict 8]) +{ + int i; + + /* 1. Prepare the first part of the message schedule W. */ + be32dec_vect(W, block, 8); + + /* 2. Initialize working variables. */ + memcpy(S, state, 32); + + /* 3. Mix. */ + for (i = 0; i < 64; i += 16) { + uint32_t x_xor_y, y_xor_z = S[(65 - i) % 8] ^ S[(66 - i) % 8]; + RNDr(S, W, 0, i); + RNDr(S, W, 1, i); + RNDr(S, W, 2, i); + RNDr(S, W, 3, i); + RNDr(S, W, 4, i); + RNDr(S, W, 5, i); + RNDr(S, W, 6, i); + RNDr(S, W, 7, i); + RNDr(S, W, 8, i); + RNDr(S, W, 9, i); + RNDr(S, W, 10, i); + RNDr(S, W, 11, i); + RNDr(S, W, 12, i); + RNDr(S, W, 13, i); + RNDr(S, W, 14, i); + RNDr(S, W, 15, i); + + if (i == 48) + break; + MSCH(W, 0, i); + MSCH(W, 1, i); + MSCH(W, 2, i); + MSCH(W, 3, i); + MSCH(W, 4, i); + MSCH(W, 5, i); + MSCH(W, 6, i); + MSCH(W, 7, i); + MSCH(W, 8, i); + MSCH(W, 9, i); + MSCH(W, 10, i); + MSCH(W, 11, i); + MSCH(W, 12, i); + MSCH(W, 13, i); + MSCH(W, 14, i); + MSCH(W, 15, i); + } + + /* 4. Mix local working variables into global state. */ + state[0] += S[0]; + state[1] += S[1]; + state[2] += S[2]; + state[3] += S[3]; + state[4] += S[4]; + state[5] += S[5]; + state[6] += S[6]; + state[7] += S[7]; +} + +static const uint8_t PAD[64] = { + 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +/* Add padding and terminating bit-count. */ +static void +SHA256_Pad(SHA256_CTX * ctx, uint32_t tmp32[static restrict 72]) +{ + size_t r; + + /* Figure out how many bytes we have buffered. */ + r = (ctx->count >> 3) & 0x3f; + + /* Pad to 56 mod 64, transforming if we finish a block en route. */ + if (r < 56) { + /* Pad to 56 mod 64. */ + memcpy(&ctx->buf[r], PAD, 56 - r); + } else { + /* Finish the current block and mix. */ + memcpy(&ctx->buf[r], PAD, 64 - r); + SHA256_Transform(ctx->state, ctx->buf, &tmp32[0], &tmp32[64]); + + /* The start of the final block is all zeroes. */ + memset(&ctx->buf[0], 0, 56); + } + + /* Add the terminating bit-count. */ + be64enc(&ctx->buf[56], ctx->count); + + /* Mix in the final block. */ + SHA256_Transform(ctx->state, ctx->buf, &tmp32[0], &tmp32[64]); +} + +/* Magic initialization constants. */ +static const uint32_t initial_state[8] = { + 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, + 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 +}; + +/** + * SHA256_Init(ctx): + * Initialize the SHA256 context ${ctx}. + */ +void +SHA256_Init(SHA256_CTX * ctx) +{ + + /* Zero bits processed so far. */ + ctx->count = 0; + + /* Initialize state. */ + memcpy(ctx->state, initial_state, sizeof(initial_state)); +} + +/** + * SHA256_Update(ctx, in, len): + * Input ${len} bytes from ${in} into the SHA256 context ${ctx}. + */ +static void +_SHA256_Update(SHA256_CTX * ctx, const void * in, size_t len, + uint32_t tmp32[static restrict 72]) +{ + uint32_t r; + const uint8_t * src = in; + + /* Return immediately if we have nothing to do. */ + if (len == 0) + return; + + /* Number of bytes left in the buffer from previous updates. */ + r = (ctx->count >> 3) & 0x3f; + + /* Update number of bits. */ + ctx->count += (uint64_t)(len) << 3; + + /* Handle the case where we don't need to perform any transforms. */ + if (len < 64 - r) { + memcpy(&ctx->buf[r], src, len); + return; + } + + /* Finish the current block. */ + memcpy(&ctx->buf[r], src, 64 - r); + SHA256_Transform(ctx->state, ctx->buf, &tmp32[0], &tmp32[64]); + src += 64 - r; + len -= 64 - r; + + /* Perform complete blocks. */ + while (len >= 64) { + SHA256_Transform(ctx->state, src, &tmp32[0], &tmp32[64]); + src += 64; + len -= 64; + } + + /* Copy left over data into buffer. */ + memcpy(ctx->buf, src, len); +} + +/* Wrapper function for intermediate-values sanitization. */ +void +SHA256_Update(SHA256_CTX * ctx, const void * in, size_t len) +{ + uint32_t tmp32[72]; + + /* Call the real function. */ + _SHA256_Update(ctx, in, len, tmp32); + + /* Clean the stack. */ + insecure_memzero(tmp32, 288); +} + +/** + * SHA256_Final(digest, ctx): + * Output the SHA256 hash of the data input to the context ${ctx} into the + * buffer ${digest}. + */ +static void +_SHA256_Final(uint8_t digest[32], SHA256_CTX * ctx, + uint32_t tmp32[static restrict 72]) +{ + + /* Add padding. */ + SHA256_Pad(ctx, tmp32); + + /* Write the hash. */ + be32enc_vect(digest, ctx->state, 4); +} + +/* Wrapper function for intermediate-values sanitization. */ +void +SHA256_Final(uint8_t digest[32], SHA256_CTX * ctx) +{ + uint32_t tmp32[72]; + + /* Call the real function. */ + _SHA256_Final(digest, ctx, tmp32); + + /* Clear the context state. */ + insecure_memzero(ctx, sizeof(SHA256_CTX)); + + /* Clean the stack. */ + insecure_memzero(tmp32, 288); +} + +/** + * SHA256_Buf(in, len, digest): + * Compute the SHA256 hash of ${len} bytes from ${in} and write it to ${digest}. + */ +void +SHA256_Buf(const void * in, size_t len, uint8_t digest[32]) +{ + SHA256_CTX ctx; + uint32_t tmp32[72]; + + SHA256_Init(&ctx); + _SHA256_Update(&ctx, in, len, tmp32); + _SHA256_Final(digest, &ctx, tmp32); + + /* Clean the stack. */ + insecure_memzero(&ctx, sizeof(SHA256_CTX)); + insecure_memzero(tmp32, 288); +} + +/** + * HMAC_SHA256_Init(ctx, K, Klen): + * Initialize the HMAC-SHA256 context ${ctx} with ${Klen} bytes of key from + * ${K}. + */ +static void +_HMAC_SHA256_Init(HMAC_SHA256_CTX * ctx, const void * _K, size_t Klen, + uint32_t tmp32[static restrict 72], uint8_t pad[static restrict 64], + uint8_t khash[static restrict 32]) +{ + const uint8_t * K = _K; + size_t i; + + /* If Klen > 64, the key is really SHA256(K). */ + if (Klen > 64) { + SHA256_Init(&ctx->ictx); + _SHA256_Update(&ctx->ictx, K, Klen, tmp32); + _SHA256_Final(khash, &ctx->ictx, tmp32); + K = khash; + Klen = 32; + } + + /* Inner SHA256 operation is SHA256(K xor [block of 0x36] || data). */ + SHA256_Init(&ctx->ictx); + memset(pad, 0x36, 64); + for (i = 0; i < Klen; i++) + pad[i] ^= K[i]; + _SHA256_Update(&ctx->ictx, pad, 64, tmp32); + + /* Outer SHA256 operation is SHA256(K xor [block of 0x5c] || hash). */ + SHA256_Init(&ctx->octx); + memset(pad, 0x5c, 64); + for (i = 0; i < Klen; i++) + pad[i] ^= K[i]; + _SHA256_Update(&ctx->octx, pad, 64, tmp32); +} + +/* Wrapper function for intermediate-values sanitization. */ +void +HMAC_SHA256_Init(HMAC_SHA256_CTX * ctx, const void * _K, size_t Klen) +{ + uint32_t tmp32[72]; + uint8_t pad[64]; + uint8_t khash[32]; + + /* Call the real function. */ + _HMAC_SHA256_Init(ctx, _K, Klen, tmp32, pad, khash); + + /* Clean the stack. */ + insecure_memzero(tmp32, 288); + insecure_memzero(khash, 32); + insecure_memzero(pad, 64); +} + +/** + * HMAC_SHA256_Update(ctx, in, len): + * Input ${len} bytes from ${in} into the HMAC-SHA256 context ${ctx}. + */ +static void +_HMAC_SHA256_Update(HMAC_SHA256_CTX * ctx, const void * in, size_t len, + uint32_t tmp32[static restrict 72]) +{ + + /* Feed data to the inner SHA256 operation. */ + _SHA256_Update(&ctx->ictx, in, len, tmp32); +} + +/* Wrapper function for intermediate-values sanitization. */ +void +HMAC_SHA256_Update(HMAC_SHA256_CTX * ctx, const void * in, size_t len) +{ + uint32_t tmp32[72]; + + /* Call the real function. */ + _HMAC_SHA256_Update(ctx, in, len, tmp32); + + /* Clean the stack. */ + insecure_memzero(tmp32, 288); +} + +/** + * HMAC_SHA256_Final(digest, ctx): + * Output the HMAC-SHA256 of the data input to the context ${ctx} into the + * buffer ${digest}. + */ +static void +_HMAC_SHA256_Final(uint8_t digest[32], HMAC_SHA256_CTX * ctx, + uint32_t tmp32[static restrict 72], uint8_t ihash[static restrict 32]) +{ + + /* Finish the inner SHA256 operation. */ + _SHA256_Final(ihash, &ctx->ictx, tmp32); + + /* Feed the inner hash to the outer SHA256 operation. */ + _SHA256_Update(&ctx->octx, ihash, 32, tmp32); + + /* Finish the outer SHA256 operation. */ + _SHA256_Final(digest, &ctx->octx, tmp32); +} + +/* Wrapper function for intermediate-values sanitization. */ +void +HMAC_SHA256_Final(uint8_t digest[32], HMAC_SHA256_CTX * ctx) +{ + uint32_t tmp32[72]; + uint8_t ihash[32]; + + /* Call the real function. */ + _HMAC_SHA256_Final(digest, ctx, tmp32, ihash); + + /* Clean the stack. */ + insecure_memzero(tmp32, 288); + insecure_memzero(ihash, 32); +} + +/** + * HMAC_SHA256_Buf(K, Klen, in, len, digest): + * Compute the HMAC-SHA256 of ${len} bytes from ${in} using the key ${K} of + * length ${Klen}, and write the result to ${digest}. + */ +void +HMAC_SHA256_Buf(const void * K, size_t Klen, const void * in, size_t len, + uint8_t digest[32]) +{ + HMAC_SHA256_CTX ctx; + uint32_t tmp32[72]; + uint8_t tmp8[96]; + + _HMAC_SHA256_Init(&ctx, K, Klen, tmp32, &tmp8[0], &tmp8[64]); + _HMAC_SHA256_Update(&ctx, in, len, tmp32); + _HMAC_SHA256_Final(digest, &ctx, tmp32, &tmp8[0]); + + /* Clean the stack. */ + insecure_memzero(&ctx, sizeof(HMAC_SHA256_CTX)); + insecure_memzero(tmp32, 288); + insecure_memzero(tmp8, 96); +} + +/* Add padding and terminating bit-count, but don't invoke Transform yet. */ +static int +SHA256_Pad_Almost(SHA256_CTX * ctx, uint8_t len[static restrict 8], + uint32_t tmp32[static restrict 72]) +{ + uint32_t r; + + r = (ctx->count >> 3) & 0x3f; + if (r >= 56) + return -1; + + /* + * Convert length to a vector of bytes -- we do this now rather + * than later because the length will change after we pad. + */ + be64enc(len, ctx->count); + + /* Add 1--56 bytes so that the resulting length is 56 mod 64. */ + _SHA256_Update(ctx, PAD, 56 - r, tmp32); + + /* Add the terminating bit-count. */ + ctx->buf[63] = len[7]; + _SHA256_Update(ctx, len, 7, tmp32); + + return 0; +} + +/** + * PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen): + * Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and + * write the output to buf. The value dkLen must be at most 32 * (2^32 - 1). + */ +void +PBKDF2_SHA256(const uint8_t * passwd, size_t passwdlen, const uint8_t * salt, + size_t saltlen, uint64_t c, uint8_t * buf, size_t dkLen) +{ + HMAC_SHA256_CTX Phctx, PShctx, hctx; + uint32_t tmp32[72]; + union { + uint8_t tmp8[96]; + uint32_t state[8]; + } u; + size_t i; + uint8_t ivec[4]; + uint8_t U[32]; + uint8_t T[32]; + uint64_t j; + int k; + size_t clen; + + /* Sanity-check. */ + assert(dkLen <= 32 * (size_t)(UINT32_MAX)); + + if (c == 1 && (dkLen & 31) == 0 && (saltlen & 63) <= 51) { + uint32_t oldcount; + uint8_t * ivecp; + + /* Compute HMAC state after processing P and S. */ + _HMAC_SHA256_Init(&hctx, passwd, passwdlen, + tmp32, &u.tmp8[0], &u.tmp8[64]); + _HMAC_SHA256_Update(&hctx, salt, saltlen, tmp32); + + /* Prepare ictx padding. */ + oldcount = hctx.ictx.count & (0x3f << 3); + _HMAC_SHA256_Update(&hctx, "\0\0\0", 4, tmp32); + if ((hctx.ictx.count & (0x3f << 3)) < oldcount || + SHA256_Pad_Almost(&hctx.ictx, u.tmp8, tmp32)) + goto generic; /* Can't happen due to saltlen check */ + ivecp = hctx.ictx.buf + (oldcount >> 3); + + /* Prepare octx padding. */ + hctx.octx.count += 32 << 3; + SHA256_Pad_Almost(&hctx.octx, u.tmp8, tmp32); + + /* Iterate through the blocks. */ + for (i = 0; i * 32 < dkLen; i++) { + /* Generate INT(i + 1). */ + be32enc(ivecp, (uint32_t)(i + 1)); + + /* Compute U_1 = PRF(P, S || INT(i)). */ + memcpy(u.state, hctx.ictx.state, sizeof(u.state)); + SHA256_Transform(u.state, hctx.ictx.buf, + &tmp32[0], &tmp32[64]); + be32enc_vect(hctx.octx.buf, u.state, 4); + memcpy(u.state, hctx.octx.state, sizeof(u.state)); + SHA256_Transform(u.state, hctx.octx.buf, + &tmp32[0], &tmp32[64]); + be32enc_vect(&buf[i * 32], u.state, 4); + } + + goto cleanup; + } + +generic: + /* Compute HMAC state after processing P. */ + _HMAC_SHA256_Init(&Phctx, passwd, passwdlen, + tmp32, &u.tmp8[0], &u.tmp8[64]); + + /* Compute HMAC state after processing P and S. */ + memcpy(&PShctx, &Phctx, sizeof(HMAC_SHA256_CTX)); + _HMAC_SHA256_Update(&PShctx, salt, saltlen, tmp32); + + /* Iterate through the blocks. */ + for (i = 0; i * 32 < dkLen; i++) { + /* Generate INT(i + 1). */ + be32enc(ivec, (uint32_t)(i + 1)); + + /* Compute U_1 = PRF(P, S || INT(i)). */ + memcpy(&hctx, &PShctx, sizeof(HMAC_SHA256_CTX)); + _HMAC_SHA256_Update(&hctx, ivec, 4, tmp32); + _HMAC_SHA256_Final(T, &hctx, tmp32, u.tmp8); + + if (c > 1) { + /* T_i = U_1 ... */ + memcpy(U, T, 32); + + for (j = 2; j <= c; j++) { + /* Compute U_j. */ + memcpy(&hctx, &Phctx, sizeof(HMAC_SHA256_CTX)); + _HMAC_SHA256_Update(&hctx, U, 32, tmp32); + _HMAC_SHA256_Final(U, &hctx, tmp32, u.tmp8); + + /* ... xor U_j ... */ + for (k = 0; k < 32; k++) + T[k] ^= U[k]; + } + } + + /* Copy as many bytes as necessary into buf. */ + clen = dkLen - i * 32; + if (clen > 32) + clen = 32; + memcpy(&buf[i * 32], T, clen); + } + + /* Clean the stack. */ + insecure_memzero(&Phctx, sizeof(HMAC_SHA256_CTX)); + insecure_memzero(&PShctx, sizeof(HMAC_SHA256_CTX)); + insecure_memzero(U, 32); + insecure_memzero(T, 32); + +cleanup: + insecure_memzero(&hctx, sizeof(HMAC_SHA256_CTX)); + insecure_memzero(tmp32, 288); + insecure_memzero(&u, sizeof(u)); +} diff --git a/deps/yescrypt-master/sha256.h b/deps/yescrypt-master/sha256.h new file mode 100644 index 000000000..6210502ff --- /dev/null +++ b/deps/yescrypt-master/sha256.h @@ -0,0 +1,129 @@ +/*- + * Copyright 2005-2016 Colin Percival + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _SHA256_H_ +#define _SHA256_H_ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Use #defines in order to avoid namespace collisions with anyone else's + * SHA256 code (e.g., the code in OpenSSL). + */ +#define SHA256_Init libcperciva_SHA256_Init +#define SHA256_Update libcperciva_SHA256_Update +#define SHA256_Final libcperciva_SHA256_Final +#define SHA256_Buf libcperciva_SHA256_Buf +#define SHA256_CTX libcperciva_SHA256_CTX +#define HMAC_SHA256_Init libcperciva_HMAC_SHA256_Init +#define HMAC_SHA256_Update libcperciva_HMAC_SHA256_Update +#define HMAC_SHA256_Final libcperciva_HMAC_SHA256_Final +#define HMAC_SHA256_Buf libcperciva_HMAC_SHA256_Buf +#define HMAC_SHA256_CTX libcperciva_HMAC_SHA256_CTX + +/* Context structure for SHA256 operations. */ +typedef struct { + uint32_t state[8]; + uint64_t count; + uint8_t buf[64]; +} SHA256_CTX; + +/** + * SHA256_Init(ctx): + * Initialize the SHA256 context ${ctx}. + */ +void SHA256_Init(SHA256_CTX *); + +/** + * SHA256_Update(ctx, in, len): + * Input ${len} bytes from ${in} into the SHA256 context ${ctx}. + */ +void SHA256_Update(SHA256_CTX *, const void *, size_t); + +/** + * SHA256_Final(digest, ctx): + * Output the SHA256 hash of the data input to the context ${ctx} into the + * buffer ${digest}. + */ +void SHA256_Final(uint8_t[32], SHA256_CTX *); + +/** + * SHA256_Buf(in, len, digest): + * Compute the SHA256 hash of ${len} bytes from ${in} and write it to ${digest}. + */ +void SHA256_Buf(const void *, size_t, uint8_t[32]); + +/* Context structure for HMAC-SHA256 operations. */ +typedef struct { + SHA256_CTX ictx; + SHA256_CTX octx; +} HMAC_SHA256_CTX; + +/** + * HMAC_SHA256_Init(ctx, K, Klen): + * Initialize the HMAC-SHA256 context ${ctx} with ${Klen} bytes of key from + * ${K}. + */ +void HMAC_SHA256_Init(HMAC_SHA256_CTX *, const void *, size_t); + +/** + * HMAC_SHA256_Update(ctx, in, len): + * Input ${len} bytes from ${in} into the HMAC-SHA256 context ${ctx}. + */ +void HMAC_SHA256_Update(HMAC_SHA256_CTX *, const void *, size_t); + +/** + * HMAC_SHA256_Final(digest, ctx): + * Output the HMAC-SHA256 of the data input to the context ${ctx} into the + * buffer ${digest}. + */ +void HMAC_SHA256_Final(uint8_t[32], HMAC_SHA256_CTX *); + +/** + * HMAC_SHA256_Buf(K, Klen, in, len, digest): + * Compute the HMAC-SHA256 of ${len} bytes from ${in} using the key ${K} of + * length ${Klen}, and write the result to ${digest}. + */ +void HMAC_SHA256_Buf(const void *, size_t, const void *, size_t, uint8_t[32]); + +/** + * PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, c, buf, dkLen): + * Compute PBKDF2(passwd, salt, c, dkLen) using HMAC-SHA256 as the PRF, and + * write the output to buf. The value dkLen must be at most 32 * (2^32 - 1). + */ +void PBKDF2_SHA256(const uint8_t *, size_t, const uint8_t *, size_t, + uint64_t, uint8_t *, size_t); + +#ifdef __cplusplus +} +#endif + +#endif /* !_SHA256_H_ */ diff --git a/deps/yescrypt-master/sysendian.h b/deps/yescrypt-master/sysendian.h new file mode 100644 index 000000000..c51730d1c --- /dev/null +++ b/deps/yescrypt-master/sysendian.h @@ -0,0 +1,122 @@ +/*- + * Copyright 2007-2014 Colin Percival + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _SYSENDIAN_H_ +#define _SYSENDIAN_H_ + +#include + +/* Avoid namespace collisions with BSD . */ +#define be32dec libcperciva_be32dec +#define be32enc libcperciva_be32enc +#define be64enc libcperciva_be64enc +#define le32dec libcperciva_le32dec +#define le32enc libcperciva_le32enc +#define le64dec libcperciva_le64dec +#define le64enc libcperciva_le64enc + +static inline uint32_t +be32dec(const void * pp) +{ + const uint8_t * p = (uint8_t const *)pp; + + return ((uint32_t)(p[3]) + ((uint32_t)(p[2]) << 8) + + ((uint32_t)(p[1]) << 16) + ((uint32_t)(p[0]) << 24)); +} + +static inline void +be32enc(void * pp, uint32_t x) +{ + uint8_t * p = (uint8_t *)pp; + + p[3] = x & 0xff; + p[2] = (x >> 8) & 0xff; + p[1] = (x >> 16) & 0xff; + p[0] = (x >> 24) & 0xff; +} + +static inline void +be64enc(void * pp, uint64_t x) +{ + uint8_t * p = (uint8_t *)pp; + + p[7] = x & 0xff; + p[6] = (x >> 8) & 0xff; + p[5] = (x >> 16) & 0xff; + p[4] = (x >> 24) & 0xff; + p[3] = (x >> 32) & 0xff; + p[2] = (x >> 40) & 0xff; + p[1] = (x >> 48) & 0xff; + p[0] = (x >> 56) & 0xff; +} + +static inline uint32_t +le32dec(const void * pp) +{ + const uint8_t * p = (uint8_t const *)pp; + + return ((uint32_t)(p[0]) + ((uint32_t)(p[1]) << 8) + + ((uint32_t)(p[2]) << 16) + ((uint32_t)(p[3]) << 24)); +} + +static inline void +le32enc(void * pp, uint32_t x) +{ + uint8_t * p = (uint8_t *)pp; + + p[0] = x & 0xff; + p[1] = (x >> 8) & 0xff; + p[2] = (x >> 16) & 0xff; + p[3] = (x >> 24) & 0xff; +} + +static inline uint64_t +le64dec(const void * pp) +{ + const uint8_t * p = (uint8_t const *)pp; + + return ((uint64_t)(p[0]) + ((uint64_t)(p[1]) << 8) + + ((uint64_t)(p[2]) << 16) + ((uint64_t)(p[3]) << 24) + + ((uint64_t)(p[4]) << 32) + ((uint64_t)(p[5]) << 40) + + ((uint64_t)(p[6]) << 48) + ((uint64_t)(p[7]) << 56)); +} + +static inline void +le64enc(void * pp, uint64_t x) +{ + uint8_t * p = (uint8_t *)pp; + + p[0] = x & 0xff; + p[1] = (x >> 8) & 0xff; + p[2] = (x >> 16) & 0xff; + p[3] = (x >> 24) & 0xff; + p[4] = (x >> 32) & 0xff; + p[5] = (x >> 40) & 0xff; + p[6] = (x >> 48) & 0xff; + p[7] = (x >> 56) & 0xff; +} + +#endif /* !_SYSENDIAN_H_ */ diff --git a/deps/yescrypt-master/tests.c b/deps/yescrypt-master/tests.c new file mode 100644 index 000000000..ed342449d --- /dev/null +++ b/deps/yescrypt-master/tests.c @@ -0,0 +1,418 @@ +/*- + * Copyright 2013-2018 Alexander Peslyak + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include + +#define YESCRYPT_FLAGS YESCRYPT_DEFAULTS +#if 1 +#define YESCRYPT_P 11 +#define YESCRYPT_PROM 8 +#else +#define YESCRYPT_P 1 +#define YESCRYPT_PROM 1 +#endif + +#undef TEST_PBKDF2_SHA256 +#define TEST_SCRYPT +#define TEST_YESCRYPT_KDF +#define TEST_YESCRYPT_ENCODING +#define TEST_ROM +#define TEST_ROM_PREALLOC + +#ifdef TEST_ROM_PREALLOC +#include /* for malloc() */ +#endif + +#ifdef TEST_PBKDF2_SHA256 +#include + +#include "sha256.h" + +static void print_PBKDF2_SHA256_raw(const char *passwd, size_t passwdlen, + const char *salt, size_t saltlen, uint64_t c, size_t dkLen) +{ + uint8_t dk[64]; + size_t i; + + assert(dkLen <= sizeof(dk)); + + /* XXX This prints the strings truncated at first NUL */ + printf("PBKDF2_SHA256(\"%s\", \"%s\", %llu, %llu) =", + passwd, salt, (unsigned long long)c, (unsigned long long)dkLen); + + PBKDF2_SHA256((const uint8_t *) passwd, passwdlen, + (const uint8_t *) salt, saltlen, c, dk, dkLen); + + for (i = 0; i < dkLen; i++) + printf(" %02x", dk[i]); + puts(""); +} + +static void print_PBKDF2_SHA256(const char *passwd, + const char *salt, uint64_t c, size_t dkLen) +{ + print_PBKDF2_SHA256_raw(passwd, strlen(passwd), salt, strlen(salt), c, + dkLen); +} +#endif + +#if defined(TEST_SCRYPT) || defined(TEST_YESCRYPT_ENCODING) +#include "yescrypt.h" +#endif + +#ifdef TEST_SCRYPT +static void print_scrypt(const char *passwd, const char *salt, + uint64_t N, uint32_t r, uint32_t p) +{ + uint8_t dk[64]; + size_t i; + + printf("scrypt(\"%s\", \"%s\", %llu, %u, %u) =", + passwd, salt, (unsigned long long)N, r, p); + + if (crypto_scrypt((const uint8_t *) passwd, strlen(passwd), + (const uint8_t *) salt, strlen(salt), N, r, p, dk, sizeof(dk))) { + puts(" FAILED"); + return; + } + + for (i = 0; i < sizeof(dk); i++) + printf(" %02x", dk[i]); + puts(""); +} +#endif + +#ifdef TEST_YESCRYPT_KDF +static void print_yescrypt(const char *passwd, const char *salt, + yescrypt_flags_t flags, + uint64_t N, uint32_t r, uint32_t p, uint32_t t, uint32_t g, + uint32_t dklen) +{ + yescrypt_local_t local; + yescrypt_params_t params = {flags, N, r, p, t, g, 0}; + uint8_t dk[64]; + uint32_t i; + +#if 1 + /* Don't test hash upgrades */ + if (g) + return; +#endif + + if (dklen > sizeof(dk) || yescrypt_init_local(&local)) { + puts("FAILED"); + return; + } + + printf("yescrypt(\"%s\", \"%s\", %u, %llu, %u, %u, %u, %u) =", + passwd, salt, flags, (unsigned long long)N, r, p, t, g); + + if (yescrypt_kdf(NULL, &local, + (const uint8_t *) passwd, strlen(passwd), + (const uint8_t *) salt, strlen(salt), ¶ms, dk, dklen)) { + yescrypt_free_local(&local); + puts(" FAILED"); + return; + } + + yescrypt_free_local(&local); + + for (i = 0; i < dklen; i++) + printf(" %02x", dk[i]); + puts(""); +} +#endif + +int main(void) +{ + int i; + + setvbuf(stdout, NULL, _IOLBF, 0); + +#ifdef TEST_PBKDF2_SHA256 + print_PBKDF2_SHA256("password", "salt", 1, 20); + print_PBKDF2_SHA256("password", "salt", 2, 20); + print_PBKDF2_SHA256("password", "salt", 4096, 20); + print_PBKDF2_SHA256("password", "salt", 16777216, 20); + print_PBKDF2_SHA256("passwordPASSWORDpassword", + "saltSALTsaltSALTsaltSALTsaltSALTsalt", 4096, 25); + print_PBKDF2_SHA256_raw("pass\0word", 9, "sa\0lt", 5, 4096, 16); +#if 0 + print_PBKDF2_SHA256("password", "salt", 1, 32); + print_PBKDF2_SHA256("password", "salt", 2, 32); + print_PBKDF2_SHA256("password", "salt", 4096, 32); + print_PBKDF2_SHA256("password", "salt", 16777216, 32); + print_PBKDF2_SHA256("passwordPASSWORDpassword", + "saltSALTsaltSALTsaltSALTsaltSALTsalt", 4096, 40); + print_PBKDF2_SHA256("password", "salt", 4096, 16); + print_PBKDF2_SHA256("password", "salt", 1, 20); + print_PBKDF2_SHA256("password", "salt", 2, 20); + print_PBKDF2_SHA256("password", "salt", 4096, 20); + print_PBKDF2_SHA256("password", "salt", 16777216, 20); + print_PBKDF2_SHA256("password", "salt", 4096, 25); + print_PBKDF2_SHA256("password", "salt", 4096, 16); +#endif +#endif + +#ifdef TEST_SCRYPT + print_scrypt("", "", 16, 1, 1); + print_scrypt("password", "NaCl", 1024, 8, 16); + print_scrypt("pleaseletmein", "SodiumChloride", 16384, 8, 1); + print_scrypt("pleaseletmein", "SodiumChloride", 1048576, 8, 1); +#endif + +#ifdef TEST_YESCRYPT_KDF + print_yescrypt("", "", 0, 16, 1, 1, 0, 0, 64); + print_yescrypt("", "", 0, 16, 1, 1, 0, 0, 8); + print_yescrypt("", "", 0, 4, 1, 1, 0, 0, 64); + print_yescrypt("", "", YESCRYPT_WORM, 4, 1, 1, 0, 0, 64); + print_yescrypt("", "", YESCRYPT_WORM, 4, 1, 1, 0, 0, 8); + print_yescrypt("", "", YESCRYPT_WORM, 4, 1, 1, 1, 0, 64); + print_yescrypt("", "", YESCRYPT_WORM, 4, 1, 1, 2, 0, 64); + print_yescrypt("", "", YESCRYPT_WORM, 4, 1, 1, 3, 0, 64); + print_yescrypt("", "", YESCRYPT_WORM, 4, 1, 1, 3, 0, 33); + print_yescrypt("", "", YESCRYPT_WORM, 4, 1, 1, 3, 0, 32); + print_yescrypt("", "", YESCRYPT_WORM, 4, 1, 1, 3, 0, 31); + print_yescrypt("", "", YESCRYPT_WORM, 4, 1, 1, 3, 0, 1); + print_yescrypt("", "", YESCRYPT_DEFAULTS, 4, 1, 1, 0, 0, 64); + print_yescrypt("", "", YESCRYPT_DEFAULTS, 4, 1, 1, 0, 0, 4); + print_yescrypt("", "", YESCRYPT_DEFAULTS, 4, 1, 1, 1, 0, 64); + print_yescrypt("", "", YESCRYPT_DEFAULTS, 4, 1, 1, 1, 0, 33); + print_yescrypt("", "", YESCRYPT_DEFAULTS, 4, 1, 1, 1, 0, 32); + print_yescrypt("", "", YESCRYPT_DEFAULTS, 4, 1, 1, 1, 0, 31); + print_yescrypt("", "", YESCRYPT_DEFAULTS, 4, 1, 1, 1, 0, 1); + for (i = 0; i <= 6; i++) + print_yescrypt("p", "s", YESCRYPT_DEFAULTS, 16, 8, 1, i + 10, i, 40); + for (i = 0; i <= 6; i++) + print_yescrypt("p", "s", YESCRYPT_WORM, 16, 8, 1, i + 10, i, 40); + for (i = 0; i <= 6; i++) + print_yescrypt("p", "s", YESCRYPT_DEFAULTS, 16, 8, 1, 0, i, 40); + for (i = 0; i <= 6; i++) + print_yescrypt("p", "s", YESCRYPT_WORM, 16, 8, 1, 0, i, 40); + for (i = 0; i <= 2; i++) + print_yescrypt("p", "s", YESCRYPT_DEFAULTS, 16, 8, 1, 0, i, 32); + for (i = 0; i <= 2; i++) + print_yescrypt("p", "s", YESCRYPT_DEFAULTS, 16, 8, 1, 0, i, 8); +#endif + +#ifdef TEST_YESCRYPT_ENCODING + { + uint8_t *setting; + yescrypt_binary_t key = {.uc={ + 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16, + 17,18,19,20,21,22,23,24,25,26,27,28,255,128,64,32}}; + + for (i = 0; i < 18; i++) { + uint32_t N_log2 = (i < 14) ? (16 - i) : 2; + uint32_t r = (i < 8) ? (8 - i) : (1 + (i & 1)); + uint32_t p = (i & 1) ? 1 : YESCRYPT_P; + yescrypt_flags_t flags = YESCRYPT_DEFAULTS; + if ((int)p - (i / 2) > 1) + p -= i / 2; + if (i & 2) { + flags = YESCRYPT_WORM; + } else { + while ((1ULL << N_log2) / p <= 3) + N_log2++; + } + yescrypt_params_t params = + {flags, (uint64_t)1 << N_log2, r, p, 0, 0, 0}; + setting = yescrypt_encode_params(¶ms, + (const uint8_t *)"WZaPV7LSUEKMo34.", 16 - (i & 15)); + if (i == 0) + printf("'%s'\n", (char *)setting); + if (!setting) + printf("%d yescrypt_encode_params() = NULL\n", i); + if (setting) { + uint8_t *hash = yescrypt( + (const uint8_t *)"pleaseletmein", setting); + printf("Plaintext: '%s'\n", (char *)hash); + hash = (uint8_t *)strdup((char *)hash); + if (!hash || strcmp( + (char *)hash, (char *)yescrypt( + (const uint8_t *)"pleaseletmein", hash))) + puts("Validation FAILED"); + uint8_t *orig = (uint8_t *)strdup((char *)hash); + if (!yescrypt_reencrypt(hash, NULL, &key)) + printf("%d yescrypt_reencrypt() = NULL\n", i); + printf("Encrypted: '%s'\n", (char *)hash); + yescrypt_local_t local; + if (yescrypt_init_local(&local)) { + puts("yescrypt_init_local() FAILED"); + return 1; + } + uint8_t buf[128]; + if (strcmp((char *)hash, (char *)yescrypt_r( + NULL, &local, + (const uint8_t *)"pleaseletmein", 13, + hash, &key, buf, sizeof(buf)))) + puts("Validation of encrypted FAILED"); + if (!strcmp((char *)hash, (char *)yescrypt_r( + NULL, &local, + (const uint8_t *)"pleaseletmein", 13, + hash, NULL, buf, sizeof(buf)))) + puts("Validation of encrypted " + "unexpectedly succeeded"); + if (!strcmp((char *)orig, (char *)yescrypt_r( + NULL, &local, + (const uint8_t *)"pleaseletmein", 13, + orig, &key, buf, sizeof(buf)))) + puts("Validation of unencrypted " + "unexpectedly succeeded"); + yescrypt_free_local(&local); + if (!yescrypt_reencrypt(hash, &key, NULL)) + printf("%d yescrypt_reencrypt() = NULL\n", i); + if (strcmp((char *)hash, (char *)orig)) + puts("Decryption FAILED"); + free(orig); + free(hash); + } + } + + printf("'%s'\n", (char *)yescrypt( + (const uint8_t *)"pleaseletmein", + (const uint8_t *)"$7$C6..../....SodiumChloride")); + + printf("'%s'\n", (char *)yescrypt( + (const uint8_t *)"pleaseletmein", + (const uint8_t *)"$7$06..../....SodiumChloride")); + +#ifdef TEST_ROM + uint64_t rom_bytes = 256 * (1024ULL*1024); + uint64_t ram_bytes = 2 * (1024ULL*1024); + uint32_t r; + uint64_t NROM_log2, N_log2; + yescrypt_shared_t shared; + yescrypt_local_t local; + + NROM_log2 = 0; + while (((rom_bytes >> NROM_log2) & 0xff) == 0) + NROM_log2++; + r = rom_bytes >> (7 + NROM_log2); + while (r < 5 && NROM_log2 > 0) { + r <<= 1; + NROM_log2--; + } + rom_bytes = (uint64_t)r << (7 + NROM_log2); + + N_log2 = 0; + while (((uint64_t)r << (7 + N_log2)) < ram_bytes) + N_log2++; + ram_bytes = (uint64_t)r << (7 + N_log2); + + printf("r=%u N=2^%u NROM=2^%u\n", r, + (unsigned int)N_log2, (unsigned int)NROM_log2); + + printf("Will use %.2f KiB ROM\n", rom_bytes / 1024.0); + printf(" %.2f KiB RAM\n", ram_bytes / 1024.0); + + printf("Initializing ROM ..."); + fflush(stdout); + yescrypt_params_t rom_params = { YESCRYPT_DEFAULTS, + 0, r, YESCRYPT_PROM, 0, 0, (uint64_t)1 << NROM_log2 }; + if (yescrypt_init_shared(&shared, + (const uint8_t *)"local param", 12, &rom_params)) { + puts(" FAILED"); + return 1; + } + yescrypt_binary_t *digest = yescrypt_digest_shared(&shared); + printf(" DONE (%02x%02x%02x%02x)\n", + digest->uc[0], digest->uc[1], digest->uc[2], digest->uc[3]); + + if (yescrypt_init_local(&local)) { + puts("FAILED"); + return 1; + } + + yescrypt_params_t params = rom_params; + params.flags = YESCRYPT_FLAGS; + params.N = (uint64_t)1 << N_log2; + params.p = YESCRYPT_P; + setting = yescrypt_encode_params(¶ms, + (const uint8_t *)"WZaPV7LSUEKMo34.", 16); + printf("'%s'\n", (char *)setting); + + uint8_t hash[128]; + + printf("'%s'\n", (char *)yescrypt_r(&shared, &local, + (const uint8_t *)"pleaseletmein", 13, setting, NULL, + hash, sizeof(hash))); + +#ifdef TEST_ROM_PREALLOC + yescrypt_free_shared(&shared); + + shared.aligned_size = ((uint64_t)1 << NROM_log2) * 128 * r; + shared.base_size = shared.aligned_size + 63; + uint8_t *where = shared.base = malloc(shared.base_size); + where += 63; + where = shared.aligned = where - ((uintptr_t)where & 63); + + printf("Initializing ROM in preallocated memory ..."); + fflush(stdout); + rom_params.flags |= YESCRYPT_SHARED_PREALLOCATED; + if (yescrypt_init_shared(&shared, + (const uint8_t *)"local param", 12, &rom_params)) { + puts(" FAILED"); + return 1; + } + digest = yescrypt_digest_shared(&shared); + printf(" DONE (%02x%02x%02x%02x)\n", + digest->uc[0], digest->uc[1], digest->uc[2], digest->uc[3]); + + if ((void *)where != shared.aligned) + puts("YESCRYPT_SHARED_PREALLOCATED failed"); +#endif + + printf("'%s'\n", (char *)yescrypt_r(&shared, &local, + (const uint8_t *)"pleaseletmein", 13, setting, NULL, + hash, sizeof(hash))); + + printf("'%s'\n", (char *)yescrypt_r(&shared, &local, + (const uint8_t *)"pleaseletmeIn", 13, setting, NULL, + hash, sizeof(hash))); + + setting = yescrypt_encode_params(¶ms, + (const uint8_t *)"WZaPV7LSUEIMo34.", 16); + + printf("'%s'\n", (char *)yescrypt_r(&shared, &local, + (const uint8_t *)"pleaseletmein", 13, setting, NULL, + hash, sizeof(hash))); + + printf("'%s'\n", (char *)yescrypt_r(&shared, &local, + (const uint8_t *)"pleaseletmeIn", 13, setting, NULL, + hash, sizeof(hash))); + + params.N = 4; + params.NROM *= params.r; + params.r = 1; + params.p = 1; + setting = yescrypt_encode_params(¶ms, + (const uint8_t *)"WZaPV7LSUEKMo34.", 16); + + printf("'%s'\n", (char *)yescrypt_r(&shared, &local, + (const uint8_t *)"pleaseletmein", 13, setting, NULL, + hash, sizeof(hash))); +#endif + } +#endif + + return 0; +} diff --git a/deps/yescrypt-master/userom.c b/deps/yescrypt-master/userom.c new file mode 100644 index 000000000..d8d89d178 --- /dev/null +++ b/deps/yescrypt-master/userom.c @@ -0,0 +1,405 @@ +/*- + * Copyright 2013-2018 Alexander Peslyak + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#define YESCRYPT_FLAGS YESCRYPT_DEFAULTS +//#define YESCRYPT_FLAGS YESCRYPT_WORM +//#define YESCRYPT_FLAGS 0 + +#define ROM_SHM_KEY 0x7965730a + +//#define DISABLE_ROM +//#define DUMP_LOCAL + +#include +#include /* for atoi() */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "yescrypt.h" + +#ifdef _OPENMP +#include + +#define NSAVE 1000 + +static uint64_t time_us(void) +{ + struct timespec t; +#ifdef CLOCK_MONOTONIC_RAW + if (clock_gettime(CLOCK_MONOTONIC_RAW, &t)) + return 0; +#else + if (clock_gettime(CLOCK_MONOTONIC, &t)) + return 0; +#endif + return 1 + (uint64_t)t.tv_sec * 1000000 + t.tv_nsec / 1000; +} +#endif + +int main(int argc, const char * const *argv) +{ +#if 0 + uint64_t rom_bytes = 112 * (1024ULL*1024*1024); + uint64_t ram_bytes = 1 * (1024ULL*1024); +#else + uint64_t rom_bytes = 3 * (1024ULL*1024*1024); + uint64_t ram_bytes = 2 * (1024ULL*1024); +#endif + uint32_t r, min_r; + uint64_t NROM_log2, N_log2; + yescrypt_shared_t shared_s; + yescrypt_shared_t *shared = NULL; +#ifndef DISABLE_ROM + int shmid; +#endif + const char *rom_filename = NULL; + int rom_fd; + yescrypt_binary_t key = {.uc={ + 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16, + 17,18,19,20,21,22,23,24,25,26,27,28,255,128,64,32}}; + + if (argc > 1) + rom_bytes = atoi(argv[1]) * (1024ULL*1024*1024); + if (argc > 2) + ram_bytes = atoi(argv[2]) * (1024ULL*1024); + if (argc > 3 && rom_bytes) + rom_filename = argv[3]; + + r = 16; + min_r = 9; + if (rom_filename) + min_r = 8 * 64; + + NROM_log2 = 0; + if (rom_bytes) { + while (((rom_bytes >> NROM_log2) & 0xff) == 0) + NROM_log2++; + r = rom_bytes >> (7 + NROM_log2); + while (r < min_r && NROM_log2 > 0) { + r <<= 1; + NROM_log2--; + } + rom_bytes = (uint64_t)r << (7 + NROM_log2); + } + + N_log2 = 0; + while (((uint64_t)r << (7 + N_log2)) < ram_bytes) + N_log2++; + ram_bytes = (uint64_t)r << (7 + N_log2); + + printf("r=%u N=2^%u NROM=2^%u\n", r, + (unsigned int)N_log2, (unsigned int)NROM_log2); + +#ifdef DISABLE_ROM + rom_bytes = 0; +#endif + + printf("Will use %.2f KiB ROM\n", rom_bytes / 1024.0); + printf(" %.2f KiB RAM\n", ram_bytes / 1024.0); + +#ifndef DISABLE_ROM + if (rom_filename) { + rom_fd = open(rom_filename, O_RDONLY); + if (rom_fd < 0) { + perror("open"); + return 1; + } + + int flags = +#ifdef MAP_NOCORE + MAP_NOCORE | +#endif +#ifdef MAP_HUGETLB + MAP_HUGETLB | +#endif + MAP_SHARED; + void *p = mmap(NULL, rom_bytes, PROT_READ, flags, rom_fd, 0); +#ifdef MAP_HUGETLB + if (p == MAP_FAILED) + p = mmap(NULL, rom_bytes, PROT_READ, + flags & ~MAP_HUGETLB, rom_fd, 0); +#endif + if (p == MAP_FAILED) { + perror("mmap"); + close(rom_fd); + return 1; + } + close(rom_fd); + + shared = &shared_s; + shared->base = shared->aligned = p; + shared->aligned_size = rom_bytes; + } else if (rom_bytes) { + shared = &shared_s; + shared->aligned_size = rom_bytes; + shmid = shmget(ROM_SHM_KEY, shared->aligned_size, 0); + if (shmid == -1) { + perror("shmget"); + return 1; + } + + shared->base = shared->aligned = shmat(shmid, NULL, SHM_RDONLY); + if (shared->base == (void *)-1) { + perror("shmat"); + return 1; + } + } +#endif + + { + yescrypt_local_t local; + const uint8_t *setting; + + if (yescrypt_init_local(&local)) { + puts("yescrypt_init_local() FAILED"); + return 1; + } + + yescrypt_params_t params = { + .flags = YESCRYPT_FLAGS, + .N = (uint64_t)1 << N_log2, + .NROM = NROM_log2 ? ((uint64_t)1 << NROM_log2) : 0, + .r = r, + .p = 1 }; + setting = yescrypt_encode_params(¶ms, + (const uint8_t *)"WZaPV7LSUEKMo34.", 16); + + { + uint8_t hash[128]; + if (!yescrypt_r(shared, &local, + (const uint8_t *)"pleaseletmein", 13, setting, NULL, + hash, sizeof(hash))) { + puts("yescrypt_r() FAILED"); + return 1; + } + printf("Plaintext: '%s'\n", (char *)hash); + if (!yescrypt_r(shared, &local, + (const uint8_t *)"pleaseletmein", 13, setting, &key, + hash, sizeof(hash))) { + puts("yescrypt_r() FAILED"); + return 1; + } + printf("Encrypted: '%s'\n", (char *)hash); + } + +#ifdef DUMP_LOCAL +#if 0 + fwrite(local.aligned, local.aligned_size, 1, stderr); +#else + /* Skip B, dump only V */ + if (local.aligned_size >= ram_bytes + 128 * r) + fwrite((char *)local.aligned + 128 * r, ram_bytes, + 1, stderr); +#endif +#endif + + puts("Benchmarking 1 thread ..."); + + clock_t clk_tck = sysconf(_SC_CLK_TCK); + struct tms start_tms, end_tms; + clock_t start = times(&start_tms), end; + unsigned int i, n; + unsigned long long count; +#ifdef _OPENMP + char save[NSAVE][128]; + unsigned int nsave = 0; +#endif + unsigned int seed = start * 1812433253U; + + n = 1; + count = 0; + do { + for (i = 0; i < n; i++) { + unsigned int j = count + i; + char p[32]; + uint8_t hash[128]; + snprintf(p, sizeof(p), "%u", seed + j); +#ifdef _OPENMP + const uint8_t *h = +#endif + yescrypt_r(shared, &local, + (const uint8_t *)p, strlen(p), + setting, &key, hash, sizeof(hash)); +#ifdef _OPENMP + if (j < NSAVE) { + save[j][0] = 0; + strncat(save[j], (char *)h, + sizeof(save[j]) - 1); + nsave = j; + } +#endif + } + count += n; + + end = times(&end_tms); + n <<= 1; + } while (end - start < clk_tck * 2); + + clock_t start_v = start_tms.tms_utime + start_tms.tms_stime + + start_tms.tms_cutime + start_tms.tms_cstime; + clock_t end_v = end_tms.tms_utime + end_tms.tms_stime + + end_tms.tms_cutime + end_tms.tms_cstime; + + printf("%llu c/s real, %llu c/s virtual " + "(%llu hashes in %.2f seconds)\n", + count * clk_tck / (end - start), + count * clk_tck / (end_v - start_v), + count, (double)(end - start) / clk_tck); + +#ifdef _OPENMP + unsigned int nt = omp_get_max_threads(); + + printf("Benchmarking %u thread%s ...\n", + nt, nt == 1 ? "" : "s"); + + typedef struct { + yescrypt_local_t local; + uint64_t min, max, total; + } thread_data_s; + union { + thread_data_s s; + uint8_t cachelines[2][64]; /* avoid false sharing */ + } thread_data[nt]; /* tricky to align this when on stack */ + + unsigned int t; + for (t = 0; t < nt; t++) { + thread_data_s *td = &thread_data[t].s; + if (yescrypt_init_local(&td->local)) { + puts("yescrypt_init_local() FAILED"); + return 1; + } + td->min = ~(uint64_t)0; td->max = 0; td->total = 0; + } + + unsigned long long count1 = count, count_restart = 0; + + if (!geteuid()) { + puts("Running as root, so trying to set SCHED_RR"); +#pragma omp parallel + { + struct sched_param param = { .sched_priority = 1 }; + if (sched_setscheduler(getpid(), SCHED_RR, ¶m)) + perror("sched_setscheduler"); + } + } + + start = times(&start_tms); + + n = count * omp_get_max_threads(); + count = 0; + do { +#pragma omp parallel for default(none) private(i) shared(n, shared, thread_data, setting, seed, count, save, nsave, key) + for (i = 0; i < n; i++) { + unsigned int j = count + i; + char p[32]; + uint8_t hash[128]; + snprintf(p, sizeof(p), "%u", seed + j); + thread_data_s *td = &thread_data[omp_get_thread_num()].s; + uint64_t start1 = time_us(); +#if 1 + const char *h = (const char *)yescrypt_r( + shared, &td->local, + (const uint8_t *)p, strlen(p), + setting, &key, hash, sizeof(hash)); +#else + yescrypt_local_t local; + yescrypt_init_local(&local); + const char *h = (const char *)yescrypt_r( + shared, &local, + (const uint8_t *)p, strlen(p), + setting, &key, hash, sizeof(hash)); + yescrypt_free_local(&local); +#endif + uint64_t end1 = time_us(); + if (end1 < start1) + end1 = start1; + uint64_t diff1 = end1 - start1; + td->total += diff1; + if (diff1 < td->min) + td->min = diff1; + if (diff1 > td->max) + td->max = diff1; + if (j < nsave && strcmp(save[j], h)) { +#pragma omp critical + printf("Mismatch at %u, %s != %s\n", + j, save[j], h); + } + } + + count += n; + if ((count - n) < count1 && count >= count1) { +/* Disregard our repeat of single thread's results (could be partially cached + * by same core, but OTOH other cores not yet warmed up to full clock rate). */ + start = times(&start_tms); + count_restart = count; + for (t = 0; t < nt; t++) { + thread_data_s *td = &thread_data[t].s; + td->min = ~(uint64_t)0; td->max = 0; td->total = 0; + } + } else { + n <<= 1; + } + + end = times(&end_tms); + } while (end - start < clk_tck); + + if (!count_restart) + puts("Didn't reach single-thread's hash count"); + count -= count_restart; + + start_v = start_tms.tms_utime + start_tms.tms_stime + + start_tms.tms_cutime + start_tms.tms_cstime; + end_v = end_tms.tms_utime + end_tms.tms_stime + + end_tms.tms_cutime + end_tms.tms_cstime; + + printf("%llu c/s real, %llu c/s virtual " + "(%llu hashes in %.2f seconds)\n", + count * clk_tck / (end - start), + count * clk_tck / (end_v - start_v), + count, (double)(end - start) / clk_tck); + + uint64_t min = ~(uint64_t)0, max = 0, total = 0; + for (t = 0; t < nt; t++) { + thread_data_s *td = &thread_data[t].s; + total += td->total; + if (td->min < min) + min = td->min; + if (td->max > max) + max = td->max; + } + printf("min %.3f ms, avg %.3f ms, max %.3f ms\n", + min / 1000.0, total / 1000.0 / count, max / 1000.0); +#endif + } + + if (rom_filename && munmap(shared->base, rom_bytes)) { + perror("munmap"); + return 1; + } + + return 0; +} diff --git a/deps/yescrypt-master/yescrypt-common.c b/deps/yescrypt-master/yescrypt-common.c new file mode 100644 index 000000000..ce32b3e52 --- /dev/null +++ b/deps/yescrypt-master/yescrypt-common.c @@ -0,0 +1,703 @@ +/*- + * Copyright 2013-2018 Alexander Peslyak + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include + +#include "insecure_memzero.h" +#include "sha256.h" + +#define YESCRYPT_INTERNAL +#include "yescrypt.h" + +#define BYTES2CHARS(bytes) ((((bytes) * 8) + 5) / 6) + +#define HASH_SIZE sizeof(yescrypt_binary_t) /* bytes */ +#define HASH_LEN BYTES2CHARS(HASH_SIZE) /* base-64 chars */ + +/* + * "$y$", up to 8 params of up to 6 chars each, '$', salt + * Alternatively, but that's smaller: + * "$7$", 3 params encoded as 1+5+5 chars, salt + */ +#define PREFIX_LEN (3 + 8 * 6 + 1 + BYTES2CHARS(32)) + +static const char * const itoa64 = + "./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; + +static const uint8_t atoi64_partial[77] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 64, 64, 64, 64, 64, 64, 64, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, + 64, 64, 64, 64, 64, 64, + 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, + 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63 +}; + +static uint8_t *encode64_uint32(uint8_t *dst, size_t dstlen, + uint32_t src, uint32_t min) +{ + uint32_t start = 0, end = 47, chars = 1, bits = 0; + + if (src < min) + return NULL; + src -= min; + + do { + uint32_t count = (end + 1 - start) << bits; + if (src < count) + break; + if (start >= 63) + return NULL; + start = end + 1; + end = start + (62 - end) / 2; + src -= count; + chars++; + bits += 6; + } while (1); + + if (dstlen <= chars) /* require room for a NUL terminator */ + return NULL; + + *dst++ = itoa64[start + (src >> bits)]; + + while (--chars) { + bits -= 6; + *dst++ = itoa64[(src >> bits) & 0x3f]; + } + + *dst = 0; /* NUL terminate just in case */ + + return dst; +} + +static inline uint32_t atoi64(uint8_t src) +{ + if (src >= '.' && src <= 'z') + return atoi64_partial[src - '.']; + + return 64; +} + +static const uint8_t *decode64_uint32(uint32_t *dst, + const uint8_t *src, uint32_t min) +{ + uint32_t start = 0, end = 47, chars = 1, bits = 0; + uint32_t c; + + c = atoi64(*src++); + if (c > 63) + goto fail; + + *dst = min; + while (c > end) { + *dst += (end + 1 - start) << bits; + start = end + 1; + end = start + (62 - end) / 2; + chars++; + bits += 6; + } + + *dst += (c - start) << bits; + + while (--chars) { + c = atoi64(*src++); + if (c > 63) + goto fail; + bits -= 6; + *dst += c << bits; + } + + return src; + +fail: + *dst = 0; + return NULL; +} + +static uint8_t *encode64_uint32_fixed(uint8_t *dst, size_t dstlen, + uint32_t src, uint32_t srcbits) +{ + uint32_t bits; + + for (bits = 0; bits < srcbits; bits += 6) { + if (dstlen < 2) + return NULL; + *dst++ = itoa64[src & 0x3f]; + dstlen--; + src >>= 6; + } + + if (src || dstlen < 1) + return NULL; + + *dst = 0; /* NUL terminate just in case */ + + return dst; +} + +static uint8_t *encode64(uint8_t *dst, size_t dstlen, + const uint8_t *src, size_t srclen) +{ + size_t i; + + for (i = 0; i < srclen; ) { + uint8_t *dnext; + uint32_t value = 0, bits = 0; + do { + value |= (uint32_t)src[i++] << bits; + bits += 8; + } while (bits < 24 && i < srclen); + dnext = encode64_uint32_fixed(dst, dstlen, value, bits); + if (!dnext) + return NULL; + dstlen -= dnext - dst; + dst = dnext; + } + + if (dstlen < 1) + return NULL; + + *dst = 0; /* NUL terminate just in case */ + + return dst; +} + +static const uint8_t *decode64_uint32_fixed(uint32_t *dst, uint32_t dstbits, + const uint8_t *src) +{ + uint32_t bits; + + *dst = 0; + for (bits = 0; bits < dstbits; bits += 6) { + uint32_t c = atoi64(*src++); + if (c > 63) { + *dst = 0; + return NULL; + } + *dst |= c << bits; + } + + return src; +} + +static const uint8_t *decode64(uint8_t *dst, size_t *dstlen, + const uint8_t *src, size_t srclen) +{ + size_t dstpos = 0; + + while (dstpos <= *dstlen && srclen) { + uint32_t value = 0, bits = 0; + while (srclen--) { + uint32_t c = atoi64(*src); + if (c > 63) { + srclen = 0; + break; + } + src++; + value |= c << bits; + bits += 6; + if (bits >= 24) + break; + } + if (!bits) + break; + if (bits < 12) /* must have at least one full byte */ + goto fail; + while (dstpos++ < *dstlen) { + *dst++ = value; + value >>= 8; + bits -= 8; + if (bits < 8) { /* 2 or 4 */ + if (value) /* must be 0 */ + goto fail; + bits = 0; + break; + } + } + if (bits) + goto fail; + } + + if (!srclen && dstpos <= *dstlen) { + *dstlen = dstpos; + return src; + } + +fail: + *dstlen = 0; + return NULL; +} + +typedef enum { ENC = 1, DEC = -1 } encrypt_dir_t; + +static void memxor(unsigned char *dst, unsigned char *src, size_t size) +{ + while (size--) + *dst++ ^= *src++; +} + +static void encrypt(unsigned char *data, size_t datalen, + const yescrypt_binary_t *key, encrypt_dir_t dir) +{ + SHA256_CTX ctx; + unsigned char f[32 + 4]; + size_t halflen, which; + unsigned char mask, round, target; + + if (!datalen) + return; + if (datalen > 64) + datalen = 64; + + halflen = datalen >> 1; + + which = 0; /* offset to half we are working on (0 or halflen) */ + mask = 0x0f; /* current half's extra nibble mask if datalen is odd */ + + round = 0; + target = 5; /* 6 rounds due to Jacques Patarin's CRYPTO 2004 paper */ + + if (dir == DEC) { + which = halflen; /* even round count, so swap the halves */ + mask ^= 0xff; + + round = target; + target = 0; + } + + f[32] = 0; + f[33] = sizeof(*key); + f[34] = datalen; + + do { + SHA256_Init(&ctx); + f[35] = round; + SHA256_Update(&ctx, &f[32], 4); + SHA256_Update(&ctx, key, sizeof(*key)); + SHA256_Update(&ctx, &data[which], halflen); + if (datalen & 1) { + f[0] = data[datalen - 1] & mask; + SHA256_Update(&ctx, f, 1); + } + SHA256_Final(f, &ctx); + which ^= halflen; + memxor(&data[which], f, halflen); + if (datalen & 1) { + mask ^= 0xff; + data[datalen - 1] ^= f[halflen] & mask; + } + if (round == target) + break; + round += dir; + } while (1); + + /* ctx is presumably zeroized by SHA256_Final() */ + insecure_memzero(f, sizeof(f)); +} + +uint8_t *yescrypt_r(const yescrypt_shared_t *shared, yescrypt_local_t *local, + const uint8_t *passwd, size_t passwdlen, + const uint8_t *setting, + const yescrypt_binary_t *key, + uint8_t *buf, size_t buflen) +{ + unsigned char saltbin[64], hashbin[32]; + const uint8_t *src, *saltstr, *salt; + uint8_t *dst; + size_t need, prefixlen, saltstrlen, saltlen; + yescrypt_params_t params = { .p = 1 }; + + if (setting[0] != '$' || + (setting[1] != '7' && setting[1] != 'y') || + setting[2] != '$') + return NULL; + src = setting + 3; + + if (setting[1] == '7') { + uint32_t N_log2 = atoi64(*src++); + if (N_log2 < 1 || N_log2 > 63) + return NULL; + params.N = (uint64_t)1 << N_log2; + + src = decode64_uint32_fixed(¶ms.r, 30, src); + if (!src) + return NULL; + + src = decode64_uint32_fixed(¶ms.p, 30, src); + if (!src) + return NULL; + + if (key) + return NULL; + } else { + uint32_t flavor, N_log2; + + src = decode64_uint32(&flavor, src, 0); + if (!src) + return NULL; + + if (flavor < YESCRYPT_RW) { + params.flags = flavor; + } else if (flavor <= YESCRYPT_RW + (YESCRYPT_RW_FLAVOR_MASK >> 2)) { + params.flags = YESCRYPT_RW + ((flavor - YESCRYPT_RW) << 2); + } else { + return NULL; + } + + src = decode64_uint32(&N_log2, src, 1); + if (!src || N_log2 > 63) + return NULL; + params.N = (uint64_t)1 << N_log2; + + src = decode64_uint32(¶ms.r, src, 1); + if (!src) + return NULL; + + if (*src != '$') { + uint32_t have; + + src = decode64_uint32(&have, src, 1); + if (!src) + return NULL; + + if (have & 1) { + src = decode64_uint32(¶ms.p, src, 2); + if (!src) + return NULL; + } + + if (have & 2) { + src = decode64_uint32(¶ms.t, src, 1); + if (!src) + return NULL; + } + + if (have & 4) { + src = decode64_uint32(¶ms.g, src, 1); + if (!src) + return NULL; + } + + if (have & 8) { + uint32_t NROM_log2; + src = decode64_uint32(&NROM_log2, src, 1); + if (!src || NROM_log2 > 63) + return NULL; + params.NROM = (uint64_t)1 << NROM_log2; + } + } + + if (*src++ != '$') + return NULL; + } + + prefixlen = src - setting; + + saltstr = src; + src = (uint8_t *)strrchr((char *)saltstr, '$'); + if (src) + saltstrlen = src - saltstr; + else + saltstrlen = strlen((char *)saltstr); + + if (setting[1] == '7') { + salt = saltstr; + saltlen = saltstrlen; + } else { + const uint8_t *saltend; + + saltlen = sizeof(saltbin); + saltend = decode64(saltbin, &saltlen, saltstr, saltstrlen); + + if (!saltend || (size_t)(saltend - saltstr) != saltstrlen) + goto fail; + + salt = saltbin; + + if (key) + encrypt(saltbin, saltlen, key, ENC); + } + + need = prefixlen + saltstrlen + 1 + HASH_LEN + 1; + if (need > buflen || need < saltstrlen) + goto fail; + + if (yescrypt_kdf(shared, local, passwd, passwdlen, salt, saltlen, + ¶ms, hashbin, sizeof(hashbin))) + goto fail; + + if (key) { + insecure_memzero(saltbin, sizeof(saltbin)); + encrypt(hashbin, sizeof(hashbin), key, ENC); + } + + dst = buf; + memcpy(dst, setting, prefixlen + saltstrlen); + dst += prefixlen + saltstrlen; + *dst++ = '$'; + + dst = encode64(dst, buflen - (dst - buf), hashbin, sizeof(hashbin)); + insecure_memzero(hashbin, sizeof(hashbin)); + if (!dst || dst >= buf + buflen) + return NULL; + + *dst = 0; /* NUL termination */ + + return buf; + +fail: + insecure_memzero(saltbin, sizeof(saltbin)); + insecure_memzero(hashbin, sizeof(hashbin)); + return NULL; +} + +uint8_t *yescrypt(const uint8_t *passwd, const uint8_t *setting) +{ + /* prefix, '$', hash, NUL */ + static uint8_t buf[PREFIX_LEN + 1 + HASH_LEN + 1]; + yescrypt_local_t local; + uint8_t *retval; + + if (yescrypt_init_local(&local)) + return NULL; + retval = yescrypt_r(NULL, &local, + passwd, strlen((char *)passwd), setting, NULL, buf, sizeof(buf)); + if (yescrypt_free_local(&local)) + return NULL; + return retval; +} + +uint8_t *yescrypt_reencrypt(uint8_t *hash, + const yescrypt_binary_t *from_key, + const yescrypt_binary_t *to_key) +{ + uint8_t *retval = NULL, *saltstart, *hashstart; + const uint8_t *hashend; + unsigned char saltbin[64], hashbin[32]; + size_t saltstrlen, saltlen = 0, hashlen; + + if (strncmp((char *)hash, "$y$", 3)) + return NULL; + + saltstart = NULL; + hashstart = (uint8_t *)strrchr((char *)hash, '$'); + if (hashstart) { + if (hashstart > (uint8_t *)hash) { + saltstart = hashstart - 1; + while (*saltstart != '$' && saltstart > hash) + saltstart--; + if (*saltstart == '$') + saltstart++; + } + hashstart++; + } else { + hashstart = hash; + } + saltstrlen = saltstart ? (hashstart - 1 - saltstart) : 0; + if (saltstrlen > BYTES2CHARS(64) || + strlen((char *)hashstart) != HASH_LEN) + return NULL; + + if (saltstrlen) { + const uint8_t *saltend; + saltlen = sizeof(saltbin); + saltend = decode64(saltbin, &saltlen, saltstart, saltstrlen); + if (!saltend || *saltend != '$' || saltlen < 1 || saltlen > 64) + goto out; + + if (from_key) + encrypt(saltbin, saltlen, from_key, ENC); + if (to_key) + encrypt(saltbin, saltlen, to_key, DEC); + } + + hashlen = sizeof(hashbin); + hashend = decode64(hashbin, &hashlen, hashstart, HASH_LEN); + if (!hashend || *hashend || hashlen != sizeof(hashbin)) + goto out; + + if (from_key) + encrypt(hashbin, hashlen, from_key, DEC); + if (to_key) + encrypt(hashbin, hashlen, to_key, ENC); + + if (saltstrlen) { + if (!encode64(saltstart, saltstrlen + 1, saltbin, saltlen)) + goto out; /* can't happen */ + *(saltstart + saltstrlen) = '$'; + } + + if (!encode64(hashstart, HASH_LEN + 1, hashbin, hashlen)) + goto out; /* can't happen */ + + retval = hash; + +out: + insecure_memzero(saltbin, sizeof(saltbin)); + insecure_memzero(hashbin, sizeof(hashbin)); + + return retval; +} + +static uint32_t N2log2(uint64_t N) +{ + uint32_t N_log2; + + if (N < 2) + return 0; + + N_log2 = 2; + while (N >> N_log2 != 0) + N_log2++; + N_log2--; + + if (N >> N_log2 != 1) + return 0; + + return N_log2; +} + +uint8_t *yescrypt_encode_params_r(const yescrypt_params_t *params, + const uint8_t *src, size_t srclen, + uint8_t *buf, size_t buflen) +{ + uint32_t flavor, N_log2, NROM_log2, have; + uint8_t *dst; + + if (srclen > SIZE_MAX / 16) + return NULL; + + if (params->flags < YESCRYPT_RW) { + flavor = params->flags; + } else if ((params->flags & YESCRYPT_MODE_MASK) == YESCRYPT_RW && + params->flags <= (YESCRYPT_RW | YESCRYPT_RW_FLAVOR_MASK)) { + flavor = YESCRYPT_RW + (params->flags >> 2); + } else { + return NULL; + } + + N_log2 = N2log2(params->N); + if (!N_log2) + return NULL; + + NROM_log2 = N2log2(params->NROM); + if (params->NROM && !NROM_log2) + return NULL; + + if ((uint64_t)params->r * (uint64_t)params->p >= (1U << 30)) + return NULL; + + dst = buf; + *dst++ = '$'; + *dst++ = 'y'; + *dst++ = '$'; + + dst = encode64_uint32(dst, buflen - (dst - buf), flavor, 0); + if (!dst) + return NULL; + + dst = encode64_uint32(dst, buflen - (dst - buf), N_log2, 1); + if (!dst) + return NULL; + + dst = encode64_uint32(dst, buflen - (dst - buf), params->r, 1); + if (!dst) + return NULL; + + have = 0; + if (params->p != 1) + have |= 1; + if (params->t) + have |= 2; + if (params->g) + have |= 4; + if (NROM_log2) + have |= 8; + + if (have) { + dst = encode64_uint32(dst, buflen - (dst - buf), have, 1); + if (!dst) + return NULL; + } + + if (params->p != 1) { + dst = encode64_uint32(dst, buflen - (dst - buf), params->p, 2); + if (!dst) + return NULL; + } + + if (params->t) { + dst = encode64_uint32(dst, buflen - (dst - buf), params->t, 1); + if (!dst) + return NULL; + } + + if (params->g) { + dst = encode64_uint32(dst, buflen - (dst - buf), params->g, 1); + if (!dst) + return NULL; + } + + if (NROM_log2) { + dst = encode64_uint32(dst, buflen - (dst - buf), NROM_log2, 1); + if (!dst) + return NULL; + } + + if (dst >= buf + buflen) + return NULL; + + *dst++ = '$'; + + dst = encode64(dst, buflen - (dst - buf), src, srclen); + if (!dst || dst >= buf + buflen) + return NULL; + + *dst = 0; /* NUL termination */ + + return buf; +} + +uint8_t *yescrypt_encode_params(const yescrypt_params_t *params, + const uint8_t *src, size_t srclen) +{ + /* prefix, NUL */ + static uint8_t buf[PREFIX_LEN + 1]; + return yescrypt_encode_params_r(params, src, srclen, buf, sizeof(buf)); +} + +int crypto_scrypt(const uint8_t *passwd, size_t passwdlen, + const uint8_t *salt, size_t saltlen, uint64_t N, uint32_t r, uint32_t p, + uint8_t *buf, size_t buflen) +{ + yescrypt_local_t local; + yescrypt_params_t params = { .flags = 0, .N = N, .r = r, .p = p }; + int retval; + + if (yescrypt_init_local(&local)) + return -1; + retval = yescrypt_kdf(NULL, &local, + passwd, passwdlen, salt, saltlen, ¶ms, buf, buflen); + if (yescrypt_free_local(&local)) + return -1; + return retval; +} diff --git a/deps/yescrypt-master/yescrypt-opt.c b/deps/yescrypt-master/yescrypt-opt.c new file mode 100644 index 000000000..97ef1e44c --- /dev/null +++ b/deps/yescrypt-master/yescrypt-opt.c @@ -0,0 +1,1533 @@ +/*- + * Copyright 2009 Colin Percival + * Copyright 2012-2025 Alexander Peslyak + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * This file was originally written by Colin Percival as part of the Tarsnap + * online backup system. + */ + +/* + * AVX and especially XOP speed up Salsa20 a lot, but this mostly matters for + * classic scrypt and for YESCRYPT_WORM (which use 8 rounds of Salsa20 per + * sub-block), and much less so for YESCRYPT_RW (which uses 2 rounds of Salsa20 + * per block except during pwxform S-box initialization). + */ +#ifdef __GNUC__ +#ifdef __XOP__ +#warning "Note: XOP is enabled. That's great." +#elif defined(__AVX512VL__) +#warning "Note: AVX512VL is enabled. That's great." +#elif defined(__AVX__) +#warning "Note: AVX is enabled, which is great for classic scrypt and YESCRYPT_WORM, but is sometimes slightly slower than plain SSE2 for YESCRYPT_RW" +#elif defined(__SSE2__) +#warning "Note: AVX and XOP are not enabled, which is great for YESCRYPT_RW, but they would substantially improve performance at classic scrypt and YESCRYPT_WORM" +#elif defined(__x86_64__) || defined(__i386__) +#warning "SSE2 not enabled. Expect poor performance." +#else +#warning "Note: building generic code for non-x86. That's OK." +#endif +#endif + +/* + * The SSE4 code version has fewer instructions than the generic SSE2 version, + * but all of the instructions are SIMD, thereby wasting the scalar execution + * units. Thus, the generic SSE2 version below actually runs faster on some + * CPUs due to its balanced mix of SIMD and scalar instructions. + */ +#undef USE_SSE4_FOR_32BIT + +#ifdef __SSE2__ +/* + * GCC before 4.9 would by default unnecessarily use store/load (without + * SSE4.1) or (V)PEXTR (with SSE4.1 or AVX) instead of simply (V)MOV. + * This was tracked as GCC bug 54349. + * "-mtune=corei7" works around this, but is only supported for GCC 4.6+. + * We use inline asm for pre-4.6 GCC, further down this file. + */ +#if __GNUC__ == 4 && __GNUC_MINOR__ >= 6 && __GNUC_MINOR__ < 9 && \ + !defined(__clang__) && !defined(__ICC) +#pragma GCC target ("tune=corei7") +#endif +#include +#ifdef __XOP__ +#include +#elif defined(__AVX512VL__) +#include +#endif +#elif defined(__SSE__) +#include +#endif + +#include +#include +#include +#include + +#include "insecure_memzero.h" +#include "sha256.h" +#include "sysendian.h" + +#define YESCRYPT_INTERNAL +#include "yescrypt.h" + +#include "yescrypt-platform.c" + +#if __STDC_VERSION__ >= 199901L +/* Have restrict */ +#elif defined(__GNUC__) +#define restrict __restrict +#else +#define restrict +#endif + +#ifdef __GNUC__ +#define unlikely(exp) __builtin_expect(exp, 0) +#else +#define unlikely(exp) (exp) +#endif + +#ifdef __SSE__ +#define PREFETCH(x, hint) _mm_prefetch((const char *)(x), (hint)); +#else +#undef PREFETCH +#endif + +typedef union { + uint32_t w[16]; + uint64_t d[8]; +#ifdef __SSE2__ + __m128i q[4]; +#endif +} salsa20_blk_t; + +static inline void salsa20_simd_shuffle(const salsa20_blk_t *Bin, + salsa20_blk_t *Bout) +{ +#define COMBINE(out, in1, in2) \ + Bout->d[out] = Bin->w[in1 * 2] | ((uint64_t)Bin->w[in2 * 2 + 1] << 32); + COMBINE(0, 0, 2) + COMBINE(1, 5, 7) + COMBINE(2, 2, 4) + COMBINE(3, 7, 1) + COMBINE(4, 4, 6) + COMBINE(5, 1, 3) + COMBINE(6, 6, 0) + COMBINE(7, 3, 5) +#undef COMBINE +} + +static inline void salsa20_simd_unshuffle(const salsa20_blk_t *Bin, + salsa20_blk_t *Bout) +{ +#define UNCOMBINE(out, in1, in2) \ + Bout->w[out * 2] = Bin->d[in1]; \ + Bout->w[out * 2 + 1] = Bin->d[in2] >> 32; + UNCOMBINE(0, 0, 6) + UNCOMBINE(1, 5, 3) + UNCOMBINE(2, 2, 0) + UNCOMBINE(3, 7, 5) + UNCOMBINE(4, 4, 2) + UNCOMBINE(5, 1, 7) + UNCOMBINE(6, 6, 4) + UNCOMBINE(7, 3, 1) +#undef UNCOMBINE +} + +#ifdef __SSE2__ +#define DECL_X \ + __m128i X0, X1, X2, X3; +#define DECL_Y \ + __m128i Y0, Y1, Y2, Y3; +#define READ_X(in) \ + X0 = (in).q[0]; X1 = (in).q[1]; X2 = (in).q[2]; X3 = (in).q[3]; +#define WRITE_X(out) \ + (out).q[0] = X0; (out).q[1] = X1; (out).q[2] = X2; (out).q[3] = X3; + +#ifdef __XOP__ +#define ARX(out, in1, in2, s) \ + out = _mm_xor_si128(out, _mm_roti_epi32(_mm_add_epi32(in1, in2), s)); +#elif defined(__AVX512VL__) +#define ARX(out, in1, in2, s) \ + out = _mm_xor_si128(out, _mm_rol_epi32(_mm_add_epi32(in1, in2), s)); +#else +#define ARX(out, in1, in2, s) { \ + __m128i tmp = _mm_add_epi32(in1, in2); \ + out = _mm_xor_si128(out, _mm_slli_epi32(tmp, s)); \ + out = _mm_xor_si128(out, _mm_srli_epi32(tmp, 32 - s)); \ +} +#endif + +#define SALSA20_2ROUNDS \ + /* Operate on "columns" */ \ + ARX(X1, X0, X3, 7) \ + ARX(X2, X1, X0, 9) \ + ARX(X3, X2, X1, 13) \ + ARX(X0, X3, X2, 18) \ + /* Rearrange data */ \ + X1 = _mm_shuffle_epi32(X1, 0x93); \ + X2 = _mm_shuffle_epi32(X2, 0x4E); \ + X3 = _mm_shuffle_epi32(X3, 0x39); \ + /* Operate on "rows" */ \ + ARX(X3, X0, X1, 7) \ + ARX(X2, X3, X0, 9) \ + ARX(X1, X2, X3, 13) \ + ARX(X0, X1, X2, 18) \ + /* Rearrange data */ \ + X1 = _mm_shuffle_epi32(X1, 0x39); \ + X2 = _mm_shuffle_epi32(X2, 0x4E); \ + X3 = _mm_shuffle_epi32(X3, 0x93); + +/** + * Apply the Salsa20 core to the block provided in (X0 ... X3). + */ +#define SALSA20_wrapper(out, rounds) { \ + __m128i Z0 = X0, Z1 = X1, Z2 = X2, Z3 = X3; \ + rounds \ + (out).q[0] = X0 = _mm_add_epi32(X0, Z0); \ + (out).q[1] = X1 = _mm_add_epi32(X1, Z1); \ + (out).q[2] = X2 = _mm_add_epi32(X2, Z2); \ + (out).q[3] = X3 = _mm_add_epi32(X3, Z3); \ +} + +/** + * Apply the Salsa20/2 core to the block provided in X. + */ +#define SALSA20_2(out) \ + SALSA20_wrapper(out, SALSA20_2ROUNDS) + +#define SALSA20_8ROUNDS \ + SALSA20_2ROUNDS SALSA20_2ROUNDS SALSA20_2ROUNDS SALSA20_2ROUNDS + +#define XOR_X(in) \ + X0 = _mm_xor_si128(X0, (in).q[0]); \ + X1 = _mm_xor_si128(X1, (in).q[1]); \ + X2 = _mm_xor_si128(X2, (in).q[2]); \ + X3 = _mm_xor_si128(X3, (in).q[3]); + +#define XOR_X_2(in1, in2) \ + X0 = _mm_xor_si128((in1).q[0], (in2).q[0]); \ + X1 = _mm_xor_si128((in1).q[1], (in2).q[1]); \ + X2 = _mm_xor_si128((in1).q[2], (in2).q[2]); \ + X3 = _mm_xor_si128((in1).q[3], (in2).q[3]); + +#define XOR_X_WRITE_XOR_Y_2(out, in) \ + (out).q[0] = Y0 = _mm_xor_si128((out).q[0], (in).q[0]); \ + (out).q[1] = Y1 = _mm_xor_si128((out).q[1], (in).q[1]); \ + (out).q[2] = Y2 = _mm_xor_si128((out).q[2], (in).q[2]); \ + (out).q[3] = Y3 = _mm_xor_si128((out).q[3], (in).q[3]); \ + X0 = _mm_xor_si128(X0, Y0); \ + X1 = _mm_xor_si128(X1, Y1); \ + X2 = _mm_xor_si128(X2, Y2); \ + X3 = _mm_xor_si128(X3, Y3); + +/** + * Apply the Salsa20/8 core to the block provided in X ^ in. + */ +#define SALSA20_8_XOR_MEM(in, out) \ + XOR_X(in) \ + SALSA20_wrapper(out, SALSA20_8ROUNDS) + +#define INTEGERIFY _mm_cvtsi128_si32(X0) + +#else /* !defined(__SSE2__) */ + +#define DECL_X \ + salsa20_blk_t X; +#define DECL_Y \ + salsa20_blk_t Y; + +#define COPY(out, in) \ + (out).d[0] = (in).d[0]; \ + (out).d[1] = (in).d[1]; \ + (out).d[2] = (in).d[2]; \ + (out).d[3] = (in).d[3]; \ + (out).d[4] = (in).d[4]; \ + (out).d[5] = (in).d[5]; \ + (out).d[6] = (in).d[6]; \ + (out).d[7] = (in).d[7]; + +#define READ_X(in) COPY(X, in) +#define WRITE_X(out) COPY(out, X) + +/** + * salsa20(B): + * Apply the Salsa20 core to the provided block. + */ +static inline void salsa20(salsa20_blk_t *restrict B, + salsa20_blk_t *restrict Bout, uint32_t doublerounds) +{ + salsa20_blk_t X; +#define x X.w + + salsa20_simd_unshuffle(B, &X); + + do { +#define R(a,b) (((a) << (b)) | ((a) >> (32 - (b)))) + /* Operate on columns */ + x[ 4] ^= R(x[ 0]+x[12], 7); x[ 8] ^= R(x[ 4]+x[ 0], 9); + x[12] ^= R(x[ 8]+x[ 4],13); x[ 0] ^= R(x[12]+x[ 8],18); + + x[ 9] ^= R(x[ 5]+x[ 1], 7); x[13] ^= R(x[ 9]+x[ 5], 9); + x[ 1] ^= R(x[13]+x[ 9],13); x[ 5] ^= R(x[ 1]+x[13],18); + + x[14] ^= R(x[10]+x[ 6], 7); x[ 2] ^= R(x[14]+x[10], 9); + x[ 6] ^= R(x[ 2]+x[14],13); x[10] ^= R(x[ 6]+x[ 2],18); + + x[ 3] ^= R(x[15]+x[11], 7); x[ 7] ^= R(x[ 3]+x[15], 9); + x[11] ^= R(x[ 7]+x[ 3],13); x[15] ^= R(x[11]+x[ 7],18); + + /* Operate on rows */ + x[ 1] ^= R(x[ 0]+x[ 3], 7); x[ 2] ^= R(x[ 1]+x[ 0], 9); + x[ 3] ^= R(x[ 2]+x[ 1],13); x[ 0] ^= R(x[ 3]+x[ 2],18); + + x[ 6] ^= R(x[ 5]+x[ 4], 7); x[ 7] ^= R(x[ 6]+x[ 5], 9); + x[ 4] ^= R(x[ 7]+x[ 6],13); x[ 5] ^= R(x[ 4]+x[ 7],18); + + x[11] ^= R(x[10]+x[ 9], 7); x[ 8] ^= R(x[11]+x[10], 9); + x[ 9] ^= R(x[ 8]+x[11],13); x[10] ^= R(x[ 9]+x[ 8],18); + + x[12] ^= R(x[15]+x[14], 7); x[13] ^= R(x[12]+x[15], 9); + x[14] ^= R(x[13]+x[12],13); x[15] ^= R(x[14]+x[13],18); +#undef R + } while (--doublerounds); +#undef x + + { + uint32_t i; + salsa20_simd_shuffle(&X, Bout); + for (i = 0; i < 16; i += 4) { + B->w[i] = Bout->w[i] += B->w[i]; + B->w[i + 1] = Bout->w[i + 1] += B->w[i + 1]; + B->w[i + 2] = Bout->w[i + 2] += B->w[i + 2]; + B->w[i + 3] = Bout->w[i + 3] += B->w[i + 3]; + } + } + +#if 0 + /* Too expensive */ + insecure_memzero(&X, sizeof(X)); +#endif +} + +/** + * Apply the Salsa20/2 core to the block provided in X. + */ +#define SALSA20_2(out) \ + salsa20(&X, &out, 1); + +#define XOR(out, in1, in2) \ + (out).d[0] = (in1).d[0] ^ (in2).d[0]; \ + (out).d[1] = (in1).d[1] ^ (in2).d[1]; \ + (out).d[2] = (in1).d[2] ^ (in2).d[2]; \ + (out).d[3] = (in1).d[3] ^ (in2).d[3]; \ + (out).d[4] = (in1).d[4] ^ (in2).d[4]; \ + (out).d[5] = (in1).d[5] ^ (in2).d[5]; \ + (out).d[6] = (in1).d[6] ^ (in2).d[6]; \ + (out).d[7] = (in1).d[7] ^ (in2).d[7]; + +#define XOR_X(in) XOR(X, X, in) +#define XOR_X_2(in1, in2) XOR(X, in1, in2) +#define XOR_X_WRITE_XOR_Y_2(out, in) \ + XOR(Y, out, in) \ + COPY(out, Y) \ + XOR(X, X, Y) + +/** + * Apply the Salsa20/8 core to the block provided in X ^ in. + */ +#define SALSA20_8_XOR_MEM(in, out) \ + XOR_X(in); \ + salsa20(&X, &out, 4); + +#define INTEGERIFY (uint32_t)X.d[0] +#endif + +/** + * blockmix_salsa8(Bin, Bout, r): + * Compute Bout = BlockMix_{salsa20/8, r}(Bin). The input Bin must be 128r + * bytes in length; the output Bout must also be the same size. + */ +static void blockmix_salsa8(const salsa20_blk_t *restrict Bin, + salsa20_blk_t *restrict Bout, size_t r) +{ + size_t i; + DECL_X + + READ_X(Bin[r * 2 - 1]) + for (i = 0; i < r; i++) { + SALSA20_8_XOR_MEM(Bin[i * 2], Bout[i]) + SALSA20_8_XOR_MEM(Bin[i * 2 + 1], Bout[r + i]) + } +} + +static uint32_t blockmix_salsa8_xor(const salsa20_blk_t *restrict Bin1, + const salsa20_blk_t *restrict Bin2, salsa20_blk_t *restrict Bout, + size_t r) +{ + size_t i; + DECL_X + +#ifdef PREFETCH + PREFETCH(&Bin2[r * 2 - 1], _MM_HINT_T0) + for (i = 0; i < r - 1; i++) { + PREFETCH(&Bin2[i * 2], _MM_HINT_T0) + PREFETCH(&Bin2[i * 2 + 1], _MM_HINT_T0) + } + PREFETCH(&Bin2[i * 2], _MM_HINT_T0) +#endif + + XOR_X_2(Bin1[r * 2 - 1], Bin2[r * 2 - 1]) + for (i = 0; i < r; i++) { + XOR_X(Bin1[i * 2]) + SALSA20_8_XOR_MEM(Bin2[i * 2], Bout[i]) + XOR_X(Bin1[i * 2 + 1]) + SALSA20_8_XOR_MEM(Bin2[i * 2 + 1], Bout[r + i]) + } + + return INTEGERIFY; +} + +/* This is tunable */ +#define Swidth 8 + +/* Not tunable in this implementation, hard-coded in a few places */ +#define PWXsimple 2 +#define PWXgather 4 + +/* Derived values. Not tunable except via Swidth above. */ +#define PWXbytes (PWXgather * PWXsimple * 8) +#define Sbytes (3 * (1 << Swidth) * PWXsimple * 8) +#define Smask (((1 << Swidth) - 1) * PWXsimple * 8) +#define Smask2 (((uint64_t)Smask << 32) | Smask) + +#define DECL_SMASK2REG /* empty */ +#define FORCE_REGALLOC_3 /* empty */ +#define MAYBE_MEMORY_BARRIER /* empty */ + +#ifdef __SSE2__ +/* + * (V)PSRLDQ and (V)PSHUFD have higher throughput than (V)PSRLQ on some CPUs + * starting with Sandy Bridge. Additionally, PSHUFD uses separate source and + * destination registers, whereas the shifts would require an extra move + * instruction for our code when building without AVX. Unfortunately, PSHUFD + * is much slower on Conroe (4 cycles latency vs. 1 cycle latency for PSRLQ) + * and somewhat slower on some non-Intel CPUs (luckily not including AMD + * Bulldozer and Piledriver). + */ +#ifdef __AVX__ +#define HI32(X) \ + _mm_srli_si128((X), 4) +#elif 1 /* As an option, check for __SSE4_1__ here not to hurt Conroe */ +#define HI32(X) \ + _mm_shuffle_epi32((X), _MM_SHUFFLE(2,3,0,1)) +#else +#define HI32(X) \ + _mm_srli_epi64((X), 32) +#endif + +#if defined(__x86_64__) && \ + __GNUC__ == 4 && __GNUC_MINOR__ < 6 && !defined(__ICC) +#ifdef __AVX__ +#define MOVQ "vmovq" +#else +/* "movq" would be more correct, but "movd" is supported by older binutils + * due to an error in AMD's spec for x86-64. */ +#define MOVQ "movd" +#endif +#define EXTRACT64(X) ({ \ + uint64_t result; \ + __asm__(MOVQ " %1, %0" : "=r" (result) : "x" (X)); \ + result; \ +}) +#elif defined(__x86_64__) && !defined(_MSC_VER) && !defined(__OPEN64__) +/* MSVC and Open64 had bugs */ +#define EXTRACT64(X) _mm_cvtsi128_si64(X) +#elif defined(__x86_64__) && defined(__SSE4_1__) +/* No known bugs for this intrinsic */ +#include +#define EXTRACT64(X) _mm_extract_epi64((X), 0) +#elif defined(USE_SSE4_FOR_32BIT) && defined(__SSE4_1__) +/* 32-bit */ +#include +#if 0 +/* This is currently unused by the code below, which instead uses these two + * intrinsics explicitly when (!defined(__x86_64__) && defined(__SSE4_1__)) */ +#define EXTRACT64(X) \ + ((uint64_t)(uint32_t)_mm_cvtsi128_si32(X) | \ + ((uint64_t)(uint32_t)_mm_extract_epi32((X), 1) << 32)) +#endif +#else +/* 32-bit or compilers with known past bugs in _mm_cvtsi128_si64() */ +#define EXTRACT64(X) \ + ((uint64_t)(uint32_t)_mm_cvtsi128_si32(X) | \ + ((uint64_t)(uint32_t)_mm_cvtsi128_si32(HI32(X)) << 32)) +#endif + +#if defined(__x86_64__) && (defined(__AVX__) || !defined(__GNUC__)) +/* 64-bit with AVX */ +/* Force use of 64-bit AND instead of two 32-bit ANDs */ +#undef DECL_SMASK2REG +#if defined(__GNUC__) && !defined(__ICC) +#define DECL_SMASK2REG uint64_t Smask2reg = Smask2; +/* Force use of lower-numbered registers to reduce number of prefixes, relying + * on out-of-order execution and register renaming. */ +#define FORCE_REGALLOC_1 \ + __asm__("" : "=a" (x), "+d" (Smask2reg), "+S" (S0), "+D" (S1)); +#define FORCE_REGALLOC_2 \ + __asm__("" : : "c" (lo)); +#else +static volatile uint64_t Smask2var = Smask2; +#define DECL_SMASK2REG uint64_t Smask2reg = Smask2var; +#define FORCE_REGALLOC_1 /* empty */ +#define FORCE_REGALLOC_2 /* empty */ +#endif +#define PWXFORM_SIMD(X) { \ + uint64_t x; \ + FORCE_REGALLOC_1 \ + uint32_t lo = x = EXTRACT64(X) & Smask2reg; \ + FORCE_REGALLOC_2 \ + uint32_t hi = x >> 32; \ + X = _mm_mul_epu32(HI32(X), X); \ + X = _mm_add_epi64(X, *(__m128i *)(S0 + lo)); \ + X = _mm_xor_si128(X, *(__m128i *)(S1 + hi)); \ +} +#elif defined(__x86_64__) +/* 64-bit without AVX. This relies on out-of-order execution and register + * renaming. It may actually be fastest on CPUs with AVX(2) as well - e.g., + * it runs great on Haswell. */ +#warning "Note: using x86-64 inline assembly for YESCRYPT_RW. That's great." +/* We need a compiler memory barrier between sub-blocks to ensure that none of + * the writes into what was S2 during processing of the previous sub-block are + * postponed until after a read from S0 or S1 in the inline asm code below. */ +#undef MAYBE_MEMORY_BARRIER +#define MAYBE_MEMORY_BARRIER \ + __asm__("" : : : "memory"); +#ifdef __ILP32__ /* x32 */ +#define REGISTER_PREFIX "e" +#else +#define REGISTER_PREFIX "r" +#endif +#define PWXFORM_SIMD(X) { \ + __m128i H; \ + __asm__( \ + "movd %0, %%rax\n\t" \ + "pshufd $0xb1, %0, %1\n\t" \ + "andq %2, %%rax\n\t" \ + "pmuludq %1, %0\n\t" \ + "movl %%eax, %%ecx\n\t" \ + "shrq $0x20, %%rax\n\t" \ + "paddq (%3,%%" REGISTER_PREFIX "cx), %0\n\t" \ + "pxor (%4,%%" REGISTER_PREFIX "ax), %0\n\t" \ + : "+x" (X), "=x" (H) \ + : "d" (Smask2), "S" (S0), "D" (S1) \ + : "cc", "ax", "cx"); \ +} +#elif defined(USE_SSE4_FOR_32BIT) && defined(__SSE4_1__) +/* 32-bit with SSE4.1 */ +#define PWXFORM_SIMD(X) { \ + __m128i x = _mm_and_si128(X, _mm_set1_epi64x(Smask2)); \ + __m128i s0 = *(__m128i *)(S0 + (uint32_t)_mm_cvtsi128_si32(x)); \ + __m128i s1 = *(__m128i *)(S1 + (uint32_t)_mm_extract_epi32(x, 1)); \ + X = _mm_mul_epu32(HI32(X), X); \ + X = _mm_add_epi64(X, s0); \ + X = _mm_xor_si128(X, s1); \ +} +#else +/* 32-bit without SSE4.1 */ +#define PWXFORM_SIMD(X) { \ + uint64_t x = EXTRACT64(X) & Smask2; \ + __m128i s0 = *(__m128i *)(S0 + (uint32_t)x); \ + __m128i s1 = *(__m128i *)(S1 + (x >> 32)); \ + X = _mm_mul_epu32(HI32(X), X); \ + X = _mm_add_epi64(X, s0); \ + X = _mm_xor_si128(X, s1); \ +} +#endif + +#define PWXFORM_ROUND \ + PWXFORM_SIMD(X0) \ + PWXFORM_SIMD(X1) \ + PWXFORM_SIMD(X2) \ + PWXFORM_SIMD(X3) + +#if defined(__x86_64__) && defined(__GNUC__) && !defined(__ICC) +#undef FORCE_REGALLOC_3 +#define FORCE_REGALLOC_3 __asm__("" : : "b" (Sw)); +#endif + +#else /* !defined(__SSE2__) */ + +#define PWXFORM_SIMD(x0, x1) { \ + uint64_t x = x0 & Smask2; \ + uint64_t *p0 = (uint64_t *)(S0 + (uint32_t)x); \ + uint64_t *p1 = (uint64_t *)(S1 + (x >> 32)); \ + x0 = ((x0 >> 32) * (uint32_t)x0 + p0[0]) ^ p1[0]; \ + x1 = ((x1 >> 32) * (uint32_t)x1 + p0[1]) ^ p1[1]; \ +} + +#define PWXFORM_ROUND \ + PWXFORM_SIMD(X.d[0], X.d[1]) \ + PWXFORM_SIMD(X.d[2], X.d[3]) \ + PWXFORM_SIMD(X.d[4], X.d[5]) \ + PWXFORM_SIMD(X.d[6], X.d[7]) +#endif + +/* + * This offset helps address the 256-byte write block via the single-byte + * displacements encodable in x86(-64) instructions. It is needed because the + * displacements are signed. Without it, we'd get 4-byte displacements for + * half of the writes. Setting it to 0x80 instead of 0x7c would avoid needing + * a displacement for one of the writes, but then the LEA instruction would + * need a 4-byte displacement. + */ +#define PWXFORM_WRITE_OFFSET 0x7c + +#define PWXFORM_WRITE \ + WRITE_X(*(salsa20_blk_t *)(Sw - PWXFORM_WRITE_OFFSET)) \ + Sw += 64; + +#define PWXFORM { \ + uint8_t *Sw = S2 + w + PWXFORM_WRITE_OFFSET; \ + FORCE_REGALLOC_3 \ + MAYBE_MEMORY_BARRIER \ + PWXFORM_ROUND \ + PWXFORM_ROUND PWXFORM_WRITE \ + PWXFORM_ROUND PWXFORM_WRITE \ + PWXFORM_ROUND PWXFORM_WRITE \ + PWXFORM_ROUND PWXFORM_WRITE \ + PWXFORM_ROUND \ + w = (w + 64 * 4) & Smask2; \ + { \ + uint8_t *Stmp = S2; \ + S2 = S1; \ + S1 = S0; \ + S0 = Stmp; \ + } \ +} + +typedef struct { + uint8_t *S0, *S1, *S2; + size_t w; +} pwxform_ctx_t; + +#define Salloc (Sbytes + ((sizeof(pwxform_ctx_t) + 63) & ~63U)) + +/** + * blockmix_pwxform(Bin, Bout, r, S): + * Compute Bout = BlockMix_pwxform{salsa20/2, r, S}(Bin). The input Bin must + * be 128r bytes in length; the output Bout must also be the same size. + */ +static void blockmix(const salsa20_blk_t *restrict Bin, + salsa20_blk_t *restrict Bout, size_t r, pwxform_ctx_t *restrict ctx) +{ + uint8_t *S0 = ctx->S0, *S1 = ctx->S1, *S2 = ctx->S2; + size_t w = ctx->w; + size_t i; + DECL_X + + /* Convert count of 128-byte blocks to max index of 64-byte block */ + r = r * 2 - 1; + + READ_X(Bin[r]) + + DECL_SMASK2REG + + i = 0; + do { + XOR_X(Bin[i]) + PWXFORM + if (unlikely(i >= r)) + break; + WRITE_X(Bout[i]) + i++; + } while (1); + + ctx->S0 = S0; ctx->S1 = S1; ctx->S2 = S2; + ctx->w = w; + + SALSA20_2(Bout[i]) +} + +static uint32_t blockmix_xor(const salsa20_blk_t *Bin1, + const salsa20_blk_t *restrict Bin2, salsa20_blk_t *Bout, + size_t r, int Bin2_in_ROM, pwxform_ctx_t *restrict ctx) +{ + uint8_t *S0 = ctx->S0, *S1 = ctx->S1, *S2 = ctx->S2; + size_t w = ctx->w; + size_t i; + DECL_X + + /* Convert count of 128-byte blocks to max index of 64-byte block */ + r = r * 2 - 1; + +#ifdef PREFETCH + if (Bin2_in_ROM) { + PREFETCH(&Bin2[r], _MM_HINT_NTA) + for (i = 0; i < r; i++) { + PREFETCH(&Bin2[i], _MM_HINT_NTA) + } + } else { + PREFETCH(&Bin2[r], _MM_HINT_T0) + for (i = 0; i < r; i++) { + PREFETCH(&Bin2[i], _MM_HINT_T0) + } + } +#else + (void)Bin2_in_ROM; /* unused */ +#endif + + XOR_X_2(Bin1[r], Bin2[r]) + + DECL_SMASK2REG + + i = 0; + r--; + do { + XOR_X(Bin1[i]) + XOR_X(Bin2[i]) + PWXFORM + WRITE_X(Bout[i]) + + XOR_X(Bin1[i + 1]) + XOR_X(Bin2[i + 1]) + PWXFORM + + if (unlikely(i >= r)) + break; + + WRITE_X(Bout[i + 1]) + + i += 2; + } while (1); + i++; + + ctx->S0 = S0; ctx->S1 = S1; ctx->S2 = S2; + ctx->w = w; + + SALSA20_2(Bout[i]) + + return INTEGERIFY; +} + +static uint32_t blockmix_xor_save(salsa20_blk_t *restrict Bin1out, + salsa20_blk_t *restrict Bin2, + size_t r, pwxform_ctx_t *restrict ctx) +{ + uint8_t *S0 = ctx->S0, *S1 = ctx->S1, *S2 = ctx->S2; + size_t w = ctx->w; + size_t i; + DECL_X + DECL_Y + + /* Convert count of 128-byte blocks to max index of 64-byte block */ + r = r * 2 - 1; + +#ifdef PREFETCH + PREFETCH(&Bin2[r], _MM_HINT_T0) + for (i = 0; i < r; i++) { + PREFETCH(&Bin2[i], _MM_HINT_T0) + } +#endif + + XOR_X_2(Bin1out[r], Bin2[r]) + + DECL_SMASK2REG + + i = 0; + r--; + do { + XOR_X_WRITE_XOR_Y_2(Bin2[i], Bin1out[i]) + PWXFORM + WRITE_X(Bin1out[i]) + + XOR_X_WRITE_XOR_Y_2(Bin2[i + 1], Bin1out[i + 1]) + PWXFORM + + if (unlikely(i >= r)) + break; + + WRITE_X(Bin1out[i + 1]) + + i += 2; + } while (1); + i++; + + ctx->S0 = S0; ctx->S1 = S1; ctx->S2 = S2; + ctx->w = w; + + SALSA20_2(Bin1out[i]) + + return INTEGERIFY; +} + +/** + * integerify(B, r): + * Return the result of parsing B_{2r-1} as a little-endian integer. + */ +static inline uint32_t integerify(const salsa20_blk_t *B, size_t r) +{ +/* + * Our 64-bit words are in host byte order, which is why we don't just read + * w[0] here (would be wrong on big-endian). Also, our 32-bit words are + * SIMD-shuffled (so the next 32 bits would be part of d[6]), but currently + * this does not matter as we only care about the least significant 32 bits. + */ + return (uint32_t)B[2 * r - 1].d[0]; +} + +/** + * smix1(B, r, N, flags, V, NROM, VROM, XY, ctx): + * Compute first loop of B = SMix_r(B, N). The input B must be 128r bytes in + * length; the temporary storage V must be 128rN bytes in length; the temporary + * storage XY must be 128r+64 bytes in length. N must be even and at least 4. + * The array V must be aligned to a multiple of 64 bytes, and arrays B and XY + * to a multiple of at least 16 bytes. + */ +static void smix1(uint8_t *B, size_t r, uint32_t N, yescrypt_flags_t flags, + salsa20_blk_t *V, uint32_t NROM, const salsa20_blk_t *VROM, + salsa20_blk_t *XY, pwxform_ctx_t *ctx) +{ + size_t s = 2 * r; + salsa20_blk_t *X = V, *Y = &V[s]; + uint32_t i, j; + + for (i = 0; i < 2 * r; i++) { + const salsa20_blk_t *src = (salsa20_blk_t *)&B[i * 64]; + salsa20_blk_t *tmp = Y; + salsa20_blk_t *dst = &X[i]; + size_t k; + for (k = 0; k < 16; k++) + tmp->w[k] = le32dec(&src->w[k]); + salsa20_simd_shuffle(tmp, dst); + } + + if (VROM) { + uint32_t n; + const salsa20_blk_t *V_j; + + V_j = &VROM[(NROM - 1) * s]; + j = blockmix_xor(X, V_j, Y, r, 1, ctx) & (NROM - 1); + V_j = &VROM[j * s]; + X = Y + s; + j = blockmix_xor(Y, V_j, X, r, 1, ctx); + + for (n = 2; n < N; n <<= 1) { + uint32_t m = (n < N / 2) ? n : (N - 1 - n); + for (i = 1; i < m; i += 2) { + j &= n - 1; + j += i - 1; + V_j = &V[j * s]; + Y = X + s; + j = blockmix_xor(X, V_j, Y, r, 0, ctx) & (NROM - 1); + V_j = &VROM[j * s]; + X = Y + s; + j = blockmix_xor(Y, V_j, X, r, 1, ctx); + } + } + n >>= 1; + + j &= n - 1; + j += N - 2 - n; + V_j = &V[j * s]; + Y = X + s; + j = blockmix_xor(X, V_j, Y, r, 0, ctx) & (NROM - 1); + V_j = &VROM[j * s]; + blockmix_xor(Y, V_j, XY, r, 1, ctx); + } else if (flags & YESCRYPT_RW) { + uint32_t n; + salsa20_blk_t *V_j; + + blockmix(X, Y, r, ctx); + X = Y + s; + blockmix(Y, X, r, ctx); + j = integerify(X, r); + + for (n = 2; n < N; n <<= 1) { + uint32_t m = (n < N / 2) ? n : (N - 1 - n); + for (i = 1; i < m; i += 2) { + Y = X + s; + j &= n - 1; + j += i - 1; + V_j = &V[j * s]; + j = blockmix_xor(X, V_j, Y, r, 0, ctx); + j &= n - 1; + j += i; + V_j = &V[j * s]; + X = Y + s; + j = blockmix_xor(Y, V_j, X, r, 0, ctx); + } + } + n >>= 1; + + j &= n - 1; + j += N - 2 - n; + V_j = &V[j * s]; + Y = X + s; + j = blockmix_xor(X, V_j, Y, r, 0, ctx); + j &= n - 1; + j += N - 1 - n; + V_j = &V[j * s]; + blockmix_xor(Y, V_j, XY, r, 0, ctx); + } else { + N -= 2; + do { + blockmix_salsa8(X, Y, r); + X = Y + s; + blockmix_salsa8(Y, X, r); + Y = X + s; + } while ((N -= 2)); + + blockmix_salsa8(X, Y, r); + blockmix_salsa8(Y, XY, r); + } + + for (i = 0; i < 2 * r; i++) { + const salsa20_blk_t *src = &XY[i]; + salsa20_blk_t *tmp = &XY[s]; + salsa20_blk_t *dst = (salsa20_blk_t *)&B[i * 64]; + size_t k; + for (k = 0; k < 16; k++) + le32enc(&tmp->w[k], src->w[k]); + salsa20_simd_unshuffle(tmp, dst); + } +} + +/** + * smix2(B, r, N, Nloop, flags, V, NROM, VROM, XY, ctx): + * Compute second loop of B = SMix_r(B, N). The input B must be 128r bytes in + * length; the temporary storage V must be 128rN bytes in length; the temporary + * storage XY must be 256r bytes in length. N must be a power of 2 and at + * least 2. Nloop must be even. The array V must be aligned to a multiple of + * 64 bytes, and arrays B and XY to a multiple of at least 16 bytes. + */ +static void smix2(uint8_t *B, size_t r, uint32_t N, uint64_t Nloop, + yescrypt_flags_t flags, salsa20_blk_t *V, uint32_t NROM, + const salsa20_blk_t *VROM, salsa20_blk_t *XY, pwxform_ctx_t *ctx) +{ + size_t s = 2 * r; + salsa20_blk_t *X = XY, *Y = &XY[s]; + uint32_t i, j; + + if (Nloop == 0) + return; + + for (i = 0; i < 2 * r; i++) { + const salsa20_blk_t *src = (salsa20_blk_t *)&B[i * 64]; + salsa20_blk_t *tmp = Y; + salsa20_blk_t *dst = &X[i]; + size_t k; + for (k = 0; k < 16; k++) + tmp->w[k] = le32dec(&src->w[k]); + salsa20_simd_shuffle(tmp, dst); + } + + j = integerify(X, r) & (N - 1); + +/* + * Normally, VROM implies YESCRYPT_RW, but we check for these separately + * because our SMix resets YESCRYPT_RW for the smix2() calls operating on the + * entire V when p > 1. + */ + if (VROM && (flags & YESCRYPT_RW)) { + do { + salsa20_blk_t *V_j = &V[j * s]; + const salsa20_blk_t *VROM_j; + j = blockmix_xor_save(X, V_j, r, ctx) & (NROM - 1); + VROM_j = &VROM[j * s]; + j = blockmix_xor(X, VROM_j, X, r, 1, ctx) & (N - 1); + } while (Nloop -= 2); + } else if (VROM) { + do { + const salsa20_blk_t *V_j = &V[j * s]; + j = blockmix_xor(X, V_j, X, r, 0, ctx) & (NROM - 1); + V_j = &VROM[j * s]; + j = blockmix_xor(X, V_j, X, r, 1, ctx) & (N - 1); + } while (Nloop -= 2); + } else if (flags & YESCRYPT_RW) { + do { + salsa20_blk_t *V_j = &V[j * s]; + j = blockmix_xor_save(X, V_j, r, ctx) & (N - 1); + V_j = &V[j * s]; + j = blockmix_xor_save(X, V_j, r, ctx) & (N - 1); + } while (Nloop -= 2); + } else if (ctx) { + do { + const salsa20_blk_t *V_j = &V[j * s]; + j = blockmix_xor(X, V_j, X, r, 0, ctx) & (N - 1); + V_j = &V[j * s]; + j = blockmix_xor(X, V_j, X, r, 0, ctx) & (N - 1); + } while (Nloop -= 2); + } else { + do { + const salsa20_blk_t *V_j = &V[j * s]; + j = blockmix_salsa8_xor(X, V_j, Y, r) & (N - 1); + V_j = &V[j * s]; + j = blockmix_salsa8_xor(Y, V_j, X, r) & (N - 1); + } while (Nloop -= 2); + } + + for (i = 0; i < 2 * r; i++) { + const salsa20_blk_t *src = &X[i]; + salsa20_blk_t *tmp = Y; + salsa20_blk_t *dst = (salsa20_blk_t *)&B[i * 64]; + size_t k; + for (k = 0; k < 16; k++) + le32enc(&tmp->w[k], src->w[k]); + salsa20_simd_unshuffle(tmp, dst); + } +} + +/** + * p2floor(x): + * Largest power of 2 not greater than argument. + */ +static uint64_t p2floor(uint64_t x) +{ + uint64_t y; + while ((y = x & (x - 1))) + x = y; + return x; +} + +/** + * smix(B, r, N, p, t, flags, V, NROM, VROM, XY, S, passwd): + * Compute B = SMix_r(B, N). The input B must be 128rp bytes in length; the + * temporary storage V must be 128rN bytes in length; the temporary storage + * XY must be 256r or 256rp bytes in length (the larger size is required with + * OpenMP-enabled builds). N must be a power of 2 and at least 4. The array V + * must be aligned to a multiple of 64 bytes, and arrays B and XY to a multiple + * of at least 16 bytes (aligning them to 64 bytes as well saves cache lines + * and helps avoid false sharing in OpenMP-enabled builds when p > 1, but it + * might also result in cache bank conflicts). + */ +//hashcat: removed static, need direct access +//static void smix(uint8_t *B, size_t r, uint32_t N, uint32_t p, uint32_t t, +void smix(uint8_t *B, size_t r, uint32_t N, uint32_t p, uint32_t t, + yescrypt_flags_t flags, + salsa20_blk_t *V, uint32_t NROM, const salsa20_blk_t *VROM, + salsa20_blk_t *XY, uint8_t *S, uint8_t *passwd) +{ + size_t s = 2 * r; + uint32_t Nchunk; + uint64_t Nloop_all, Nloop_rw; + uint32_t i; + + Nchunk = N / p; + Nloop_all = Nchunk; + if (flags & YESCRYPT_RW) { + if (t <= 1) { + if (t) + Nloop_all *= 2; /* 2/3 */ + Nloop_all = (Nloop_all + 2) / 3; /* 1/3, round up */ + } else { + Nloop_all *= t - 1; + } + } else if (t) { + if (t == 1) + Nloop_all += (Nloop_all + 1) / 2; /* 1.5, round up */ + Nloop_all *= t; + } + + Nloop_rw = 0; + if (flags & YESCRYPT_INIT_SHARED) + Nloop_rw = Nloop_all; + else if (flags & YESCRYPT_RW) + Nloop_rw = Nloop_all / p; + + Nchunk &= ~(uint32_t)1; /* round down to even */ + Nloop_all++; Nloop_all &= ~(uint64_t)1; /* round up to even */ + Nloop_rw++; Nloop_rw &= ~(uint64_t)1; /* round up to even */ + +#ifdef _OPENMP +#pragma omp parallel if (p > 1) default(none) private(i) shared(B, r, N, p, flags, V, NROM, VROM, XY, S, passwd, s, Nchunk, Nloop_all, Nloop_rw) + { +#pragma omp for +#endif + for (i = 0; i < p; i++) { + uint32_t Vchunk = i * Nchunk; + uint32_t Np = (i < p - 1) ? Nchunk : (N - Vchunk); + uint8_t *Bp = &B[128 * r * i]; + salsa20_blk_t *Vp = &V[Vchunk * s]; +#ifdef _OPENMP + salsa20_blk_t *XYp = &XY[i * (2 * s)]; +#else + salsa20_blk_t *XYp = XY; +#endif + pwxform_ctx_t *ctx_i = NULL; + if (flags & YESCRYPT_RW) { + uint8_t *Si = S + i * Salloc; + smix1(Bp, 1, Sbytes / 128, 0 /* no flags */, + (salsa20_blk_t *)Si, 0, NULL, XYp, NULL); + ctx_i = (pwxform_ctx_t *)(Si + Sbytes); + ctx_i->S2 = Si; + ctx_i->S1 = Si + Sbytes / 3; + ctx_i->S0 = Si + Sbytes / 3 * 2; + ctx_i->w = 0; + if (i == 0) + HMAC_SHA256_Buf(Bp + (128 * r - 64), 64, + passwd, 32, passwd); + } + smix1(Bp, r, Np, flags, Vp, NROM, VROM, XYp, ctx_i); + smix2(Bp, r, p2floor(Np), Nloop_rw, flags, Vp, + NROM, VROM, XYp, ctx_i); + } + + if (Nloop_all > Nloop_rw) { +#ifdef _OPENMP +#pragma omp for +#endif + for (i = 0; i < p; i++) { + uint8_t *Bp = &B[128 * r * i]; +#ifdef _OPENMP + salsa20_blk_t *XYp = &XY[i * (2 * s)]; +#else + salsa20_blk_t *XYp = XY; +#endif + pwxform_ctx_t *ctx_i = NULL; + if (flags & YESCRYPT_RW) { + uint8_t *Si = S + i * Salloc; + ctx_i = (pwxform_ctx_t *)(Si + Sbytes); + } + smix2(Bp, r, N, Nloop_all - Nloop_rw, + flags & ~YESCRYPT_RW, V, NROM, VROM, XYp, ctx_i); + } + } +#ifdef _OPENMP + } +#endif +} + +/** + * yescrypt_kdf_body(shared, local, passwd, passwdlen, salt, saltlen, + * flags, N, r, p, t, NROM, buf, buflen): + * Compute scrypt(passwd[0 .. passwdlen - 1], salt[0 .. saltlen - 1], N, r, + * p, buflen), or a revision of scrypt as requested by flags and shared, and + * write the result into buf. + * + * shared and flags may request special modes as described in yescrypt.h. + * + * local is the thread-local data structure, allowing to preserve and reuse a + * memory allocation across calls, thereby reducing its overhead. + * + * t controls computation time while not affecting peak memory usage. + * + * Return 0 on success; or -1 on error. + * + * This optimized implementation currently limits N to the range from 4 to + * 2^31, but other implementations might not. + */ +static int yescrypt_kdf_body(const yescrypt_shared_t *shared, + yescrypt_local_t *local, + const uint8_t *passwd, size_t passwdlen, + const uint8_t *salt, size_t saltlen, + yescrypt_flags_t flags, uint64_t N, uint32_t r, uint32_t p, uint32_t t, + uint64_t NROM, + uint8_t *buf, size_t buflen) +{ + yescrypt_region_t tmp; + const salsa20_blk_t *VROM; + size_t B_size, V_size, XY_size, need; + uint8_t *B, *S; + salsa20_blk_t *V, *XY; + uint8_t sha256[32]; + uint8_t dk[sizeof(sha256)], *dkp = buf; + + /* Sanity-check parameters */ + switch (flags & YESCRYPT_MODE_MASK) { + case 0: /* classic scrypt - can't have anything non-standard */ + if (flags || t || NROM) + goto out_EINVAL; + break; + case YESCRYPT_WORM: + if (flags != YESCRYPT_WORM || NROM) + goto out_EINVAL; + break; + case YESCRYPT_RW: + if (flags != (flags & YESCRYPT_KNOWN_FLAGS)) + goto out_EINVAL; +#if PWXsimple == 2 && PWXgather == 4 && Sbytes == 12288 + if ((flags & YESCRYPT_RW_FLAVOR_MASK) == + (YESCRYPT_ROUNDS_6 | YESCRYPT_GATHER_4 | + YESCRYPT_SIMPLE_2 | YESCRYPT_SBOX_12K)) + break; +#else +#error "Unsupported pwxform settings" +#endif + /* FALLTHRU */ + default: + goto out_EINVAL; + } +#if SIZE_MAX > UINT32_MAX + if (buflen > (((uint64_t)1 << 32) - 1) * 32) + goto out_EINVAL; +#endif + if ((uint64_t)r * (uint64_t)p >= 1 << 30) + goto out_EINVAL; + if (N > UINT32_MAX) + goto out_EINVAL; + if ((N & (N - 1)) != 0 || N <= 3 || r < 1 || p < 1) + goto out_EINVAL; + if (r > SIZE_MAX / 256 / p || + N > SIZE_MAX / 128 / r) + goto out_EINVAL; + if (flags & YESCRYPT_RW) { + if (N / p <= 3 || p > SIZE_MAX / Salloc) + goto out_EINVAL; + } +#ifdef _OPENMP + else if (N > SIZE_MAX / 128 / (r * p)) { + goto out_EINVAL; + } +#endif + + VROM = NULL; + if (shared) { + uint64_t expected_size = (size_t)128 * r * NROM; + if ((NROM & (NROM - 1)) != 0 || + NROM <= 1 || NROM > UINT32_MAX || + shared->aligned_size < expected_size) + goto out_EINVAL; + if (!(flags & YESCRYPT_INIT_SHARED)) { + uint64_t *tag = (uint64_t *) + ((uint8_t *)shared->aligned + expected_size - 48); + if (tag[0] != YESCRYPT_ROM_TAG1 || tag[1] != YESCRYPT_ROM_TAG2) + goto out_EINVAL; + } + VROM = shared->aligned; + } else { + if (NROM) + goto out_EINVAL; + } + + /* Allocate memory */ + V = NULL; + V_size = (size_t)128 * r * N; +#ifdef _OPENMP + if (!(flags & YESCRYPT_RW)) + V_size *= p; +#endif + need = V_size; + if (flags & YESCRYPT_INIT_SHARED) { + if (local->aligned_size < need) { + if (local->base || local->aligned || + local->base_size || local->aligned_size) + goto out_EINVAL; + if (!alloc_region(local, need)) + return -1; + } + if (flags & YESCRYPT_ALLOC_ONLY) + return -2; /* expected "failure" */ + V = (salsa20_blk_t *)local->aligned; + need = 0; + } + B_size = (size_t)128 * r * p; + need += B_size; + if (need < B_size) + goto out_EINVAL; + XY_size = (size_t)256 * r; +#ifdef _OPENMP + XY_size *= p; +#endif + need += XY_size; + if (need < XY_size) + goto out_EINVAL; + if (flags & YESCRYPT_RW) { + size_t S_size = (size_t)Salloc * p; + need += S_size; + if (need < S_size) + goto out_EINVAL; + } + if (flags & YESCRYPT_INIT_SHARED) { + if (!alloc_region(&tmp, need)) + return -1; + B = (uint8_t *)tmp.aligned; + XY = (salsa20_blk_t *)((uint8_t *)B + B_size); + } else { + init_region(&tmp); + if (local->aligned_size < need) { + if (free_region(local)) + return -1; + if (!alloc_region(local, need)) + return -1; + } + if (flags & YESCRYPT_ALLOC_ONLY) + return -3; /* expected "failure" */ + B = (uint8_t *)local->aligned; + V = (salsa20_blk_t *)((uint8_t *)B + B_size); + XY = (salsa20_blk_t *)((uint8_t *)V + V_size); + } + S = NULL; + if (flags & YESCRYPT_RW) + S = (uint8_t *)XY + XY_size; + + if (flags) { + HMAC_SHA256_Buf("yescrypt-prehash", + (flags & YESCRYPT_PREHASH) ? 16 : 8, + passwd, passwdlen, sha256); + passwd = sha256; + passwdlen = sizeof(sha256); + } + + PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, 1, B, B_size); + + if (flags) + memcpy(sha256, B, sizeof(sha256)); + + if (p == 1 || (flags & YESCRYPT_RW)) { + smix(B, r, N, p, t, flags, V, NROM, VROM, XY, S, sha256); + } else { + uint32_t i; +#ifdef _OPENMP +#pragma omp parallel for default(none) private(i) shared(B, r, N, p, t, flags, V, NROM, VROM, XY, S) +#endif + for (i = 0; i < p; i++) { +#ifdef _OPENMP + smix(&B[(size_t)128 * r * i], r, N, 1, t, flags, + &V[(size_t)2 * r * i * N], + NROM, VROM, + &XY[(size_t)4 * r * i], NULL, NULL); +#else + smix(&B[(size_t)128 * r * i], r, N, 1, t, flags, V, + NROM, VROM, XY, NULL, NULL); +#endif + } + } + + dkp = buf; + if (flags && buflen < sizeof(dk)) { + PBKDF2_SHA256(passwd, passwdlen, B, B_size, 1, dk, sizeof(dk)); + dkp = dk; + } + + PBKDF2_SHA256(passwd, passwdlen, B, B_size, 1, buf, buflen); + + /* + * Except when computing classic scrypt, allow all computation so far + * to be performed on the client. The final steps below match those of + * SCRAM (RFC 5802), so that an extension of SCRAM (with the steps so + * far in place of SCRAM's use of PBKDF2 and with SHA-256 in place of + * SCRAM's use of SHA-1) would be usable with yescrypt hashes. + */ + if (flags && !(flags & YESCRYPT_PREHASH)) { + /* Compute ClientKey */ + HMAC_SHA256_Buf(dkp, sizeof(dk), "Client Key", 10, sha256); + /* Compute StoredKey */ + { + size_t clen = buflen; + if (clen > sizeof(dk)) + clen = sizeof(dk); + SHA256_Buf(sha256, sizeof(sha256), dk); + memcpy(buf, dk, clen); + } + } + + if (flags) { + insecure_memzero(sha256, sizeof(sha256)); + insecure_memzero(dk, sizeof(dk)); + } + + if (free_region(&tmp)) { + insecure_memzero(buf, buflen); /* must preserve errno */ + return -1; + } + + /* Success! */ + return 0; + +out_EINVAL: + errno = EINVAL; + return -1; +} + +/** + * yescrypt_kdf(shared, local, passwd, passwdlen, salt, saltlen, params, + * buf, buflen): + * Compute scrypt or its revision as requested by the parameters. The inputs + * to this function are the same as those for yescrypt_kdf_body() above, with + * the addition of g, which controls hash upgrades (0 for no upgrades so far). + */ +int yescrypt_kdf(const yescrypt_shared_t *shared, yescrypt_local_t *local, + const uint8_t *passwd, size_t passwdlen, + const uint8_t *salt, size_t saltlen, + const yescrypt_params_t *params, + uint8_t *buf, size_t buflen) +{ + yescrypt_flags_t flags = params->flags; + uint64_t N = params->N; + uint32_t r = params->r; + uint32_t p = params->p; + uint32_t t = params->t; + uint32_t g = params->g; + uint64_t NROM = params->NROM; + uint8_t dk[32]; + int retval; + + /* Support for hash upgrades has been temporarily removed */ + if (g) { + errno = EINVAL; + return -1; + } + + if ((flags & (YESCRYPT_RW | YESCRYPT_INIT_SHARED)) == YESCRYPT_RW && + p >= 1 && N / p >= 0x100 && N / p * r >= 0x20000) { + if (yescrypt_kdf_body(shared, local, + passwd, passwdlen, salt, saltlen, + flags | YESCRYPT_ALLOC_ONLY, N, r, p, t, NROM, + buf, buflen) != -3) { + errno = EINVAL; + return -1; + } + if ((retval = yescrypt_kdf_body(shared, local, + passwd, passwdlen, salt, saltlen, + flags | YESCRYPT_PREHASH, N >> 6, r, p, 0, NROM, + dk, sizeof(dk)))) + return retval; + passwd = dk; + passwdlen = sizeof(dk); + } + + retval = yescrypt_kdf_body(shared, local, + passwd, passwdlen, salt, saltlen, + flags, N, r, p, t, NROM, buf, buflen); +#ifndef SKIP_MEMZERO + if (passwd == dk) + insecure_memzero(dk, sizeof(dk)); +#endif + return retval; +} + +int yescrypt_init_shared(yescrypt_shared_t *shared, + const uint8_t *seed, size_t seedlen, + const yescrypt_params_t *params) +{ + yescrypt_params_t subparams; + yescrypt_shared_t half1, half2; + uint8_t salt[32]; + uint64_t *tag; + + subparams = *params; + subparams.flags |= YESCRYPT_INIT_SHARED; + subparams.N = params->NROM; + subparams.NROM = 0; + + if (!(params->flags & YESCRYPT_RW) || params->N || params->g) + return -1; + + if (params->flags & YESCRYPT_SHARED_PREALLOCATED) { + if (!shared->aligned || !shared->aligned_size) + return -1; + +/* Overwrite a possible old ROM tag before we overwrite the rest */ + tag = (uint64_t *) + ((uint8_t *)shared->aligned + shared->aligned_size - 48); + memset(tag, 0, 48); + } else { + init_region(shared); + + subparams.flags |= YESCRYPT_ALLOC_ONLY; + if (yescrypt_kdf(NULL, shared, NULL, 0, NULL, 0, &subparams, + NULL, 0) != -2 || !shared->aligned) + return -1; + subparams.flags -= YESCRYPT_ALLOC_ONLY; + } + + subparams.N /= 2; + + half1 = *shared; + half1.aligned_size /= 2; + half2 = half1; + half2.aligned = (uint8_t *)half2.aligned + half1.aligned_size; + + if (yescrypt_kdf(NULL, &half1, + seed, seedlen, (const uint8_t *)"yescrypt-ROMhash", 16, &subparams, + salt, sizeof(salt))) + goto fail; + + subparams.NROM = subparams.N; + + if (yescrypt_kdf(&half1, &half2, + seed, seedlen, salt, sizeof(salt), &subparams, salt, sizeof(salt))) + goto fail; + + if (yescrypt_kdf(&half2, &half1, + seed, seedlen, salt, sizeof(salt), &subparams, salt, sizeof(salt))) + goto fail; + + tag = (uint64_t *) + ((uint8_t *)shared->aligned + shared->aligned_size - 48); + tag[0] = YESCRYPT_ROM_TAG1; + tag[1] = YESCRYPT_ROM_TAG2; + tag[2] = le64dec(salt); + tag[3] = le64dec(salt + 8); + tag[4] = le64dec(salt + 16); + tag[5] = le64dec(salt + 24); + + insecure_memzero(salt, sizeof(salt)); + return 0; + +fail: + insecure_memzero(salt, sizeof(salt)); + if (!(params->flags & YESCRYPT_SHARED_PREALLOCATED)) + free_region(shared); + return -1; +} + +yescrypt_binary_t *yescrypt_digest_shared(yescrypt_shared_t *shared) +{ + static yescrypt_binary_t digest; + uint64_t *tag; + + if (shared->aligned_size < 48) + return NULL; + + tag = (uint64_t *) + ((uint8_t *)shared->aligned + shared->aligned_size - 48); + + if (tag[0] != YESCRYPT_ROM_TAG1 || tag[1] != YESCRYPT_ROM_TAG2) + return NULL; + + le64enc(digest.uc, tag[2]); + le64enc(digest.uc + 8, tag[3]); + le64enc(digest.uc + 16, tag[4]); + le64enc(digest.uc + 24, tag[5]); + + return &digest; +} + +int yescrypt_free_shared(yescrypt_shared_t *shared) +{ + return free_region(shared); +} + +int yescrypt_init_local(yescrypt_local_t *local) +{ + init_region(local); + return 0; +} + +int yescrypt_free_local(yescrypt_local_t *local) +{ + return free_region(local); +} diff --git a/deps/yescrypt-master/yescrypt-platform.c b/deps/yescrypt-master/yescrypt-platform.c new file mode 100644 index 000000000..b04acee2a --- /dev/null +++ b/deps/yescrypt-master/yescrypt-platform.c @@ -0,0 +1,111 @@ +/*- + * Copyright 2013-2018,2022 Alexander Peslyak + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifdef __unix__ +#include +#endif +#ifdef __linux__ +#include /* for MAP_HUGE_2MB */ +#endif + +#define HUGEPAGE_THRESHOLD (32 * 1024 * 1024) + +#ifdef __x86_64__ +#define HUGEPAGE_SIZE (2 * 1024 * 1024) +#else +#undef HUGEPAGE_SIZE +#endif + +static void *alloc_region(yescrypt_region_t *region, size_t size) +{ + size_t base_size = size; + uint8_t *base, *aligned; +#ifdef MAP_ANON + int flags = +#ifdef MAP_NOCORE + MAP_NOCORE | +#endif + MAP_ANON | MAP_PRIVATE; +#if defined(MAP_HUGETLB) && defined(MAP_HUGE_2MB) && defined(HUGEPAGE_SIZE) + size_t new_size = size; + const size_t hugepage_mask = (size_t)HUGEPAGE_SIZE - 1; + if (size >= HUGEPAGE_THRESHOLD && size + hugepage_mask >= size) { + flags |= MAP_HUGETLB | MAP_HUGE_2MB; +/* + * Linux's munmap() fails on MAP_HUGETLB mappings if size is not a multiple of + * huge page size, so let's round up to huge page size here. + */ + new_size = size + hugepage_mask; + new_size &= ~hugepage_mask; + } + base = mmap(NULL, new_size, PROT_READ | PROT_WRITE, flags, -1, 0); + if (base != MAP_FAILED) { + base_size = new_size; + } else if (flags & MAP_HUGETLB) { + flags &= ~(MAP_HUGETLB | MAP_HUGE_2MB); + base = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0); + } + +#else + base = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0); +#endif + if (base == MAP_FAILED) + base = NULL; + aligned = base; +#elif defined(HAVE_POSIX_MEMALIGN) + if ((errno = posix_memalign((void **)&base, 64, size)) != 0) + base = NULL; + aligned = base; +#else + base = aligned = NULL; + if (size + 63 < size) { + errno = ENOMEM; + } else if ((base = malloc(size + 63)) != NULL) { + aligned = base + 63; + aligned -= (uintptr_t)aligned & 63; + } +#endif + region->base = base; + region->aligned = aligned; + region->base_size = base ? base_size : 0; + region->aligned_size = base ? size : 0; + return aligned; +} + +static inline void init_region(yescrypt_region_t *region) +{ + region->base = region->aligned = NULL; + region->base_size = region->aligned_size = 0; +} + +static int free_region(yescrypt_region_t *region) +{ +return 0; + if (region->base) { +#ifdef MAP_ANON + if (munmap(region->base, region->base_size)) + return -1; +#else + free(region->base); +#endif + } + init_region(region); + return 0; +} diff --git a/deps/yescrypt-master/yescrypt-ref.c b/deps/yescrypt-master/yescrypt-ref.c new file mode 100644 index 000000000..532304519 --- /dev/null +++ b/deps/yescrypt-master/yescrypt-ref.c @@ -0,0 +1,925 @@ +/*- + * Copyright 2009 Colin Percival + * Copyright 2013-2018 Alexander Peslyak + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * This file was originally written by Colin Percival as part of the Tarsnap + * online backup system. + * + * This is the reference implementation. Its purpose is to provide a simple + * human- and machine-readable specification that implementations intended + * for actual use should be tested against. It is deliberately mostly not + * optimized, and it is not meant to be used in production. Instead, use + * yescrypt-opt.c. + */ + +#ifdef __GNUC__ +#warning "This reference implementation is deliberately mostly not optimized, nor does it make any attempt not to leave sensitive data in memory. Use yescrypt-opt.c instead unless you're testing (against) the reference implementation on purpose." +#endif + +#include +#include +#include +#include + +#include "sha256.h" +#include "sysendian.h" + +#define YESCRYPT_INTERNAL +#include "yescrypt.h" + +static void blkcpy(uint32_t *dst, const uint32_t *src, size_t count) +{ + do { + *dst++ = *src++; + } while (--count); +} + +static void blkxor(uint32_t *dst, const uint32_t *src, size_t count) +{ + do { + *dst++ ^= *src++; + } while (--count); +} + +/** + * salsa20(B): + * Apply the Salsa20 core to the provided block. + */ +static void salsa20(uint32_t B[16], uint32_t rounds) +{ + uint32_t x[16]; + size_t i; + + /* SIMD unshuffle */ + for (i = 0; i < 16; i++) + x[i * 5 % 16] = B[i]; + + for (i = 0; i < rounds; i += 2) { +#define R(a,b) (((a) << (b)) | ((a) >> (32 - (b)))) + /* Operate on columns */ + x[ 4] ^= R(x[ 0]+x[12], 7); x[ 8] ^= R(x[ 4]+x[ 0], 9); + x[12] ^= R(x[ 8]+x[ 4],13); x[ 0] ^= R(x[12]+x[ 8],18); + + x[ 9] ^= R(x[ 5]+x[ 1], 7); x[13] ^= R(x[ 9]+x[ 5], 9); + x[ 1] ^= R(x[13]+x[ 9],13); x[ 5] ^= R(x[ 1]+x[13],18); + + x[14] ^= R(x[10]+x[ 6], 7); x[ 2] ^= R(x[14]+x[10], 9); + x[ 6] ^= R(x[ 2]+x[14],13); x[10] ^= R(x[ 6]+x[ 2],18); + + x[ 3] ^= R(x[15]+x[11], 7); x[ 7] ^= R(x[ 3]+x[15], 9); + x[11] ^= R(x[ 7]+x[ 3],13); x[15] ^= R(x[11]+x[ 7],18); + + /* Operate on rows */ + x[ 1] ^= R(x[ 0]+x[ 3], 7); x[ 2] ^= R(x[ 1]+x[ 0], 9); + x[ 3] ^= R(x[ 2]+x[ 1],13); x[ 0] ^= R(x[ 3]+x[ 2],18); + + x[ 6] ^= R(x[ 5]+x[ 4], 7); x[ 7] ^= R(x[ 6]+x[ 5], 9); + x[ 4] ^= R(x[ 7]+x[ 6],13); x[ 5] ^= R(x[ 4]+x[ 7],18); + + x[11] ^= R(x[10]+x[ 9], 7); x[ 8] ^= R(x[11]+x[10], 9); + x[ 9] ^= R(x[ 8]+x[11],13); x[10] ^= R(x[ 9]+x[ 8],18); + + x[12] ^= R(x[15]+x[14], 7); x[13] ^= R(x[12]+x[15], 9); + x[14] ^= R(x[13]+x[12],13); x[15] ^= R(x[14]+x[13],18); +#undef R + } + + /* SIMD shuffle */ + for (i = 0; i < 16; i++) + B[i] += x[i * 5 % 16]; +} + +/** + * blockmix_salsa8(B, Y, r): + * Compute B = BlockMix_{salsa20/8, r}(B). The input B must be 128r bytes in + * length; the temporary space Y must also be the same size. + */ +static void blockmix_salsa8(uint32_t *B, uint32_t *Y, size_t r) +{ + uint32_t X[16]; + size_t i; + + /* 1: X <-- B_{2r - 1} */ + blkcpy(X, &B[(2 * r - 1) * 16], 16); + + /* 2: for i = 0 to 2r - 1 do */ + for (i = 0; i < 2 * r; i++) { + /* 3: X <-- H(X xor B_i) */ + blkxor(X, &B[i * 16], 16); + salsa20(X, 8); + + /* 4: Y_i <-- X */ + blkcpy(&Y[i * 16], X, 16); + } + + /* 6: B' <-- (Y_0, Y_2 ... Y_{2r-2}, Y_1, Y_3 ... Y_{2r-1}) */ + for (i = 0; i < r; i++) + blkcpy(&B[i * 16], &Y[(i * 2) * 16], 16); + for (i = 0; i < r; i++) + blkcpy(&B[(i + r) * 16], &Y[(i * 2 + 1) * 16], 16); +} + +/* These are tunable, but they must meet certain constraints */ +#define PWXsimple 2 +#define PWXgather 4 +#define PWXrounds 6 +#define Swidth 8 + +/* Derived values. Not tunable on their own. */ +#define PWXbytes (PWXgather * PWXsimple * 8) +#define PWXwords (PWXbytes / sizeof(uint32_t)) +#define Sbytes (3 * (1 << Swidth) * PWXsimple * 8) +#define Swords (Sbytes / sizeof(uint32_t)) +#define Smask (((1 << Swidth) - 1) * PWXsimple * 8) +#define rmin ((PWXbytes + 127) / 128) + +typedef struct { + uint32_t *S; + uint32_t (*S0)[2], (*S1)[2], (*S2)[2]; + size_t w; +} pwxform_ctx_t; + +/** + * pwxform(B): + * Transform the provided block using the provided S-boxes. + */ +static void pwxform(uint32_t *B, pwxform_ctx_t *ctx) +{ + uint32_t (*X)[PWXsimple][2] = (uint32_t (*)[PWXsimple][2])B; + uint32_t (*S0)[2] = ctx->S0, (*S1)[2] = ctx->S1, (*S2)[2] = ctx->S2; + size_t w = ctx->w; + size_t i, j, k; + + /* 1: for i = 0 to PWXrounds - 1 do */ + for (i = 0; i < PWXrounds; i++) { + /* 2: for j = 0 to PWXgather - 1 do */ + for (j = 0; j < PWXgather; j++) { + uint32_t xl = X[j][0][0]; + uint32_t xh = X[j][0][1]; + uint32_t (*p0)[2], (*p1)[2]; + + /* 3: p0 <-- (lo(B_{j,0}) & Smask) / (PWXsimple * 8) */ + p0 = S0 + (xl & Smask) / sizeof(*S0); + /* 4: p1 <-- (hi(B_{j,0}) & Smask) / (PWXsimple * 8) */ + p1 = S1 + (xh & Smask) / sizeof(*S1); + + /* 5: for k = 0 to PWXsimple - 1 do */ + for (k = 0; k < PWXsimple; k++) { + uint64_t x, s0, s1; + + /* 6: B_{j,k} <-- (hi(B_{j,k}) * lo(B_{j,k}) + S0_{p0,k}) xor S1_{p1,k} */ + s0 = ((uint64_t)p0[k][1] << 32) + p0[k][0]; + s1 = ((uint64_t)p1[k][1] << 32) + p1[k][0]; + + xl = X[j][k][0]; + xh = X[j][k][1]; + + x = (uint64_t)xh * xl; + x += s0; + x ^= s1; + + X[j][k][0] = x; + X[j][k][1] = x >> 32; + + /* 8: if (i != 0) and (i != PWXrounds - 1) */ + if (i != 0 && i != PWXrounds - 1) { + /* 9: S2_w <-- B_j */ + S2[w][0] = x; + S2[w][1] = x >> 32; + /* 10: w <-- w + 1 */ + w++; + } + } + } + } + + /* 14: (S0, S1, S2) <-- (S2, S0, S1) */ + ctx->S0 = S2; + ctx->S1 = S0; + ctx->S2 = S1; + /* 15: w <-- w mod 2^Swidth */ + ctx->w = w & ((1 << Swidth) * PWXsimple - 1); +} + +/** + * blockmix_pwxform(B, ctx, r): + * Compute B = BlockMix_pwxform{salsa20/2, ctx, r}(B). The input B must be + * 128r bytes in length. + */ +static void blockmix_pwxform(uint32_t *B, pwxform_ctx_t *ctx, size_t r) +{ + uint32_t X[PWXwords]; + size_t r1, i; + + /* Convert 128-byte blocks to PWXbytes blocks */ + /* 1: r_1 <-- 128r / PWXbytes */ + r1 = 128 * r / PWXbytes; + + /* 2: X <-- B'_{r_1 - 1} */ + blkcpy(X, &B[(r1 - 1) * PWXwords], PWXwords); + + /* 3: for i = 0 to r_1 - 1 do */ + for (i = 0; i < r1; i++) { + /* 4: if r_1 > 1 */ + if (r1 > 1) { + /* 5: X <-- X xor B'_i */ + blkxor(X, &B[i * PWXwords], PWXwords); + } + + /* 7: X <-- pwxform(X) */ + pwxform(X, ctx); + + /* 8: B'_i <-- X */ + blkcpy(&B[i * PWXwords], X, PWXwords); + } + + /* 10: i <-- floor((r_1 - 1) * PWXbytes / 64) */ + i = (r1 - 1) * PWXbytes / 64; + + /* 11: B_i <-- H(B_i) */ + salsa20(&B[i * 16], 2); + +#if 1 /* No-op with our current pwxform settings, but do it to make sure */ + /* 12: for i = i + 1 to 2r - 1 do */ + for (i++; i < 2 * r; i++) { + /* 13: B_i <-- H(B_i xor B_{i-1}) */ + blkxor(&B[i * 16], &B[(i - 1) * 16], 16); + salsa20(&B[i * 16], 2); + } +#endif +} + +/** + * integerify(B, r): + * Return the result of parsing B_{2r-1} as a little-endian integer. + */ +static uint64_t integerify(const uint32_t *B, size_t r) +{ +/* + * Our 32-bit words are in host byte order, and word 13 is the second word of + * B_{2r-1} due to SIMD shuffling. The 64-bit value we return is also in host + * byte order, as it should be. + */ + const uint32_t *X = &B[(2 * r - 1) * 16]; + return ((uint64_t)X[13] << 32) + X[0]; +} + +/** + * p2floor(x): + * Largest power of 2 not greater than argument. + */ +static uint64_t p2floor(uint64_t x) +{ + uint64_t y; + while ((y = x & (x - 1))) + x = y; + return x; +} + +/** + * wrap(x, i): + * Wrap x to the range 0 to i-1. + */ +static uint64_t wrap(uint64_t x, uint64_t i) +{ + uint64_t n = p2floor(i); + return (x & (n - 1)) + (i - n); +} + +/** + * smix1(B, r, N, flags, V, NROM, VROM, XY, ctx): + * Compute first loop of B = SMix_r(B, N). The input B must be 128r bytes in + * length; the temporary storage V must be 128rN bytes in length; the temporary + * storage XY must be 256r bytes in length. + */ +static void smix1(uint32_t *B, size_t r, uint64_t N, yescrypt_flags_t flags, + uint32_t *V, uint64_t NROM, const uint32_t *VROM, + uint32_t *XY, pwxform_ctx_t *ctx) +{ + size_t s = 32 * r; + uint32_t *X = XY; + uint32_t *Y = &XY[s]; + uint64_t i, j; + size_t k; + + /* 1: X <-- B */ + for (k = 0; k < 2 * r; k++) + for (i = 0; i < 16; i++) + X[k * 16 + i] = le32dec(&B[k * 16 + (i * 5 % 16)]); + + /* 2: for i = 0 to N - 1 do */ + for (i = 0; i < N; i++) { + /* 3: V_i <-- X */ + blkcpy(&V[i * s], X, s); + + if (VROM && i == 0) { + /* X <-- X xor VROM_{NROM-1} */ + blkxor(X, &VROM[(NROM - 1) * s], s); + } else if (VROM && (i & 1)) { + /* j <-- Integerify(X) mod NROM */ + j = integerify(X, r) & (NROM - 1); + + /* X <-- X xor VROM_j */ + blkxor(X, &VROM[j * s], s); + } else if ((flags & YESCRYPT_RW) && i > 1) { + /* j <-- Wrap(Integerify(X), i) */ + j = wrap(integerify(X, r), i); + + /* X <-- X xor V_j */ + blkxor(X, &V[j * s], s); + } + + /* 4: X <-- H(X) */ + if (ctx) + blockmix_pwxform(X, ctx, r); + else + blockmix_salsa8(X, Y, r); + } + + /* B' <-- X */ + for (k = 0; k < 2 * r; k++) + for (i = 0; i < 16; i++) + le32enc(&B[k * 16 + (i * 5 % 16)], X[k * 16 + i]); +} + +/** + * smix2(B, r, N, Nloop, flags, V, NROM, VROM, XY, ctx): + * Compute second loop of B = SMix_r(B, N). The input B must be 128r bytes in + * length; the temporary storage V must be 128rN bytes in length; the temporary + * storage XY must be 256r bytes in length. The value N must be a power of 2 + * greater than 1. + */ +static void smix2(uint32_t *B, size_t r, uint64_t N, uint64_t Nloop, + yescrypt_flags_t flags, uint32_t *V, uint64_t NROM, + const uint32_t *VROM, uint32_t *XY, pwxform_ctx_t *ctx) +{ + size_t s = 32 * r; + uint32_t *X = XY; + uint32_t *Y = &XY[s]; + uint64_t i, j; + size_t k; + + /* X <-- B */ + for (k = 0; k < 2 * r; k++) + for (i = 0; i < 16; i++) + X[k * 16 + i] = le32dec(&B[k * 16 + (i * 5 % 16)]); + + /* 6: for i = 0 to N - 1 do */ + for (i = 0; i < Nloop; i++) { + if (VROM && (i & 1)) { + /* j <-- Integerify(X) mod NROM */ + j = integerify(X, r) & (NROM - 1); + + /* X <-- H(X xor VROM_j) */ + blkxor(X, &VROM[j * s], s); + } else { + /* 7: j <-- Integerify(X) mod N */ + j = integerify(X, r) & (N - 1); + + /* 8.1: X <-- X xor V_j */ + blkxor(X, &V[j * s], s); + /* V_j <-- X */ + if (flags & YESCRYPT_RW) + blkcpy(&V[j * s], X, s); + } + + /* 8.2: X <-- H(X) */ + if (ctx) + blockmix_pwxform(X, ctx, r); + else + blockmix_salsa8(X, Y, r); + } + + /* 10: B' <-- X */ + for (k = 0; k < 2 * r; k++) + for (i = 0; i < 16; i++) + le32enc(&B[k * 16 + (i * 5 % 16)], X[k * 16 + i]); +} + +/** + * smix(B, r, N, p, t, flags, V, NROM, VROM, XY, ctx, passwd): + * Compute B = SMix_r(B, N). The input B must be 128rp bytes in length; the + * temporary storage V must be 128rN bytes in length; the temporary storage + * XY must be 256r bytes in length. The value N must be a power of 2 greater + * than 1. + */ +static void smix(uint32_t *B, size_t r, uint64_t N, uint32_t p, uint32_t t, + yescrypt_flags_t flags, + uint32_t *V, uint64_t NROM, const uint32_t *VROM, + uint32_t *XY, pwxform_ctx_t *ctx, uint8_t *passwd) +{ + size_t s = 32 * r; + uint64_t Nchunk, Nloop_all, Nloop_rw, Vchunk; + uint32_t i; + + /* 1: n <-- N / p */ + Nchunk = N / p; + + /* 2: Nloop_all <-- fNloop(n, t, flags) */ + Nloop_all = Nchunk; + if (flags & YESCRYPT_RW) { + if (t <= 1) { + if (t) + Nloop_all *= 2; /* 2/3 */ + Nloop_all = (Nloop_all + 2) / 3; /* 1/3, round up */ + } else { + Nloop_all *= t - 1; + } + } else if (t) { + if (t == 1) + Nloop_all += (Nloop_all + 1) / 2; /* 1.5, round up */ + Nloop_all *= t; + } + + /* 6: Nloop_rw <-- 0 */ + Nloop_rw = 0; + if (flags & YESCRYPT_INIT_SHARED) { + Nloop_rw = Nloop_all; + } else { + /* 3: if YESCRYPT_RW flag is set */ + if (flags & YESCRYPT_RW) { + /* 4: Nloop_rw <-- Nloop_all / p */ + Nloop_rw = Nloop_all / p; + } + } + + /* 8: n <-- n - (n mod 2) */ + Nchunk &= ~(uint64_t)1; /* round down to even */ + /* 9: Nloop_all <-- Nloop_all + (Nloop_all mod 2) */ + Nloop_all++; Nloop_all &= ~(uint64_t)1; /* round up to even */ + /* 10: Nloop_rw <-- Nloop_rw + (Nloop_rw mod 2) */ + Nloop_rw++; Nloop_rw &= ~(uint64_t)1; /* round up to even */ + + /* 11: for i = 0 to p - 1 do */ + /* 12: u <-- in */ + for (i = 0, Vchunk = 0; i < p; i++, Vchunk += Nchunk) { + /* 13: if i = p - 1 */ + /* 14: n <-- N - u */ + /* 15: end if */ + /* 16: v <-- u + n - 1 */ + uint64_t Np = (i < p - 1) ? Nchunk : (N - Vchunk); + uint32_t *Bp = &B[i * s]; + uint32_t *Vp = &V[Vchunk * s]; + pwxform_ctx_t *ctx_i = NULL; + /* 17: if YESCRYPT_RW flag is set */ + if (flags & YESCRYPT_RW) { + ctx_i = &ctx[i]; + /* 18: SMix1_1(B_i, Sbytes / 128, S_i, no flags) */ + smix1(Bp, 1, Sbytes / 128, 0 /* no flags */, + ctx_i->S, 0, NULL, XY, NULL); + /* 19: S2_i <-- S_{i,0...2^Swidth-1} */ + ctx_i->S2 = (uint32_t (*)[2])ctx_i->S; + /* 20: S1_i <-- S_{i,2^Swidth...2*2^Swidth-1} */ + ctx_i->S1 = ctx_i->S2 + (1 << Swidth) * PWXsimple; + /* 21: S0_i <-- S_{i,2*2^Swidth...3*2^Swidth-1} */ + ctx_i->S0 = ctx_i->S1 + (1 << Swidth) * PWXsimple; + /* 22: w_i <-- 0 */ + ctx_i->w = 0; + /* 23: if i = 0 */ + if (i == 0) { + /* 24: passwd <-- HMAC-SHA256(B_{0,2r-1}, passwd) */ + HMAC_SHA256_Buf(Bp + (s - 16), 64, + passwd, 32, passwd); + } + } + /* 27: SMix1_r(B_i, n, V_{u..v}, flags) */ + smix1(Bp, r, Np, flags, Vp, NROM, VROM, XY, ctx_i); + /* 28: SMix2_r(B_i, p2floor(n), Nloop_rw, V_{u..v}, flags) */ + smix2(Bp, r, p2floor(Np), Nloop_rw, flags, Vp, + NROM, VROM, XY, ctx_i); + } + + /* 30: for i = 0 to p - 1 do */ + for (i = 0; i < p; i++) { + uint32_t *Bp = &B[i * s]; + /* 31: SMix2_r(B_i, N, Nloop_all - Nloop_rw, V, flags excluding YESCRYPT_RW) */ + smix2(Bp, r, N, Nloop_all - Nloop_rw, flags & ~YESCRYPT_RW, + V, NROM, VROM, XY, (flags & YESCRYPT_RW) ? &ctx[i] : NULL); + } +} + +/** + * yescrypt_kdf_body(shared, local, passwd, passwdlen, salt, saltlen, + * flags, N, r, p, t, NROM, buf, buflen): + * Compute scrypt(passwd[0 .. passwdlen - 1], salt[0 .. saltlen - 1], N, r, + * p, buflen), or a revision of scrypt as requested by flags and shared, and + * write the result into buf. + * + * shared and flags may request special modes as described in yescrypt.h. + * + * local is the thread-local data structure, allowing optimized implementations + * to preserve and reuse a memory allocation across calls, thereby reducing its + * overhead (this reference implementation does not make that optimization). + * + * t controls computation time while not affecting peak memory usage. + * + * Return 0 on success; or -1 on error. + */ +static int yescrypt_kdf_body(const yescrypt_shared_t *shared, + yescrypt_local_t *local, + const uint8_t *passwd, size_t passwdlen, + const uint8_t *salt, size_t saltlen, + yescrypt_flags_t flags, uint64_t N, uint32_t r, uint32_t p, uint32_t t, + uint64_t NROM, + uint8_t *buf, size_t buflen) +{ + int retval = -1; + const uint32_t *VROM; + size_t B_size, V_size; + uint32_t *B, *V, *XY, *S; + pwxform_ctx_t *pwxform_ctx; + uint32_t sha256[8]; + uint8_t dk[sizeof(sha256)], *dkp = buf; + uint32_t i; + + /* Sanity-check parameters */ + switch (flags & YESCRYPT_MODE_MASK) { + case 0: /* classic scrypt - can't have anything non-standard */ + if (flags || t || NROM) + goto out_EINVAL; + break; + case YESCRYPT_WORM: + if (flags != YESCRYPT_WORM || NROM) + goto out_EINVAL; + break; + case YESCRYPT_RW: + if (flags != (flags & YESCRYPT_KNOWN_FLAGS)) + goto out_EINVAL; +#if PWXsimple == 2 && PWXgather == 4 && PWXrounds == 6 && Sbytes == 12288 + if ((flags & YESCRYPT_RW_FLAVOR_MASK) == + (YESCRYPT_ROUNDS_6 | YESCRYPT_GATHER_4 | + YESCRYPT_SIMPLE_2 | YESCRYPT_SBOX_12K)) + break; +#else +#error "Unsupported pwxform settings" +#endif + /* FALLTHRU */ + default: + goto out_EINVAL; + } +#if SIZE_MAX > UINT32_MAX + if (buflen > (((uint64_t)1 << 32) - 1) * 32) + goto out_EINVAL; +#endif + if ((uint64_t)r * (uint64_t)p >= 1 << 30) + goto out_EINVAL; + if ((N & (N - 1)) != 0 || N <= 1 || r < 1 || p < 1) + goto out_EINVAL; + if (r > SIZE_MAX / 128 / p || +#if SIZE_MAX / 256 <= UINT32_MAX + r > SIZE_MAX / 256 || +#endif + N > SIZE_MAX / 128 / r) + goto out_EINVAL; + if (N > UINT64_MAX / ((uint64_t)t + 1)) + goto out_EINVAL; + if (flags & YESCRYPT_RW) { + if (N / p <= 1 || r < rmin || + p > SIZE_MAX / Sbytes || + p > SIZE_MAX / sizeof(*pwxform_ctx)) + goto out_EINVAL; + } + + VROM = NULL; + if (shared) { + uint64_t expected_size = (size_t)128 * r * NROM; + if ((NROM & (NROM - 1)) != 0 || NROM <= 1 || + shared->aligned_size < expected_size) + goto out_EINVAL; + if (!(flags & YESCRYPT_INIT_SHARED)) { + uint32_t *tag = (uint32_t *) + ((uint8_t *)shared->aligned + expected_size - 48); + uint64_t tag1 = ((uint64_t)tag[1] << 32) + tag[0]; + uint64_t tag2 = ((uint64_t)tag[3] << 32) + tag[2]; + if (tag1 != YESCRYPT_ROM_TAG1 || tag2 != YESCRYPT_ROM_TAG2) + goto out_EINVAL; + } + VROM = shared->aligned; + } else { + if (NROM) + goto out_EINVAL; + } + + /* Allocate memory */ + V_size = (size_t)128 * r * N; + if (flags & YESCRYPT_INIT_SHARED) { + V = (uint32_t *)local->aligned; + if (local->aligned_size < V_size) { + if (local->base || local->aligned || + local->base_size || local->aligned_size) + goto out_EINVAL; + if ((V = malloc(V_size)) == NULL) + return -1; + local->base = local->aligned = V; + local->base_size = local->aligned_size = V_size; + } + if (flags & YESCRYPT_ALLOC_ONLY) + return -2; /* expected "failure" */ + } else { + if ((V = malloc(V_size)) == NULL) + return -1; + } + B_size = (size_t)128 * r * p; + if ((B = malloc(B_size)) == NULL) + goto free_V; + if ((XY = malloc((size_t)256 * r)) == NULL) + goto free_B; + S = NULL; + pwxform_ctx = NULL; + if (flags & YESCRYPT_RW) { + if ((S = malloc((size_t)Sbytes * p)) == NULL) + goto free_XY; + if ((pwxform_ctx = malloc(sizeof(*pwxform_ctx) * p)) == NULL) + goto free_S; + } + + if (flags) { + HMAC_SHA256_Buf("yescrypt-prehash", + (flags & YESCRYPT_PREHASH) ? 16 : 8, + passwd, passwdlen, (uint8_t *)sha256); + passwd = (uint8_t *)sha256; + passwdlen = sizeof(sha256); + } + + /* 1: (B_0 ... B_{p-1}) <-- PBKDF2(P, S, 1, p * MFLen) */ + PBKDF2_SHA256(passwd, passwdlen, salt, saltlen, 1, + (uint8_t *)B, B_size); + + if (flags) + blkcpy(sha256, B, sizeof(sha256) / sizeof(sha256[0])); + + if (flags & YESCRYPT_RW) { + for (i = 0; i < p; i++) + pwxform_ctx[i].S = &S[i * Swords]; + smix(B, r, N, p, t, flags, V, NROM, VROM, XY, pwxform_ctx, + (uint8_t *)sha256); + } else { + /* 2: for i = 0 to p - 1 do */ + for (i = 0; i < p; i++) { + /* 3: B_i <-- MF(B_i, N) */ + smix(&B[(size_t)32 * r * i], r, N, 1, t, flags, V, + NROM, VROM, XY, NULL, NULL); + } + } + + dkp = buf; + if (flags && buflen < sizeof(dk)) { + PBKDF2_SHA256(passwd, passwdlen, (uint8_t *)B, B_size, 1, + dk, sizeof(dk)); + dkp = dk; + } + + /* 5: DK <-- PBKDF2(P, B, 1, dkLen) */ + PBKDF2_SHA256(passwd, passwdlen, (uint8_t *)B, B_size, 1, buf, buflen); + + /* + * Except when computing classic scrypt, allow all computation so far + * to be performed on the client. The final steps below match those of + * SCRAM (RFC 5802), so that an extension of SCRAM (with the steps so + * far in place of SCRAM's use of PBKDF2 and with SHA-256 in place of + * SCRAM's use of SHA-1) would be usable with yescrypt hashes. + */ + if (flags && !(flags & YESCRYPT_PREHASH)) { + /* Compute ClientKey */ + HMAC_SHA256_Buf(dkp, sizeof(dk), "Client Key", 10, + (uint8_t *)sha256); + /* Compute StoredKey */ + { + size_t clen = buflen; + if (clen > sizeof(dk)) + clen = sizeof(dk); + SHA256_Buf((uint8_t *)sha256, sizeof(sha256), dk); + memcpy(buf, dk, clen); + } + } + + /* Success! */ + retval = 0; + + /* Free memory */ + free(pwxform_ctx); +free_S: + free(S); +free_XY: + free(XY); +free_B: + free(B); +free_V: + if (!(flags & YESCRYPT_INIT_SHARED)) + free(V); + + return retval; + +out_EINVAL: + errno = EINVAL; + return -1; +} + +/** + * yescrypt_kdf(shared, local, passwd, passwdlen, salt, saltlen, params, + * buf, buflen): + * Compute scrypt or its revision as requested by the parameters. The inputs + * to this function are the same as those for yescrypt_kdf_body() above, with + * the addition of g, which controls hash upgrades (0 for no upgrades so far). + */ +int yescrypt_kdf(const yescrypt_shared_t *shared, yescrypt_local_t *local, + const uint8_t *passwd, size_t passwdlen, + const uint8_t *salt, size_t saltlen, + const yescrypt_params_t *params, + uint8_t *buf, size_t buflen) +{ + yescrypt_flags_t flags = params->flags; + uint64_t N = params->N; + uint32_t r = params->r; + uint32_t p = params->p; + uint32_t t = params->t; + uint32_t g = params->g; + uint64_t NROM = params->NROM; + uint8_t dk[32]; + + /* Support for hash upgrades has been temporarily removed */ + if (g) { + errno = EINVAL; + return -1; + } + + if ((flags & YESCRYPT_RW) && + p >= 1 && N / p >= 0x100 && N / p * r >= 0x20000) { + /* + * This reference implementation's yescrypt_kdf_body() + * (de)allocates memory on each call, which defeats the purpose + * of this pre-hashing. The optimized implementations, which + * you should actually use, make the larger allocation first + * and then reuse it. Thus, this implementation doing things + * differently serves as a test that the computation result is + * unaffected by such differences. + */ + int retval = yescrypt_kdf_body(shared, local, + passwd, passwdlen, salt, saltlen, + flags | YESCRYPT_PREHASH, N >> 6, r, p, 0, NROM, + dk, sizeof(dk)); + if (retval) + return retval; + passwd = dk; + passwdlen = sizeof(dk); + } + + return yescrypt_kdf_body(shared, local, + passwd, passwdlen, salt, saltlen, + flags, N, r, p, t, NROM, buf, buflen); +} + +int yescrypt_init_shared(yescrypt_shared_t *shared, + const uint8_t *seed, size_t seedlen, + const yescrypt_params_t *params) +{ + yescrypt_flags_t flags = params->flags; + uint64_t N = params->NROM; + uint32_t r = params->r; + uint32_t p = params->p; + uint32_t t = params->t; + yescrypt_shared_t half1, half2; + uint8_t salt[32]; + uint32_t *tag; + + if (!(params->flags & YESCRYPT_RW) || params->N || params->g) + return -1; + + if (flags & YESCRYPT_SHARED_PREALLOCATED) { + if (!shared->aligned || !shared->aligned_size) + return -1; + +/* Overwrite a possible old ROM tag before we overwrite the rest */ + tag = (uint32_t *) + ((uint8_t *)shared->aligned + shared->aligned_size - 48); + memset(tag, 0, 48); + } else { + shared->base = shared->aligned = NULL; + shared->base_size = shared->aligned_size = 0; + + if (yescrypt_kdf_body(NULL, shared, NULL, 0, NULL, 0, + flags | YESCRYPT_INIT_SHARED | YESCRYPT_ALLOC_ONLY, + N, r, p, t, 0, NULL, 0) != -2 || !shared->aligned) + goto fail; + } + + half1 = half2 = *shared; + half1.aligned_size /= 2; + half2.aligned = (uint8_t *)half2.aligned + half1.aligned_size; + half2.aligned_size = half1.aligned_size; + N /= 2; + + if (yescrypt_kdf_body(NULL, &half1, + seed, seedlen, (const uint8_t *)"yescrypt-ROMhash", 16, + flags | YESCRYPT_INIT_SHARED, N, r, p, t, 0, + salt, sizeof(salt))) + goto fail; + + if (yescrypt_kdf_body(&half1, &half2, + seed, seedlen, salt, sizeof(salt), + flags | YESCRYPT_INIT_SHARED, N, r, p, t, N, + salt, sizeof(salt))) + goto fail; + + if (yescrypt_kdf_body(&half2, &half1, + seed, seedlen, salt, sizeof(salt), + flags | YESCRYPT_INIT_SHARED, N, r, p, t, N, + salt, sizeof(salt))) + goto fail; + + tag = (uint32_t *) + ((uint8_t *)shared->aligned + shared->aligned_size - 48); + tag[0] = YESCRYPT_ROM_TAG1 & 0xffffffffU; + tag[1] = YESCRYPT_ROM_TAG1 >> 32; + tag[2] = YESCRYPT_ROM_TAG2 & 0xffffffffU; + tag[3] = YESCRYPT_ROM_TAG2 >> 32; + tag[4] = le32dec(salt); + tag[5] = le32dec(salt + 4); + tag[6] = le32dec(salt + 8); + tag[7] = le32dec(salt + 12); + tag[8] = le32dec(salt + 16); + tag[9] = le32dec(salt + 20); + tag[10] = le32dec(salt + 24); + tag[11] = le32dec(salt + 28); + + return 0; + +fail: + if (!(flags & YESCRYPT_SHARED_PREALLOCATED)) + free(shared->base); + return -1; +} + +yescrypt_binary_t *yescrypt_digest_shared(yescrypt_shared_t *shared) +{ + static yescrypt_binary_t digest; + uint32_t *tag; + uint64_t tag1, tag2; + + if (shared->aligned_size < 48) + return NULL; + + tag = (uint32_t *) + ((uint8_t *)shared->aligned + shared->aligned_size - 48); + + tag1 = ((uint64_t)tag[1] << 32) + tag[0]; + tag2 = ((uint64_t)tag[3] << 32) + tag[2]; + if (tag1 != YESCRYPT_ROM_TAG1 || tag2 != YESCRYPT_ROM_TAG2) + return NULL; + + le32enc(digest.uc, tag[4]); + le32enc(digest.uc + 4, tag[5]); + le32enc(digest.uc + 8, tag[6]); + le32enc(digest.uc + 12, tag[7]); + le32enc(digest.uc + 16, tag[8]); + le32enc(digest.uc + 20, tag[9]); + le32enc(digest.uc + 24, tag[10]); + le32enc(digest.uc + 28, tag[11]); + + return &digest; +} + +int yescrypt_free_shared(yescrypt_shared_t *shared) +{ + free(shared->base); + shared->base = shared->aligned = NULL; + shared->base_size = shared->aligned_size = 0; + return 0; +} + +int yescrypt_init_local(yescrypt_local_t *local) +{ +/* The reference implementation doesn't use the local structure */ + local->base = local->aligned = NULL; + local->base_size = local->aligned_size = 0; + return 0; +} + +int yescrypt_free_local(yescrypt_local_t *local) +{ +/* The reference implementation frees its memory in yescrypt_kdf() */ + (void)local; /* unused */ + return 0; +} diff --git a/deps/yescrypt-master/yescrypt.h b/deps/yescrypt-master/yescrypt.h new file mode 100644 index 000000000..f00b5a88a --- /dev/null +++ b/deps/yescrypt-master/yescrypt.h @@ -0,0 +1,346 @@ +/*- + * Copyright 2009 Colin Percival + * Copyright 2013-2018 Alexander Peslyak + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * This file was originally written by Colin Percival as part of the Tarsnap + * online backup system. + */ +#ifndef _YESCRYPT_H_ +#define _YESCRYPT_H_ + +#include +#include /* for size_t */ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * crypto_scrypt(passwd, passwdlen, salt, saltlen, N, r, p, buf, buflen): + * Compute scrypt(passwd[0 .. passwdlen - 1], salt[0 .. saltlen - 1], N, r, + * p, buflen) and write the result into buf. The parameters r, p, and buflen + * must satisfy r * p < 2^30 and buflen <= (2^32 - 1) * 32. The parameter N + * must be a power of 2 greater than 1. + * + * Return 0 on success; or -1 on error. + * + * MT-safe as long as buf is local to the thread. + */ +extern int crypto_scrypt(const uint8_t *passwd, size_t passwdlen, + const uint8_t *salt, size_t saltlen, + uint64_t N, uint32_t r, uint32_t p, uint8_t *buf, size_t buflen); + +/** + * Internal type used by the memory allocator. Please do not use it directly. + * Use yescrypt_shared_t and yescrypt_local_t as appropriate instead, since + * they might differ from each other in a future version. + */ +typedef struct { + void *base, *aligned; + size_t base_size, aligned_size; +} yescrypt_region_t; + +/** + * Types for shared (ROM) and thread-local (RAM) data structures. + */ +typedef yescrypt_region_t yescrypt_shared_t; +typedef yescrypt_region_t yescrypt_local_t; + +/** + * Two 64-bit tags placed 48 bytes to the end of a ROM in host byte endianness + * (and followed by 32 bytes of the ROM digest). + */ +#define YESCRYPT_ROM_TAG1 0x7470797263736579ULL /* "yescrypt" */ +#define YESCRYPT_ROM_TAG2 0x687361684d4f522dULL /* "-ROMhash" */ + +/** + * Type and possible values for the flags argument of yescrypt_kdf(), + * yescrypt_encode_params_r(), yescrypt_encode_params(). Most of these may be + * OR'ed together, except that YESCRYPT_WORM stands on its own. + * Please refer to the description of yescrypt_kdf() below for the meaning of + * these flags. + */ +typedef uint32_t yescrypt_flags_t; +/* Public */ +#define YESCRYPT_WORM 1 +#define YESCRYPT_RW 0x002 +#define YESCRYPT_ROUNDS_3 0x000 +#define YESCRYPT_ROUNDS_6 0x004 +#define YESCRYPT_GATHER_1 0x000 +#define YESCRYPT_GATHER_2 0x008 +#define YESCRYPT_GATHER_4 0x010 +#define YESCRYPT_GATHER_8 0x018 +#define YESCRYPT_SIMPLE_1 0x000 +#define YESCRYPT_SIMPLE_2 0x020 +#define YESCRYPT_SIMPLE_4 0x040 +#define YESCRYPT_SIMPLE_8 0x060 +#define YESCRYPT_SBOX_6K 0x000 +#define YESCRYPT_SBOX_12K 0x080 +#define YESCRYPT_SBOX_24K 0x100 +#define YESCRYPT_SBOX_48K 0x180 +#define YESCRYPT_SBOX_96K 0x200 +#define YESCRYPT_SBOX_192K 0x280 +#define YESCRYPT_SBOX_384K 0x300 +#define YESCRYPT_SBOX_768K 0x380 +/* Only valid for yescrypt_init_shared() */ +#define YESCRYPT_SHARED_PREALLOCATED 0x10000 +#ifdef YESCRYPT_INTERNAL +/* Private */ +#define YESCRYPT_MODE_MASK 0x003 +#define YESCRYPT_RW_FLAVOR_MASK 0x3fc +#define YESCRYPT_INIT_SHARED 0x01000000 +#define YESCRYPT_ALLOC_ONLY 0x08000000 +#define YESCRYPT_PREHASH 0x10000000 +#endif + +#define YESCRYPT_RW_DEFAULTS \ + (YESCRYPT_RW | \ + YESCRYPT_ROUNDS_6 | YESCRYPT_GATHER_4 | YESCRYPT_SIMPLE_2 | \ + YESCRYPT_SBOX_12K) + +#define YESCRYPT_DEFAULTS YESCRYPT_RW_DEFAULTS + +#ifdef YESCRYPT_INTERNAL +#define YESCRYPT_KNOWN_FLAGS \ + (YESCRYPT_MODE_MASK | YESCRYPT_RW_FLAVOR_MASK | \ + YESCRYPT_SHARED_PREALLOCATED | \ + YESCRYPT_INIT_SHARED | YESCRYPT_ALLOC_ONLY | YESCRYPT_PREHASH) +#endif + +/** + * yescrypt parameters combined into one struct. N, r, p are the same as in + * classic scrypt, except that the meaning of p changes when YESCRYPT_RW is + * set. flags, t, g, NROM are special to yescrypt. + */ +typedef struct { + yescrypt_flags_t flags; + uint64_t N; + uint32_t r, p, t, g; + uint64_t NROM; +} yescrypt_params_t; + +/** + * A 256-bit yescrypt hash, or a hash encryption key (which may itself have + * been derived as a yescrypt hash of a human-specified key string). + */ +typedef union { + unsigned char uc[32]; + uint64_t u64[4]; +} yescrypt_binary_t; + +/** + * yescrypt_init_shared(shared, seed, seedlen, params): + * Optionally allocate memory for and initialize the shared (ROM) data + * structure. The parameters flags, NROM, r, p, and t specify how the ROM is + * to be initialized, and seed and seedlen specify the initial seed affecting + * the data with which the ROM is filled. + * + * Return 0 on success; or -1 on error. + * + * If bit YESCRYPT_SHARED_PREALLOCATED in flags is set, then memory for the + * ROM is assumed to have been preallocated by the caller, with shared->aligned + * being the start address of the ROM and shared->aligned_size being its size + * (which must be sufficient for NROM, r, p). This may be used e.g. when the + * ROM is to be placed in a SysV shared memory segment allocated by the caller. + * + * MT-safe as long as shared is local to the thread. + */ +extern int yescrypt_init_shared(yescrypt_shared_t *shared, + const uint8_t *seed, size_t seedlen, const yescrypt_params_t *params); + +/** + * yescrypt_digest_shared(shared): + * Extract the previously stored message digest of the provided yescrypt ROM. + * + * Return pointer to the message digest on success; or NULL on error. + * + * MT-unsafe. + */ +extern yescrypt_binary_t *yescrypt_digest_shared(yescrypt_shared_t *shared); + +/** + * yescrypt_free_shared(shared): + * Free memory that had been allocated with yescrypt_init_shared(). + * + * Return 0 on success; or -1 on error. + * + * MT-safe as long as shared is local to the thread. + */ +extern int yescrypt_free_shared(yescrypt_shared_t *shared); + +/** + * yescrypt_init_local(local): + * Initialize the thread-local (RAM) data structure. Actual memory allocation + * is currently fully postponed until a call to yescrypt_kdf() or yescrypt_r(). + * + * Return 0 on success; or -1 on error. + * + * MT-safe as long as local is local to the thread. + */ +extern int yescrypt_init_local(yescrypt_local_t *local); + +/** + * yescrypt_free_local(local): + * Free memory that may have been allocated for an initialized thread-local + * (RAM) data structure. + * + * Return 0 on success; or -1 on error. + * + * MT-safe as long as local is local to the thread. + */ +extern int yescrypt_free_local(yescrypt_local_t *local); + +/** + * yescrypt_kdf(shared, local, passwd, passwdlen, salt, saltlen, params, + * buf, buflen): + * Compute scrypt(passwd[0 .. passwdlen - 1], salt[0 .. saltlen - 1], N, r, + * p, buflen), or a revision of scrypt as requested by flags and shared, and + * write the result into buf. The parameters N, r, p, and buflen must satisfy + * the same conditions as with crypto_scrypt(). t controls computation time + * while not affecting peak memory usage (t = 0 is optimal unless higher N*r + * is not affordable while higher t is). g controls hash upgrades (g = 0 for + * no upgrades so far). shared and flags may request special modes. local is + * the thread-local data structure, allowing to preserve and reuse a memory + * allocation across calls, thereby reducing processing overhead. + * + * Return 0 on success; or -1 on error. + * + * Classic scrypt is available by setting shared = NULL, flags = 0, and t = 0. + * + * Setting YESCRYPT_WORM enables only minimal deviations from classic scrypt: + * support for the t parameter, and pre- and post-hashing. + * + * Setting YESCRYPT_RW fully enables yescrypt. As a side effect of differences + * between the algorithms, it also prevents p > 1 from growing the threads' + * combined processing time and memory allocation (like it did with classic + * scrypt and YESCRYPT_WORM), treating p as a divider rather than a multiplier. + * + * Passing a shared structure, with ROM contents previously computed by + * yescrypt_init_shared(), enables the use of ROM and requires YESCRYPT_RW. + * + * In order to allow for initialization of the ROM to be split into a separate + * program (or separate invocation of the same program), the shared->aligned + * and shared->aligned_size fields may optionally be set by the caller directly + * (e.g., to a mapped SysV shm segment), without using yescrypt_init_shared(). + * + * local must be initialized with yescrypt_init_local(). + * + * MT-safe as long as local and buf are local to the thread. + */ +extern int yescrypt_kdf(const yescrypt_shared_t *shared, + yescrypt_local_t *local, + const uint8_t *passwd, size_t passwdlen, + const uint8_t *salt, size_t saltlen, + const yescrypt_params_t *params, + uint8_t *buf, size_t buflen); + +/** + * yescrypt_r(shared, local, passwd, passwdlen, setting, key, buf, buflen): + * Compute and encode an scrypt or enhanced scrypt hash of passwd given the + * parameters and salt value encoded in setting. If shared is not NULL, a ROM + * is used and YESCRYPT_RW is required. Otherwise, whether to compute classic + * scrypt, YESCRYPT_WORM (a slight deviation from classic scrypt), or + * YESCRYPT_RW (time-memory tradeoff discouraging modification) is determined + * by the setting string. shared (if not NULL) and local must be initialized + * as described above for yescrypt_kdf(). buf must be large enough (as + * indicated by buflen) to hold the encoded hash string. + * + * Return the encoded hash string on success; or NULL on error. + * + * MT-safe as long as local and buf are local to the thread. + */ +extern uint8_t *yescrypt_r(const yescrypt_shared_t *shared, + yescrypt_local_t *local, + const uint8_t *passwd, size_t passwdlen, + const uint8_t *setting, + const yescrypt_binary_t *key, + uint8_t *buf, size_t buflen); + +/** + * yescrypt(passwd, setting): + * Compute and encode an scrypt or enhanced scrypt hash of passwd given the + * parameters and salt value encoded in setting. Whether to compute classic + * scrypt, YESCRYPT_WORM (a slight deviation from classic scrypt), or + * YESCRYPT_RW (time-memory tradeoff discouraging modification) is determined + * by the setting string. + * + * Return the encoded hash string on success; or NULL on error. + * + * This is a crypt(3)-like interface, which is simpler to use than + * yescrypt_r(), but it is not MT-safe, it does not allow for the use of a ROM, + * and it is slower than yescrypt_r() for repeated calls because it allocates + * and frees memory on each call. + * + * MT-unsafe. + */ +extern uint8_t *yescrypt(const uint8_t *passwd, const uint8_t *setting); + +/** + * yescrypt_reencrypt(hash, from_key, to_key): + * Re-encrypt a yescrypt hash from one key to another. Either key may be NULL + * to indicate unencrypted hash. The encoded hash string is modified in-place. + * + * Return the hash pointer on success; or NULL on error (in which case the hash + * string is left unmodified). + * + * MT-safe as long as hash is local to the thread. + */ +extern uint8_t *yescrypt_reencrypt(uint8_t *hash, + const yescrypt_binary_t *from_key, + const yescrypt_binary_t *to_key); + +/** + * yescrypt_encode_params_r(params, src, srclen, buf, buflen): + * Generate a setting string for use with yescrypt_r() and yescrypt() by + * encoding into it the parameters flags, N, r, p, t, g, and a salt given by + * src (of srclen bytes). buf must be large enough (as indicated by buflen) + * to hold the setting string. + * + * Return the setting string on success; or NULL on error. + * + * MT-safe as long as buf is local to the thread. + */ +extern uint8_t *yescrypt_encode_params_r(const yescrypt_params_t *params, + const uint8_t *src, size_t srclen, + uint8_t *buf, size_t buflen); + +/** + * yescrypt_encode_params(params, src, srclen): + * Generate a setting string for use with yescrypt_r() and yescrypt(). This + * function is the same as yescrypt_encode_params_r() except that it uses a + * static buffer and thus is not MT-safe. + * + * Return the setting string on success; or NULL on error. + * + * MT-unsafe. + */ +extern uint8_t *yescrypt_encode_params(const yescrypt_params_t *params, + const uint8_t *src, size_t srclen); + +#ifdef __cplusplus +} +#endif + +#endif /* !_YESCRYPT_H_ */ diff --git a/src/bridges/bridge_scrypt_yescrypt.c b/src/bridges/bridge_scrypt_yescrypt.c new file mode 100644 index 000000000..d0bfd3284 --- /dev/null +++ b/src/bridges/bridge_scrypt_yescrypt.c @@ -0,0 +1,285 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#include "common.h" +#include "types.h" +#include "bridges.h" +#include "memory.h" + +#include "yescrypt.h" + +void smix(uint8_t *B, size_t r, uint32_t N, uint32_t p, uint32_t t, + yescrypt_flags_t flags, + void *V, uint32_t NROM, const void *VROM, + void *XY, uint8_t *S, uint8_t *passwd); + +// good: we can use this multiplier do reduce copy overhead to increase the guessing speed, +// bad: but we also increase the password candidate batch size. +// slow hashes which make use of this bridge probably are used with smaller wordlists, +// and therefore it's easier for hashcat to parallelize if this multiplier is low. +// in the end, it's a trade-off. + +#define N_ACCEL 8 + +#define SCRYPT_R_MAX 16 +#define SCRYPT_P_MAX 16 + +#define SCRYPT_TMP_SIZE (128ULL * SCRYPT_R_MAX * SCRYPT_P_MAX) +#define SCRYPT_TMP_SIZE4 (SCRYPT_TMP_SIZE / 4) + +typedef struct +{ + u32 B[SCRYPT_TMP_SIZE4]; + +} scrypt_tmp_t; + +typedef struct +{ + u32 salt_buf[64]; + u32 salt_len; + + u32 digest_buf[64]; + u32 digest_len; + + u32 N; + u32 r; + u32 p; + +} scrypt_t; + +typedef struct +{ + void *V; + void *XY; + + // implementation specific + + char unit_info_buf[1024]; + int unit_info_len; + + u64 workitem_count; + size_t workitem_size; + +} unit_t; + +typedef struct +{ + unit_t *units_buf; + int units_cnt; + +} bridge_scrypt_yescrypt_t; + +static bool units_init (bridge_scrypt_yescrypt_t *bridge_scrypt_yescrypt) +{ + #if defined (_WIN) + + SYSTEM_INFO sysinfo; + + GetSystemInfo (&sysinfo); + + int num_devices = sysinfo.dwNumberOfProcessors; + + #else + + int num_devices = sysconf (_SC_NPROCESSORS_ONLN); + + #endif + + unit_t *units_buf = (unit_t *) hccalloc (num_devices, sizeof (unit_t)); + + int units_cnt = 0; + + for (int i = 0; i < num_devices; i++) + { + unit_t *unit_buf = &units_buf[i]; + + unit_buf->unit_info_len = snprintf (unit_buf->unit_info_buf, sizeof (unit_buf->unit_info_buf) - 1, + "%s", + "Scrypt-Yescrypt"); + + unit_buf->unit_info_buf[unit_buf->unit_info_len] = 0; + + unit_buf->workitem_count = N_ACCEL; + + units_cnt++; + } + + bridge_scrypt_yescrypt->units_buf = units_buf; + bridge_scrypt_yescrypt->units_cnt = units_cnt; + + return true; +} + +static void units_term (bridge_scrypt_yescrypt_t *bridge_scrypt_yescrypt) +{ + if (bridge_scrypt_yescrypt) + { + hcfree (bridge_scrypt_yescrypt->units_buf); + } +} + +void *platform_init () +{ + // bridge_scrypt_yescrypt_t will be our platform context + + bridge_scrypt_yescrypt_t *bridge_scrypt_yescrypt = (bridge_scrypt_yescrypt_t *) hcmalloc (sizeof (bridge_scrypt_yescrypt_t)); + + if (units_init (bridge_scrypt_yescrypt) == false) + { + hcfree (bridge_scrypt_yescrypt); + + return NULL; + } + + return bridge_scrypt_yescrypt; +} + +void platform_term (void *platform_context) +{ + bridge_scrypt_yescrypt_t *bridge_scrypt_yescrypt = platform_context; + + if (bridge_scrypt_yescrypt) + { + units_term (bridge_scrypt_yescrypt); + + hcfree (bridge_scrypt_yescrypt); + } +} + +int get_unit_count (void *platform_context) +{ + bridge_scrypt_yescrypt_t *bridge_scrypt_yescrypt = platform_context; + + return bridge_scrypt_yescrypt->units_cnt; +} + +// we support units of mixed speed, that's why the workitem count is unit specific + +int get_workitem_count (void *platform_context, const int unit_idx) +{ + bridge_scrypt_yescrypt_t *bridge_scrypt_yescrypt = platform_context; + + unit_t *unit_buf = &bridge_scrypt_yescrypt->units_buf[unit_idx]; + + return unit_buf->workitem_count; +} + +char *get_unit_info (void *platform_context, const int unit_idx) +{ + bridge_scrypt_yescrypt_t *bridge_scrypt_yescrypt = platform_context; + + unit_t *unit_buf = &bridge_scrypt_yescrypt->units_buf[unit_idx]; + + return unit_buf->unit_info_buf; +} + +bool salt_prepare (void *platform_context, MAYBE_UNUSED hashconfig_t *hashconfig, MAYBE_UNUSED hashes_t *hashes) +{ + // selftest hash + + scrypt_t *scrypt_st = (scrypt_t *) hashes->st_esalts_buf; + + size_t largest_V = 128 * scrypt_st->r * scrypt_st->N; // yescrypt: the temporary storage V must be 128rN bytes in length + size_t largest_XY = 256 * scrypt_st->r * scrypt_st->p; // yescrypt: the temporary storage XY must be 256r or 256rp bytes in length + + // from here regular hashes + + scrypt_t *scrypt = (scrypt_t *) hashes->esalts_buf; + + for (u32 salt_idx = 0; salt_idx < hashes->salts_cnt; salt_idx++, scrypt++) + { + const size_t sz_V = 128 * scrypt->r * scrypt->N; // yescrypt: the temporary storage V must be 128rN bytes in length + const size_t sz_XY = 256 * scrypt->r * scrypt->p; // yescrypt: the temporary storage XY must be 256r or 256rp bytes in length + + if (sz_V > largest_V) largest_V = sz_V; + if (sz_XY > largest_XY) largest_XY = sz_XY; + } + + bridge_scrypt_yescrypt_t *bridge_scrypt_yescrypt = platform_context; + + for (int unit_idx = 0; unit_idx < bridge_scrypt_yescrypt->units_cnt; unit_idx++) + { + unit_t *unit_buf = &bridge_scrypt_yescrypt->units_buf[unit_idx]; + + unit_buf->V = hcmalloc_aligned (largest_V, 64); + unit_buf->XY = hcmalloc_aligned (largest_XY, 64); + } + + return true; +} + +void salt_destroy (void *platform_context, MAYBE_UNUSED hashconfig_t *hashconfig, MAYBE_UNUSED hashes_t *hashes) +{ + bridge_scrypt_yescrypt_t *bridge_scrypt_yescrypt = platform_context; + + for (int unit_idx = 0; unit_idx < bridge_scrypt_yescrypt->units_cnt; unit_idx++) + { + unit_t *unit_buf = &bridge_scrypt_yescrypt->units_buf[unit_idx]; + + hcfree_aligned (unit_buf->V); + hcfree_aligned (unit_buf->XY); + } +} + +bool launch_loop (MAYBE_UNUSED void *platform_context, MAYBE_UNUSED hc_device_param_t *device_param, MAYBE_UNUSED hashconfig_t *hashconfig, MAYBE_UNUSED hashes_t *hashes, MAYBE_UNUSED const u32 salt_pos, MAYBE_UNUSED const u64 pws_cnt) +{ + bridge_scrypt_yescrypt_t *bridge_scrypt_yescrypt = platform_context; + + const int unit_idx = device_param->bridge_link_device; + + unit_t *unit_buf = &bridge_scrypt_yescrypt->units_buf[unit_idx]; + + scrypt_t *esalts_buf = (scrypt_t *) hashes->esalts_buf; + + scrypt_t *esalt_buf = &esalts_buf[salt_pos]; + + // hashcat guarantees h_tmps[] is 64 byte aligned, so is *B + + scrypt_tmp_t *scrypt_tmp = (scrypt_tmp_t *) device_param->h_tmps; + + for (u64 pw_cnt = 0; pw_cnt < pws_cnt; pw_cnt++) + { + u8 *B = (u8 *) scrypt_tmp->B; + + // We could use p-based parallelization from yescrypt instead, + // but since we're already multi-threading, there's no need to run OpenMP. + // With that in mind, we can optimize by using a constant p=1, + // allowing the compiler to eliminate branches in smix(). + + for (u32 i = 0; i < esalt_buf->p; i++) + { + // Same here: using constants allows the compiler to optimize away branches in smix(), + // so there's no need to call smix1()/smix2() directly and unnecessarily complicate the code. + + smix (B, esalt_buf->r, esalt_buf->N, 1, 0, 0, unit_buf->V, 0, NULL, unit_buf->XY, NULL, NULL); + + B += 128 * esalt_buf->r; + } + + scrypt_tmp++; + } + + return true; +} + +void bridge_init (bridge_ctx_t *bridge_ctx) +{ + bridge_ctx->bridge_context_size = BRIDGE_CONTEXT_SIZE_CURRENT; + bridge_ctx->bridge_interface_version = BRIDGE_INTERFACE_VERSION_CURRENT; + + bridge_ctx->platform_init = platform_init; + bridge_ctx->platform_term = platform_term; + bridge_ctx->get_unit_count = get_unit_count; + bridge_ctx->get_unit_info = get_unit_info; + bridge_ctx->get_workitem_count = get_workitem_count; + bridge_ctx->thread_init = BRIDGE_DEFAULT; + bridge_ctx->thread_term = BRIDGE_DEFAULT; + bridge_ctx->salt_prepare = salt_prepare; + bridge_ctx->salt_destroy = salt_destroy; + bridge_ctx->launch_loop = launch_loop; + bridge_ctx->launch_loop2 = BRIDGE_DEFAULT; + bridge_ctx->st_update_hash = BRIDGE_DEFAULT; + bridge_ctx->st_update_pass = BRIDGE_DEFAULT; +} diff --git a/src/bridges/bridge_scrypt_yescrypt.mk b/src/bridges/bridge_scrypt_yescrypt.mk new file mode 100644 index 000000000..715bbb950 --- /dev/null +++ b/src/bridges/bridge_scrypt_yescrypt.mk @@ -0,0 +1,30 @@ + +SCRYPT_YESCRYPT := deps/yescrypt-master +SCRYPT_YESCRYPT_CFLAGS := -I$(SCRYPT_YESCRYPT)/ -DSKIP_MEMZERO -Wno-cpp -Wno-type-limits + +ifeq ($(BUILD_MODE),cross) +SCRYPT_YESCRYPT_CFLAGS += -mavx2 +else +ifeq ($(UNAME),Darwin) +ifeq ($(IS_APPLE_SILICON),0) +SCRYPT_YESCRYPT_CFLAGS += -mavx2 +endif +else +SCRYPT_YESCRYPT_CFLAGS += -march=native +endif +endif + +ifeq ($(BUILD_MODE),cross) +bridges/bridge_scrypt_yescrypt.so: src/bridges/bridge_scrypt_yescrypt.c $(SCRYPT_YESCRYPT)/yescrypt-opt.c $(SCRYPT_YESCRYPT)/sha256.c obj/combined.LINUX.a + $(CC_LINUX) $(CCFLAGS) $(CFLAGS_CROSS_LINUX) $^ -o $@ $(LFLAGS_CROSS_LINUX) -shared -fPIC -D BRIDGE_INTERFACE_VERSION_CURRENT=$(BRIDGE_INTERFACE_VERSION) $(SCRYPT_YESCRYPT_CFLAGS) +bridges/bridge_scrypt_yescrypt.dll: src/bridges/bridge_scrypt_yescrypt.c $(SCRYPT_YESCRYPT)/yescrypt-opt.c $(SCRYPT_YESCRYPT)/sha256.c obj/combined.WIN.a + $(CC_WIN) $(CCFLAGS) $(CFLAGS_CROSS_WIN) $^ -o $@ $(LFLAGS_CROSS_WIN) -shared -fPIC -D BRIDGE_INTERFACE_VERSION_CURRENT=$(BRIDGE_INTERFACE_VERSION) $(SCRYPT_YESCRYPT_CFLAGS) +else +ifeq ($(SHARED),1) +bridges/bridge_scrypt_yescrypt.$(BRIDGE_SUFFIX): src/bridges/bridge_scrypt_yescrypt.c $(SCRYPT_YESCRYPT)/yescrypt-opt.c $(SCRYPT_YESCRYPT)/sha256.c $(HASHCAT_LIBRARY) + $(CC) $(CCFLAGS) $(CFLAGS_NATIVE) $^ -o $@ $(LFLAGS_NATIVE) -shared -fPIC -D BRIDGE_INTERFACE_VERSION_CURRENT=$(BRIDGE_INTERFACE_VERSION) $(SCRYPT_YESCRYPT_CFLAGS) +else +bridges/bridge_scrypt_yescrypt.$(BRIDGE_SUFFIX): src/bridges/bridge_scrypt_yescrypt.c $(SCRYPT_YESCRYPT)/yescrypt-opt.c $(SCRYPT_YESCRYPT)/sha256.c obj/combined.NATIVE.a + $(CC) $(CCFLAGS) $(CFLAGS_NATIVE) $^ -o $@ $(LFLAGS_NATIVE) -shared -fPIC -D BRIDGE_INTERFACE_VERSION_CURRENT=$(BRIDGE_INTERFACE_VERSION) $(SCRYPT_YESCRYPT_CFLAGS) +endif +endif diff --git a/src/modules/module_70200.c b/src/modules/module_70200.c new file mode 100644 index 000000000..f4e6c0bef --- /dev/null +++ b/src/modules/module_70200.c @@ -0,0 +1,298 @@ +/** + * Author......: See docs/credits.txt + * License.....: MIT + */ + +#include "common.h" +#include "types.h" +#include "modules.h" +#include "bitops.h" +#include "convert.h" +#include "shared.h" + +static const u32 ATTACK_EXEC = ATTACK_EXEC_OUTSIDE_KERNEL; +static const u32 DGST_POS0 = 0; +static const u32 DGST_POS1 = 1; +static const u32 DGST_POS2 = 2; +static const u32 DGST_POS3 = 3; +static const u32 DGST_SIZE = DGST_SIZE_4_4; +static const u32 HASH_CATEGORY = HASH_CATEGORY_GENERIC_KDF; +static const char *HASH_NAME = "scrypt [Bridged: Scrypt-Yescrypt]"; +static const u64 KERN_TYPE = 70100; +static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE; +static const u64 OPTS_TYPE = OPTS_TYPE_STOCK_MODULE + | OPTS_TYPE_PT_GENERATE_LE + | OPTS_TYPE_NATIVE_THREADS + | OPTS_TYPE_MP_MULTI_DISABLE; +static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED; +static const u64 BRIDGE_TYPE = BRIDGE_TYPE_MATCH_TUNINGS // optional - improves performance + | BRIDGE_TYPE_LAUNCH_LOOP; +static const char *BRIDGE_NAME = "scrypt_yescrypt"; +static const char *ST_PASS = "hashcat"; +static const char *ST_HASH = "SCRYPT:16384:8:2:ODEzMTA2Mw==:NuOcXzv+MOqXmwTXnH6bbUEjN/vjlDG28IM7WXaUkk0="; + +u32 module_attack_exec (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ATTACK_EXEC; } +u32 module_dgst_pos0 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS0; } +u32 module_dgst_pos1 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS1; } +u32 module_dgst_pos2 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS2; } +u32 module_dgst_pos3 (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_POS3; } +u32 module_dgst_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return DGST_SIZE; } +u32 module_hash_category (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_CATEGORY; } +const char *module_hash_name (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return HASH_NAME; } +u64 module_kern_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return KERN_TYPE; } +u32 module_opti_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTI_TYPE; } +u64 module_opts_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return OPTS_TYPE; } +u32 module_salt_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return SALT_TYPE; } +const char *module_st_hash (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_HASH; } +const char *module_st_pass (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return ST_PASS; } +const char *module_bridge_name (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return BRIDGE_NAME; } +u64 module_bridge_type (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) { return BRIDGE_TYPE; } + +static const char *SIGNATURE_SCRYPT = "SCRYPT"; + +typedef struct +{ + u32 salt_buf[64]; + u32 salt_len; + + u32 digest_buf[64]; + u32 digest_len; + + u32 N; + u32 r; + u32 p; + +} scrypt_t; + +u32 module_pw_max (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +{ + // this overrides the reductions of PW_MAX in case optimized kernel is selected + // IOW, even in optimized kernel mode it support length 256 + + const u32 pw_max = PW_MAX; + + return pw_max; +} + +u64 module_esalt_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +{ + const u64 esalt_size = (const u64) sizeof (scrypt_t); + + return esalt_size; +} + +u64 module_tmp_size (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const user_options_t *user_options, MAYBE_UNUSED const user_options_extra_t *user_options_extra) +{ + #define SCRYPT_R_MAX 16 + #define SCRYPT_P_MAX 16 + + const u64 tmp_size = 128ULL * SCRYPT_R_MAX * SCRYPT_P_MAX; + + return tmp_size; +} + +int module_hash_decode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED void *digest_buf, MAYBE_UNUSED salt_t *salt, MAYBE_UNUSED void *esalt_buf, MAYBE_UNUSED void *hook_salt_buf, MAYBE_UNUSED hashinfo_t *hash_info, const char *line_buf, MAYBE_UNUSED const int line_len) +{ + u32 *digest = (u32 *) digest_buf; + + scrypt_t *scrypt = (scrypt_t *) esalt_buf; + + hc_token_t token; + + token.token_cnt = 6; + + token.signatures_cnt = 1; + token.signatures_buf[0] = SIGNATURE_SCRYPT; + + token.len_min[0] = 6; + token.len_max[0] = 6; + token.sep[0] = ':'; + token.attr[0] = TOKEN_ATTR_VERIFY_LENGTH + | TOKEN_ATTR_VERIFY_SIGNATURE; + + token.len_min[1] = 1; + token.len_max[1] = 6; + token.sep[1] = ':'; + token.attr[1] = TOKEN_ATTR_VERIFY_LENGTH; + + token.len_min[2] = 1; + token.len_max[2] = 6; + token.sep[2] = ':'; + token.attr[2] = TOKEN_ATTR_VERIFY_LENGTH; + + token.len_min[3] = 1; + token.len_max[3] = 6; + token.sep[3] = ':'; + token.attr[3] = TOKEN_ATTR_VERIFY_LENGTH; + + token.len_min[4] = 0; + token.len_max[4] = 45; + token.sep[4] = ':'; + token.attr[4] = TOKEN_ATTR_VERIFY_LENGTH + | TOKEN_ATTR_VERIFY_BASE64A; + + token.len_min[5] = 44; + token.len_max[5] = 88; + token.sep[5] = ':'; + token.attr[5] = TOKEN_ATTR_VERIFY_LENGTH + | TOKEN_ATTR_VERIFY_BASE64A; + + const int rc_tokenizer = input_tokenizer ((const u8 *) line_buf, line_len, &token); + + if (rc_tokenizer != PARSER_OK) return (rc_tokenizer); + + // scrypt settings + + const u8 *N_pos = token.buf[1]; + const u8 *r_pos = token.buf[2]; + const u8 *p_pos = token.buf[3]; + + scrypt->N = hc_strtoul ((const char *) N_pos, NULL, 10); + scrypt->r = hc_strtoul ((const char *) r_pos, NULL, 10); + scrypt->p = hc_strtoul ((const char *) p_pos, NULL, 10); + + if (scrypt->r > 16) return (PARSER_SALT_VALUE); + if (scrypt->p > 16) return (PARSER_SALT_VALUE); + + // salt + + const u8 *salt_pos = token.buf[4]; + const int salt_len = token.len[4]; + + scrypt->salt_len = base64_decode (base64_to_int, (const u8 *) salt_pos, salt_len, (u8 *) scrypt->salt_buf); + + // digest - base64 decode + + const u8 *hash_pos = token.buf[5]; + const int hash_len = token.len[5]; + + scrypt->digest_len = base64_decode (base64_to_int, (const u8 *) hash_pos, hash_len, (u8 *) scrypt->digest_buf); + + // comparison digest + + digest[0] = scrypt->digest_buf[0]; + digest[1] = scrypt->digest_buf[1]; + digest[2] = scrypt->digest_buf[2]; + digest[3] = scrypt->digest_buf[3]; + + // fake salt, we just need to make this unique + + salt->salt_buf[0] = digest[0]; + salt->salt_buf[1] = digest[1]; + salt->salt_buf[2] = digest[2]; + salt->salt_buf[3] = digest[3]; + salt->salt_buf[4] = scrypt->N; + salt->salt_buf[5] = scrypt->r; + salt->salt_buf[6] = scrypt->p; + salt->salt_buf[7] = 0; + + salt->salt_len = 32; + salt->salt_iter = 1; + + return (PARSER_OK); +} + +int module_hash_encode (MAYBE_UNUSED const hashconfig_t *hashconfig, MAYBE_UNUSED const void *digest_buf, MAYBE_UNUSED const salt_t *salt, MAYBE_UNUSED const void *esalt_buf, MAYBE_UNUSED const void *hook_salt_buf, MAYBE_UNUSED const hashinfo_t *hash_info, char *line_buf, MAYBE_UNUSED const int line_size) +{ + const scrypt_t *scrypt = (const scrypt_t *) esalt_buf; + + char base64_salt[64] = { 0 }; + + base64_encode (int_to_base64, (const u8 *) scrypt->salt_buf, scrypt->salt_len, (u8 *) base64_salt); + + char base64_digest[128] = { 0 }; + + base64_encode (int_to_base64, (const u8 *) scrypt->digest_buf, scrypt->digest_len, (u8 *) base64_digest); + + const int line_len = snprintf (line_buf, line_size, "%s:%u:%u:%u:%s:%s", + SIGNATURE_SCRYPT, + scrypt->N, + scrypt->r, + scrypt->p, + base64_salt, + base64_digest); + + return line_len; +} + +void module_init (module_ctx_t *module_ctx) +{ + module_ctx->module_context_size = MODULE_CONTEXT_SIZE_CURRENT; + module_ctx->module_interface_version = MODULE_INTERFACE_VERSION_CURRENT; + + module_ctx->module_attack_exec = module_attack_exec; + module_ctx->module_benchmark_esalt = MODULE_DEFAULT; + module_ctx->module_benchmark_hook_salt = MODULE_DEFAULT; + module_ctx->module_benchmark_mask = MODULE_DEFAULT; + module_ctx->module_benchmark_charset = MODULE_DEFAULT; + module_ctx->module_benchmark_salt = MODULE_DEFAULT; + module_ctx->module_bridge_name = module_bridge_name; + module_ctx->module_bridge_type = module_bridge_type; + module_ctx->module_build_plain_postprocess = MODULE_DEFAULT; + module_ctx->module_deep_comp_kernel = MODULE_DEFAULT; + module_ctx->module_deprecated_notice = MODULE_DEFAULT; + module_ctx->module_dgst_pos0 = module_dgst_pos0; + module_ctx->module_dgst_pos1 = module_dgst_pos1; + module_ctx->module_dgst_pos2 = module_dgst_pos2; + module_ctx->module_dgst_pos3 = module_dgst_pos3; + module_ctx->module_dgst_size = module_dgst_size; + module_ctx->module_dictstat_disable = MODULE_DEFAULT; + module_ctx->module_esalt_size = module_esalt_size; + module_ctx->module_extra_buffer_size = MODULE_DEFAULT; + module_ctx->module_extra_tmp_size = MODULE_DEFAULT; + module_ctx->module_extra_tuningdb_block = MODULE_DEFAULT; + module_ctx->module_forced_outfile_format = MODULE_DEFAULT; + module_ctx->module_hash_binary_count = MODULE_DEFAULT; + module_ctx->module_hash_binary_parse = MODULE_DEFAULT; + module_ctx->module_hash_binary_save = MODULE_DEFAULT; + module_ctx->module_hash_decode_postprocess = MODULE_DEFAULT; + module_ctx->module_hash_decode_potfile = MODULE_DEFAULT; + module_ctx->module_hash_decode_zero_hash = MODULE_DEFAULT; + module_ctx->module_hash_decode = module_hash_decode; + module_ctx->module_hash_encode_status = MODULE_DEFAULT; + module_ctx->module_hash_encode_potfile = MODULE_DEFAULT; + module_ctx->module_hash_encode = module_hash_encode; + module_ctx->module_hash_init_selftest = MODULE_DEFAULT; + module_ctx->module_hash_mode = MODULE_DEFAULT; + module_ctx->module_hash_category = module_hash_category; + module_ctx->module_hash_name = module_hash_name; + module_ctx->module_hashes_count_min = MODULE_DEFAULT; + module_ctx->module_hashes_count_max = MODULE_DEFAULT; + module_ctx->module_hlfmt_disable = MODULE_DEFAULT; + module_ctx->module_hook_extra_param_size = MODULE_DEFAULT; + module_ctx->module_hook_extra_param_init = MODULE_DEFAULT; + module_ctx->module_hook_extra_param_term = MODULE_DEFAULT; + module_ctx->module_hook12 = MODULE_DEFAULT; + module_ctx->module_hook23 = MODULE_DEFAULT; + module_ctx->module_hook_salt_size = MODULE_DEFAULT; + module_ctx->module_hook_size = MODULE_DEFAULT; + module_ctx->module_jit_build_options = MODULE_DEFAULT; + module_ctx->module_jit_cache_disable = MODULE_DEFAULT; + module_ctx->module_kernel_accel_max = MODULE_DEFAULT; + module_ctx->module_kernel_accel_min = MODULE_DEFAULT; + module_ctx->module_kernel_loops_max = MODULE_DEFAULT; + module_ctx->module_kernel_loops_min = MODULE_DEFAULT; + module_ctx->module_kernel_threads_max = MODULE_DEFAULT; + module_ctx->module_kernel_threads_min = MODULE_DEFAULT; + module_ctx->module_kern_type = module_kern_type; + module_ctx->module_kern_type_dynamic = MODULE_DEFAULT; + module_ctx->module_opti_type = module_opti_type; + module_ctx->module_opts_type = module_opts_type; + module_ctx->module_outfile_check_disable = MODULE_DEFAULT; + module_ctx->module_outfile_check_nocomp = MODULE_DEFAULT; + module_ctx->module_potfile_custom_check = MODULE_DEFAULT; + module_ctx->module_potfile_disable = MODULE_DEFAULT; + module_ctx->module_potfile_keep_all_hashes = MODULE_DEFAULT; + module_ctx->module_pwdump_column = MODULE_DEFAULT; + module_ctx->module_pw_max = module_pw_max; + module_ctx->module_pw_min = MODULE_DEFAULT; + module_ctx->module_salt_max = MODULE_DEFAULT; + module_ctx->module_salt_min = MODULE_DEFAULT; + module_ctx->module_salt_type = module_salt_type; + module_ctx->module_separator = MODULE_DEFAULT; + module_ctx->module_st_hash = module_st_hash; + module_ctx->module_st_pass = module_st_pass; + module_ctx->module_tmp_size = module_tmp_size; + module_ctx->module_unstable_warning = MODULE_DEFAULT; + module_ctx->module_warmup_disable = MODULE_DEFAULT; +} diff --git a/tools/test_modules/m70200.pm b/tools/test_modules/m70200.pm new file mode 100644 index 000000000..145610f6c --- /dev/null +++ b/tools/test_modules/m70200.pm @@ -0,0 +1,90 @@ +#!/usr/bin/env perl + +## +## Author......: See docs/credits.txt +## License.....: MIT +## + +use strict; +use warnings; + +use Crypt::ScryptKDF qw (scrypt_hash); +use MIME::Base64 qw (decode_base64); + +sub module_constraints { [[0, 256], [1, 15], [-1, -1], [-1, -1], [-1, -1]] } + +sub module_generate_hash +{ + my $word = shift; + my $salt = shift; + my $N = shift // 16384; + my $r = shift // 8; + my $p = shift // 2; + + my $hash_buf = scrypt_hash ($word, $salt, $N, $r, $p, 32); + + my $hash = sprintf ('%s', $hash_buf); + + return $hash; +} + +sub module_verify_hash +{ + my $line = shift; + + # scrypt + return unless (substr ($line, 0, 7) eq 'SCRYPT:'); + + # get hash + my $index1 = index ($line, ":", 7); + + return if $index1 < 1; + + # N + my $N = substr ($line, 7, $index1 - 7); + + my $index2 = index ($line, ":", $index1 + 1); + + return if $index2 < 1; + + # r + my $r = substr ($line, $index1 + 1, $index2 - $index1 - 1); + + $index1 = index ($line, ":", $index2 + 1); + + return if $index1 < 1; + + # p + my $p = substr ($line, $index2 + 1, $index1 - $index2 - 1); + + $index2 = index ($line, ":", $index1 + 1); + + return if $index2 < 1; + + # salt + my $salt = substr ($line, $index1 + 1, $index2 - $index1 - 1); + + $salt = decode_base64 ($salt); + + $index1 = index ($line, ":", $index2 + 1); + + return if $index1 < 1; + + # digest + + my $word = substr ($line, $index1 + 1); + + return unless defined $salt; + return unless defined $word; + return unless defined $N; + return unless defined $r; + return unless defined $p; + + $word = pack_if_HEX_notation ($word); + + my $new_hash = module_generate_hash ($word, $salt, $N, $r, $p); + + return ($new_hash, $word); +} + +1;