mirror of
https://github.com/hashcat/hashcat.git
synced 2025-08-04 12:56:00 +00:00

Until now, support for Metal has been written using an Apple M1 and a 10-year-old Apple Intel as a basis to verify that the changes were also compatible with very old devices. Recently, the code has been tested on an Apple M4 Pro, with a performance increase of about 3,7 times compared to the M1. The code has also been sporadically tested on an Apple device with a discrete AMD GPU, but the performance was very low. With this patch, I revisited memory management on Metal, initially creating an easily configurable array mapped 1 to 1 with the buffers allocated by hashcat. The configuration refers to the Storage Mode associated with the buffers, as well as an ad hoc modification that transforms buffers with a SHARED Storage Mode to MANAGED if the device is a discrete GPU and not an M* (Silicon) or integrated (Intel) one. The result was excellent, as some very quick tests showed, for example, argon2 going from 10 H/s (1330.58ms) to 465 H/s (57.26ms)! A 4550% increase in computational power and a 2223% increase in execution timing on the GPU! In addition to the array for configuring the buffer storage modes, a macro, HC_MTL_CREATEBUFFER, has also been created. This is used to generate the code that calls hc_mtlCreateBuffer, making the code much more readable than before. In summary, this patch lays the groundwork for further improvements to the hashcat core, both on Metal itself and also for other runtimes, particularly OpenCL.
232 lines
10 KiB
C
232 lines
10 KiB
C
/**
|
|
* Author......: See docs/credits.txt
|
|
* License.....: MIT
|
|
*/
|
|
|
|
#ifndef HC_EXT_METAL_H
|
|
#define HC_EXT_METAL_H
|
|
|
|
#if defined (__APPLE__)
|
|
|
|
#include <objc/runtime.h>
|
|
#include <CoreFoundation/CoreFoundation.h>
|
|
|
|
#define mtl_device_id id
|
|
#define mtl_command_queue id
|
|
#define mtl_function id
|
|
#define mtl_pipeline id
|
|
#define mtl_library id
|
|
#define mtl_command_buffer id
|
|
#define mtl_command_encoder id
|
|
#define mtl_blit_command_encoder id
|
|
#define mtl_compute_command_encoder id
|
|
|
|
typedef struct mtl_mem
|
|
{
|
|
id buf_ptr;
|
|
|
|
unsigned int buf_mode;
|
|
|
|
} mtl_mem_t;
|
|
|
|
typedef enum metalResourceStorageMode
|
|
{
|
|
MTL_STORAGE_MODE_PRIVATE = 0, // only the GPU can access
|
|
MTL_STORAGE_MODE_SHARED, // both the CPU and the GPU can access
|
|
MTL_STORAGE_MODE_MANAGED, // double allocations
|
|
|
|
} metalResourceStorageMode_t;
|
|
|
|
typedef enum metalBufferStorageModeId
|
|
{
|
|
metal_d_pws_buf_storageMode,
|
|
metal_d_pws_amp_buf_storageMode,
|
|
metal_d_pws_comp_buf_storageMode,
|
|
metal_d_pws_idx_storageMode,
|
|
metal_d_rules_storageMode,
|
|
metal_d_rules_c_storageMode,
|
|
metal_d_combs_storageMode,
|
|
metal_d_combs_c_storageMode,
|
|
metal_d_bfs_storageMode,
|
|
metal_d_bfs_c_storageMode,
|
|
metal_d_tm_c_storageMode,
|
|
metal_d_bitmap_s1_a_storageMode,
|
|
metal_d_bitmap_s1_b_storageMode,
|
|
metal_d_bitmap_s1_c_storageMode,
|
|
metal_d_bitmap_s1_d_storageMode,
|
|
metal_d_bitmap_s2_a_storageMode,
|
|
metal_d_bitmap_s2_b_storageMode,
|
|
metal_d_bitmap_s2_c_storageMode,
|
|
metal_d_bitmap_s2_d_storageMode,
|
|
metal_d_plain_bufs_storageMode,
|
|
metal_d_digests_buf_storageMode,
|
|
metal_d_digests_shown_storageMode,
|
|
metal_d_salt_bufs_storageMode,
|
|
metal_d_esalt_bufs_storageMode,
|
|
metal_d_tmps_storageMode,
|
|
metal_d_hooks_storageMode,
|
|
metal_d_result_storageMode,
|
|
metal_d_extra0_buf_storageMode,
|
|
metal_d_extra1_buf_storageMode,
|
|
metal_d_extra2_buf_storageMode,
|
|
metal_d_extra3_buf_storageMode,
|
|
metal_d_root_css_buf_storageMode,
|
|
metal_d_markov_css_buf_storageMode,
|
|
metal_d_st_digests_buf_storageMode,
|
|
metal_d_st_salts_buf_storageMode,
|
|
metal_d_st_esalts_buf_storageMode,
|
|
metal_d_kernel_param_storageMode,
|
|
//
|
|
metal_private_storageMode,
|
|
metal_shared_storageMode,
|
|
metal_managed_storageMode,
|
|
MTL_BUFFER_CNT
|
|
|
|
} metalBufferStorageModeId_t;
|
|
|
|
static const metalResourceStorageMode_t metalResourceStorageModes[MTL_BUFFER_CNT] =
|
|
{
|
|
[metal_d_pws_buf_storageMode] = MTL_STORAGE_MODE_SHARED,
|
|
[metal_d_pws_amp_buf_storageMode] = MTL_STORAGE_MODE_SHARED,
|
|
[metal_d_pws_comp_buf_storageMode] = MTL_STORAGE_MODE_SHARED,
|
|
[metal_d_pws_idx_storageMode] = MTL_STORAGE_MODE_SHARED,
|
|
[metal_d_rules_storageMode] = MTL_STORAGE_MODE_SHARED,
|
|
[metal_d_rules_c_storageMode] = MTL_STORAGE_MODE_SHARED,
|
|
[metal_d_combs_storageMode] = MTL_STORAGE_MODE_SHARED,
|
|
[metal_d_combs_c_storageMode] = MTL_STORAGE_MODE_SHARED,
|
|
[metal_d_bfs_storageMode] = MTL_STORAGE_MODE_SHARED,
|
|
[metal_d_bfs_c_storageMode] = MTL_STORAGE_MODE_SHARED,
|
|
[metal_d_tm_c_storageMode] = MTL_STORAGE_MODE_SHARED,
|
|
[metal_d_bitmap_s1_a_storageMode] = MTL_STORAGE_MODE_SHARED,
|
|
[metal_d_bitmap_s1_b_storageMode] = MTL_STORAGE_MODE_SHARED,
|
|
[metal_d_bitmap_s1_c_storageMode] = MTL_STORAGE_MODE_SHARED,
|
|
[metal_d_bitmap_s1_d_storageMode] = MTL_STORAGE_MODE_SHARED,
|
|
[metal_d_bitmap_s2_a_storageMode] = MTL_STORAGE_MODE_SHARED,
|
|
[metal_d_bitmap_s2_b_storageMode] = MTL_STORAGE_MODE_SHARED,
|
|
[metal_d_bitmap_s2_c_storageMode] = MTL_STORAGE_MODE_SHARED,
|
|
[metal_d_bitmap_s2_d_storageMode] = MTL_STORAGE_MODE_SHARED,
|
|
[metal_d_plain_bufs_storageMode] = MTL_STORAGE_MODE_SHARED,
|
|
[metal_d_digests_buf_storageMode] = MTL_STORAGE_MODE_SHARED,
|
|
[metal_d_digests_shown_storageMode] = MTL_STORAGE_MODE_SHARED,
|
|
[metal_d_salt_bufs_storageMode] = MTL_STORAGE_MODE_SHARED,
|
|
[metal_d_esalt_bufs_storageMode] = MTL_STORAGE_MODE_SHARED,
|
|
[metal_d_tmps_storageMode] = MTL_STORAGE_MODE_SHARED,
|
|
[metal_d_hooks_storageMode] = MTL_STORAGE_MODE_SHARED,
|
|
[metal_d_result_storageMode] = MTL_STORAGE_MODE_SHARED,
|
|
[metal_d_extra0_buf_storageMode] = MTL_STORAGE_MODE_SHARED,
|
|
[metal_d_extra1_buf_storageMode] = MTL_STORAGE_MODE_SHARED,
|
|
[metal_d_extra2_buf_storageMode] = MTL_STORAGE_MODE_SHARED,
|
|
[metal_d_extra3_buf_storageMode] = MTL_STORAGE_MODE_SHARED,
|
|
[metal_d_root_css_buf_storageMode] = MTL_STORAGE_MODE_SHARED,
|
|
[metal_d_markov_css_buf_storageMode] = MTL_STORAGE_MODE_SHARED,
|
|
[metal_d_st_digests_buf_storageMode] = MTL_STORAGE_MODE_SHARED,
|
|
[metal_d_st_salts_buf_storageMode] = MTL_STORAGE_MODE_SHARED,
|
|
[metal_d_st_esalts_buf_storageMode] = MTL_STORAGE_MODE_SHARED,
|
|
[metal_d_kernel_param_storageMode] = MTL_STORAGE_MODE_SHARED,
|
|
//
|
|
[metal_private_storageMode] = MTL_STORAGE_MODE_PRIVATE,
|
|
[metal_shared_storageMode] = MTL_STORAGE_MODE_SHARED,
|
|
[metal_managed_storageMode] = MTL_STORAGE_MODE_MANAGED
|
|
};
|
|
|
|
#define HC_MTL_CREATEBUFFER(ctx, size, ptr, buf_name) \
|
|
do { \
|
|
if (hc_mtlCreateBuffer(ctx, device_param->metal_device, size, ptr, \
|
|
&device_param->metal_d_##buf_name, \
|
|
metal_d_##buf_name##_storageMode) == -1) return -1; \
|
|
} while (0)
|
|
|
|
typedef enum metalDeviceAttribute
|
|
{
|
|
MTL_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 1,
|
|
MTL_DEVICE_ATTRIBUTE_UNIFIED_MEMORY,
|
|
MTL_DEVICE_ATTRIBUTE_WARP_SIZE,
|
|
MTL_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR,
|
|
MTL_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR,
|
|
MTL_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK,
|
|
MTL_DEVICE_ATTRIBUTE_CLOCK_RATE,
|
|
MTL_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK,
|
|
MTL_DEVICE_ATTRIBUTE_MAX_TRANSFER_RATE,
|
|
MTL_DEVICE_ATTRIBUTE_HEADLESS,
|
|
MTL_DEVICE_ATTRIBUTE_LOW_POWER,
|
|
MTL_DEVICE_ATTRIBUTE_REMOVABLE,
|
|
MTL_DEVICE_ATTRIBUTE_REGISTRY_ID,
|
|
MTL_DEVICE_ATTRIBUTE_PHYSICAL_LOCATION,
|
|
MTL_DEVICE_ATTRIBUTE_LOCATION_NUMBER,
|
|
|
|
} metalDeviceAttribute_t;
|
|
|
|
typedef enum metalDeviceLocation
|
|
{
|
|
// MTLDeviceLocationBuiltIn
|
|
// The GPU is built into the device
|
|
MTL_DEVICE_LOCATION_BUILTIN = 0,
|
|
|
|
// MTLDeviceLocationSlot
|
|
// The GPU is connected to a slot inside the computer
|
|
MTL_DEVICE_LOCATION_SLOT = 1,
|
|
|
|
// MTLDeviceLocationExternal
|
|
// The GPU is connected via an external interface, such as Thunderbolt
|
|
MTL_DEVICE_LOCATION_EXTERNAL = 2,
|
|
|
|
// MTLDeviceLocationUnspecified
|
|
// The GPU's location is not specified or cannot be determined
|
|
MTL_DEVICE_LOCATION_UNSPECIFIED = 4294967295,
|
|
|
|
} metalDeviceLocation_t;
|
|
|
|
typedef struct hc_metal
|
|
{
|
|
CFArrayRef devices;
|
|
|
|
} hc_metal_t;
|
|
|
|
typedef hc_metal_t MTL_PTR;
|
|
|
|
int mtl_init (void *hashcat_ctx);
|
|
void mtl_close (void *hashcat_ctx);
|
|
|
|
int hc_mtlRuntimeGetVersionString (void *hashcat_ctx, char *runtimeVersion_str, size_t *size);
|
|
|
|
int hc_mtlDeviceGetCount (void *hashcat_ctx, int *count);
|
|
int hc_mtlDeviceGet (void *hashcat_ctx, mtl_device_id *metal_device, int ordinal);
|
|
int hc_mtlDeviceGetName (void *hashcat_ctx, char *name, size_t len, mtl_device_id metal_device);
|
|
int hc_mtlDeviceGetAttribute (void *hashcat_ctx, int *pi, metalDeviceAttribute_t attrib, mtl_device_id metal_device);
|
|
int hc_mtlDeviceTotalMem (void *hashcat_ctx, size_t *bytes, mtl_device_id metal_device);
|
|
int hc_mtlDeviceMaxMemAlloc (void *hashcat_ctx, size_t *bytes, mtl_device_id metal_device);
|
|
int hc_mtlMemGetInfo (void *hashcat_ctx, size_t *mem_free, size_t *mem_total);
|
|
|
|
int hc_mtlCreateCommandQueue (void *hashcat_ctx, mtl_device_id metal_device, mtl_command_queue *command_queue);
|
|
int hc_mtlCreateBuffer (void *hashcat_ctx, mtl_device_id metal_device, size_t size, void *ptr, mtl_mem_t *mem, metalBufferStorageModeId_t metal_storage_mode);
|
|
int hc_mtlCreateKernel (void *hashcat_ctx, mtl_device_id metal_device, mtl_library metal_library, const char *func_name, mtl_function *metal_function, mtl_pipeline *metal_pipeline);
|
|
|
|
int hc_mtlGetMaxTotalThreadsPerThreadgroup (void *hashcat_ctx, mtl_pipeline metal_pipeline, unsigned int *maxTotalThreadsPerThreadgroup);
|
|
int hc_mtlGetThreadExecutionWidth (void *hashcat_ctx, mtl_pipeline metal_pipeline, unsigned int *threadExecutionWidth);
|
|
int hc_mtlGetStaticThreadgroupMemoryLength (void *hashcat_ctx, mtl_pipeline metal_pipeline, unsigned int *staticThreadgroupMemoryLength);
|
|
|
|
// copy buffer
|
|
int hc_mtlMemcpyDtoD (void *hashcat_ctx, mtl_command_queue command_queue, mtl_mem_t mem_dst, size_t mem_dst_off, mtl_mem_t mem_src, size_t mem_src_off, size_t buf_size);
|
|
// write
|
|
int hc_mtlMemcpyHtoD (void *hashcat_ctx, mtl_device_id metal_device, mtl_command_queue command_queue, mtl_mem_t mem_dst, size_t mem_dst_off, const void *mem_src, size_t buf_size);
|
|
// read
|
|
int hc_mtlMemcpyDtoH (void *hashcat_ctx, mtl_device_id metal_device, mtl_command_queue command_queue, void *mem_dst, mtl_mem_t mem_src, size_t mem_src_off, size_t buf_size);
|
|
|
|
int hc_mtlReleaseMemObject (void *hashcat_ctx, mtl_mem_t *metal_buffer);
|
|
int hc_mtlReleaseFunction (void *hashcat_ctx, mtl_function *metal_function);
|
|
int hc_mtlReleaseLibrary (void *hashcat_ctx, mtl_function *metal_library);
|
|
int hc_mtlReleaseCommandQueue (void *hashcat_ctx, mtl_command_queue command_queue);
|
|
int hc_mtlReleaseDevice (void *hashcat_ctx, mtl_device_id metal_device);
|
|
|
|
int hc_mtlCreateLibraryWithSource (void *hashcat_ctx, mtl_device_id metal_device, const char *kernel_sources, const char *build_options_buf, const char *include_path, mtl_library *metal_library);
|
|
int hc_mtlCreateLibraryWithFile (void *hashcat_ctx, mtl_device_id metal_device, const char *cached_file, mtl_library *metal_library);
|
|
|
|
int hc_mtlEncodeComputeCommand_pre (void *hashcat_ctx, mtl_pipeline metal_pipeline, mtl_command_queue metal_command_queue, mtl_command_buffer *metal_command_buffer, mtl_command_encoder *metal_command_encoder);
|
|
int hc_mtlSetCommandEncoderArg (void *hashcat_ctx, mtl_command_encoder metal_command_encoder, size_t off, size_t idx, id buf, void *host_data, size_t host_data_size);
|
|
|
|
int hc_mtlEncodeComputeCommand (void *hashcat_ctx, mtl_command_encoder metal_command_encoder, mtl_command_buffer metal_command_buffer, const unsigned int work_dim, const size_t global_work_size[3], const size_t local_work_size[3], double *ms);
|
|
|
|
#endif // __APPLE__
|
|
|
|
#endif // HC_EXT_METAL_H
|