From 532a1545428a6caf06a78f62783542dce5e572e8 Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Tue, 27 Jul 2021 12:02:27 +0200 Subject: [PATCH 1/2] ADL: Updated support for AMD Display Library to 15.0, updated datatypes and added support for OverDrive 7 and 8 based GPUs --- docs/changes.txt | 2 +- include/ext_ADL.h | 267 ++++++++++++++++++++-------------------------- src/ext_ADL.c | 1 - src/hwmon.c | 16 ++- 4 files changed, 132 insertions(+), 154 deletions(-) diff --git a/docs/changes.txt b/docs/changes.txt index 2190b9c98..0c31c4cdf 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -30,7 +30,7 @@ ## Technical ## -- ADL: Updated support for AMD Display Library to 14.0, updated datatypes and added support for OverDrive 7 and 8 based GPUs +- ADL: Updated support for AMD Display Library to 15.0, updated datatypes and added support for OverDrive 7 and 8 based GPUs - Commandline: Throw an error if separator character given by the user with -p option is not exactly 1 byte - HIP Kernels: Got rid of hip/hip_runtime.h dependancy to enable more easy integration of the HIP backend on Windows - Kernel Cache: Add kernel threads into hash computation which is later used in the kernel cache filename diff --git a/include/ext_ADL.h b/include/ext_ADL.h index 238453b7c..369a8eb1a 100644 --- a/include/ext_ADL.h +++ b/include/ext_ADL.h @@ -13,22 +13,120 @@ #include #endif // _WIN -// Values taken from display-library-14.0.zip +// Declarations from: +// https://github.com/GPUOpen-LibrariesAndSDKs/display-library/blob/209538e1dc7273f7459411a3a5044ffe2437ed95/include/adl_defines.h +// https://github.com/GPUOpen-LibrariesAndSDKs/display-library/blob/209538e1dc7273f7459411a3a5044ffe2437ed95/include/adl_structures.h -/** - * Declarations from adl_defines.h - */ +/// Defines ADL_TRUE +#define ADL_TRUE 1 +/// Defines ADL_FALSE +#define ADL_FALSE 0 + +//Define Performance Metrics Log max sensors number +#define ADL_PMLOG_MAX_SENSORS 256 + +typedef enum ADLSensorType +{ + SENSOR_MAXTYPES = 0, + PMLOG_CLK_GFXCLK = 1, + PMLOG_CLK_MEMCLK = 2, + PMLOG_CLK_SOCCLK = 3, + PMLOG_CLK_UVDCLK1 = 4, + PMLOG_CLK_UVDCLK2 = 5, + PMLOG_CLK_VCECLK = 6, + PMLOG_CLK_VCNCLK = 7, + PMLOG_TEMPERATURE_EDGE = 8, + PMLOG_TEMPERATURE_MEM = 9, + PMLOG_TEMPERATURE_VRVDDC = 10, + PMLOG_TEMPERATURE_VRMVDD = 11, + PMLOG_TEMPERATURE_LIQUID = 12, + PMLOG_TEMPERATURE_PLX = 13, + PMLOG_FAN_RPM = 14, + PMLOG_FAN_PERCENTAGE = 15, + PMLOG_SOC_VOLTAGE = 16, + PMLOG_SOC_POWER = 17, + PMLOG_SOC_CURRENT = 18, + PMLOG_INFO_ACTIVITY_GFX = 19, + PMLOG_INFO_ACTIVITY_MEM = 20, + PMLOG_GFX_VOLTAGE = 21, + PMLOG_MEM_VOLTAGE = 22, + PMLOG_ASIC_POWER = 23, + PMLOG_TEMPERATURE_VRSOC = 24, + PMLOG_TEMPERATURE_VRMVDD0 = 25, + PMLOG_TEMPERATURE_VRMVDD1 = 26, + PMLOG_TEMPERATURE_HOTSPOT = 27, + PMLOG_TEMPERATURE_GFX = 28, + PMLOG_TEMPERATURE_SOC = 29, + PMLOG_GFX_POWER = 30, + PMLOG_GFX_CURRENT = 31, + PMLOG_TEMPERATURE_CPU = 32, + PMLOG_CPU_POWER = 33, + PMLOG_CLK_CPUCLK = 34, + PMLOG_THROTTLER_STATUS = 35, + PMLOG_CLK_VCN1CLK1 = 36, + PMLOG_CLK_VCN1CLK2 = 37, + PMLOG_SMART_POWERSHIFT_CPU = 38, + PMLOG_SMART_POWERSHIFT_DGPU = 39, + PMLOG_BUS_SPEED = 40, + PMLOG_BUS_LANES = 41, + PMLOG_MAX_SENSORS_REAL +} ADLSensorType; + +/// Defines the maximum string length +#define ADL_MAX_CHAR 4096 +/// Defines the maximum string length +#define ADL_MAX_PATH 256 +/// Defines the maximum number of supported adapters +#define ADL_MAX_ADAPTERS 250 +/// Defines the maxumum number of supported displays +#define ADL_MAX_DISPLAYS 150 +/// Defines the maxumum string length for device name +#define ADL_MAX_DEVICENAME 32 +/// Defines for all adapters +#define ADL_ADAPTER_INDEX_ALL -1 + +/// \defgroup define_adl_results Result Codes +/// This group of definitions are the various results returned by all ADL functions \n +/// @{ +/// All OK, but need to wait +#define ADL_OK_WAIT 4 +/// All OK, but need restart +#define ADL_OK_RESTART 3 +/// All OK but need mode change +#define ADL_OK_MODE_CHANGE 2 +/// All OK, but with warning +#define ADL_OK_WARNING 1 /// ADL function completed successfully #define ADL_OK 0 /// Generic Error. Most likely one or more of the Escape calls to the driver failed! #define ADL_ERR -1 - +/// ADL not initialized +#define ADL_ERR_NOT_INIT -2 +/// One of the parameter passed is invalid +#define ADL_ERR_INVALID_PARAM -3 +/// One of the parameter size is invalid +#define ADL_ERR_INVALID_PARAM_SIZE -4 +/// Invalid ADL index passed +#define ADL_ERR_INVALID_ADL_IDX -5 +/// Invalid controller index passed +#define ADL_ERR_INVALID_CONTROLLER_IDX -6 +/// Invalid display index passed +#define ADL_ERR_INVALID_DIPLAY_IDX -7 /// Function not supported by the driver #define ADL_ERR_NOT_SUPPORTED -8 - -/// Defines the maximum string length -#define ADL_MAX_PATH 256 +/// Null Pointer error +#define ADL_ERR_NULL_POINTER -9 +/// Call can't be made due to disabled adapter +#define ADL_ERR_DISABLED_ADAPTER -10 +/// Invalid Callback +#define ADL_ERR_INVALID_CALLBACK -11 +/// Display Resource conflict +#define ADL_ERR_RESOURCE_CONFLICT -12 +//Failed to update some of the values. Can be returned by set request that include multiple values if not all values were successfully committed. +#define ADL_ERR_SET_INCOMPLETE -20 +/// There's no Linux XDisplay in Linux Console environment +#define ADL_ERR_NO_XDISPLAY -21 //values for ADLFanSpeedValue.iSpeedType #define ADL_DL_FANCTRL_SPEED_TYPE_PERCENT 1 @@ -37,9 +135,6 @@ //values for ADLFanSpeedValue.iFlags #define ADL_DL_FANCTRL_FLAG_USER_DEFINED_SPEED 1 -//Define Performance Metrics Log max sensors number -#define ADL_PMLOG_MAX_SENSORS 256 - /** * Declarations from adl_structures.h */ @@ -90,6 +185,7 @@ typedef struct AdapterInfo char strPNPString[ADL_MAX_PATH]; /// It is generated from EnumDisplayDevices. int iOSDisplayIndex; + #endif /* (_WIN32) || (_WIN64) */ #if defined (LINUX) @@ -181,90 +277,6 @@ typedef struct ADLFanSpeedValue int iFlags; } ADLFanSpeedValue; -///////////////////////////////////////////////////////////////////////////////////////////// -///\brief Structure containing information about the display device. -/// -/// This structure is used to store display device information -/// such as display index, type, name, connection status, mapped adapter and controller indexes, -/// whether or not multiple VPUs are supported, local display connections or not (through Lasso), etc. -/// This information can be returned to the user. Alternatively, it can be used to access various driver calls to set -/// or fetch various display device related settings upon the user's request. -/// \nosubgrouping -//////////////////////////////////////////////////////////////////////////////////////////// -typedef struct ADLDisplayID -{ -/// The logical display index belonging to this adapter. - int iDisplayLogicalIndex; - -///\brief The physical display index. -/// For example, display index 2 from adapter 2 can be used by current adapter 1.\n -/// So current adapter may enumerate this adapter as logical display 7 but the physical display -/// index is still 2. - int iDisplayPhysicalIndex; - -/// The persistent logical adapter index for the display. - int iDisplayLogicalAdapterIndex; - -///\brief The persistent physical adapter index for the display. -/// It can be the current adapter or a non-local adapter. \n -/// If this adapter index is different than the current adapter, -/// the Display Non Local flag is set inside DisplayInfoValue. - int iDisplayPhysicalAdapterIndex; -} ADLDisplayID, *LPADLDisplayID; - -///////////////////////////////////////////////////////////////////////////////////////////// -///\brief Structure containing information about the display device. -/// -/// This structure is used to store various information about the display device. This -/// information can be returned to the user, or used to access various driver calls to set -/// or fetch various display-device-related settings upon the user's request -/// \nosubgrouping -//////////////////////////////////////////////////////////////////////////////////////////// -typedef struct ADLDisplayInfo -{ -/// The DisplayID structure - ADLDisplayID displayID; - -///\deprecated The controller index to which the display is mapped.\n Will not be used in the future\n - int iDisplayControllerIndex; - -/// The display's EDID name. - char strDisplayName[ADL_MAX_PATH]; - -/// The display's manufacturer name. - char strDisplayManufacturerName[ADL_MAX_PATH]; - -/// The Display type. For example: CRT, TV, CV, DFP. - int iDisplayType; - -/// The display output type. For example: HDMI, SVIDEO, COMPONMNET VIDEO. - int iDisplayOutputType; - -/// The connector type for the device. - int iDisplayConnector; - -///\brief The bit mask identifies the number of bits ADLDisplayInfo is currently using. \n -/// It will be the sum all the bit definitions in ADL_DISPLAY_DISPLAYINFO_xxx. - int iDisplayInfoMask; - -/// The bit mask identifies the display status. \ref define_displayinfomask - int iDisplayInfoValue; -} ADLDisplayInfo, *LPADLDisplayInfo; - -///////////////////////////////////////////////////////////////////////////////////////////// -/// \brief Structure containing information about the BIOS. -/// -/// This structure is used to store various information about the Chipset. This -/// information can be returned to the user. -/// \nosubgrouping -//////////////////////////////////////////////////////////////////////////////////////////// -typedef struct ADLBiosInfo -{ - char strPartNumber[ADL_MAX_PATH]; ///< Part number. - char strVersion[ADL_MAX_PATH]; ///< Version number. - char strDate[ADL_MAX_PATH]; ///< BIOS date in yyyy/mm/dd hh:mm format. -} ADLBiosInfo, *LPADLBiosInfo; - ///////////////////////////////////////////////////////////////////////////////////////////// ///\brief Structure containing information about current power management related activity. /// @@ -346,7 +358,7 @@ typedef struct ADLODParameters /// This structure is used to store information about Overdrive 6 fan speed information /// \nosubgrouping //////////////////////////////////////////////////////////////////////////////////////////// -typedef struct _ADLOD6FanSpeedInfo +typedef struct ADLOD6FanSpeedInfo { /// Contains a bitmap of the valid fan speed type flags. Possible values: \ref ADL_OD6_FANSPEED_TYPE_PERCENT, \ref ADL_OD6_FANSPEED_TYPE_RPM, \ref ADL_OD6_FANSPEED_USER_DEFINED int iSpeedType; @@ -368,7 +380,7 @@ typedef struct _ADLOD6FanSpeedInfo /// This structure is used to store information about Overdrive 6 fan speed value /// \nosubgrouping //////////////////////////////////////////////////////////////////////////////////////////// -typedef struct _ADLOD6FanSpeedValue +typedef struct ADLOD6FanSpeedValue { /// Indicates the units of the fan speed. Possible values: \ref ADL_OD6_FANSPEED_TYPE_PERCENT, \ref ADL_OD6_FANSPEED_TYPE_RPM int iSpeedType; @@ -388,7 +400,7 @@ typedef struct _ADLOD6FanSpeedValue /// This structure is used to store information about current Overdrive 6 performance status. /// \nosubgrouping //////////////////////////////////////////////////////////////////////////////////////////// -typedef struct _ADLOD6CurrentStatus +typedef struct ADLOD6CurrentStatus { /// Current engine clock in 10 KHz. int iEngineClock; @@ -419,7 +431,7 @@ typedef struct _ADLOD6CurrentStatus /// This structure is used to store information about Overdrive 6 clock range /// \nosubgrouping //////////////////////////////////////////////////////////////////////////////////////////// -typedef struct _ADLOD6ParameterRange +typedef struct ADLOD6ParameterRange { /// The starting value of the clock range int iMin; @@ -436,7 +448,7 @@ typedef struct _ADLOD6ParameterRange /// This structure is used to store information about Overdrive 6 capabilities /// \nosubgrouping //////////////////////////////////////////////////////////////////////////////////////////// -typedef struct _ADLOD6Capabilities +typedef struct ADLOD6Capabilities { /// Contains a bitmap of the OD6 capability flags. Possible values: \ref ADL_OD6_CAPABILITY_SCLK_CUSTOMIZATION, /// \ref ADL_OD6_CAPABILITY_MCLK_CUSTOMIZATION, \ref ADL_OD6_CAPABILITY_GPU_ACTIVITY_MONITOR @@ -487,7 +499,7 @@ typedef struct ADLODPerformanceLevel /// This structure is used to store information about Overdrive 6 clock values. /// \nosubgrouping //////////////////////////////////////////////////////////////////////////////////////////// -typedef struct _ADLOD6PerformanceLevel +typedef struct ADLOD6PerformanceLevel { /// Engine (core) clock. int iEngineClock; @@ -504,7 +516,7 @@ typedef struct _ADLOD6PerformanceLevel /// are contained in the aLevels array. /// \nosubgrouping //////////////////////////////////////////////////////////////////////////////////////////// -typedef struct _ADLOD6StateInfo +typedef struct ADLOD6StateInfo { /// Number of levels. OD6 uses clock ranges instead of discrete performance levels. /// iNumberOfPerformanceLevels is always 2. The 1st level indicates the minimum clocks @@ -544,63 +556,18 @@ typedef struct ADLODPerformanceLevels /// This structure is used to store information about Performance Metrics data output /// \nosubgrouping //////////////////////////////////////////////////////////////////////////////////////////// -typedef struct _ADLSingleSensorData +typedef struct ADLSingleSensorData { int supported; int value; } ADLSingleSensorData; -typedef struct _ADLPMLogDataOutput +typedef struct ADLPMLogDataOutput { int size; ADLSingleSensorData sensors[ADL_PMLOG_MAX_SENSORS]; }ADLPMLogDataOutput; -typedef enum _ADLSensorType -{ - SENSOR_MAXTYPES = 0, - PMLOG_CLK_GFXCLK = 1, - PMLOG_CLK_MEMCLK = 2, - PMLOG_CLK_SOCCLK = 3, - PMLOG_CLK_UVDCLK1 = 4, - PMLOG_CLK_UVDCLK2 = 5, - PMLOG_CLK_VCECLK = 6, - PMLOG_CLK_VCNCLK = 7, - PMLOG_TEMPERATURE_EDGE = 8, - PMLOG_TEMPERATURE_MEM = 9, - PMLOG_TEMPERATURE_VRVDDC = 10, - PMLOG_TEMPERATURE_VRMVDD = 11, - PMLOG_TEMPERATURE_LIQUID = 12, - PMLOG_TEMPERATURE_PLX = 13, - PMLOG_FAN_RPM = 14, - PMLOG_FAN_PERCENTAGE = 15, - PMLOG_SOC_VOLTAGE = 16, - PMLOG_SOC_POWER = 17, - PMLOG_SOC_CURRENT = 18, - PMLOG_INFO_ACTIVITY_GFX = 19, - PMLOG_INFO_ACTIVITY_MEM = 20, - PMLOG_GFX_VOLTAGE = 21, - PMLOG_MEM_VOLTAGE = 22, - PMLOG_ASIC_POWER = 23, - PMLOG_TEMPERATURE_VRSOC = 24, - PMLOG_TEMPERATURE_VRMVDD0 = 25, - PMLOG_TEMPERATURE_VRMVDD1 = 26, - PMLOG_TEMPERATURE_HOTSPOT = 27, - PMLOG_TEMPERATURE_GFX = 28, - PMLOG_TEMPERATURE_SOC = 29, - PMLOG_GFX_POWER = 30, - PMLOG_GFX_CURRENT = 31, - PMLOG_TEMPERATURE_CPU = 32, - PMLOG_CPU_POWER = 33, - PMLOG_CLK_CPUCLK = 34, - PMLOG_THROTTLER_STATUS = 35, - PMLOG_CLK_VCN1CLK1 = 36, - PMLOG_CLK_VCN1CLK2 = 37, - PMLOG_SMART_POWERSHIFT_CPU = 38, - PMLOG_SMART_POWERSHIFT_DGPU = 39, - PMLOG_MAX_SENSORS_REAL -} ADLSensorType; - /// \brief Handle to ADL client context. /// /// ADL clients obtain context handle from initial call to \ref ADL2_Main_Control_Create. @@ -628,7 +595,6 @@ typedef int HM_ADAPTER_ADL; typedef int (ADL_API_CALL *ADL_ADAPTER_ACTIVE_GET ) ( int, int* ); typedef int (ADL_API_CALL *ADL_ADAPTER_ADAPTERINFO_GET ) ( LPAdapterInfo, int ); typedef int (ADL_API_CALL *ADL_ADAPTER_NUMBEROFADAPTERS_GET ) ( int* ); -typedef int (ADL_API_CALL *ADL_DISPLAY_DISPLAYINFO_GET ) ( int, int *, ADLDisplayInfo **, int ); typedef int (ADL_API_CALL *ADL_MAIN_CONTROL_CREATE )(ADL_MAIN_MALLOC_CALLBACK, int ); typedef int (ADL_API_CALL *ADL_MAIN_CONTROL_DESTROY )(); typedef int (ADL_API_CALL *ADL_OVERDRIVE5_CURRENTACTIVITY_GET ) (int iAdapterIndex, ADLPMActivity *lpActivity); @@ -654,7 +620,6 @@ typedef struct hm_adl_lib ADL_ADAPTER_ACTIVE_GET ADL_Adapter_Active_Get; ADL_ADAPTER_ADAPTERINFO_GET ADL_Adapter_AdapterInfo_Get; ADL_ADAPTER_NUMBEROFADAPTERS_GET ADL_Adapter_NumberOfAdapters_Get; - ADL_DISPLAY_DISPLAYINFO_GET ADL_Display_DisplayInfo_Get; ADL_MAIN_CONTROL_CREATE ADL_Main_Control_Create; ADL_MAIN_CONTROL_DESTROY ADL_Main_Control_Destroy; ADL_OVERDRIVE5_CURRENTACTIVITY_GET ADL_Overdrive5_CurrentActivity_Get; diff --git a/src/ext_ADL.c b/src/ext_ADL.c index 08ba55e1e..9676c916f 100644 --- a/src/ext_ADL.c +++ b/src/ext_ADL.c @@ -53,7 +53,6 @@ int adl_init (void *hashcat_ctx) HC_LOAD_FUNC(adl, ADL_Adapter_Active_Get, ADL_ADAPTER_ACTIVE_GET, ADL, 0); HC_LOAD_FUNC(adl, ADL_Adapter_AdapterInfo_Get, ADL_ADAPTER_ADAPTERINFO_GET, ADL, 0); HC_LOAD_FUNC(adl, ADL_Adapter_NumberOfAdapters_Get, ADL_ADAPTER_NUMBEROFADAPTERS_GET, ADL, 0); - HC_LOAD_FUNC(adl, ADL_Display_DisplayInfo_Get, ADL_DISPLAY_DISPLAYINFO_GET, ADL, 0); HC_LOAD_FUNC(adl, ADL_Main_Control_Create, ADL_MAIN_CONTROL_CREATE, ADL, 0); HC_LOAD_FUNC(adl, ADL_Main_Control_Destroy, ADL_MAIN_CONTROL_DESTROY, ADL, 0); HC_LOAD_FUNC(adl, ADL_Overdrive5_CurrentActivity_Get, ADL_OVERDRIVE5_CURRENTACTIVITY_GET, ADL, 0); diff --git a/src/hwmon.c b/src/hwmon.c index ee5a001e8..4e6a4f986 100644 --- a/src/hwmon.c +++ b/src/hwmon.c @@ -682,7 +682,21 @@ int hm_get_buslanes_with_devices_idx (hashcat_ctx_t *hashcat_ctx, const int back return PMActivity.iCurrentBusLanes; } - // NO OD8 + if (hwmon_ctx->hm_device[backend_device_idx].od_version == 8) + { + ADLPMLogDataOutput odlpDataOutput; + + memset (&odlpDataOutput, 0, sizeof (ADLPMLogDataOutput)); + + if (hm_ADL2_New_QueryPMLogData_Get (hashcat_ctx, hwmon_ctx->hm_device[backend_device_idx].adl, &odlpDataOutput) == -1) + { + hwmon_ctx->hm_device[backend_device_idx].buslanes_get_supported = false; + + return -1; + } + + return odlpDataOutput.sensors[PMLOG_BUS_LANES].value; + } } if (hwmon_ctx->hm_sysfs_amdgpu) From 25f1c12e3c7ff4efc71b9dd10aa2de7f81dfc122 Mon Sep 17 00:00:00 2001 From: Jens Steube Date: Wed, 28 Jul 2021 07:51:27 +0200 Subject: [PATCH 2/2] SCRYPT Kernels: Add more optimized values for some new NV/AMD GPUs and new semi-automated derivation process description Blowfish Kernels: Backport optimizations reducing bank conflicts from bcrypt to Password Safe v2 and Open Document Format (ODF) 1.1 --- docs/changes.txt | 4 ++ hashcat.hctune | 84 ++++++++++++++++++++++++++------------ src/modules/module_09000.c | 65 +++++++++++++++++++++++------ 3 files changed, 114 insertions(+), 39 deletions(-) diff --git a/docs/changes.txt b/docs/changes.txt index 0c31c4cdf..8cad8aa31 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -19,6 +19,7 @@ ## - AMD GPUs: Add inline assembly code for md5crypt/sha256crypt, PDF 1.7, 7-Zip, RAR3, Samsung Android and Windows Phone 8+ +- AMD GPUs: On Apple OpenCL platform, we ask for the preferred kernel thread size rather than hard-coding 32 - Blake Kernels: Optimize BLAKE2B_ROUND() 64 bit rotates giving a 5% performance increase - Blowfish Kernels: Backport optimizations reducing bank conflicts from bcrypt to Password Safe v2 and Open Document Format (ODF) 1.1 - Brain Session: Adds hashconfig specific opti_type and opts_type parameters to hashcat session computation to cover features like -O and -M @@ -31,7 +32,10 @@ ## - ADL: Updated support for AMD Display Library to 15.0, updated datatypes and added support for OverDrive 7 and 8 based GPUs +- AMD Driver: Updated requirement for AMD Linux driver to ROCm 4.4 or later because of new HIP Interface +- AMD Driver: Updated requirement for AMD Windows driver to Adrenalin 21.2.1 or later because of new ADL library - Commandline: Throw an error if separator character given by the user with -p option is not exactly 1 byte +- ECC secp256k1: Removed the inline assembly code for AMD GPUs because the latest JIT compilers optimize it with the same efficiency - HIP Kernels: Got rid of hip/hip_runtime.h dependancy to enable more easy integration of the HIP backend on Windows - Kernel Cache: Add kernel threads into hash computation which is later used in the kernel cache filename - SCRYPT Kernels: Add more optimized values for some new NV/AMD GPUs diff --git a/hashcat.hctune b/hashcat.hctune index 2e1951eef..2b99ee149 100644 --- a/hashcat.hctune +++ b/hashcat.hctune @@ -279,7 +279,14 @@ GeForce_RTX_3090 ALIAS_nv_sm50_or_higher ## Device_738c ALIAS_AMD_MI100 + +AMD_Radeon_(TM)_RX_480_Graphics ALIAS_AMD_RX480 + +Vega_10_XL/XT_[Radeon_RX_Vega_56/64] ALIAS_AMD_Vega64 +AMD_Radeon_Vega_64 ALIAS_AMD_Vega64 + Device_73bf ALIAS_AMD_RX6900XT +AMD_Radeon_RX_6900_XT ALIAS_AMD_RX6900XT ############# ## ENTRIES ## @@ -486,22 +493,41 @@ DEVICE_TYPE_GPU * 14500 1 A ## ## Find the ideal -n value, then store it here along with the proper compute device name. ## Formatting guidelines are availabe at the top of this document. +## +## ------------------------------------------------- +## +## You can also ignore all theoretical derivations and semi-automate the process in the real scenario (I prefer this approach): +## +## 1. For example, to find the value for 8900, first create a valid hash for 8900 as follows: +## +## $ ./hashcat --example-hashes -m 8900 | grep Example.Hash | grep -v Format | cut -b 25- > tmp.hash.8900 +## +## 2. Now let it iterate through all -n values to a certain point. In this case, I'm using 200, but in general it's a value that is at least twice that of the multiprocessor. If you don't mind you can just leave it as it is, it just runs a little longer. +## +## $ export i=1; while [ $i -ne 201 ]; do echo $i; ./hashcat --quiet tmp.hash.8900 --keep-guessing --self-test-disable --markov-disable --restore-disable --outfile-autohex-disable --wordlist-autohex-disable --potfile-disable --logfile-disable --hwmon-disable --status --status-timer 1 --runtime 28 --machine-readable --optimized-kernel-enable --workload-profile 3 --hash-type 8900 --attack-mode 3 ?b?b?b?b?b?b?b --backend-devices 1 --force -n $i; i=$(($i+1)); done | tee x +## +## 3. Determine the highest measured H/s speed. But don't just use the highest value. Instead, use the number that seems most stable, usually at the beginning. +## +## $ grep "$(printf 'STATUS\t3')" x | cut -f4 -d$'\t' | sort -n | tail +## +## 4. To match the speed you have chosen to the correct value in the "x" file, simply search for it in it. Then go up a little on the block where you found him. The value -n is the single value that begins before the block start. If you have multiple blocks at the same speed, choose the lowest value for -n +## ## 4GB -GeForce_GTX_980 * 8900 1 28 A +GeForce_GTX_980 * 8900 1 29 A GeForce_GTX_980 * 9300 1 128 A -GeForce_GTX_980 * 15700 1 28 A -GeForce_GTX_980 * 22700 1 28 A +GeForce_GTX_980 * 15700 1 24 A +GeForce_GTX_980 * 22700 1 29 A ## 8GB -GeForce_GTX_1080 * 8900 1 14 A +GeForce_GTX_1080 * 8900 1 15 A GeForce_GTX_1080 * 9300 1 256 A -GeForce_GTX_1080 * 15700 1 14 A -GeForce_GTX_1080 * 22700 1 14 A +GeForce_GTX_1080 * 15700 1 28 A +GeForce_GTX_1080 * 22700 1 15 A ## 11GB GeForce_RTX_2080_Ti * 8900 1 68 A -GeForce_RTX_2080_Ti * 9300 1 532 A +GeForce_RTX_2080_Ti * 9300 1 528 A GeForce_RTX_2080_Ti * 15700 1 68 A GeForce_RTX_2080_Ti * 22700 1 68 A @@ -509,7 +535,7 @@ GeForce_RTX_2080_Ti * 22700 1 68 GeForce_RTX_3060_Ti * 8900 1 51 A GeForce_RTX_3060_Ti * 9300 1 256 A GeForce_RTX_3060_Ti * 15700 1 11 A -GeForce_RTX_3060_Ti * 22700 1 43 A +GeForce_RTX_3060_Ti * 22700 1 51 A ## 8GB GeForce_RTX_3070 * 8900 1 46 A @@ -517,26 +543,32 @@ GeForce_RTX_3070 * 9300 1 368 GeForce_RTX_3070 * 15700 1 22 A GeForce_RTX_3070 * 22700 1 46 A +## 24GB +GeForce_RTX_3090 * 8900 1 82 A +GeForce_RTX_3090 * 9300 1 984 A +GeForce_RTX_3090 * 15700 1 82 A +GeForce_RTX_3090 * 22700 1 82 A + ## 4GB -AMD_Radeon_(TM)_RX_480_Graphics * 8900 1 14 A -AMD_Radeon_(TM)_RX_480_Graphics * 9300 1 126 A -AMD_Radeon_(TM)_RX_480_Graphics * 15700 1 14 A -AMD_Radeon_(TM)_RX_480_Graphics * 22700 1 14 A +ALIAS_AMD_RX480 * 8900 1 15 A +ALIAS_AMD_RX480 * 9300 1 232 A +ALIAS_AMD_RX480 * 15700 1 58 A +ALIAS_AMD_RX480 * 22700 1 15 A ## 8GB -Vega_10_XL/XT_[Radeon_RX_Vega_56/64] * 8900 1 28 A -Vega_10_XL/XT_[Radeon_RX_Vega_56/64] * 9300 1 442 A -Vega_10_XL/XT_[Radeon_RX_Vega_56/64] * 15700 1 28 A -Vega_10_XL/XT_[Radeon_RX_Vega_56/64] * 22700 1 28 A +ALIAS_AMD_Vega64 * 8900 1 31 A +ALIAS_AMD_Vega64 * 9300 1 440 A +ALIAS_AMD_Vega64 * 15700 1 53 A +ALIAS_AMD_Vega64 * 22700 1 31 A -## 32GB, WF64 -ALIAS_AMD_MI100 * 8900 1 76 A -ALIAS_AMD_MI100 * 9300 1 288 A -ALIAS_AMD_MI100 * 15700 1 76 A -ALIAS_AMD_MI100 * 22700 1 76 A +## 32GB +ALIAS_AMD_MI100 * 8900 1 79 A +ALIAS_AMD_MI100 * 9300 1 1000 A +ALIAS_AMD_MI100 * 15700 1 120 A +ALIAS_AMD_MI100 * 22700 1 79 A -## 16GB, WF32 -ALIAS_AMD_RX6900XT * 8900 1 62 A -ALIAS_AMD_RX6900XT * 9300 1 704 A -ALIAS_AMD_RX6900XT * 15700 1 62 A -ALIAS_AMD_RX6900XT * 22700 1 62 A +## 16GB +ALIAS_AMD_RX6900XT * 8900 1 59 A +ALIAS_AMD_RX6900XT * 9300 1 720 A +ALIAS_AMD_RX6900XT * 15700 1 56 A +ALIAS_AMD_RX6900XT * 22700 1 59 A diff --git a/src/modules/module_09000.c b/src/modules/module_09000.c index 3e3158c0a..28963a1fc 100644 --- a/src/modules/module_09000.c +++ b/src/modules/module_09000.c @@ -22,7 +22,8 @@ static const u64 KERN_TYPE = 9000; static const u32 OPTI_TYPE = OPTI_TYPE_ZERO_BYTE; static const u64 OPTS_TYPE = OPTS_TYPE_PT_GENERATE_LE | OPTS_TYPE_BINARY_HASHFILE - | OPTS_TYPE_AUTODETECT_DISABLE; + | OPTS_TYPE_AUTODETECT_DISABLE + | OPTS_TYPE_DYNAMIC_SHARED; static const u32 SALT_TYPE = SALT_TYPE_EMBEDDED; static const char *ST_PASS = "hashcat"; static const char *ST_HASH = "0a3f352686e5eb5be173e668a4fff5cd5df420927e1da2d5d4052340160637e3e6a5a92841a188ed240e13b919f3d91694bd4c0acba79271e9c08a83ea5ad387cbb74d5884066a1cb5a8caa80d847079168f84823847c631dbe3a834f1bc496acfebac3bff1608bf1c857717f8f428e07b5e2cb12aaeddfa83d7dcb6d840234d08b84f8ca6c6e562af73eea13148f7902bcaf0220d3e36eeeff1d37283dc421483a2791182614ebb"; @@ -75,16 +76,25 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY { char *jit_build_options = NULL; + // this mode heavily depends on the available shared memory size + // note the kernel need to have some special code changes in order to make use to use post-48k memory region + // we need to set some macros + + bool use_dynamic = false; + + if (device_param->is_cuda == true) + { + use_dynamic = true; + } + // this uses some nice feedback effect. // based on the device_local_mem_size the reqd_work_group_size in the kernel is set to some value // which is then is read from the opencl host in the kernel_preferred_wgs_multiple1/2/3 result. // therefore we do not need to set module_kernel_threads_min/max except for CPU, where the threads are set to fixed 1. - u32 fixed_local_size = 0; - if (device_param->opencl_device_type & CL_DEVICE_TYPE_CPU) { - fixed_local_size = 1; + hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u", 1); } else { @@ -100,29 +110,58 @@ char *module_jit_build_options (MAYBE_UNUSED const hashconfig_t *hashconfig, MAY if (device_param->is_opencl == true) { - overhead = 4; + overhead = 1; } } if (user_options->kernel_threads_chgd == true) { - fixed_local_size = user_options->kernel_threads; + u32 fixed_local_size = user_options->kernel_threads; - // otherwise out-of-bound reads - - if ((fixed_local_size * 4096) > (device_param->device_local_mem_size - overhead)) + if (use_dynamic == true) { - fixed_local_size = (device_param->device_local_mem_size - overhead) / 4096; + if ((fixed_local_size * 4096) > device_param->kernel_dynamic_local_mem_size_memset) + { + // otherwise out-of-bound reads + + fixed_local_size = device_param->kernel_dynamic_local_mem_size_memset / 4096; + } + + hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u -D DYNAMIC_LOCAL", fixed_local_size); + } + else + { + if ((fixed_local_size * 4096) > (device_param->device_local_mem_size - overhead)) + { + // otherwise out-of-bound reads + + fixed_local_size = (device_param->device_local_mem_size - overhead) / 4096; + } + + hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u", fixed_local_size); } } else { - fixed_local_size = (device_param->device_local_mem_size - overhead) / 4096; + if (use_dynamic == true) + { + // using kernel_dynamic_local_mem_size_memset is a bit hackish. + // we had to brute-force this value out of an already loaded CUDA function. + // there's no official way to query for this value. + + const u32 fixed_local_size = device_param->kernel_dynamic_local_mem_size_memset / 4096; + + hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u -D DYNAMIC_LOCAL", fixed_local_size); + } + else + { + const u32 fixed_local_size = (device_param->device_local_mem_size - overhead) / 4096; + + hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u", fixed_local_size); + } } } - hc_asprintf (&jit_build_options, "-D FIXED_LOCAL_SIZE=%u", fixed_local_size); - return jit_build_options; }