From 9398fa0736976fdc2c3265c317c08bbc8396ce1d Mon Sep 17 00:00:00 2001 From: gojimmypi Date: Tue, 19 Sep 2023 08:21:13 -0700 Subject: [PATCH] Espressif HW Improvements (#6624) * Espressif HW Improvements * revised AES HW/SW fallback logic for ESP32 --- IDE/Espressif/ESP-IDF/README.md | 33 +- IDE/Espressif/ESP-IDF/README_32se.md | 2 +- .../ESP-IDF/examples/wolfssl_test/README.md | 16 +- .../components/wolfssl/CMakeLists.txt | 8 +- .../ESP-IDF/examples/wolfssl_test/main/main.c | 24 +- IDE/Espressif/ESP-IDF/user_settings.h | 67 +- wolfcrypt/src/aes.c | 256 +- wolfcrypt/src/port/Espressif/README.md | 15 +- wolfcrypt/src/port/Espressif/esp32_aes.c | 126 +- wolfcrypt/src/port/Espressif/esp32_mp.c | 2228 +++++++++++++---- wolfcrypt/src/port/Espressif/esp32_sha.c | 12 +- wolfcrypt/src/port/Espressif/esp32_util.c | 210 +- wolfcrypt/src/sha.c | 28 + wolfcrypt/src/sha256.c | 4 +- wolfcrypt/src/sha512.c | 3 +- wolfcrypt/src/tfm.c | 262 +- .../wolfcrypt/port/Espressif/esp32-crypt.h | 462 +++- 17 files changed, 2985 insertions(+), 771 deletions(-) diff --git a/IDE/Espressif/ESP-IDF/README.md b/IDE/Espressif/ESP-IDF/README.md index a0cc1c9e74..3117b60eb2 100644 --- a/IDE/Espressif/ESP-IDF/README.md +++ b/IDE/Espressif/ESP-IDF/README.md @@ -6,6 +6,7 @@ and have not yet been upgraded to the master branch V5. See the latest [migration guides](https://docs.espressif.com/projects/esp-idf/en/latest/esp32/migration-guides/index.html). ## Overview + ESP-IDF development framework with wolfSSL by setting *WOLFSSL_ESPIDF* definition Including the following examples: @@ -17,29 +18,57 @@ Including the following examples: The *user_settings.h* file enables some of the hardened settings. ## Requirements + 1. [ESP-IDF development framework](https://docs.espressif.com/projects/esp-idf/en/latest/get-started/) ## Setup for Linux + 1. Run `setup.sh` at _/path/to_`/wolfssl/IDE/Espressif/ESP-IDF/` to deploy files into ESP-IDF tree 2. Find Wolfssl files at _/path/to/esp_`/esp-idf/components/wolfssl/` - 3. Find [Example programs](https://github.com/wolfSSL/wolfssl/tree/master/IDE/Espressif/ESP-IDF/examples) under _/path/to/esp_`/esp-idf/examples/protocols/wolfssl_xxx` (where xxx is the project name) + 3. Find [Example Programs](https://github.com/wolfSSL/wolfssl/tree/master/IDE/Espressif/ESP-IDF/examples) under _/path/to/esp_`/esp-idf/examples/protocols/wolfssl_xxx` (where xxx is the project name) ## Setup for Windows + 1. Run ESP-IDF Command Prompt (cmd.exe) or Run ESP-IDF PowerShell Environment 2. Run `setup_win.bat` at `.\IDE\Espressif\ESP-IDF\` 3. Find Wolfssl files at _/path/to/esp_`/esp-idf/components/wolfssl/` 4. Find [Example programs](https://github.com/wolfSSL/wolfssl/tree/master/IDE/Espressif/ESP-IDF/examples) under _/path/to/esp_`/esp-idf/examples/protocols/wolfssl_xxx` (where xxx is the project name) +## Setup for VisualGDB + +### Clone a specific version: + +``` +C:\SysGCC\esp32\esp-idf>git clone -b v5.0.2 --recursive https://github.com/espressif/esp-idf.git v5.0.2 +``` + ## Configuration + 1. The `user_settings.h` can be found in _/path/to/esp_`/esp-idf/components/wolfssl/include/user_settings.h` ## Build examples + 1. See README in each example folder ## Support + For question please email [support@wolfssl.com] Note: This is tested with : - - OS: Ubuntu 20.04.3 LTS and Microsoft Windows 10 Pro 10.0.19041 and well as WSL Ubuntu + - OS: Ubuntu 20.04.3 LTS + - Microsoft Windows 10 Pro 10.0.19041 + - WSL Ubuntu + - ESP-IDF: ESP-IDF v4.3.2 - Module : ESP32-WROOM-32 + +## JTAG Debugging + +All of the examples are configured to use either the on-board JTAG (when available) or +the open source [Tigard multi-protocol tool for hardware hacking](https://github.com/tigard-tools/tigard). + +VisualGDB users should find the configuration file in the `interface\ftdi` directory: + +``` +C:\Users\%USERNAME%\AppData\Local\VisualGDB\EmbeddedDebugPackages\com.sysprogs.esp32.core\share\openocd\scripts\interface\ftdi +``` diff --git a/IDE/Espressif/ESP-IDF/README_32se.md b/IDE/Espressif/ESP-IDF/README_32se.md index cb5171fcca..af440a8b5a 100644 --- a/IDE/Espressif/ESP-IDF/README_32se.md +++ b/IDE/Espressif/ESP-IDF/README_32se.md @@ -15,7 +15,7 @@ Including the following examples: 2. Microchip CryptoAuthentication Library: https://github.com/MicrochipTech/cryptoauthlib ## Setup -1. Comment out `#define WOLFSSL_ESPWROOM32` in `/path/to/wolfssl/IDE/Espressif/ESP-IDF/user_settings.h`\ +1. Comment out `#define WOLFSSL_ESP32` in `/path/to/wolfssl/IDE/Espressif/ESP-IDF/user_settings.h`\ Uncomment out `#define WOLFSSL_ESPWROOM32SE` in `/path/to/wolfssl/IDE/Espressif/ESP-IDF/user_settings.h` * **Note:** crypt test will fail if enabled `WOLFSSL_ESPWROOM32SE` 3. wolfSSL under ESP-IDF. Please see [README.md](https://github.com/wolfSSL/wolfssl/blob/master/IDE/Espressif/ESP-IDF/README.md) diff --git a/IDE/Espressif/ESP-IDF/examples/wolfssl_test/README.md b/IDE/Espressif/ESP-IDF/examples/wolfssl_test/README.md index 5be9875e84..c06aa79f88 100644 --- a/IDE/Espressif/ESP-IDF/examples/wolfssl_test/README.md +++ b/IDE/Espressif/ESP-IDF/examples/wolfssl_test/README.md @@ -27,11 +27,19 @@ Example build on WSL, assuming `git clone` from `c:\workspace`: # switch to test example cd /mnt/c/workspace/wolfssl/IDE/Espressif/ESP-IDF/examples/wolfssl_test -# Pick ESP-IDF install directory, this one for v4.4.2 in VisualGDB -. /mnt/c/SysGCC/esp32/esp-idf/v4.4.2/export.sh +# Pick ESP-IDF install directory, this one for v5.1 in VisualGDB +. /mnt/c/SysGCC/esp32/esp-idf/v5.1/export.sh -# build and flash, in this example to COM20 -idf.py build flash -p /dev/ttyS20 -b 921600 monitor +# set target chipset +idf.py set-target esp32s3 + +# erase +idf.py erase-flash -p /dev/ttyS24 -b 115200 + +# start with a low upload speed, then increase as found operational +idf.py +# build and flash, in this example to COM24 +idf.py build flash -p /dev/ttyS24 -b 115200 monitor ``` ## Example Output diff --git a/IDE/Espressif/ESP-IDF/examples/wolfssl_test/components/wolfssl/CMakeLists.txt b/IDE/Espressif/ESP-IDF/examples/wolfssl_test/components/wolfssl/CMakeLists.txt index a916facabd..70e81574b3 100644 --- a/IDE/Espressif/ESP-IDF/examples/wolfssl_test/components/wolfssl/CMakeLists.txt +++ b/IDE/Espressif/ESP-IDF/examples/wolfssl_test/components/wolfssl/CMakeLists.txt @@ -50,7 +50,6 @@ set(COMPONENT_SRCDIRS "${WOLFSSL_ROOT}/src/" "${WOLFSSL_ROOT}/wolfcrypt/src/" "${WOLFSSL_ROOT}/wolfcrypt/src/port/Espressif/" "${WOLFSSL_ROOT}/wolfcrypt/src/port/atmel/" - "${WOLFSSL_ROOT}/wolfcrypt/benchmark/" "${WOLFSSL_ROOT}/wolfcrypt/test/" ) @@ -190,9 +189,10 @@ set(COMPONENT_SRCEXCLUDE "${WOLFSSL_ROOT}/src/conf.c" "${WOLFSSL_ROOT}/src/misc.c" "${WOLFSSL_ROOT}/src/pk.c" - "${WOLFSSL_ROOT}/src/ssl_asn1.c" # included by ssl.c - "${WOLFSSL_ROOT}/src/ssl_bn.c" # included by ssl.c - "${WOLFSSL_ROOT}/src/ssl_misc.c" # included by ssl.c + "${WOLFSSL_ROOT}/src/ssl_asn1.c" # included by ssl.c + "${WOLFSSL_ROOT}/src/ssl_bn.c" # included by ssl.c + "${WOLFSSL_ROOT}/src/ssl_certman.c" # included by ssl.c + "${WOLFSSL_ROOT}/src/ssl_misc.c" # included by ssl.c "${WOLFSSL_ROOT}/src/x509.c" "${WOLFSSL_ROOT}/src/x509_str.c" "${WOLFSSL_ROOT}/wolfcrypt/src/evp.c" diff --git a/IDE/Espressif/ESP-IDF/examples/wolfssl_test/main/main.c b/IDE/Espressif/ESP-IDF/examples/wolfssl_test/main/main.c index 9b5770c7be..63aaaf27e4 100644 --- a/IDE/Espressif/ESP-IDF/examples/wolfssl_test/main/main.c +++ b/IDE/Espressif/ESP-IDF/examples/wolfssl_test/main/main.c @@ -154,6 +154,8 @@ void app_main(void) /* some interesting settings are target specific (ESP32, -C3, -S3, etc */ #if defined(CONFIG_IDF_TARGET_ESP32C3) /* not available for C3 at this time */ +#elif defined(CONFIG_IDF_TARGET_ESP32C6) + /* not available for C6 at this time */ #elif defined(CONFIG_IDF_TARGET_ESP32S3) ESP_LOGI(TAG, "CONFIG_ESP32S3_DEFAULT_CPU_FREQ_MHZ = %u MHz", CONFIG_ESP32S3_DEFAULT_CPU_FREQ_MHZ @@ -185,8 +187,6 @@ void app_main(void) #endif #endif - - #if defined (WOLFSSL_USE_TIME_HELPER) set_time(); #endif @@ -224,9 +224,23 @@ void app_main(void) /* see wolfssl/wolfcrypt/error-crypt.h */ } - /* after the test, we'll just wait */ +#ifdef INCLUDE_uxTaskGetStackHighWaterMark + ESP_LOGI(TAG, "Stack HWM: %d", uxTaskGetStackHighWaterMark(NULL)); + + ESP_LOGI(TAG, "Stack used: %d", CONFIG_ESP_MAIN_TASK_STACK_SIZE + - (uxTaskGetStackHighWaterMark(NULL) / 4)); +#endif + + ESP_LOGI(TAG, "\n\nDone!\n\n" + "If running from idf.py monitor, press twice: Ctrl+]"); + + /* done */ while (1) { - /* nothing */ - } +#if defined(SINGLE_THREADED) + while (1); +#else + vTaskDelay(60000); +#endif + } /* done whle */ #endif } diff --git a/IDE/Espressif/ESP-IDF/user_settings.h b/IDE/Espressif/ESP-IDF/user_settings.h index 043f14d965..2ac4ac9ecd 100644 --- a/IDE/Espressif/ESP-IDF/user_settings.h +++ b/IDE/Espressif/ESP-IDF/user_settings.h @@ -18,12 +18,24 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA */ + #undef WOLFSSL_ESPIDF #undef WOLFSSL_ESP32 #undef WOLFSSL_ESPWROOM32SE #undef WOLFSSL_ESP32 #undef WOLFSSL_ESP8266 +/* The Espressif sdkconfig will have chipset info. +** +** Possible values: +** +** CONFIG_IDF_TARGET_ESP32 +** CONFIG_IDF_TARGET_ESP32S3 +** CONFIG_IDF_TARGET_ESP32C3 +** CONFIG_IDF_TARGET_ESP32C6 +*/ +#include + #define WOLFSSL_ESPIDF /* @@ -85,11 +97,11 @@ /* #define CUSTOM_SLOT_ALLOCATION */ #endif -/* rsa primitive specific definition */ +/* RSA primitive specific definition */ #if defined(WOLFSSL_ESP32) || defined(WOLFSSL_ESPWROOM32SE) /* Define USE_FAST_MATH and SMALL_STACK */ #define ESP32_USE_RSA_PRIMITIVE - /* threshold for performance adjustment for hw primitive use */ + /* threshold for performance adjustment for HW primitive use */ /* X bits of G^X mod P greater than */ #define EPS_RSA_EXPT_XBTIS 36 /* X and Y of X * Y mod P greater than */ @@ -107,11 +119,50 @@ /* #define NO_ASN_TIME */ /* #define XTIME time */ -/* when you want not to use HW acceleration */ -/* #define NO_ESP32_CRYPT */ -/* #define NO_WOLFSSL_ESP32_CRYPT_HASH*/ -/* #define NO_WOLFSSL_ESP32_CRYPT_AES */ -/* #define NO_WOLFSSL_ESP32_CRYPT_RSA_PRI */ -/* adjust wait-timeout count if you see timeout in rsa hw acceleration */ +/* adjust wait-timeout count if you see timeout in RSA HW acceleration */ #define ESP_RSA_TIMEOUT_CNT 0x249F00 + +#if defined(CONFIG_IDF_TARGET_ESP32) + /* when you want not to use HW acceleration on ESP32 (below for S3, etc */ + /* #define NO_ESP32_CRYPT */ + /* #define NO_WOLFSSL_ESP32_CRYPT_HASH */ + /* #define NO_WOLFSSL_ESP32_CRYPT_AES */ + /* #define NO_WOLFSSL_ESP32_CRYPT_RSA_PRI */ +#elif defined(CONFIG_IDF_TARGET_ESP32S2) + /* ESP32-S2 disabled by default; not implemented */ + #define NO_ESP32_CRYPT + #define NO_WOLFSSL_ESP32_CRYPT_HASH + #define NO_WOLFSSL_ESP32_CRYPT_AES + #define NO_WOLFSSL_ESP32_CRYPT_RSA_PRI +#elif defined(CONFIG_IDF_TARGET_ESP32S3) + /* when you want not to use HW acceleration on ESP32-S3 */ + /* #define NO_ESP32_CRYPT */ + /* #define NO_WOLFSSL_ESP32_CRYPT_HASH */ + /* #define NO_WOLFSSL_ESP32_CRYPT_AES */ + /* #define NO_WOLFSSL_ESP32_CRYPT_RSA_PRI */ +#elif defined(CONFIG_IDF_TARGET_ESP32C3) + /* ESP32-C3 disabled by default, not implemented */ + #define NO_ESP32_CRYPT + #define NO_WOLFSSL_ESP32_CRYPT_HASH + #define NO_WOLFSSL_ESP32_CRYPT_AES + #define NO_WOLFSSL_ESP32_CRYPT_RSA_PRI +#elif defined(CONFIG_IDF_TARGET_ESP32C6) + /* ESP32-C6 disabled by default, not implemented */ + #define NO_ESP32_CRYPT + #define NO_WOLFSSL_ESP32_CRYPT_HASH + #define NO_WOLFSSL_ESP32_CRYPT_AES + #define NO_WOLFSSL_ESP32_CRYPT_RSA_PRI +#elif defined(CONFIG_IDF_TARGET_ESP32H2) + /* ESP32-H2 disabled by default, not implemented */ + #define NO_ESP32_CRYPT + #define NO_WOLFSSL_ESP32_CRYPT_HASH + #define NO_WOLFSSL_ESP32_CRYPT_AES + #define NO_WOLFSSL_ESP32_CRYPT_RSA_PRI +#else + /* anything else unknown will have HW disabled by default */ + #define NO_ESP32_CRYPT + #define NO_WOLFSSL_ESP32_CRYPT_HASH + #define NO_WOLFSSL_ESP32_CRYPT_AES + #define NO_WOLFSSL_ESP32_CRYPT_RSA_PRI +#endif diff --git a/wolfcrypt/src/aes.c b/wolfcrypt/src/aes.c index 1da560be1f..fd2f9e0e5c 100644 --- a/wolfcrypt/src/aes.c +++ b/wolfcrypt/src/aes.c @@ -469,24 +469,57 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits #elif defined(WOLFSSL_ESP32_CRYPT) && \ !defined(NO_WOLFSSL_ESP32_CRYPT_AES) - - #include "wolfssl/wolfcrypt/port/Espressif/esp32-crypt.h" - - #if defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) - static WARN_UNUSED_RESULT int wc_AesEncrypt( + #include + #include + const char* TAG = "aes"; + + /* We'll use SW for fallback: + * unsupported key lengths. (e.g. ESP32-S3) + * chipsets not ikmplemented. + * hardware busy. */ + #define NEED_AES_TABLES + #define NEED_AES_HW_FALLBACK + #define NEED_SOFTWARE_AES_SETKEY + #undef WOLFSSL_AES_DIRECT + #define WOLFSSL_AES_DIRECT + + /* If we choose to never have a fallback to SW: */ + #if !defined(NEED_AES_HW_FALLBACK) && (defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT)) + static WARN_UNUSED_RESULT int wc_AesEncrypt( /* calling this one when NO_AES_192 is defined */ Aes* aes, const byte* inBlock, byte* outBlock) { + int ret; /* Thread mutex protection handled in esp_aes_hw_InUse */ - return wc_esp32AesEncrypt(aes, inBlock, outBlock); + #ifdef NEED_AES_HW_FALLBACK + if (wc_esp32AesSupportedKeyLen(aes)) { + ret = wc_esp32AesEncrypt(aes, inBlock, outBlock); + } + #else + ret = wc_esp32AesEncrypt(aes, inBlock, outBlock); + #endif + return ret; } #endif - #if defined(HAVE_AES_DECRYPT) && defined(WOLFSSL_AES_DIRECT) + /* If we choose to never have a fallback to SW */ + #if !defined(NEED_AES_HW_FALLBACK) && (defined(HAVE_AES_DECRYPT) && defined(WOLFSSL_AES_DIRECT)) static WARN_UNUSED_RESULT int wc_AesDecrypt( Aes* aes, const byte* inBlock, byte* outBlock) { + int ret = 0; /* Thread mutex protection handled in esp_aes_hw_InUse */ - return wc_esp32AesDecrypt(aes, inBlock, outBlock); + #ifdef NEED_AES_HW_FALLBACK + if (wc_esp32AesSupportedKeyLen(aes)) { + ret = wc_esp32AesDecrypt(aes, inBlock, outBlock); + } + else { + ret = wc_AesDecrypt_SW(aes, inBlock, outBlock); + } + #else + /* if we don't need fallback, always use HW */ + ret = wc_esp32AesDecrypt(aes, inBlock, outBlock); + #endif + return ret; } #endif @@ -848,7 +881,10 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits #ifdef NEED_AES_TABLES -#if !defined(WOLFSSL_SILABS_SE_ACCEL) +#if (!defined(WOLFSSL_SILABS_SE_ACCEL) && \ + !defined(WOLFSSL_ESP32_CRYPT_RSA_PRI) \ + ) || \ + (defined(WOLFSSL_ESP32_CRYPT_RSA_PRI) && defined(NEED_AES_HW_FALLBACK)) static const FLASH_QUALIFIER word32 rcon[] = { 0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000, 0x20000000, 0x40000000, 0x80000000, @@ -1775,14 +1811,21 @@ static word32 GetTable8_4(const byte* t, byte o0, byte o1, byte o2, byte o3) ((word32)(t)[o2] << 8) | ((word32)(t)[o3] << 0)) #endif -/* Software AES - ECB Encrypt */ -static WARN_UNUSED_RESULT int wc_AesEncrypt( + /* this section disabled with NO_AES_192 */ +static WARN_UNUSED_RESULT int wc_AesEncrypt( /* calling this one when missing NO_AES_192 */ Aes* aes, const byte* inBlock, byte* outBlock) { word32 s0, s1, s2, s3; word32 t0, t1, t2, t3; - word32 r = aes->rounds >> 1; - const word32* rk = aes->key; + word32 r; + const word32* rk; + + if (aes == NULL) { + return BAD_FUNC_ARG; + } + + r = aes->rounds >> 1; + rk = aes->key; if (r > 7 || r == 0) { WOLFSSL_ERROR_VERBOSE(KEYUSAGE_E); @@ -1854,6 +1897,21 @@ static WARN_UNUSED_RESULT int wc_AesEncrypt( } #endif +#if defined(WOLFSSL_ESPIDF) && defined(NEED_AES_HW_FALLBACK) + ESP_LOGV(TAG, "wc_AesEncrypt fallback check"); + if (wc_esp32AesSupportedKeyLen(aes)) { + return wc_esp32AesEncrypt(aes, inBlock, outBlock); + } + else { + /* For example, the ESP32-S3 does not support HW for len = 24, + * so fall back to SW */ + #ifdef DEBUG_WOLFSSL + ESP_LOGW(TAG, "wc_AesEncrypt HW Falling back, unsupported keylen = %d", + aes->keylen); + #endif + } +#endif + /* * map byte array block to cipher state * and add initial round key: @@ -2114,7 +2172,7 @@ static WARN_UNUSED_RESULT int wc_AesEncrypt( XMEMCPY(outBlock + 3 * sizeof(s0), &s3, sizeof(s3)); return 0; -} +} /* wc_AesEncrypt */ #endif /* HAVE_AES_CBC || WOLFSSL_AES_DIRECT || HAVE_AESGCM */ #if defined(HAVE_AES_DECRYPT) @@ -2163,8 +2221,15 @@ static WARN_UNUSED_RESULT int wc_AesDecrypt( { word32 s0, s1, s2, s3; word32 t0, t1, t2, t3; - word32 r = aes->rounds >> 1; - const word32* rk = aes->key; + word32 r; + const word32* rk; + + if (aes == NULL) { + return BAD_FUNC_ARG; + } + + r = aes->rounds >> 1; + rk = aes->key; if (r > 7 || r == 0) { WOLFSSL_ERROR_VERBOSE(KEYUSAGE_E); @@ -2210,6 +2275,19 @@ static WARN_UNUSED_RESULT int wc_AesDecrypt( AES_DECRYPTION, kAlgorithm_SSS_AES_ECB); } #endif +#if defined(WOLFSSL_ESPIDF) && defined(NEED_AES_HW_FALLBACK) + if (wc_esp32AesSupportedKeyLen(aes)) { + return wc_esp32AesDecrypt(aes, inBlock, outBlock); + } + else { + /* For example, the ESP32-S3 does not support HW for len = 24, + * so fall back to SW */ + #ifdef DEBUG_WOLFSSL + ESP_LOGW(TAG, "wc_AesDecrypt HW Falling back, " + "unsupported keylen = %d", aes->keylen); + #endif + } /* else !wc_esp32AesSupportedKeyLen for ESP32 */ +#endif /* * map byte array block to cipher state @@ -2422,7 +2500,7 @@ static WARN_UNUSED_RESULT int wc_AesDecrypt( XMEMCPY(outBlock + 3 * sizeof(s0), &s3, sizeof(s3)); return 0; -} +} /* wc_AesDecrypt[_SW]() */ #endif /* HAVE_AES_CBC || WOLFSSL_AES_DIRECT */ #endif /* HAVE_AES_DECRYPT */ @@ -2661,15 +2739,16 @@ static WARN_UNUSED_RESULT int wc_AesDecrypt( { return wc_AesSetKey(aes, userKey, keylen, iv, dir); } -#elif defined(WOLFSSL_ESP32_CRYPT) && \ - !defined(NO_WOLFSSL_ESP32_CRYPT_AES) - - int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, +#elif defined(WOLFSSL_ESP32_CRYPT) && !defined(NO_WOLFSSL_ESP32_CRYPT_AES) + /* This is the only definition for HW only. + * but needs to be renamed when fallback needed. + * See call in wc_AesSetKey() */ + int wc_AesSetKey_for_ESP32(Aes* aes, const byte* userKey, word32 keylen, const byte* iv, int dir) { (void)dir; (void)iv; - + ESP_LOGV(TAG, "wc_AesSetKey_for_ESP32"); if (aes == NULL || (keylen != 16 && keylen != 24 && keylen != 32)) { return BAD_FUNC_ARG; } @@ -2700,13 +2779,9 @@ static WARN_UNUSED_RESULT int wc_AesDecrypt( aes->left = 0; #endif return wc_AesSetIV(aes, iv); - } + } /* wc_AesSetKey */ - int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen, - const byte* iv, int dir) - { - return wc_AesSetKey(aes, userKey, keylen, iv, dir); - } + /* end #elif ESP32 */ #elif defined(WOLFSSL_CRYPTOCELL) && defined(WOLFSSL_CRYPTOCELL_AES) int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, const byte* iv, @@ -2797,7 +2872,13 @@ static WARN_UNUSED_RESULT int wc_AesDecrypt( /* implemented in wolfcrypt/src/port/silabs/silabs_aes.c */ #else + #define NEED_SOFTWARE_AES_SETKEY +#endif +/* Either we fell though with no HW support at all, + * or perhaps there's HW support for *some* keylengths + * and we need both HW and SW. */ +#ifdef NEED_SOFTWARE_AES_SETKEY /* Software AES - SetKey */ static WARN_UNUSED_RESULT int wc_AesSetKeyLocal( Aes* aes, const byte* userKey, word32 keylen, const byte* iv, int dir, @@ -2971,10 +3052,26 @@ static WARN_UNUSED_RESULT int wc_AesDecrypt( rk = aes->key; XMEMCPY(rk, userKey, keylen); + #if defined(LITTLE_ENDIAN_ORDER) && !defined(WOLFSSL_PIC32MZ_CRYPT) && \ (!defined(WOLFSSL_ESP32_CRYPT) || \ defined(NO_WOLFSSL_ESP32_CRYPT_AES)) - ByteReverseWords(rk, rk, keylen); + /* software */ + ByteReverseWords(rk, rk, keylen); + #elif defined(WOLFSSL_ESP32_CRYPT) && !defined(NO_WOLFSSL_ESP32_CRYPT_AES) + if (wc_esp32AesSupportedKeyLen(aes)) { + /* supported lengths don't get reversed */ + ESP_LOGV(TAG, "wc_AesSetKeyLocal (no ByteReverseWords)"); + } + else { + /* For example, the ESP32-S3 does not support HW for len = 24, + * so fall back to SW */ + #ifdef DEBUG_WOLFSSL + ESP_LOGW(TAG, "wc_AesSetKeyLocal ByteReverseWords"); + #endif + /* When not ESP32 HW, we need to reverse endianess */ + ByteReverseWords(rk, rk, keylen); + } #endif #ifdef WOLFSSL_IMXRT_DCP @@ -3179,10 +3276,10 @@ static WARN_UNUSED_RESULT int wc_AesDecrypt( wc_MemZero_Check(&temp, sizeof(temp)); #endif return ret; - } + } /* wc_AesSetKeyLocal */ int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, - const byte* iv, int dir) + const byte* iv, int dir) { if (aes == NULL) { return BAD_FUNC_ARG; @@ -3191,24 +3288,38 @@ static WARN_UNUSED_RESULT int wc_AesDecrypt( return BAD_FUNC_ARG; } + /* sometimes hardware may not support all keylengths (e.g. ESP32-S3) */ + #if defined(WOLFSSL_ESPIDF) && defined(NEED_AES_HW_FALLBACK) + ESP_LOGV(TAG, "wc_AesSetKey fallback check %d", keylen); + if (wc_esp32AesSupportedKeyLenValue(keylen)) { + ESP_LOGV(TAG, "wc_AesSetKey calling wc_AesSetKey_for_ESP32"); + return wc_AesSetKey_for_ESP32(aes, userKey, keylen, iv, dir); + } + else { + #ifdef DEBUG_WOLFSSL + ESP_LOGW(TAG, "wc_AesSetKey HW Fallback, unsupported keylen = %d", + keylen); + #endif + } + #endif return wc_AesSetKeyLocal(aes, userKey, keylen, iv, dir, 1); - } + } /* wc_AesSetKey() */ #if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) /* AES-CTR and AES-DIRECT need to use this for key setup */ /* This function allows key sizes that are not 128/192/256 bits */ - int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen, - const byte* iv, int dir) - { - if (aes == NULL) { - return BAD_FUNC_ARG; - } - if (keylen > sizeof(aes->key)) { - return BAD_FUNC_ARG; - } - - return wc_AesSetKeyLocal(aes, userKey, keylen, iv, dir, 0); + int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen, + const byte* iv, int dir) + { + if (aes == NULL) { + return BAD_FUNC_ARG; + } + if (keylen > sizeof(aes->key)) { + return BAD_FUNC_ARG; } + + return wc_AesSetKeyLocal(aes, userKey, keylen, iv, dir, 0); + } #endif /* WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ #endif /* wc_AesSetKey block */ @@ -3951,14 +4062,12 @@ int wc_AesSetIV(Aes* aes, const byte* iv) #elif defined(WOLFSSL_ESP32_CRYPT) && \ !defined(NO_WOLFSSL_ESP32_CRYPT_AES) - int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) - { - return wc_esp32AesCbcEncrypt(aes, out, in, sz); - } - int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) - { - return wc_esp32AesCbcDecrypt(aes, out, in, sz); - } + /* We'll use SW for fall back: + * unsupported key lengths + * hardware busy */ + #define NEED_SW_AESCBC + #define NEED_AESCBC_HW_FALLBACK + #elif defined(WOLFSSL_CRYPTOCELL) && defined(WOLFSSL_CRYPTOCELL_AES) int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) { @@ -3988,9 +4097,15 @@ int wc_AesSetIV(Aes* aes, const byte* iv) /* implemented in wolfcrypt/src/port/psa/psa_aes.c */ #else + /* Reminder: Some HW implementations may also define this as needed. + * (e.g. for unsupported key length fallback) */ + #define NEED_SW_AESCBC +#endif +#ifdef NEED_SW_AESCBC /* Software AES - CBC Encrypt */ - int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) + +int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) { word32 blocks; @@ -4108,6 +4223,21 @@ int wc_AesSetIV(Aes* aes, const byte* iv) } #endif + #if defined(WOLFSSL_ESPIDF) && defined(NEED_AESCBC_HW_FALLBACK) + if (wc_esp32AesSupportedKeyLen(aes)) { + ESP_LOGV(TAG, "wc_AesCbcEncrypt calling wc_esp32AesCbcEncrypt"); + return wc_esp32AesCbcEncrypt(aes, out, in, sz); + } + else { + /* For example, the ESP32-S3 does not support HW for len = 24, + * so fall back to SW */ + #ifdef DEBUG_WOLFSSL + ESP_LOGW(TAG, "wc_AesCbcEncrypt HW Falling back, " + "unsupported keylen = %d", aes->keylen); + #endif + } + #endif + while (blocks--) { int ret; xorbuf((byte*)aes->reg, in, AES_BLOCK_SIZE); @@ -4121,14 +4251,13 @@ int wc_AesSetIV(Aes* aes, const byte* iv) } return 0; - } + } /* wc_AesCbcEncrypt */ - #ifdef HAVE_AES_DECRYPT +#ifdef HAVE_AES_DECRYPT /* Software AES - CBC Decrypt */ int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) { word32 blocks; - if (aes == NULL || out == NULL || in == NULL) { return BAD_FUNC_ARG; } @@ -4137,6 +4266,21 @@ int wc_AesSetIV(Aes* aes, const byte* iv) return 0; } + #if defined(WOLFSSL_ESPIDF) && defined(NEED_AESCBC_HW_FALLBACK) + if (wc_esp32AesSupportedKeyLen(aes)) { + ESP_LOGV(TAG, "wc_AesCbcDecrypt calling wc_esp32AesCbcDecrypt"); + return wc_esp32AesCbcDecrypt(aes, out, in, sz); + } + else { + /* For example, the ESP32-S3 does not support HW for len = 24, + * so fall back to SW */ + #ifdef DEBUG_WOLFSSL + ESP_LOGW(TAG, "wc_AesCbcDecrypt HW Falling back, " + "unsupported keylen = %d", aes->keylen); + #endif + } + #endif + blocks = sz / AES_BLOCK_SIZE; if (sz % AES_BLOCK_SIZE) { #ifdef WOLFSSL_AES_CBC_LENGTH_CHECKS @@ -4242,7 +4386,7 @@ int wc_AesSetIV(Aes* aes, const byte* iv) return 0; } - #endif /* HAVE_AES_DECRYPT */ +#endif /* HAVE_AES_DECRYPT */ #endif /* AES-CBC block */ #endif /* HAVE_AES_CBC */ diff --git a/wolfcrypt/src/port/Espressif/README.md b/wolfcrypt/src/port/Espressif/README.md index fe98e3be65..77c096d8ac 100644 --- a/wolfcrypt/src/port/Espressif/README.md +++ b/wolfcrypt/src/port/Espressif/README.md @@ -11,25 +11,30 @@ For detail about ESP32 HW Acceleration, you can find in [Technical Reference Man To enable hw acceleration : * Uncomment out `#define WOLFSSL_ESPIDF` in `/path/to/wolfssl/wolfssl/wolfcrypt/settings.h` -* Uncomment out `#define WOLFSSL_ESPWROOM32` in `/path/to/wolfssl/wolfssl/wolfcrypt/settings.h` +* Uncomment out `#define WOLFSSL_ESP32` in `/path/to/wolfssl/wolfssl/wolfcrypt/settings.h` To disable portions of the hardware acceleration you can optionally define: ```c /* Disabled SHA, AES and RSA acceleration */ -#define NO_ESP32WROOM32_CRYPT +#define NO_ESP32_CRYPT /* Disabled AES acceleration */ -#define NO_WOLFSSL_ESP32WROOM32_CRYPT_AES +#define NO_WOLFSSL_ESP32_CRYPT_AES /* Disabled SHA acceleration */ -#define NO_WOLFSSL_ESP32WROOM32_CRYPT_HASH +#define NO_WOLFSSL_ESP32_CRYPT_HASH /* Disabled RSA Primitive acceleration */ -#define NO_WOLFSSL_ESP32WROOM32_CRYPT_RSA_PRI +#define NO_WOLFSSL_ESP32_CRYPT_RSA_PRI ``` ### Coding In your application you must include `` before any other wolfSSL headers. If building the sources directly we recommend defining `WOLFSSL_USER_SETTINGS` and adding your own `user_settings.h` file. You can find a good reference for this in `IDE/GCC-ARM/Header/user_settings.h`. +To view disassembly, add `__attribute__((section(".iram1")))` decorator. Foe example: + +``` +static int __attribute__((section(".iram1"))) memblock_peek(volatile u_int32_t mem_address) +``` ### Benchmarks diff --git a/wolfcrypt/src/port/Espressif/esp32_aes.c b/wolfcrypt/src/port/Espressif/esp32_aes.c index d219f65ec3..8d677a34f3 100644 --- a/wolfcrypt/src/port/Espressif/esp32_aes.c +++ b/wolfcrypt/src/port/Espressif/esp32_aes.c @@ -125,7 +125,7 @@ static int esp_aes_hw_Set_KeyMode(Aes *ctx, ESP32_AESPROCESS mode) word32 i; word32 mode_ = 0; - ESP_LOGV(TAG, " enter esp_aes_hw_Set_KeyMode"); + ESP_LOGV(TAG, " enter esp_aes_hw_Set_KeyMode %d", mode); /* check mode */ if (mode == ESP32_AES_UPDATEKEY_ENCRYPT) { @@ -141,37 +141,39 @@ static int esp_aes_hw_Set_KeyMode(Aes *ctx, ESP32_AESPROCESS mode) } } /* if mode */ - if (ret == 0) { + /* + ** ESP32: see table 22-1 in ESP32 Technical Reference + ** ESP32S3: see table 19-2 in ESP32S3 Technical Reference + ** mode Algorithm ESP32 ESP32S3 + ** 0 AES-128 Encryption y y + ** 1 AES-192 Encryption y n + ** 2 AES-256 Encryption y y + ** 4 AES-128 Decryption y y + ** 5 AES-192 Decryption y n + ** 6 AES-256 Decryption y y + */ + switch(ctx->keylen){ + case 24: mode_ += 1; break; + case 32: mode_ += 2; break; + default: break; + } + +#if CONFIG_IDF_TARGET_ESP32S3 + if (mode_ == 1 || mode_ == 5 || mode_ == 7) { + /* this should have been detected in aes.c and fall back to SW */ + ESP_LOGE(TAG, "esp_aes_hw_Set_KeyMode unsupported mode: %i", mode_); + ret = BAD_FUNC_ARG; + } +#endif + if (ret == 0) { /* update key */ for (i = 0; i < (ctx->keylen) / sizeof(word32); i++) { - DPORT_REG_WRITE(AES_KEY_BASE + (i * 4), *(((word32*)ctx->key) + i)); - } - - /* - ** ESP32: see table 22-1 in ESP32 Technical Reference - ** ESP32S3: see table 19-2 in ESP32S3 Technical Reference - ** mode Algorithm ESP32 ESP32S3 - ** 0 AES-128 Encryption y y - ** 1 AES-192 Encryption y n - ** 2 AES-256 Encryption y y - ** 4 AES-128 Decryption y y - ** 5 AES-192 Decryption y n - ** 6 AES-256 Decryption y y - */ - switch(ctx->keylen){ - case 24: mode_ += 1; break; - case 32: mode_ += 2; break; - default: break; + DPORT_REG_WRITE((volatile uint32_t*)(AES_KEY_BASE + (i * 4)), + *(((word32*)ctx->key) + i) + ); } - #if CONFIG_IDF_TARGET_ESP32S3 - if (mode_ == 1 || mode_ == 5 || mode_ == 7) { - ESP_LOGE(TAG, "esp_aes_hw_Set_KeyMode unsupported mode: %i", mode_); - ret = BAD_FUNC_ARG; - } - #endif - if (ret == 0) { DPORT_REG_WRITE(AES_MODE_REG, mode_); } @@ -243,6 +245,60 @@ static void esp_aes_bk(const byte* in, byte* out) ESP_LOGV(TAG, "leave esp_aes_bk"); } /* esp_aes_bk */ +/* +* wc_esp32AesSupportedKeyLen +* @brief: returns 1 if AES key length supported in HW, 0 if not +* @param aes:a value of a ley length */ +WOLFSSL_LOCAL int wc_esp32AesSupportedKeyLenValue(int keylen) +{ + int ret = 0; +#if defined(CONFIG_IDF_TARGET_ESP32) + if (keylen == 16 || keylen == 24 || keylen == 32) { + ret = 1; + } + else { + ret = 0; /* keylen 24 (192 bit) not supported */ + } + +#elif defined(CONFIG_IDF_TARGET_ESP32S2) + ret = 0; /* not supported */ + +#elif defined(CONFIG_IDF_TARGET_ESP32S3) + if (keylen == 16 || keylen == 32) { + ret = 1; + } + else { + ret = 0; /* keylen 24 (192 bit) not supported */ + } + +#elif defined(CONFIG_IDF_TARGET_ESP32C3) + ret = 0; /* not supported */ +#elif defined(CONFIG_IDF_TARGET_ESP32C6) + ret = 0; /* not supported */ +#elif defined(CONFIG_IDF_TARGET_ESP32H2) + ret = 0; /* not supported */ +#else + ret = 0; /* if we don't know, then it is not supported */ +#endif + return ret; +} + +/* +* wc_esp32AesSupportedKeyLen +* @brief: returns 1 if AES key length supported in HW, 0 if not +* @param aes: a pointer of the AES object used to encrypt data */ +WOLFSSL_LOCAL int wc_esp32AesSupportedKeyLen(struct Aes* aes) +{ + int ret; + if (aes == NULL) { + ret = 0; /* we need a valid aes object to get its keylength */ + } + else { + ret = wc_esp32AesSupportedKeyLenValue(aes->keylen); + } + return ret; +} + /* * wc_esp32AesEncrypt * @brief: a one block encrypt of the input block, into the output block @@ -252,7 +308,7 @@ static void esp_aes_bk(const byte* in, byte* out) * the encrypted message * @return: 0 on success, BAD_FUNC_ARG if the AES algorithm isn't supported. */ -int wc_esp32AesEncrypt(Aes *aes, const byte* in, byte* out) +WOLFSSL_LOCAL int wc_esp32AesEncrypt(Aes *aes, const byte* in, byte* out) { int ret = 0; @@ -287,7 +343,7 @@ int wc_esp32AesEncrypt(Aes *aes, const byte* in, byte* out) * the decrypted message * @return: 0 on success, BAD_FUNC_ARG if the AES algorithm isn't supported. */ -int wc_esp32AesDecrypt(Aes *aes, const byte* in, byte* out) +WOLFSSL_LOCAL int wc_esp32AesDecrypt(Aes *aes, const byte* in, byte* out) { int ret; @@ -325,7 +381,7 @@ int wc_esp32AesDecrypt(Aes *aes, const byte* in, byte* out) * @param sz : size of input message * @return: 0 on success, BAD_FUNC_ARG if the AES algorithm isn't supported. */ -int wc_esp32AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) +WOLFSSL_LOCAL int wc_esp32AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) { int ret; int i; @@ -343,7 +399,7 @@ int wc_esp32AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) if (ret == 0) { ret = esp_aes_hw_Set_KeyMode(aes, ESP32_AES_UPDATEKEY_ENCRYPT); if (ret != 0) { - ESP_LOGE(TAG, "wc_esp32AesCbcEncrypt failed HW Set KeyMode"); + ESP_LOGW(TAG, "wc_esp32AesCbcEncrypt failed HW Set KeyMode"); } } /* if set esp_aes_hw_InUse successful */ @@ -367,7 +423,7 @@ int wc_esp32AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) esp_aes_hw_Leave(); ESP_LOGV(TAG, "leave wc_esp32AesCbcEncrypt"); - return 0; + return ret; } /* wc_esp32AesCbcEncrypt */ /* @@ -382,7 +438,7 @@ int wc_esp32AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) * @param sz : size of input message * @return: 0 on success, BAD_FUNC_ARG if the AES algorithm isn't supported. */ -int wc_esp32AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) +WOLFSSL_LOCAL int wc_esp32AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) { int ret; @@ -401,7 +457,7 @@ int wc_esp32AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) if (ret == 0) { ret = esp_aes_hw_Set_KeyMode(aes, ESP32_AES_UPDATEKEY_DECRYPT); if (ret != 0) { - ESP_LOGE(TAG, "wc_esp32AesCbcDecrypt failed HW Set KeyMode"); + ESP_LOGW(TAG, "wc_esp32AesCbcDecrypt failed HW Set KeyMode"); } } @@ -425,7 +481,7 @@ int wc_esp32AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) esp_aes_hw_Leave(); ESP_LOGV(TAG, "leave wc_esp32AesCbcDecrypt"); - return 0; + return ret; } /* wc_esp32AesCbcDecrypt */ #endif /* WOLFSSL_ESP32_CRYPT */ diff --git a/wolfcrypt/src/port/Espressif/esp32_mp.c b/wolfcrypt/src/port/Espressif/esp32_mp.c index b2dc3c4cf8..67caac77b7 100644 --- a/wolfcrypt/src/port/Espressif/esp32_mp.c +++ b/wolfcrypt/src/port/Espressif/esp32_mp.c @@ -18,15 +18,32 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA */ -#include -#include + +/* + * See ESP32 Technical Reference Manual - RSA Accelerator Chapter + * + * esp_mp_exptmod() Large Number Modular Exponentiation Z = X^Y mod M + * esp_mp_mulmod() Large Number Modular Multiplication Z = X × Y mod M + * esp_mp_mul() Large Number Multiplication Z = X × Y + * + * The ESP32 RSA Accelerator supports operand lengths of: + * N ∈ {512, 1024, 1536, 2048, 2560, 3072, 3584, 4096} bits. The bit length + * of arguments Z, X, Y , M, and r can be any one from the N set, but all + * numbers in a calculation must be of the same length. + * + * The bit length of M′ is always 32. + * + * Also, beware: "we have uint32_t == unsigned long for both Xtensa and RISC-V" + * see https://github.com/espressif/esp-idf/issues/9511#issuecomment-1207342464 + * https://docs.espressif.com/projects/esp-idf/en/latest/esp32/migration-guides/release-5.x/5.0/gcc.html + */ #ifdef HAVE_CONFIG_H #include #endif #include -#include "wolfssl/wolfcrypt/logging.h" +#include #if !defined(NO_RSA) || defined(HAVE_ECC) @@ -41,25 +58,94 @@ #endif #include -static const char* const TAG = "wolfssl_mp"; +#ifndef SINGLE_THREADED + /* Espressif freeRTOS */ + #include +#endif #define ESP_HW_RSAMAX_BIT 4096 #define ESP_HW_MULTI_RSAMAX_BITS 2048 #define ESP_HW_RSAMIN_BIT 512 -#define BYTE_TO_WORDS(s) (((s+3)>>2)) /* (s+(4-1))/ 4 */ -#define BITS_TO_WORDS(s) (((s+31)>>3)>>2) /* (s+(32-1))/ 8/ 4*/ +#define BYTE_TO_WORDS(s) (((s+3)>>2)) /* (s+(4-1))/ 4 */ +#define BITS_TO_WORDS(s) (((s+31)>>3)>>2) /* (s+(32-1))/ 8/ 4*/ #define BITS_IN_ONE_WORD 32 -#define MP_NG -1 - #define ESP_TIMEOUT(cnt) (cnt >= ESP_RSA_TIMEOUT_CNT) +static const char* const TAG = "wolfssl_esp32_mp"; + +#ifdef DEBUG_WOLFSSL + static int hw_validation = 0; /* validating HW and SW? (prevent HW call) */ + #define SET_HW_VALIDATION {hw_validation = 1;} + #define CLR_HW_VALIDATION {hw_validation = 0;} + #define IS_HW_VALIDATION (hw_validation == 1) + #undef WOLFSSL_HW_METRICS + #define WOLFSSL_HW_METRICS /* usage metrics always on during debug */ +#endif + +/* For esp_mp_exptmod and esp_mp_mulmod we need a variety of calculated helper +** values to properly setup the hardware. See esp_mp_montgomery_init() */ +struct esp_mp_helper +{ + MATH_INT_T r_inv; /* result of calculated montgomery helper */ + word32 exp; + word32 Xs; /* how many bits in X operand */ + word32 Ys; /* how many bits in Y operand */ + word32 Ms; /* how many bits in M operand */ + word32 Rs; /* how many bits in R_inv calc */ + word32 maxWords_sz; /* maximum words expected */ + word32 hwWords_sz; + mp_digit mp; /* result of calculated montgomery M' helper */ +#ifdef DEBUG_WOLFSSL + mp_digit mp2; /* optional compare to alternate montgomery calc */ +#endif +}; + +/* usage metrics can be turned on independently of debugging */ +#ifdef WOLFSSL_HW_METRICS + static unsigned long esp_mp_mul_usage_ct = 0; + static unsigned long esp_mp_mul_error_ct = 0; + + static unsigned long esp_mp_mulmod_usage_ct = 0; + static unsigned long esp_mp_mulmod_fallback_ct = 0; + static unsigned long esp_mp_mulmod_even_mod_ct = 0; + static unsigned long esp_mp_mulmod_small_x_ct = 0; + static unsigned long esp_mp_mulmod_small_y_ct = 0; + static unsigned long esp_mp_mulmod_error_ct = 0; + + static unsigned long esp_mp_exptmod_usage_ct = 0; + static unsigned long esp_mp_exptmod_error_ct = 0; + static unsigned long esp_mp_exptmod_fallback_ct = 0; + static unsigned long esp_mp_max_used = 0; +#endif + /* mutex */ -static wolfSSL_Mutex mp_mutex; -static int espmp_CryptHwMutexInit = 0; +#ifdef SINGLE_THREADED + int single_thread_locked = 0; +#else + static wolfSSL_Mutex mp_mutex; + static int espmp_CryptHwMutexInit = 0; +#endif + +#ifdef DEBUG_WOLFSSL + /* when debugging, we'll double-check the mutex with call depth */ + #ifndef NO_WOLFSSL_ESP32_CRYPT_RSA_PRI_EXPTMOD + static int esp_mp_exptmod_depth_counter = 0; + #endif /* NO_WOLFSSL_ESP32_CRYPT_RSA_PRI_EXPTMOD */ +#endif /* DEBUG_WOLFSSL */ + /* * check if the HW is ready before accessing it * +* See 24.3.1 Initialization of ESP32 Technical Reference Manual +* https://www.espressif.com/sites/default/files/documentation/esp32_technical_reference_manual_en.pdf +* +* The RSA Accelerator is activated by enabling the corresponding peripheral +* clock, and by clearing the DPORT_RSA_PD bit in the DPORT_RSA_PD_CTRL_REG +* register. This releases the RSA Accelerator from reset. +* +* See esp_mp_hw_lock(). +* * When the RSA Accelerator is released from reset, the register RSA_CLEAN_REG * reads 0 and an initialization process begins. Hardware initializes the four * memory blocks by setting them to 0. After initialization is complete, @@ -69,30 +155,71 @@ static int espmp_CryptHwMutexInit = 0; */ static int esp_mp_hw_wait_clean(void) { - int ret = MP_OKAY; + int ret = MP_OKAY; word32 timeout = 0; -#if CONFIG_IDF_TARGET_ESP32S3 +#if defined(CONFIG_IDF_TARGET_ESP32) + /* RSA_CLEAN_REG is now called RSA_QUERY_CLEAN_REG. + ** hwcrypto_reg.h maintains RSA_CLEAN_REG for backwards compatibility: + ** so this block _might_ not be needed in some circumstances. */ + ESP_EM__PRE_MP_HW_WAIT_CLEAN + /* wait until ready, + ** or timeout counter exceeds ESP_RSA_TIMEOUT_CNT in user_settings */ + while(!ESP_TIMEOUT(++timeout) && DPORT_REG_READ(RSA_CLEAN_REG) == 0) { + /* wait. expected delay 1 to 2 uS */ + ESP_EM__MP_HW_WAIT_CLEAN + } +#elif defined(CONFIG_IDF_TARGET_ESP32S3) + ESP_EM__PRE_MP_HW_WAIT_CLEAN while (!ESP_TIMEOUT(++timeout) && DPORT_REG_READ(RSA_QUERY_CLEAN_REG) != 1) { - /* wait. expected delay 1 to 2 uS */ - } -#else - /* RSA_CLEAN_REG is now called RSA_QUERY_CLEAN_REG. hwcrypto_reg.h maintains - * RSA_CLEAN_REG for backwards compatibility so this block _might_ be not needed. */ - while(!ESP_TIMEOUT(++timeout) && DPORT_REG_READ(RSA_CLEAN_REG) != 1) { /* wait. expected delay 1 to 2 uS */ + ESP_EM__MP_HW_WAIT_CLEAN } +#else + /* no HW timeout if we don't know the platform. assumes no HW */ #endif if (ESP_TIMEOUT(timeout)) { ESP_LOGE(TAG, "esp_mp_hw_wait_clean waiting HW ready timed out."); - ret = MP_NG; + ret = WC_HW_WAIT_E; /* hardware is busy, MP_HW_BUSY; */ } return ret; } +/* +** esp_mp_hw_islocked() - detect if we've locked the HW for use. +** +** WARNING: this does *not* detect separate calls to the +** periph_module_disable() and periph_module_enable(). +*/ +static int esp_mp_hw_islocked(void) +{ + int ret = 0; +#ifdef SINGLE_THREADED + if (single_thread_locked == 0) { + /* not in use */ + ESP_LOGV(TAG, "SINGLE_THREADED esp_mp_hw_islocked = false"); + } + else { + ESP_LOGV(TAG, "SINGLE_THREADED esp_mp_hw_islocked = true"); + ret = 1; + } +#else + TaskHandle_t mutexHolder = xSemaphoreGetMutexHolder(mp_mutex); + if (mutexHolder == NULL) { + /* Mutex is not in use */ + ESP_LOGV(TAG, "multi-threaded esp_mp_hw_islocked = false"); + } + else { + ESP_LOGV(TAG, "multi-threaded esp_mp_hw_islocked = true"); + ret = 1; + } +#endif + return ret; +} + /* * esp_mp_hw_lock() * @@ -120,7 +247,9 @@ static int esp_mp_hw_lock() int ret = 0; ESP_LOGV(TAG, "enter esp_mp_hw_lock"); - +#ifdef SINGLE_THREADED + single_thread_locked = 1; +#else if (espmp_CryptHwMutexInit == 0) { ret = esp_CryptHwMutexInit(&mp_mutex); if (ret == 0) { @@ -132,81 +261,147 @@ static int esp_mp_hw_lock() } } else { - /* ESP AES has already been initialized */ + /* mp_mutex has already been initialized */ } + /* Set our mutex to indicate the HW is in use */ if (ret == 0) { /* lock hardware */ - ret = esp_CryptHwMutexLock(&mp_mutex, portMAX_DELAY); + ret = esp_CryptHwMutexLock(&mp_mutex, ESP_MP_HW_LOCK_MAX_DELAY); if (ret != 0) { ESP_LOGE(TAG, "mp engine lock failed."); - ret = MP_NG; + ret = WC_HW_WAIT_E; /* caller is expected to fall back to SW */ } } +#endif /* not SINGLE_THREADED */ -#if CONFIG_IDF_TARGET_ESP32S3 - /* Activate the RSA accelerator. See 20.3 of ESP32-S3 technical manual. - * periph_module_enable doesn't seem to be documented and in private folder - * with v5 release. Maybe it will be deprecated? */ +#if defined(CONFIG_IDF_TARGET_ESP32) + /* Enable RSA hardware */ if (ret == 0) { periph_module_enable(PERIPH_RSA_MODULE); /* clear bit to enable hardware operation; (set to disable) */ - DPORT_REG_CLR_BIT(SYSTEM_RSA_PD_CTRL_REG, SYSTEM_RSA_MEM_PD); + DPORT_REG_CLR_BIT(DPORT_RSA_PD_CTRL_REG, DPORT_RSA_PD); + ESP_EM__POST_SP_MP_HW_LOCK } -#else - /* Enable RSA hardware */ +#elif defined(CONFIG_IDF_TARGET_ESP32S3) + /* Activate the RSA accelerator. See 20.3 of ESP32-S3 technical manual. + * periph_module_enable doesn't seem to be documented and in private folder + * with v5 release. Maybe it will be deprecated? */ if (ret == 0) { periph_module_enable(PERIPH_RSA_MODULE); /* clear bit to enable hardware operation; (set to disable) */ - DPORT_REG_CLR_BIT(DPORT_RSA_PD_CTRL_REG, DPORT_RSA_PD); + DPORT_REG_CLR_BIT(SYSTEM_RSA_PD_CTRL_REG, SYSTEM_RSA_MEM_PD); } +#else + /* when unknown or not implmemted, assume there's no HW to lock */ #endif /* reminder: wait until RSA_CLEAN_REG reads 1 - * see esp_mp_hw_wait_clean() - */ - + ** see esp_mp_hw_wait_clean() */ ESP_LOGV(TAG, "leave esp_mp_hw_lock"); return ret; } /* -* Release HW engine +** Release RSA HW engine */ -static void esp_mp_hw_unlock( void ) +static int esp_mp_hw_unlock( void ) { -#if CONFIG_IDF_TARGET_ESP32S3 - /* Deactivate the RSA accelerator. See 20.3 of ESP32-S3 technical manual. - * periph_module_enable doesn't seem to be documented and in private folder - * with v5 release. Maybe it will be deprecated? */ - DPORT_REG_SET_BIT(SYSTEM_RSA_PD_CTRL_REG, SYSTEM_RSA_MEM_PD); - periph_module_disable(PERIPH_RSA_MODULE); - + int ret = MP_OKAY; + if (esp_mp_hw_islocked()) { + +#if defined(CONFIG_IDF_TARGET_ESP32) + /* set bit to disabled hardware operation; (clear to enable) */ + DPORT_REG_SET_BIT(DPORT_RSA_PD_CTRL_REG, DPORT_RSA_PD); + + /* Disable RSA hardware */ + periph_module_disable(PERIPH_RSA_MODULE); +#elif defined(CONFIG_IDF_TARGET_ESP32S3) + /* Deactivate the RSA accelerator. See 20.3 of ESP32-S3 technical manual. + * periph_module_enable doesn't seem to be documented and in private folder + * with v5 release. Maybe it will be deprecated? */ + DPORT_REG_SET_BIT(SYSTEM_RSA_PD_CTRL_REG, SYSTEM_RSA_MEM_PD); + periph_module_disable(PERIPH_RSA_MODULE); #else - /* set bit to disabled hardware operation; (clear to enable) - */ - DPORT_REG_SET_BIT(DPORT_RSA_PD_CTRL_REG, DPORT_RSA_PD); - - /* Disable RSA hardware */ - periph_module_disable(PERIPH_RSA_MODULE); + /* unknown platform, assume no HW to unlock */ #endif + /* unlock */ +#if defined(SINGLE_THREADED) + single_thread_locked = 0; +#else + esp_CryptHwMutexUnLock(&mp_mutex); +#endif /* SINGLE_THREADED */ + + ESP_LOGV(TAG, "esp_mp_hw_unlock"); + } + else { + ESP_LOGW(TAG, "Warning: esp_mp_hw_unlock called when not locked."); + } - /* unlock */ - esp_CryptHwMutexUnLock(&mp_mutex); + return ret; } -/* this is based on an article by Cetin Kaya Koc, - * A New Algorithm for Inversion: mod p^k, June 28 2017 */ + +/* Only mulmod and mulexp_mod HW accelerator need montgomery math prep: M' */ +#if !defined(NO_WOLFSSL_ESP32_CRYPT_RSA_PRI_EXPTMOD) \ + || \ + !defined(NO_WOLFSSL_ESP32_CRYPT_RSA_PRI_MULMOD) + static int esp_calc_Mdash(MATH_INT_T *M, word32 k, mp_digit* md) { + int ret = MP_OKAY; + +#ifdef USE_ALT_MPRIME + /* M' = M^(-1) mod b; b = 2^32 */ + + /* Call Large Number Modular Exponentiation + * + * Z = X^Y mod M + * + * mp_exptmod notation: Y = (G ^ X) mod P + * + * G is our parameter: M + */ + MATH_INT_T X[1] = { }; + MATH_INT_T P[1] = { }; + MATH_INT_T Y[1] = { }; + word32 Xs; + + ESP_LOGV(TAG, "\nBegin esp_calc_Mdash USE_ALT_MPRIME\n"); + + mp_init(X); + mp_init(P); + mp_init(Y); + + /* MATH_INT_T value of (-1) */ + X->dp[0] = 1; + X->sign = MP_NEG; + X->used = 1; + + Xs = mp_count_bits(X); + + /* MATH_INT_T value of 2^32 */ + P->dp[1] = 1; + P->used = 2; + + /* this fails due to even P number; ((b & 1) == 0) in fp_montgomery_setup() + * called from _fp_exptmod_ct, called from fp_exptmod */ + ret = mp_exptmod(M, X, P, Y); + + *md = Y->dp[0]; + ESP_LOGI(TAG, "esp_calc_Mdash %u", *md); +#else + /* this is based on an article by Cetin Kaya Koc, + * A New Algorithm for Inversion: mod p^k, June 28 2017 */ int i; int xi; int b0 = 1; int bi; word32 N = 0; word32 x; + ESP_LOGV(TAG, "\nBegin esp_calc_Mdash\n"); N = M->dp[0]; bi = b0; @@ -222,74 +417,262 @@ static int esp_calc_Mdash(MATH_INT_T *M, word32 k, mp_digit* md) } /* 2's complement */ *md = ~x + 1; - return MP_OKAY; +#endif + + ESP_LOGV(TAG, "\nEnd esp_calc_Mdash \n"); + return ret; +} +#endif /* !NO_WOLFSSL_ESP32_CRYPT_RSA_PRI_[MULMOD/EXPTMOD] for M' */ + +/* the result may need to have extra bytes zeroed or used length adjusted */ +static int esp_clean_result(MATH_INT_T* Z, int used_padding) +{ + int ret = MP_OKAY; + uint16_t this_extra; + size_t i; + +/* TODO remove this section if MP_SIZE accepted into sp_int.h +** See https://github.com/wolfSSL/wolfssl/pull/6565 */ + uint16_t dp_length = 0; (void) dp_length; +#ifdef USE_FAST_MATH + #undef MP_SIZE + #define MP_SIZE FP_SIZE + dp_length = FP_SIZE; +#else + #undef MP_SIZE + #define MP_SIZE 128 + dp_length = SP_INT_DIGITS; +#endif +/* TODO end */ + + this_extra = Z->used; + if (this_extra > MP_SIZE) { + ESP_LOGW(TAG, "Warning (Z->used: %d) > (MP_SIZE: %d); adjusting...", + Z->used, MP_SIZE); + this_extra = MP_SIZE; + } + + while (Z->dp[this_extra] > 0 && (this_extra < MP_SIZE)) { + ESP_LOGV(TAG, "Adjust! %d", this_extra); + Z->dp[this_extra] = 0; + this_extra++; + } + + /* trim any trailing zeros and adjust z.used size */ + if (Z->used > 1) { + ESP_LOGV(TAG, "ZTrim: Z->used = %d", Z->used); + for (i = Z->used; i > 1; i--) { + if (Z->dp[i - 1] == 0) { + /* last element in zero based array */ + Z->used = i - 1; + } + else { + break; /* if not zero, nothing else to do */ + } + } + ESP_LOGV(TAG, "New Z->used = %d", Z->used); + } + else { + ESP_LOGV(TAG, "no z-trim needed"); + } + +#if defined(WOLFSSL_SP_INT_NEGATIVE) || defined(USE_FAST_MATH) + if (Z->sign != 0) { + mp_setneg(Z); /* any value other than zero is assumed negative */ + } +#endif + + /* a result of 1 is interesting */ + if ((Z->dp[0] == 1) && (Z->used == 1)) { + /* + * When the exponent is 0: In this case, the result of the modular + * exponentiation operation will always be 1, regardless of the value + * of the base. + * + * When the base is 1: If the base is equal to 1, then the result of + * the modular exponentiation operation will always be 1, regardless + * of the value of the exponent. + * + * When the exponent is equal to the totient of the modulus: If the + * exponent is equal to the totient of the modulus, and the base is + * relatively prime to the modulus, then the result of the modular + * exponentiation operation will be 1. + */ + ESP_LOGV(TAG, "Z->dp[0] == 1"); + } + + return ret; } /* start HW process */ -static void process_start(word32 reg) +static void process_start(u_int32_t reg) { - /* clear interrupt */ - DPORT_REG_WRITE(RSA_INTERRUPT_REG, 1); - /* start process */ - DPORT_REG_WRITE(reg, 1); + /* see 3.16 "software needs to always use the "volatile" + ** attribute when accessing registers in these two address spaces. */ + DPORT_REG_WRITE((volatile uint32_t*)reg, 1); + ESP_EM__POST_PROCESS_START; } -/* wait until done */ -static int wait_until_done(word32 reg) +/* wait until RSA math register indicates operation completed */ +static int wait_until_done(uint32_t reg) { + int ret = MP_OKAY; word32 timeout = 0; + /* wait until done && not timeout */ - while (!ESP_TIMEOUT(++timeout) && - DPORT_REG_READ(reg) != 1) { - /* wait */ + ESP_EM__MP_HW_WAIT_DONE; + while (!ESP_TIMEOUT(++timeout) && DPORT_REG_READ(reg) != 1) { + asm volatile("nop"); /* wait */ } + ESP_EM__DPORT_FIFO_READ; /* clear interrupt */ DPORT_REG_WRITE(RSA_INTERRUPT_REG, 1); if (ESP_TIMEOUT(timeout)) { - ESP_LOGE(TAG, "rsa operation is timed out."); - return MP_NG; + ESP_LOGE(TAG, "rsa operation timed out."); + ret = WC_HW_E; /* MP_HW_ERROR; */ } - return MP_OKAY; + return ret; } /* read data from memory into mp_init */ -static void esp_memblock_to_mpint(word32 mem_address, - MATH_INT_T* mp, - word32 numwords) +static int esp_memblock_to_mpint(const uint32_t mem_address, + MATH_INT_T* mp, + word32 numwords) { + int ret = MP_OKAY; + uint32_t i; + +#ifdef USE_ESP_DPORT_ACCESS_READ_BUFFER esp_dport_access_read_buffer((uint32_t*)mp->dp, mem_address, numwords); +#else + ESP_EM__PRE_DPORT_READ; + DPORT_INTERRUPT_DISABLE(); + ESP_EM__READ_NON_FIFO_REG; + for (i = 0; i < numwords; ++i) { + ESP_EM__3_16; + mp->dp[i] = DPORT_SEQUENCE_REG_READ((uint32_t)(mem_address + i * 4)); + } + DPORT_INTERRUPT_RESTORE(); +#endif mp->used = numwords; + +#if defined(ESP_VERIFY_MEMBLOCK) + ret = XMEMCMP((const uint32_t *)mem_address, /* HW reg memory */ + (const uint32_t *)&mp->dp, /* our dp value */ + numwords * sizeof(word32)); + + if (ret != 0 ) { + ESP_LOGW(TAG, "Validation Failure esp_memblock_to_mpint.\n" + "Reading %u Words at Address = 0x%08x", + (int)(numwords * sizeof(word32)), + (unsigned int)mem_address); + ESP_LOGI(TAG, "Trying again... "); + esp_dport_access_read_buffer((uint32_t*)mp->dp, mem_address, numwords); + mp->used = numwords; + if (0 != XMEMCMP((const void *)mem_address, + (const void *)&mp->dp, numwords * sizeof(word32))) { + ESP_LOGE(TAG, "Validation Failure esp_memblock_to_mpint " + "a second time. Giving up."); + ret = MP_VAL; + } + else { + ESP_LOGI(TAG, "Successfully re-read after Validation Failure."); + ret = MP_VAL; + } + } +#endif + return ret; } -/* write mp_init into memory block - */ -static void esp_mpint_to_memblock(word32 mem_address, const MATH_INT_T* mp, - const word32 bits, - const word32 hwords) +#ifndef NO_WOLFSSL_ESP32_CRYPT_RSA_PRI_MP_MUL +/* Write 0x00 to [wordSz] words of register memory starting at mem_address */ +#if defined(CONFIG_IDF_TARGET_ESP32) +/* only the classic has memblock clear due to slightly different data layout */ +static int esp_zero_memblock(u_int32_t mem_address, int wordSz) { + int ret = MP_OKAY; + int i; + + ESP_EM__PRE_DPORT_WRITE; + DPORT_INTERRUPT_DISABLE(); + for (i = 0; i < wordSz; i++) { + DPORT_REG_WRITE( + (volatile u_int32_t *)(mem_address + (i * sizeof(word32))), + (u_int32_t)(0) /* zero memory blocks [wordSz] words long */ + ); + } + DPORT_INTERRUPT_RESTORE(); + return ret; +} +#endif /* CONFIG_IDF_TARGET_ESP32 */ +#endif /* not NO_WOLFSSL_ESP32_CRYPT_RSA_PRI_MP_MUL */ + +/* write MATH_INT_T mp value (dp[]) into memory block */ +static int esp_mpint_to_memblock(u_int32_t mem_address, + const MATH_INT_T* mp, + const word32 bits, + const word32 hwords) +{ + int ret = MP_OKAY; + /* init */ - word32 i; - word32 len = (bits / 8 + ((bits & 7) != 0 ? 1 : 0)); + word32 i; /* memory offset counter */ + word32 len; /* actual number of words to write to register */ + len = (bits / 8 + ((bits & 7) != 0 ? 1 : 0)); len = (len + sizeof(word32)-1) / sizeof(word32); - for (i=0; i < hwords; i++) { + /* write */ + ESP_EM__PRE_DPORT_WRITE; + DPORT_INTERRUPT_DISABLE(); + for (i = 0; i < hwords; i++) { if (i < len) { - DPORT_REG_WRITE(mem_address + (i * sizeof(word32)), mp->dp[i]); + /* write our data */ + ESP_LOGV(TAG, "Write i = %d value.", i); + DPORT_REG_WRITE( + (volatile u_int32_t*)(mem_address + (i * sizeof(word32))), + mp->dp[i] + ); /* DPORT_REG_WRITE */ } else { - DPORT_REG_WRITE(mem_address + (i * sizeof(word32)), 0); + /* write zeros */ + /* TODO we may be able to skip zero in certain circumstances */ + if (i == 0) { + ESP_LOGV(TAG, "esp_mpint_to_memblock zero?"); + } + ESP_LOGV(TAG, "Write i = %d value = zero.", i); + DPORT_REG_WRITE( + (volatile u_int32_t*)(mem_address + (i * sizeof(word32))), + (u_int32_t)0 /* writing 4 bytes of zero */ + ); /* DPORT_REG_WRITE */ } } + DPORT_INTERRUPT_RESTORE(); + + /* optional re-read verify */ +#if defined(ESP_VERIFY_MEMBLOCK) + len = XMEMCMP((const void *)mem_address, /* HW reg memory */ + (const void *)&mp->dp, /* our dp value */ + hwords * sizeof(word32) + ); + if (len != 0) { + ESP_LOGE(TAG, "esp_mpint_to_memblock compare fails at %d", len); + #ifdef DEBUG_WOLFSSL + esp_show_mp("mp", (MATH_INT_T*)mp); + #endif + ret = MP_VAL; + } +#endif + return ret; } /* return needed HW words. * supported words length - * words : {16 , 32, 48, 64, 80, 96, 112, 128} - * bits : {512,1024, 1536, 2048, 2560, 3072, 3584, 4096} + * words : { 16, 32, 48, 64, 80, 96, 112, 128} + * bits : {512, 1024, 1536, 2048, 2560, 3072, 3584, 4096} */ static word32 words2hwords(word32 wd) { @@ -307,278 +690,892 @@ static word32 bits2words(word32 bits) return ((bits + (d - 1)) / d); } +/* exptmod and mulmod helpers as needed */ +#if !defined(NO_WOLFSSL_ESP32_CRYPT_RSA_PRI_EXPTMOD) \ + || \ + !defined(NO_WOLFSSL_ESP32_CRYPT_RSA_PRI_MULMOD) +/* rinv and M' only used for mulmod and mulexp_mod */ + /* get rinv */ static int esp_get_rinv(MATH_INT_T *rinv, MATH_INT_T *M, word32 exp) { - int ret = 0; +#ifdef DEBUG_WOLFSSL + MATH_INT_T rinv2[1]; + MATH_INT_T M2[1]; + int reti = MP_OKAY; +#endif + int ret = MP_OKAY; + + ESP_LOGV(TAG, "\nBegin esp_get_rinv \n"); +#ifdef DEBUG_WOLFSSL + mp_copy(M, M2); /* copy (src = M) to (dst = M2) */ + mp_copy(rinv, rinv2); /* copy (src = M) to (dst = M2) */ +#endif - /* 2^(exp)*/ - if ((ret = mp_2expt(rinv, exp)) != MP_OKAY) { + /* 2^(exp) + * + * rinv will have all zeros with a 1 in last word. + * e.g. exp=2048 will have a 1 in dp[0x40] = dp[64] + * this is the 65'th element (zero based) + * Value for used = 0x41 = 65 + **/ + ret = mp_2expt(rinv, exp); + if (ret == MP_OKAY) { + ret = mp_mod(rinv, M, rinv); + } + else { ESP_LOGE(TAG, "failed to calculate mp_2expt()"); - return ret; } /* r_inv = R^2 mod M(=P) */ - if (ret == 0 && (ret = mp_mod(rinv, M, rinv)) != MP_OKAY) { + if (ret == MP_OKAY) { + ESP_LOGV(TAG, "esp_get_rinv compute success"); + } + else { ESP_LOGE(TAG, "failed to calculate mp_mod()"); - return ret; } +#ifdef DEBUG_WOLFSSL + if (ret == MP_OKAY) { + + /* computes a = B**n mod b without division or multiplication useful for + * normalizing numbers in a Montgomery system. */ + reti = mp_montgomery_calc_normalization(rinv2, M2); + if (reti == MP_OKAY) { + ESP_LOGV(TAG, "mp_montgomery_calc_normalization = %d", reti); + } + else { + ESP_LOGW(TAG, "Error montgomery calc M2 result = %d", reti); + } + } +#endif + + ESP_LOGV(TAG, "\nEnd esp_get_rinv \n"); return ret; } +#endif /* ! xEXPTMOD || ! xMULMOD for rinv */ -/* Z = X * Y; */ -int esp_mp_mul(MATH_INT_T* X, MATH_INT_T* Y, MATH_INT_T* Z) +/* during debug, we'll compare HW to SW results */ +int esp_hw_validation_active(void) { - int ret; +#ifdef DEBUG_WOLFSSL + return IS_HW_VALIDATION; +#else + return 0; /* we're never validating when not debugging */ +#endif +} -#ifdef WOLFSSL_SP_INT_NEGATIVE - /* neg check: X*Y becomes negative */ - int neg; +/* useful during debugging and error display, + * we can show all the mp helper calc values */ +int esp_show_mph(struct esp_mp_helper* mph) +{ + int ret = MP_OKAY; - /* aka (X->sign == Y->sign) ? MP_ZPOS : MP_NEG; , but with mp_isneg(): */ - neg = (mp_isneg(X) == mp_isneg(Y)) ? MP_ZPOS : MP_NEG; - if (neg) { - /* Negative numbers are relatively infrequent. - * May be interesting during verbose debugging: */ - ESP_LOGV(TAG, "mp_isneg(X) = %d; mp_isneg(Y) = %d; neg = %d ", - mp_isneg(X), mp_isneg(Y), neg); + if (mph == NULL) { + /* if a bad mp helper passed, we cannot use HW */ + ESP_LOGE(TAG, "ERROR: Bad esp_mp_helper for esp_show_mph"); + return MP_VAL; } -#endif - ret = MP_OKAY; /* assume success until proven wrong */ -#if CONFIG_IDF_TARGET_ESP32S3 - - int BitsInX = mp_count_bits(X); - int BitsInY = mp_count_bits(Y); + if (mph->Xs != 0) + ESP_LOGI(TAG, "Xs %d", mph->Xs); + if (mph->Ys != 0) + ESP_LOGI(TAG, "Ys %d", mph->Ys); + if (mph->Ms != 0) + ESP_LOGI(TAG, "Ms %d", mph->Ms); + if (mph->Rs != 0) + ESP_LOGI(TAG, "Rs %d", mph->Rs); + if (mph->maxWords_sz != 0) + ESP_LOGI(TAG, "maxWords_sz %d", mph->maxWords_sz); + if (mph->hwWords_sz != 0) + ESP_LOGI(TAG, "hwWords_sz %d", mph->hwWords_sz); + if (mph->mp != 0) + ESP_LOGI(TAG, "mp %d", mph->mp); +#ifdef DEBUG_WOLFSSL + if (mph->mp2 != 0) + ESP_LOGI(TAG, "mp2 %d", mph->mp2); +#endif + if (mph->r_inv.used != 0) + esp_show_mp("r_inv", &(mph->r_inv)); + return ret; +} - /* X & Y must be represented by the same number of bits. Must be - * enough to represent the larger one. */ - int MinXYBits = max(BitsInX, BitsInY); +#if !defined(NO_WOLFSSL_ESP32_CRYPT_RSA_PRI_EXPTMOD) \ + || \ + !defined(NO_WOLFSSL_ESP32_CRYPT_RSA_PRI_MULMOD) +/* only when using exptmod or mulmod, we have some helper functions. */ - /* Figure out how many words we need to represent each operand & the result. */ - int WordsForOperand = bits2words(MinXYBits); - int WordsForResult = bits2words(BitsInX + BitsInY); +/* given X, Y, M - setup mp hardware and other helper values.*/ +int esp_mp_montgomery_init(MATH_INT_T* X, MATH_INT_T* Y, MATH_INT_T* M, + struct esp_mp_helper* mph) +{ + int ret = MP_OKAY; + int exp; - /* Make sure we are within capabilities of hardware. */ - if ( (WordsForOperand * BITS_IN_ONE_WORD) > ESP_HW_MULTI_RSAMAX_BITS ) { - ESP_LOGW(TAG, "exceeds max bit length(2048)"); - return MP_VAL; /* Error: value is not able to be used. */ + if (mph == NULL) { + /* if a bad mp helper passed, we cannot use HW */ + ESP_LOGE(TAG, "ERROR: Bad esp_mp_helper, falling back to SW"); + return MP_HW_FALLBACK; } + if ((X == NULL) || (Y == NULL) || (M == NULL) ) { + /* if a bad oprand passed, we cannot use HW */ + ESP_LOGE(TAG, "ERROR: Bad montgomery operand, falling back to SW"); + return MP_HW_FALLBACK; + } + XMEMSET(mph, 0, sizeof(struct esp_mp_helper)); + mph->Xs = mp_count_bits(X); /* X's = the number of bits needed */ + +#if ESP_PROHIBIT_SMALL_X + /* optionally prohibit small X. + ** note this is very common in ECC: [1] * [Y] mod [M] */ + if ((X->used == 1) && (X->dp[1] < (1 << 8))) { + #ifdef WOLFSSL_HW_METRICS + esp_mp_mulmod_small_x_ct++; + #endif + ESP_LOGW(TAG, "esp_mp_montgomery_init MP_HW_FALLBACK Xs = %d", + mph->Xs); + ret = MP_HW_FALLBACK; + } +#endif - /* Steps to perform large number multiplication. Calculates Z = X x Y. The number of - * bits in the operands (X, Y) is N. N can be 32x, where x = {1,2,3,...64}, so the - * maximum number of bits in the X and Y is 2048. - * See 20.3.3 of ESP32-S3 technical manual - * 1. Lock the hardware so no-one else uses it and wait until it is ready. - * 2. Enable/disable interrupt that signals completion -- we don't use the interrupt. - * 3. Write number of words required for result to the RSA_MODE_REG (now called RSA_LENGTH_REG). - * Number of words required for the result is 2 * words for operand - 1 - * 4. Load X, Y operands to memory blocks. Note the Y value must be written to - * right aligned. - * 5. Start the operation by writing 1 to RSA_MULT_START_REG, then wait for it - * to complete by monitoring RSA_IDLE_REG (which is now called RSA_QUERY_INTERRUPT_REG). - * 6. Read the result out. - * 7. Release the hardware lock so others can use it. - * x. Clear the interrupt flag, if you used it (we don't). */ - - /* 1. lock HW for use & wait until it is ready. */ - if ( ((ret = esp_mp_hw_lock()) != MP_OKAY) || - ((ret = esp_mp_hw_wait_clean()) != MP_OKAY) ) { - return ret; + /* prohibit small Y */ + if (ret == MP_OKAY) { + mph->Ys = mp_count_bits(Y); /* init Y's to pass to montgomery init */ + + if (mph->Ys <= 8) { /* hard floor 8 bits, problematic in some ESP32 */ + #ifdef WOLFSSL_HW_METRICS + esp_mp_mulmod_small_y_ct++; /* track how many times we fall back */ + #endif + ESP_LOGV(TAG, "esp_mp_montgomery_init MP_HW_FALLBACK Ys = %d", + mph->Ys); + ret = MP_HW_FALLBACK; /* fall back to software calc at exit */ + } + else { + mph->Ms = mp_count_bits(M); + /* maximum bits and words for writing to HW */ + mph->maxWords_sz = bits2words(max(mph->Xs, max(mph->Ys, mph->Ms))); + mph->hwWords_sz = words2hwords(mph->maxWords_sz); + + if ((mph->hwWords_sz << 5) > ESP_HW_RSAMAX_BIT) { + ESP_LOGW(TAG, "Warning: hwWords_sz = %d (%d bits)" + " exceeds HW maximum bits (%d), " + " falling back to SW.", + mph->hwWords_sz, + mph->hwWords_sz << 5, + ESP_HW_RSAMAX_BIT); + ret = MP_HW_FALLBACK; + } + } } - /* 2. Disable completion interrupt signal; we don't use. - ** 0 => no interrupt; 1 => interrupt on completion. */ - DPORT_REG_WRITE(RSA_INTERRUPT_REG, 0); + ESP_LOGV(TAG, "hwWords_sz = %d", mph->hwWords_sz); + + /* calculate r_inv = R^2 mode M + * where: R = b^n, and b = 2^32 + * accordingly R^2 = 2^(n*32*2) + */ +#if defined(CONFIG_IDF_TARGET_ESP32) + exp = mph->hwWords_sz << 6; +#elif defined(CONFIG_IDF_TARGET_ESP32S3) + exp = mph->maxWords_sz * BITS_IN_ONE_WORD * 2; +#else + exp = 0; /* no HW, no montgomery HW init */ +#endif - /* 3. Write number of words required for result. */ - if ( (WordsForOperand * BITS_IN_ONE_WORD * 2) > ESP_HW_RSAMAX_BIT) { - ESP_LOGW(TAG, "result exceeds max bit length"); - return MP_VAL; /* Error: value is not able to be used. */ + if (ret == MP_OKAY && (M != NULL)) { + ret = mp_init((mp_int*)&(mph->r_inv)); + if (ret == MP_OKAY) { + ret = esp_get_rinv( (mp_int*)&(mph->r_inv), M, exp); + if (ret == MP_OKAY) { + mph->Rs = mp_count_bits((mp_int*)&(mph->r_inv)); + } + else { + ESP_LOGE(TAG, "calculate r_inv failed."); + ret = MP_VAL; + } /* esp_get_rinv check */ + } /* mp_init success */ + else { + ESP_LOGE(TAG, "calculate r_inv failed mp_init."); + ret = MP_MEM; + } /* mp_init check */ + } /* calculate r_inv */ + + /* if we were successful in r_inv, next get M' */ + if (ret == MP_OKAY) { +#ifdef DEBUG_WOLFSSL + ret = mp_montgomery_setup(M, &(mph->mp2) ); +#endif + /* calc M' */ + /* if Pm is odd, uses mp_montgomery_setup() */ + ret = esp_calc_Mdash(M, 32/* bits */, &(mph->mp)); + if (ret != MP_OKAY) { + ESP_LOGE(TAG, "failed esp_calc_Mdash()"); + } } - DPORT_REG_WRITE(RSA_LENGTH_REG, (WordsForOperand * 2 - 1) ); - /* 4. Load X, Y operands. Maximum is 64 words (64*8*4 = 2048 bits) */ - esp_mpint_to_memblock(RSA_MEM_X_BLOCK_BASE, - X, BitsInX, WordsForOperand); - esp_mpint_to_memblock(RSA_MEM_Z_BLOCK_BASE + WordsForOperand * 4, - Y, BitsInY, WordsForOperand); +#ifdef DEBUG_WOLFSSL + if (ret == MP_OKAY) { + if (mph->mp == mph->mp2) { + ESP_LOGV(TAG, "M' match esp_calc_Mdash vs mp_montgomery_setup " + "= %ul !", mph->mp); + } + else { + ESP_LOGW(TAG, + "\n\n" + "M' MISMATCH esp_calc_Mdash = 0x%08x = %d \n" + "vs mp_montgomery_setup = 0x%08x = %d \n\n", + mph->mp, + mph->mp, + mph->mp2, + mph->mp2); + mph->mp = mph->mp2; + } + } + else { + #if 0 + esp_show_mp("X", X); + esp_show_mp("Y", Y); + esp_show_mp("M", M); + esp_show_mph(mph); + #endif + if (ret == MP_HW_FALLBACK) { + ESP_LOGV(TAG, "esp_mp_montgomery_init exit falling back."); - /* 5. Start operation and wait until it completes. */ - process_start(RSA_MULT_START_REG); - ret = wait_until_done(RSA_QUERY_INTERRUPT_REG); - if (MP_OKAY != ret) { - return ret; + } + else { + ESP_LOGE(TAG, "esp_mp_montgomery_init failed: return code = %d", + ret); + } } +#endif - /* 6. read the result form MEM_Z */ - esp_memblock_to_mpint(RSA_MEM_Z_BLOCK_BASE, Z, WordsForResult); + return ret; +} /* esp_mp_montgomery_init */ - /* 7. clear and release HW */ - esp_mp_hw_unlock(); +#endif /* ! NO_WOLFSSL_ESP32_CRYPT_RSA_PRI_[EXPTMOD|MULMOD] */ - /* end if CONFIG_IDF_TARGET_ESP32S3 */ +#ifndef NO_WOLFSSL_ESP32_CRYPT_RSA_PRI_MP_MUL +/* Large Number Multiplication + * + * See 24.3.3 of the ESP32 Technical Reference Manual + * + * Z = X * Y; */ +int esp_mp_mul(MATH_INT_T* X, MATH_INT_T* Y, MATH_INT_T* Z) +{ +/* During debug, we may be validating against SW result. */ +#ifdef DEBUG_WOLFSSL + /* create a place to store copies to perform duplicate operations. + ** copies needed as some operations overwrite operands: e.g. X = X * Y */ + MATH_INT_T X2[1]; + MATH_INT_T Y2[1]; + MATH_INT_T Z2[1]; + MATH_INT_T PEEK[1]; +#endif + + int ret = MP_OKAY; /* assume success until proven wrong */ -#else /* not CONFIG_IDF_TARGET_ESP32S3 */ - /* assumed to be regular Xtensa here */ + /* we don't use the mph helper for mp_mul, so we'll calculate locally: */ word32 Xs; word32 Ys; word32 Zs; word32 maxWords_sz; word32 hwWords_sz; + word32 resultWords_sz; - /* ask bits number */ - Xs = mp_count_bits(X); - Ys = mp_count_bits(Y); - Zs = Xs + Ys; +#if defined(CONFIG_IDF_TARGET_ESP32) + word32 left_pad_offset = 0; +#endif - /* maximum bits and words for writing to HW */ - maxWords_sz = bits2words(max(Xs, Ys)); - hwWords_sz = words2hwords(maxWords_sz); +/* if we are supporting negative numbers, check that first since operands + * may be later modified (e.g. Z = Z * X) */ +#if defined(WOLFSSL_SP_INT_NEGATIVE) || defined(USE_FAST_MATH) + /* neg check: X*Y becomes negative */ + int res_sign; - /* sanity check */ - if((hwWords_sz<<5) > ESP_HW_MULTI_RSAMAX_BITS) { - ESP_LOGW(TAG, "exceeds max bit length(2048)"); - return MP_VAL; /* Error: value is not able to be used. */ + /* aka (X->sign == Y->sign) ? MP_ZPOS : MP_NEG; , but with mp_isneg(): */ + res_sign = (mp_isneg(X) == mp_isneg(Y)) ? MP_ZPOS : MP_NEG; + if (res_sign) { + /* Negative numbers are relatively infrequent. + * May be interesting during verbose debugging: */ + ESP_LOGV(TAG, "mp_isneg(X) = %d; mp_isneg(Y) = %d; neg = %d ", + mp_isneg(X), mp_isneg(Y), res_sign); } +#endif - /*Steps to use HW in the following order: - * 1. wait until clean HW engine - * 2. Write(2*N/512bits - 1 + 8) to MULT_MODE_REG - * 3. Write X and Y to memory blocks - * need to write data to each memory block only according to the length - * of the number. - * 4. Write 1 to MUL_START_REG - * 5. Wait for the first operation to be done. Poll INTERRUPT_REG until it reads 1. - * (Or until the INTER interrupt is generated.) - * 6. Write 1 to RSA_INTERRUPT_REG to clear the interrupt. - * 7. Read the Z from RSA_Z_MEM - * 8. Write 1 to RSA_INTERUPT_REG to clear the interrupt. - * 9. Release the HW engine +#ifdef WOLFSSL_HW_METRICS + esp_mp_max_used = (X->used > esp_mp_max_used) ? X->used : esp_mp_max_used; + esp_mp_max_used = (Y->used > esp_mp_max_used) ? Y->used : esp_mp_max_used; +#endif + + /* if either operand is zero, there's nothing to do. + * Y checked first, as it was observed to be zero during + * wolfcrypt tests more often than X */ + if (mp_iszero(Y) || mp_iszero(X)) { + mp_forcezero(Z); + return MP_OKAY; + } + +#ifdef DEBUG_WOLFSSL + /* The caller should have checked if the call was for a SW validation. + * During debug, we'll return an error. */ + if (esp_hw_validation_active()) { + return MP_HW_VALIDATION_ACTIVE; + } + + /* these occur many times during RSA calcs */ + if (X == Z) { + ESP_LOGV(TAG, "mp_mul X == Z"); + } + if (Y == Z) { + ESP_LOGV(TAG, "mp_mul Y == Z"); + } + + mp_init(X2); + mp_init(Y2); + mp_init(Z2); + + mp_copy(X, X2); /* copy (src = X) to (dst = X2) */ + mp_copy(Y, Y2); /* copy (src = Y) to (dst = Y2) */ + mp_copy(Z, Z2); /* copy (src = Z) to (dst = Z2) */ + + if (IS_HW_VALIDATION) { + ESP_LOGE(TAG, "Caller must not try HW when validation active."); + } + else { + SET_HW_VALIDATION; + mp_mul(X2, Y2, Z2); + CLR_HW_VALIDATION; + } +#endif /* DEBUG_WOLFSSL */ + + Xs = mp_count_bits(X); + Ys = mp_count_bits(Y); + Zs = Xs + Ys; + + /* RSA Accelerator only supports Large Number Multiplication + * with operand length N = 32 × x, + * where x ∈ {1, 2, 3, . . . , 64} */ + if (Xs > 64 || Ys > 64) { + return MP_HW_FALLBACK; + } + + if (Zs <= sizeof(mp_digit)*8) { + Z->dp[0] = X->dp[0] * Y->dp[0]; + Z->used = 1; +#if defined(WOLFSSL_SP_INT_NEGATIVE) || defined(USE_FAST_MATH) + Z->sign = res_sign; /* see above mp_isneg() for negative result detection */ +#endif + return MP_OKAY; + } + + if (ret == MP_OKAY) { + + } + /* maximum bits and words for writing to HW */ + maxWords_sz = bits2words(max(Xs, Ys)); + hwWords_sz = words2hwords(maxWords_sz); + + resultWords_sz = bits2words(Xs + Ys); + /* sanity check */ + if((hwWords_sz<<5) > ESP_HW_MULTI_RSAMAX_BITS) { + ESP_LOGW(TAG, "exceeds max bit length(2048) (a)"); + ret = MP_HW_FALLBACK; /* Error: value is not able to be used. */ + } + +#if defined(CONFIG_IDF_TARGET_ESP32) + /* assumed to be regular ESP32 Xtensa here */ + + /*Steps to use HW in the following order: + * 1. wait until clean HW engine + * 2. Write(2*N/512bits - 1 + 8) to MULT_MODE_REG + * 3. Write X and Y to memory blocks + * need to write data to each memory block only according to the length + * of the number. + * 4. Write 1 to MUL_START_REG + * 5. Wait for the first operation to be done. Poll INTERRUPT_REG until it reads 1. + * (Or until the INTER interrupt is generated.) + * 6. Write 1 to RSA_INTERRUPT_REG to clear the interrupt. + * 7. Read the Z from RSA_Z_MEM + * 8. Write 1 to RSA_INTERUPT_REG to clear the interrupt. + * 9. Release the HW engine */ - /* lock HW for use */ - if ((ret = esp_mp_hw_lock()) != MP_OKAY) { - return ret; + + /* Y (left-extend) + * Accelerator supports large-number multiplication with only + * four operand lengths of N ∈ {512, 1024, 1536, 2048} */ + left_pad_offset = maxWords_sz << 2; + if (left_pad_offset <= 512 >> 3) { + left_pad_offset = 512 >> 3; /* 64 bytes (16 words) */ + } + else { + if (left_pad_offset <= 1024 >> 3) { + left_pad_offset = 1024 >> 3; /* 128 bytes = 32 words */ + } + else { + if (left_pad_offset <= 1536 >> 3) { + left_pad_offset = 1536 >> 3; /* 192 bytes = 48 words */ + } + else { + if (left_pad_offset <= 2048 >> 3) { + left_pad_offset = 2048 >> 3; /* 256 bytes = 64 words */ + } + else { + ret = MP_VAL; + ESP_LOGE(TAG, "Unsupported operand length: %d", + hwWords_sz); + } + } + } } - if((ret = esp_mp_hw_wait_clean()) != MP_OKAY) { - return ret; + if (ret == MP_OKAY) { + /* lock HW for use */ + ret = esp_mp_hw_lock(); /* enables HW clock */ } - /* step.1 (2*N/512) => N/256. 512 bits => 16 words */ - DPORT_REG_WRITE(RSA_MULT_MODE_REG, (hwWords_sz >> 3) - 1 + 8); - /* step.2 write X, M and r_inv into memory */ - esp_mpint_to_memblock(RSA_MEM_X_BLOCK_BASE, - X, - Xs, - hwWords_sz); - /* Y(let-extend) */ - esp_mpint_to_memblock(RSA_MEM_Z_BLOCK_BASE + (hwWords_sz<<2), - Y, - Ys, - hwWords_sz); - /* step.3 start process */ - process_start(RSA_MULT_START_REG); - - /* step.4,5 wait until done */ - ret = wait_until_done(RSA_INTERRUPT_REG); - if (ret != MP_OKAY) { - ESP_LOGE(TAG, "wait_until_done failed."); - return ret; + if (ret == MP_OKAY) { + ret = esp_mp_hw_wait_clean(); } - /* step.6 read the result form MEM_Z */ - esp_memblock_to_mpint(RSA_MEM_Z_BLOCK_BASE, Z, BITS_TO_WORDS(Zs)); + + if (ret == MP_OKAY) { + /* step.1 (2*N/512) => N/256. 512 bits => 16 words */ + /* Write 2*N/512 - 1 + 8 */ + + DPORT_REG_WRITE(RSA_MULT_MODE_REG, + (2 * left_pad_offset * 8 / 512) - 1 + 8); + + /* step.2 write X into memory */ + esp_mpint_to_memblock(RSA_MEM_X_BLOCK_BASE, + X, + Xs, + hwWords_sz); + + /* write zeros from RSA_MEM_Z_BLOCK_BASE to left_pad_offset - 1 */ + esp_zero_memblock(RSA_MEM_Z_BLOCK_BASE, + (left_pad_offset - 1) / sizeof(int)); + + /* write the left-padded Y value into Z */ + esp_mpint_to_memblock(RSA_MEM_Z_BLOCK_BASE + (left_pad_offset), + Y, + Ys, + hwWords_sz); + + #ifdef DEBUG_WOLFSSL + /* save value to peek at the result stored in RSA_MEM_Z_BLOCK_BASE */ + esp_memblock_to_mpint(RSA_MEM_Z_BLOCK_BASE, + PEEK, + 128); + #endif + + /* step.3 start process */ + process_start(RSA_MULT_START_REG); + + /* step.4,5 wait until done */ + ret = wait_until_done(RSA_INTERRUPT_REG); + + /* step.6 read the result form MEM_Z */ + if (ret == MP_OKAY) { + esp_memblock_to_mpint(RSA_MEM_Z_BLOCK_BASE, Z, resultWords_sz); + } +#ifndef DEBUG_WOLFSSL + else { + ESP_LOGE(TAG, "ERROR: wait_until_done failed in esp32_mp"); + } +#endif + } /* end of processing */ +#elif defined(CONFIG_IDF_TARGET_ESP32S3) + /* Unlike the ESP32 that is limited to only four operand lengths, + * the ESP32-S3 The RSA Accelerator supports large-number modular + * multiplication with operands of 128 different lengths. + * + * X & Y must be represented by the same number of bits. Must be + * enough to represent the larger one. */ + + /* Figure out how many words we need to + * represent each operand & the result. */ + + /* Make sure we are within capabilities of hardware. */ + if ((hwWords_sz * BITS_IN_ONE_WORD) > ESP_HW_MULTI_RSAMAX_BITS) { + ESP_LOGW(TAG, "exceeds max bit length(%d)", ESP_HW_MULTI_RSAMAX_BITS); + ret = MP_HW_FALLBACK; /* let SW figure out how to deal with it */ + } + if ((hwWords_sz * BITS_IN_ONE_WORD * 2) > ESP_HW_RSAMAX_BIT) { + ESP_LOGW(TAG, "result exceeds max bit length(%d)", ESP_HW_RSAMAX_BIT ); + ret = MP_HW_FALLBACK; /* let SW figure out how to deal with it */ + } + + /* Steps to perform large number multiplication. Calculates Z = X x Y. The number of + * bits in the operands (X, Y) is N. N can be 32x, where x = {1,2,3,...64}, so the + * maximum number of bits in the X and Y is 2048. + * See 20.3.3 of ESP32-S3 technical manual + * 1. Lock the hardware so no-one else uses it and wait until it is ready. + * 2. Enable/disable interrupt that signals completion -- we don't use the interrupt. + * 3. Write number of words required for result to the RSA_MODE_REG (now called RSA_LENGTH_REG). + * Number of words required for the result is 2 * words for operand - 1 + * 4. Load X, Y operands to memory blocks. Note the Y value must be written to + * right aligned. + * 5. Start the operation by writing 1 to RSA_MULT_START_REG, then wait for it + * to complete by monitoring RSA_IDLE_REG (which is now called RSA_QUERY_INTERRUPT_REG). + * 6. Read the result out. + * 7. Release the hardware lock so others can use it. + * x. Clear the interrupt flag, if you used it (we don't). */ + + /* 1. lock HW for use & wait until it is ready. */ + if (ret == MP_OKAY) { + ret = esp_mp_hw_lock(); /* enables HW clock */ + } /* the only thing we expect is success or busy */ + if (ret == MP_OKAY) { + ret = esp_mp_hw_wait_clean(); + } + + /* HW multiply */ + if (ret == MP_OKAY) { + /* 2. Disable completion interrupt signal; we don't use. + ** 0 => no interrupt; 1 => interrupt on completion. */ + DPORT_REG_WRITE(RSA_INTERRUPT_REG, 0); + + /* 3. Write number of words required for result. */ + DPORT_REG_WRITE(RSA_LENGTH_REG, (hwWords_sz * 2 - 1)); + + /* 4. Load X, Y operands. Maximum is 64 words (64*8*4 = 2048 bits) */ + esp_mpint_to_memblock(RSA_MEM_X_BLOCK_BASE, + X, + Xs, + hwWords_sz); + esp_mpint_to_memblock(RSA_MEM_Z_BLOCK_BASE + hwWords_sz * 4, + Y, + Ys, + hwWords_sz); + + /* 5. Start operation and wait until it completes. */ + process_start(RSA_MULT_START_REG); + ret = wait_until_done(RSA_QUERY_INTERRUPT_REG); + } + if (ret == MP_OKAY) { + /* 6. read the result form MEM_Z */ + esp_memblock_to_mpint(RSA_MEM_Z_BLOCK_BASE, Z, resultWords_sz); + } + + /* + ** end if CONFIG_IDF_TARGET_ESP32S3 + */ +#else + ret = MP_HW_FALLBACK; +#endif /* target HW calcs*/ + + /* common exit for all chipset types */ /* step.7 clear and release HW */ esp_mp_hw_unlock(); -#endif /* CONFIG_IDF_TARGET_ESP32S3 or not */ +#if defined(WOLFSSL_SP_INT_NEGATIVE) || defined(USE_FAST_MATH) + if (ret == MP_OKAY) { + if (!mp_iszero(Z) && res_sign) { + /* for non-zero negative numbers, set negative flag for our result: + * Z->sign = FP_NEG */ + ESP_LOGV(TAG, "Setting Z to negative result!"); + mp_setneg(Z); + } + else { + Z->sign = MP_ZPOS; + } + } +#endif - /* common exit for all chipset types */ -#ifdef WOLFSSL_SP_INT_NEGATIVE - if (!mp_iszero(Z) && neg) { - /* for non-zero negative numbers, set negative flag for our result: - * Z->sign = FP_NEG */ - mp_setneg(Z); +#ifdef DEBUG_WOLFSSL + if (mp_cmp(X, X2) != 0) { + /* this may be interesting when operands change (e.g. z=x*z mode m) */ + /* ESP_LOGE(TAG, "mp_mul X vs X2 mismatch!"); */ + } + if (mp_cmp(Y, Y2) != 0) { + /* this may be interesting when operands change (e.g. z=y*z mode m) */ + /* ESP_LOGE(TAG, "mp_mul Y vs Y2 mismatch!"); */ + } + if (mp_cmp(Z, Z2) != 0) { + int found_z_used = Z->used; + + ESP_LOGE(TAG, "mp_mul Z vs Z2 mismatch!"); + ESP_LOGI(TAG, "Xs = %d", Xs); + ESP_LOGI(TAG, "Ys = %d", Ys); + ESP_LOGI(TAG, "Zs = %d", Zs); + ESP_LOGI(TAG, "found_z_used = %d", found_z_used); + ESP_LOGI(TAG, "z.used = %d", Z->used); + ESP_LOGI(TAG, "hwWords_sz = %d", hwWords_sz); + ESP_LOGI(TAG, "maxWords_sz = %d", maxWords_sz); +#if defined(CONFIG_IDF_TARGET_ESP32) + ESP_LOGI(TAG, "left_pad_offset = %d", left_pad_offset); +#endif + ESP_LOGI(TAG, "hwWords_sz<<2 = %d", hwWords_sz << 2); + esp_show_mp("X", X2); /* show the copy in X2, as X may have been clobbered */ + esp_show_mp("Y", Y2); /* show the copy in Y2, as Y may have been clobbered */ + esp_show_mp("Peek Z", PEEK); /* this is the Z before start */ + esp_show_mp("Z", Z); /* this is the HW result */ + esp_show_mp("Z2", Z2); /* this is the SW result */ + #ifndef NO_RECOVER_SOFTWARE_CALC + ESP_LOGW(TAG, "Recovering mp_mul error with software result"); + mp_copy(Z2, Z); /* copy (src = Z2) to (dst = Z) */ + #else + ret = MP_VAL; + #endif } #endif +#ifdef WOLFSSL_HW_METRICS + esp_mp_mul_usage_ct++; + esp_mp_max_used = (Z->used > esp_mp_max_used) ? Z->used : esp_mp_max_used; + if (ret != MP_OKAY) { + esp_mp_mul_error_ct++; /* includes fallback */ + } +#endif + + if (ret == MP_OKAY) { + /* never clean the result for anything other than success, as we may + * fall back to SW and we don't want to muck up operand values. */ + esp_clean_result(Z, 0); + } + + ESP_LOGV(TAG, "\nEnd esp_mp_mul \n"); + return ret; -} +} /* esp_mp_mul() */ +#endif /* ! NO_WOLFSSL_ESP32_CRYPT_RSA_PRI_MP_MUL*/ -/* Z = X * Y (mod M) */ +#ifndef NO_WOLFSSL_ESP32_CRYPT_RSA_PRI_MULMOD +/* Large Number Modular Multiplication + * + * See 24.3.3 of the ESP32 Technical Reference Manual + * + * Z = X × Y mod M */ int esp_mp_mulmod(MATH_INT_T* X, MATH_INT_T* Y, MATH_INT_T* M, MATH_INT_T* Z) { - int ret = 0; - int negcheck; - word32 Xs; - word32 Ys; - word32 Ms; - word32 maxWords_sz; - word32 hwWords_sz; - word32 zwords; + struct esp_mp_helper mph[1]; /* we'll save some values in this mp helper */ + MATH_INT_T tmpZ[1] = {}; +#ifdef DEBUG_WOLFSSL + MATH_INT_T X2[1] = {}; + MATH_INT_T Y2[1] = {}; + MATH_INT_T M2[1] = {}; + MATH_INT_T Z2[1] = {}; + MATH_INT_T PEEK[1] = {}; + (void) PEEK; +#endif + + int ret = MP_OKAY; + word32 zwords = 0; + +#if defined(WOLFSSL_SP_INT_NEGATIVE) || defined(USE_FAST_MATH) + int negcheck = 0; +#endif - MATH_INT_T r_inv; - MATH_INT_T tmpZ; - mp_digit mp; +#ifdef DEBUG_WOLFSSL + int reti = 0; /* interim return value used only during HW==SW validation */ +#endif - uint32_t Exponent; -#if CONFIG_IDF_TARGET_ESP32S3 +#if defined(CONFIG_IDF_TARGET_ESP32) +#elif defined(CONFIG_IDF_TARGET_ESP32S3) uint32_t OperandBits; int WordsForOperand; -# endif +#else + ret = MP_HW_FALLBACK; +#endif - /* neg check - X*Y becomes negative */ - negcheck = mp_isneg(X) != mp_isneg(Y) ? 1 : 0; + ESP_LOGV(TAG, "\nBegin esp_mp_mulmod \n"); + +#ifdef WOLFSSL_HW_METRICS + esp_mp_max_used = (X->used > esp_mp_max_used) ? X->used : esp_mp_max_used; + esp_mp_max_used = (Y->used > esp_mp_max_used) ? Y->used : esp_mp_max_used; + esp_mp_max_used = (M->used > esp_mp_max_used) ? M->used : esp_mp_max_used; +#endif + + /* do we have an even moduli? */ + if ((M->dp[0] & 1) == 0) { +#ifndef NO_ESP_MP_MUL_EVEN_ALT_CALC + /* Z = X × Y mod M in mixed HW & SW*/ + ret = esp_mp_mul(X, Y, tmpZ); /* HW X * Y */ + if (ret == MP_OKAY) { + /* z = tmpZ mod M, 0 <= Z < M */ + ret = mp_mod(tmpZ, M, Z); /* SW mod M */ + } + ESP_LOGV(TAG, "alternate mp_mul calc!"); + return ret; +#else + #ifdef WOLFSSL_HW_METRICS + esp_mp_mulmod_even_mod_ct++; + #endif + ESP_LOGV(TAG, "esp_mp_mulmod does not support even numbers"); + ret = MP_HW_FALLBACK; /* let the software figure out what to do */ + return ret; +#endif /* NO_ESP_MP_MUL_EVEN_ALTERNATE */ + } /* even moduli check */ + +#ifdef DEBUG_WOLFSSL + /* we're only validating HW when in debug mode */ + if (esp_hw_validation_active()) { + ESP_LOGV(TAG, "MP_HW_VALIDATION_ACTIVE"); + return MP_HW_VALIDATION_ACTIVE; + } +#endif - /* ask bits number */ - Xs = mp_count_bits(X); - Ys = mp_count_bits(Y); - Ms = mp_count_bits(M); - /* maximum bits and words for writing to HW */ - maxWords_sz = bits2words(max(Xs, max(Ys, Ms))); - zwords = bits2words(min(Ms, Xs + Ys)); - hwWords_sz = words2hwords(maxWords_sz); - if ((hwWords_sz << 5) > ESP_HW_RSAMAX_BIT) { - ESP_LOGE(TAG, "exceeds HW maximum bits"); - return MP_VAL; /* Error: value is not able to be used. */ +#ifdef DEBUG_WOLFSSL + if (IS_HW_VALIDATION) { + ESP_LOGE(TAG, "Caller must not try HW when validation active."); } - /* calculate r_inv = R^2 mode M - * where: R = b^n, and b = 2^32 - * accordingly R^2 = 2^(n*32*2) - */ -#if CONFIG_IDF_TARGET_ESP32S3 - Exponent = maxWords_sz * BITS_IN_ONE_WORD * 2; -#else - Exponent = hwWords_sz << 6; + else { + /* when validating, save SW in [V]2 for later comparison to HW */ + mp_init(X2); + mp_init(Y2); + mp_init(M2); + mp_init(Z2); + + mp_copy(X, X2); /* copy (src = X) to (dst = X2) */ + mp_copy(Y, Y2); /* copy (src = Y) to (dst = Y2) */ + mp_copy(M, M2); /* copy (src = M) to (dst = M2) */ + mp_copy(Z, Z2); /* copy (src = Z) to (dst = Z2) */ + + SET_HW_VALIDATION; + reti = mp_mulmod(X2, Y2, M2, Z2); + if (reti == 0) { + ESP_LOGV(TAG, "wolfSSL mp_mulmod during vaidation success"); + } + else { + ESP_LOGE(TAG, "wolfSSL mp_mulmod during vaidation failed"); + } + CLR_HW_VALIDATION; + } +#endif /* DEBUG_WOLFSSL */ + + if (ret == MP_OKAY) { + + /* neg check: X*Y becomes negative, we'll need adjustment */ +#if defined(WOLFSSL_SP_INT_NEGATIVE) || defined(USE_FAST_MATH) + negcheck = mp_isneg(X) != mp_isneg(Y) ? 1 : 0; #endif - ret = mp_init_multi(&tmpZ, &r_inv, NULL, NULL, NULL, NULL); - if (ret == 0 && (ret = esp_get_rinv(&r_inv, M, Exponent)) != MP_OKAY) { - ESP_LOGE(TAG, "calculate r_inv failed."); - mp_clear(&tmpZ); - mp_clear(&r_inv); - return ret; + + /* calculate r_inv = R^2 mod M + * where: R = b^n, and b = 2^32 + * accordingly R^2 = 2^(n*32*2) + */ + ret = esp_mp_montgomery_init(X, Y, M, mph); + if (ret == MP_OKAY) { + ESP_LOGV(TAG, "esp_mp_exptmod esp_mp_montgomery_init success."); + } + else { + #ifdef WOLFSSL_HW_METRICS + if (ret == MP_HW_FALLBACK) { + esp_mp_mulmod_fallback_ct++; + } + else { + esp_mp_mulmod_error_ct++; + } + #endif + return ret; + } + zwords = bits2words(min(mph->Ms, mph->Xs + mph->Ys)); } - /* lock HW for use */ - if ((ret = esp_mp_hw_lock()) != MP_OKAY) { - mp_clear(&tmpZ); - mp_clear(&r_inv); - return ret; + /* lock HW for use, enable peripheral clock */ + if (ret == MP_OKAY) { + ret = esp_mp_hw_lock(); } - /* Calculate M' */ - if ((ret = esp_calc_Mdash(M, 32/* bits */, &mp)) != MP_OKAY) { - ESP_LOGE(TAG, "failed to calculate M dash"); - mp_clear(&tmpZ); - mp_clear(&r_inv); - return ret; + +#if defined(CONFIG_IDF_TARGET_ESP32) + /* Classic ESP32, non-S3 Xtensa */ + + /*Steps to use HW in the following order: + * prep: wait until clean HW engine + * + * 1. Write (N/512bits - 1) to MULT_MODE_REG + * 2. Write X,M(=G, X, P) to memory blocks + * need to write data to each memory block only according to the length + * of the number. + * 3. Write M' to M_PRIME_REG + * 4. Write 1 to MODEXP_START_REG + * 5. Wait for the first round of the operation to be completed. + * Poll RSA_INTERRUPT_REG until it reads 1, + * or until the RSA_INTR interrupt is generated. + * (Or until the INTER interrupt is generated.) + * 6. Write 1 to RSA_INTERRUPT_REG to clear the interrupt. + * 7. Write Yi (i ∈ [0, n) ∩ N) to RSA_X_MEM + * Users need to write to the memory block only according to the length + * of the number. Data beyond this length is ignored. + * 8. Write 1 to RSA_MULT_START_REG + * 9. Wait for the second operation to be completed. + * Poll INTERRUPT_REG until it reads 1. + * 10. Read the Zi (i ∈ [0, n) ∩ N) from RSA_Z_MEM + * 11. Write 1 to RSA_INTERUPT_REG to clear the interrupt. + * + * post: Release the HW engine + * + * After the operation, the RSA_MULT_MODE_REG register, and memory blocks + * RSA_M_MEM and RSA_M_PRIME_REG remain unchanged. Users do not need to + * refresh these registers or memory blocks if the values remain the same. + */ + + if (ret == MP_OKAY) { + /* Prep wait for the engine */ + ret = esp_mp_hw_wait_clean(); } -#if CONFIG_IDF_TARGET_ESP32S3 + if (ret == MP_OKAY) { + /* step.1 + * Write (N/512bits - 1) to MULT_MODE_REG + * 512 bits => 16 words */ + DPORT_REG_WRITE(RSA_MULT_MODE_REG, (mph->hwWords_sz >> 4) - 1); +#if defined(DEBUG_WOLFSSL) + ESP_LOGV(TAG, "RSA_MULT_MODE_REG = %d", (mph->hwWords_sz >> 4) - 1); +#endif /* WOLFSSL_DEBUG */ + + /* step.2 write X, M, and r_inv into memory. + * The capacity of each memory block is 128 words. + * The memory blocks use the little endian format for storage, + * i.e. the least significant digit of each number is in lowest address.*/ + esp_mpint_to_memblock(RSA_MEM_X_BLOCK_BASE, + X, mph->Xs, mph->hwWords_sz); + esp_mpint_to_memblock(RSA_MEM_M_BLOCK_BASE, + M, mph->Ms, mph->hwWords_sz); + esp_mpint_to_memblock(RSA_MEM_Z_BLOCK_BASE, + &(mph->r_inv), mph->Rs, mph->hwWords_sz); + + /* step.3 write M' into memory */ + /* confirmed that mp2 does not support even modulus. + * indeed we see a failure, but we can predict when modules is odd + * or when mp != mp2[0] */ + DPORT_REG_WRITE(RSA_M_DASH_REG, mph->mp); + ESP_EM__3_16; + + /* step.4 start process */ + process_start(RSA_MULT_START_REG); + + /* step.5,6 wait until done */ + wait_until_done(RSA_INTERRUPT_REG); + + /* step.7 Y to MEM_X */ + esp_mpint_to_memblock(RSA_MEM_X_BLOCK_BASE, Y, mph->Ys, mph->hwWords_sz); + +#ifdef DEBUG_WOLFSSL + /* save value to peek at the result stored in RSA_MEM_Z_BLOCK_BASE */ + esp_memblock_to_mpint(RSA_MEM_X_BLOCK_BASE, + PEEK, + 128); + esp_clean_result(PEEK, 0); +#endif /* DEBUG_WOLFSSL */ + + /* step.8 start process */ + process_start(RSA_MULT_START_REG); + + /* step.9,11 wait until done */ + wait_until_done(RSA_INTERRUPT_REG); + + /* step.12 read the result from MEM_Z */ + esp_memblock_to_mpint(RSA_MEM_Z_BLOCK_BASE, tmpZ, zwords); + } /* step 1 .. 12 */ + + /* step.13 clear and release HW */ + esp_mp_hw_unlock(); + +#elif defined(CONFIG_IDF_TARGET_ESP32S3) /* Steps to perform large number modular multiplication. Calculates Z = (X x Y) modulo M. * The number of bits in the operands (X, Y) is N. N can be 32x, where x = {1,2,3,...64}, so the * maximum number of bits in the X and Y is 2048. We must use the same number of words to represent @@ -597,144 +1594,207 @@ int esp_mp_mulmod(MATH_INT_T* X, MATH_INT_T* Y, MATH_INT_T* M, MATH_INT_T* Z) * x. Clear the interrupt flag, if you used it (we don't). */ /* 1. Wait until hardware is ready. */ - if ((ret = esp_mp_hw_wait_clean()) != MP_OKAY) { - return ret; + if (ret == MP_OKAY) { + ret = esp_mp_hw_wait_clean(); } - /* 2. Disable completion interrupt signal; we don't use. - ** 0 => no interrupt; 1 => interrupt on completion. */ - DPORT_REG_WRITE(RSA_INTERRUPT_REG, 0); + if (ret == MP_OKAY) { + /* 2. Disable completion interrupt signal; we don't use. + ** 0 => no interrupt; 1 => interrupt on completion. */ + DPORT_REG_WRITE(RSA_INTERRUPT_REG, 0); - /* 3. Write (N_result_bits/32 - 1) to the RSA_MODE_REG. */ - OperandBits = max(max(Xs, Ys), Ms); - if (OperandBits > ESP_HW_MULTI_RSAMAX_BITS) { - ESP_LOGW(TAG, "result exceeds max bit length"); - return MP_VAL; /* Error: value is not able to be used. */ + /* 3. Write (N_result_bits/32 - 1) to the RSA_MODE_REG. */ + OperandBits = max(max(mph->Xs, mph->Ys), mph->Ms); + if (OperandBits > ESP_HW_MULTI_RSAMAX_BITS) { + ESP_LOGW(TAG, "result exceeds max bit length"); + return MP_VAL; /* Error: value is not able to be used. */ + } + WordsForOperand = bits2words(OperandBits); + DPORT_REG_WRITE(RSA_LENGTH_REG, WordsForOperand - 1); + + /* 4. Write M' value into RSA_M_PRIME_REG (now called RSA_M_DASH_REG) */ + DPORT_REG_WRITE(RSA_M_DASH_REG, mph->mp); + + /* Select acceleration options. */ + DPORT_REG_WRITE(RSA_CONSTANT_TIME_REG, 0); + + /* 5. Load X, Y, M, r' operands. + * Note RSA_MEM_RB_BLOCK_BASE == RSA_MEM_Z_BLOC_BASE on ESP32s3*/ + esp_mpint_to_memblock(RSA_MEM_X_BLOCK_BASE, + X, + mph->Xs, + mph->hwWords_sz); + esp_mpint_to_memblock(RSA_MEM_Y_BLOCK_BASE, + Y, + mph->Ys, + mph->hwWords_sz); + esp_mpint_to_memblock(RSA_MEM_M_BLOCK_BASE, + M, + mph->Ms, + mph->hwWords_sz); + esp_mpint_to_memblock(RSA_MEM_RB_BLOCK_BASE, + &(mph->r_inv), + mph->Rs, + mph->hwWords_sz); + + /* 6. Start operation and wait until it completes. */ + process_start(RSA_MOD_MULT_START_REG); /* we're here in esp_mp_mulmod */ + asm volatile("memw"); + asm volatile("nop"); + asm volatile("nop"); + asm volatile("nop"); + asm volatile("nop"); + asm volatile("nop"); + asm volatile("nop"); } - WordsForOperand = bits2words(OperandBits); - DPORT_REG_WRITE(RSA_LENGTH_REG, WordsForOperand - 1); - - /* 4. Write M' value into RSA_M_PRIME_REG (now called RSA_M_DASH_REG) */ - DPORT_REG_WRITE(RSA_M_DASH_REG, mp); - /* Select acceleration options. */ - DPORT_REG_WRITE(RSA_CONSTANT_TIME_REG, 0); - - /* 5. Load X, Y, M, r' operands. - * Note RSA_MEM_RB_BLOCK_BASE == RSA_MEM_Z_BLOC_BASE on ESP32s3*/ - esp_mpint_to_memblock(RSA_MEM_X_BLOCK_BASE, X, Xs, hwWords_sz); - esp_mpint_to_memblock(RSA_MEM_Y_BLOCK_BASE, Y, Ys, hwWords_sz); - esp_mpint_to_memblock(RSA_MEM_M_BLOCK_BASE, M, Ms, hwWords_sz); - esp_mpint_to_memblock(RSA_MEM_RB_BLOCK_BASE, &r_inv, mp_count_bits(&r_inv), hwWords_sz); - - /* 6. Start operation and wait until it completes. */ - process_start(RSA_MOD_MULT_START_REG); - ret = wait_until_done(RSA_QUERY_INTERRUPT_REG); - if (MP_OKAY != ret) { - return ret; + if (ret == MP_OKAY) { + ret = wait_until_done(RSA_QUERY_INTERRUPT_REG); } - /* 7. read the result form MEM_Z */ - esp_memblock_to_mpint(RSA_MEM_Z_BLOCK_BASE, &tmpZ, zwords); + if (ret == MP_OKAY) { + /* 7. read the result from MEM_Z */ + esp_memblock_to_mpint(RSA_MEM_Z_BLOCK_BASE, tmpZ, zwords); + } /* 8. clear and release HW */ esp_mp_hw_unlock(); - if (negcheck) { - mp_sub(M, &tmpZ, &tmpZ); - } - - mp_copy(&tmpZ, Z); - mp_clear(&tmpZ); - mp_clear(&r_inv); - - return ret; /* end if CONFIG_IDF_TARGET_ESP32S3 */ #else - /* non-S3 Xtensa */ - - /*Steps to use HW in the following order: - * 1. wait until clean HW engine - * 2. Write(N/512bits - 1) to MULT_MODE_REG - * 3. Write X,M(=G, X, P) to memory blocks - * need to write data to each memory block only according to the length - * of the number. - * 4. Write M' to M_PRIME_REG - * 5. Write 1 to MODEXP_START_REG - * 6. Wait for the first operation to be done. Poll INTERRUPT_REG until it reads 1. - * (Or until the INTER interrupt is generated.) - * 7. Write 1 to RSA_INTERRUPT_REG to clear the interrupt. - * 8. Write Y to RSA_X_MEM - * 9. Write 1 to RSA_MULT_START_REG - * 10. Wait for the second operation to be completed. Poll INTERRUPT_REG until it reads 1. - * 11. Read the Z from RSA_Z_MEM - * 12. Write 1 to RSA_INTERUPT_REG to clear the interrupt. - * 13. Release the HW engine - */ + /* for all non-supported chipsets, fall back to SW calcs */ + ret = MP_HW_FALLBACK; +#endif + if (ret == MP_OKAY) { + /* additional steps */ + /* this is needed for known issue when Z is greater than M */ + if (mp_cmp(tmpZ, M) == MP_GT) { + /* Z -= M */ + mp_sub(tmpZ, M, tmpZ); + ESP_LOGV(TAG, "Z is greater than M"); + } + #if defined(WOLFSSL_SP_INT_NEGATIVE) || defined(USE_FAST_MATH) + if (negcheck) { + mp_sub(M, tmpZ, tmpZ); + ESP_LOGV(TAG, "neg check adjustment"); + } + #endif + mp_copy(tmpZ, Z); /* copy tmpZ to result Z */ - if ( (ret = esp_mp_hw_wait_clean()) != MP_OKAY ) { - return ret; + esp_clean_result(Z, 0); } - /* step.1 512 bits => 16 words */ - DPORT_REG_WRITE(RSA_MULT_MODE_REG, (hwWords_sz >> 4) - 1); - - /* step.2 write X, M and r_inv into memory */ - esp_mpint_to_memblock(RSA_MEM_X_BLOCK_BASE, X, Xs, hwWords_sz); - esp_mpint_to_memblock(RSA_MEM_M_BLOCK_BASE, M, Ms, hwWords_sz); - esp_mpint_to_memblock(RSA_MEM_Z_BLOCK_BASE, - &r_inv, - mp_count_bits(&r_inv), - hwWords_sz); - - /* step.3 write M' into memory */ - DPORT_REG_WRITE(RSA_M_DASH_REG, mp); - - /* step.4 start process */ - process_start(RSA_MULT_START_REG); - /* step.5,6 wait until done */ - wait_until_done(RSA_INTERRUPT_REG); - /* step.7 Y to MEM_X */ - esp_mpint_to_memblock(RSA_MEM_X_BLOCK_BASE, Y, Ys, hwWords_sz); +#ifdef WOLFSSL_HW_METRICS + esp_mp_mulmod_usage_ct++; + if (ret == MP_HW_FALLBACK) { + ESP_LOGV(TAG, "esp_mp_mulmod HW Fallback tick"); + esp_mp_mulmod_fallback_ct++; + } +#endif - /* step.8 start process */ - process_start(RSA_MULT_START_REG); +#ifdef DEBUG_WOLFSSL + if (ret == MP_HW_FALLBACK) { + ESP_LOGI(TAG, "HW Fallback"); + } + else { + if (mp_cmp(X, X2) != 0) { + ESP_LOGV(TAG, "mp_mul X vs X2 mismatch!"); + } + if (mp_cmp(Y, Y2) != 0) { + ESP_LOGV(TAG, "mp_mul Y vs Y2 mismatch!"); + } - /* step.9,11 wait until done */ - wait_until_done(RSA_INTERRUPT_REG); + if (mp_cmp(Z, Z2) != 0) { + ESP_LOGE(TAG, "esp_mp_mulmod Z vs Z2 mismatch!"); + + esp_mp_mulmod_error_ct++; + int found_z_used = Z->used; + + ESP_LOGI(TAG, "Xs = %d", mph->Xs); + ESP_LOGI(TAG, "Ys = %d", mph->Ys); + ESP_LOGI(TAG, "found_z_used = %d", found_z_used); + ESP_LOGI(TAG, "z.used = %d", Z->used); + ESP_LOGI(TAG, "hwWords_sz = %d", mph->hwWords_sz); + ESP_LOGI(TAG, "maxWords_sz = %d", mph->maxWords_sz); + ESP_LOGI(TAG, "hwWords_sz<<2 = %d", mph->hwWords_sz << 2); + + /* parameters may have been collbered; Show cpied values */ + esp_show_mp("X", X2); + esp_show_mp("Y", Y2); + esp_show_mp("M", M2); + + esp_show_mp("r_inv", &(mph->r_inv)); /*show r_inv */ + ESP_LOGI(TAG, "mp = 0x%08x = %u", mph->mp, mph->mp); + + if (mph->mp == mph->mp2) { + ESP_LOGI(TAG, "M' match esp_calc_Mdash vs mp_montgomery_setup" + " = %d !", mph->mp); + } + else { + ESP_LOGW(TAG, + "\n\n" + "M' MISMATCH esp_calc_Mdash = 0x%08x = %d \n" + "vs mp_montgomery_setup = 0x%08x = %d \n\n", + mph->mp, + mph->mp, + mph->mp2, + mph->mp2); + mph->mp = mph->mp2; + } + + + esp_show_mp("HW Z", Z); /* this is the HW result */ + esp_show_mp("SW Z2", Z2); /* this is the SW result */ + ESP_LOGI(TAG, "esp_mp_mulmod_usage_ct = %lu tries", + esp_mp_mulmod_usage_ct); + ESP_LOGI(TAG, "esp_mp_mulmod_error_ct = %lu failures", + esp_mp_mulmod_error_ct); + ESP_LOGI(TAG, ""); + + + #ifndef NO_RECOVER_SOFTWARE_CALC + ESP_LOGW(TAG, "Recovering mp_mul error with software result"); + mp_copy(Z2, Z); /* copy (src = Z2) to (dst = Z) */ + #else + ret = MP_VAL; /* if we are not recovering, then we have an error */ + #endif + } + else { + ESP_LOGV(TAG, "esp_mp_mulmod success!"); + } + } - /* step.12 read the result from MEM_Z */ - esp_memblock_to_mpint(RSA_MEM_Z_BLOCK_BASE, &tmpZ, zwords); +#endif /* DEBUG_WOLFSSL */ - /* step.13 clear and release HW */ - esp_mp_hw_unlock(); + /* cleanup and exit */ + mp_clear(tmpZ); + mp_clear(&(mph->r_inv)); - /* additional steps */ - /* this needs for known issue when Z is greater than M */ - if (mp_cmp(&tmpZ, M) == MP_GT) { - /* Z -= M */ - mp_sub(&tmpZ, M, &tmpZ); + ESP_LOGV(TAG, "\nEnd esp_mp_mulmod \n"); + if (ret == MP_OKAY) { + ESP_LOGV(TAG, "esp_mp_mulmod exit success "); } - if (negcheck) { - mp_sub(M, &tmpZ, &tmpZ); + else { + ESP_LOGW(TAG, "esp_mp_mulmod exit failed = %d", ret); } - mp_copy(&tmpZ, Z); - - mp_clear(&tmpZ); - mp_clear(&r_inv); - - return ret; +#ifdef WOLFSSL_HW_METRICS + /* calculate max used after any cleanup */ + esp_mp_max_used = (Z->used > esp_mp_max_used) ? Z->used : esp_mp_max_used; #endif -} + return ret; +} /* esp_mp_mulmod */ +#endif /* ! NO_WOLFSSL_ESP32_CRYPT_RSA_PRI_MULMOD */ + +#ifndef NO_WOLFSSL_ESP32_CRYPT_RSA_PRI_EXPTMOD /* Large Number Modular Exponentiation * * Z = X^Y mod M * - * See: - * ESP32, Chapter 24, https://www.espressif.com/sites/default/files/documentation/esp32_technical_reference_manual_en.pdf - * ESP32s3, section 20.3.1, https://www.espressif.com/sites/default/files/documentation/esp32-s3_technical_reference_manual_en.pdf + * ESP32, Section 24.3.2 https://www.espressif.com/sites/default/files/documentation/esp32_technical_reference_manual_en.pdf + * ESP32S3, Section 20.3.1, https://www.espressif.com/sites/default/files/documentation/esp32-s3_technical_reference_manual_en.pdf + * * The operation is based on Montgomery multiplication. Aside from the * arguments X, Y , and M, two additional ones are needed —r and M′ .* These arguments are calculated in advance by software. @@ -745,61 +1805,165 @@ int esp_mp_mulmod(MATH_INT_T* X, MATH_INT_T* Y, MATH_INT_T* M, MATH_INT_T* Z) .* but all numbers in a calculation must be of the same length. .* The bit length of M′ is always 32. .* -.* Note some DH references may use: Y = (G ^ X) mod P - */ -int esp_mp_exptmod(MATH_INT_T* X, MATH_INT_T* Y, word32 Ys, MATH_INT_T* M, MATH_INT_T* Z) + * Z = (X ^ Y) mod M : Espressif generic notation + * Y = (G ^ X) mod P : wolfSSL DH reference notation */ +int esp_mp_exptmod(MATH_INT_T* X, MATH_INT_T* Y, MATH_INT_T* M, MATH_INT_T* Z) { - int ret = 0; + /* Danger! Do not initialize any function parameters, not even the result Z. + * Some operations such as (rnd = rnd^e) will wipe out the rnd operand + * value upon initialization. + * (e.g. the address of X and Z could be the same when called) */ + struct esp_mp_helper mph[1]; /* we'll save some mp helper data here */ + int ret = MP_OKAY; + +#if defined(CONFIG_IDF_TARGET_ESP32) +#elif defined(CONFIG_IDF_TARGET_ESP32S3) + uint32_t OperandBits; + uint32_t WordsForOperand; +#else +#endif - word32 Xs; - word32 Ms; - word32 maxWords_sz; - word32 hwWords_sz; + ESP_LOGV(TAG, "\nBegin esp_mp_exptmod \n"); +#ifdef WOLFSSL_HW_METRICS + esp_mp_exptmod_usage_ct++; + esp_mp_max_used = (X->used > esp_mp_max_used) ? X->used : esp_mp_max_used; + esp_mp_max_used = (Y->used > esp_mp_max_used) ? Y->used : esp_mp_max_used; + esp_mp_max_used = (M->used > esp_mp_max_used) ? M->used : esp_mp_max_used; +#endif - MATH_INT_T r_inv; - mp_digit mp; + if (mp_iszero(M)) { +#ifdef DEBUG_WOLFSSL + ESP_LOGI(TAG, "esp_mp_exptmod M is zero!"); +#endif +#ifdef WOLFSSL_HW_METRICS + esp_mp_exptmod_fallback_ct++; +#endif + return MP_HW_FALLBACK; /* fall back and let SW decide how to handle */ + } -#if CONFIG_IDF_TARGET_ESP32S3 - uint32_t OperandBits; - uint32_t WordsForOperand; + if (mp_isone(M)) { +#ifdef DEBUG_WOLFSSL + ESP_LOGI(TAG, "esp_mp_exptmod M is one!"); #endif + mp_clear(Z); + return MP_OKAY; /* mod zero is zero */ + } - /* ask bits number */ - Xs = mp_count_bits(X); - Ms = mp_count_bits(M); - /* maximum bits and words for writing to HW */ - maxWords_sz = bits2words(max(Xs, max(Ys, Ms))); - hwWords_sz = words2hwords(maxWords_sz); + ret = esp_mp_montgomery_init(X, Y, M, mph); - if ((hwWords_sz << 5) > ESP_HW_RSAMAX_BIT) { - ESP_LOGE(TAG, "exceeds HW maximum bits"); - return MP_VAL; /* Error: value is not able to be used. */ + if (ret == MP_OKAY) { + ESP_LOGV(TAG, "esp_mp_exptmod esp_mp_montgomery_init success."); } - /* calculate r_inv = R^2 mode M - * where: R = b^n, and b = 2^32 - * accordingly R^2 = 2^(n*32*2) - */ - ret = mp_init(&r_inv); - if ( (ret == 0) && - ((ret = esp_get_rinv(&r_inv, M, (hwWords_sz << 6))) != MP_OKAY) ) { - ESP_LOGE(TAG, "calculate r_inv failed."); - mp_clear(&r_inv); + else { +#ifdef WOLFSSL_HW_METRICS + if (ret == MP_HW_FALLBACK) { + esp_mp_exptmod_fallback_ct++; + } + else { + esp_mp_exptmod_error_ct++; + } +#endif return ret; } + +#ifdef DEBUG_WOLFSSL + if (esp_hw_validation_active()) { + /* recall there's only one HW for all math accelerations */ + return MP_HW_VALIDATION_ACTIVE; + } + + if (esp_mp_exptmod_depth_counter != 0) { + ESP_LOGE(TAG, "esp_mp_exptmod Depth Counter Error!"); + } + esp_mp_exptmod_depth_counter++; +#endif + + /* + max bits = 0x400 = 1024 bits +1024 / 8 = 128 bytes + 128 / 4 = 32 words (0x20) + */ + /* lock and init the HW */ - if ( (ret = esp_mp_hw_lock()) != MP_OKAY ) { - mp_clear(&r_inv); - return ret; + if (ret == MP_OKAY) { + ret = esp_mp_hw_lock(); + if (ret != MP_OKAY) { + ESP_LOGE(TAG, "esp_mp_hw_lock failed"); + #ifdef DEBUG_WOLFSSL + esp_mp_exptmod_depth_counter--; + #endif + } } - /* calc M' */ - /* if Pm is odd, uses mp_montgomery_setup() */ - if ( (ret = esp_calc_Mdash(M, 32/* bits */, &mp)) != MP_OKAY ) { - ESP_LOGE(TAG, "failed to calculate M dash"); - mp_clear(&r_inv); - return ret; + +#if defined(CONFIG_IDF_TARGET_ESP32) + /* non-ESP32S3 Xtensa (regular ESP32) */ + + /* Steps to use HW in the following order: + * 1. Write(N/512bits - 1) to MODEXP_MODE_REG + * 2. Write X, Y, M and r_inv to memory blocks + * need to write data to each memory block only according to the length + * of the number. + * 3. Write M' to M_PRIME_REG + * 4. Write 1 to MODEXP_START_REG + * 5. Wait for the operation to be done. Poll INTERRUPT_REG until it reads 1. + * (Or until the INTER interrupt is generated.) + * 6. Read the result Z(=Y) from Z_MEM + * 7. Write 1 to INTERRUPT_REG to clear the interrupt. + */ + if (ret == MP_OKAY) { + ret = esp_mp_hw_wait_clean(); + #ifdef WOLFSSL_HW_METRICS + if (ret != MP_OKAY) { + esp_mp_exptmod_error_ct++; + } + #endif + } + + if (ret == MP_OKAY) { + /* step.1 */ + ESP_LOGV(TAG, + "hwWords_sz = %d, num = %d", + mph->hwWords_sz, + (mph->hwWords_sz >> 4) - 1 + ); + + DPORT_REG_WRITE(RSA_MODEXP_MODE_REG, (mph->hwWords_sz >> 4) - 1); + /* step.2 write G, X, P, r_inv and M' into memory */ + esp_mpint_to_memblock(RSA_MEM_X_BLOCK_BASE, + X, + mph->Xs, + mph->hwWords_sz); + esp_mpint_to_memblock(RSA_MEM_Y_BLOCK_BASE, + Y, mph->Ys, + mph->hwWords_sz); + esp_mpint_to_memblock(RSA_MEM_M_BLOCK_BASE, + M, + mph->Ms, + mph->hwWords_sz); + esp_mpint_to_memblock(RSA_MEM_Z_BLOCK_BASE, + &(mph->r_inv), + mph->Rs, + mph->hwWords_sz); + + /* step.3 write M' into memory */ + ESP_LOGV(TAG, "M' = %d", mph->mp); + DPORT_REG_WRITE(RSA_M_DASH_REG, mph->mp); + ESP_EM__3_16; + + /* step.4 start process */ + process_start(RSA_MODEXP_START_REG); /* was RSA_START_MODEXP_REG; + * RSA_MODEXP_START_REG in docs? */ + + /* step.5 wait until done */ + wait_until_done(RSA_INTERRUPT_REG); + /* step.6 read a result form memory */ + esp_memblock_to_mpint(RSA_MEM_Z_BLOCK_BASE, Z, BITS_TO_WORDS(mph->Ms)); } -#if CONFIG_IDF_TARGET_ESP32S3 + /* step.7 clear and release HW */ + esp_mp_hw_unlock(); + +#elif defined(CONFIG_IDF_TARGET_ESP32S3) /* Steps to perform large number modular exponentiation. Calculates Z = (X ^ Y) modulo M. * The number of bits in the operands (X, Y) is N. N can be 32x, where x = {1,2,3,...64}, so the * maximum number of bits in the X and Y is 2048. @@ -817,98 +1981,168 @@ int esp_mp_exptmod(MATH_INT_T* X, MATH_INT_T* Y, word32 Ys, MATH_INT_T* M, MATH_ * x. Clear the interrupt flag, if you used it (we don't). */ /* 1. Wait until hardware is ready. */ - if ((ret = esp_mp_hw_wait_clean()) != MP_OKAY) { - return ret; + if (ret == MP_OKAY) { + ret = esp_mp_hw_wait_clean(); } - /* 2. Disable completion interrupt signal; we don't use. - ** 0 => no interrupt; 1 => interrupt on completion. */ - DPORT_REG_WRITE(RSA_INTERRUPT_REG, 0); - - /* 3. Write (N_result_bits/32 - 1) to the RSA_MODE_REG. */ - OperandBits = max(max(Xs, Ys), Ms); - if (OperandBits > ESP_HW_MULTI_RSAMAX_BITS) { - ESP_LOGW(TAG, "result exceeds max bit length"); - return MP_VAL; /* Error: value is not able to be used. */ + if (ret == MP_OKAY) { + OperandBits = max(max(mph->Xs, mph->Ys), mph->Ms); + if (OperandBits > ESP_HW_MULTI_RSAMAX_BITS) { + ESP_LOGW(TAG, "result exceeds max bit length"); + ret = MP_VAL; /* Error: value is not able to be used. */ + } + else { + WordsForOperand = bits2words(OperandBits); + } } - WordsForOperand = bits2words(OperandBits); - DPORT_REG_WRITE(RSA_LENGTH_REG, WordsForOperand - 1); - - /* 4. Write M' value into RSA_M_PRIME_REG (now called RSA_M_DASH_REG) */ - DPORT_REG_WRITE(RSA_M_DASH_REG, mp); - /* 5. Load X, Y, M, r' operands. */ - esp_mpint_to_memblock(RSA_MEM_X_BLOCK_BASE, X, Xs, hwWords_sz); - esp_mpint_to_memblock(RSA_MEM_Y_BLOCK_BASE, Y, Ys, hwWords_sz); - esp_mpint_to_memblock(RSA_MEM_M_BLOCK_BASE, M, Ms, hwWords_sz); - esp_mpint_to_memblock(RSA_MEM_Z_BLOCK_BASE, &r_inv, - mp_count_bits(&r_inv), hwWords_sz); - - /* 6. Start operation and wait until it completes. */ - process_start(RSA_MODEXP_START_REG); - ret = wait_until_done(RSA_QUERY_INTERRUPT_REG); - if (MP_OKAY != ret) { - return ret; + if (ret == MP_OKAY) { + /* 2. Disable completion interrupt signal; we don't use. + ** 0 => no interrupt; 1 => interrupt on completion. */ + DPORT_REG_WRITE(RSA_INTERRUPT_REG, 0); + + /* 3. Write (N_result_bits/32 - 1) to the RSA_MODE_REG. */ + DPORT_REG_WRITE(RSA_LENGTH_REG, WordsForOperand - 1); + + /* 4. Write M' value into RSA_M_PRIME_REG (now called RSA_M_DASH_REG) */ + DPORT_REG_WRITE(RSA_M_DASH_REG, mph->mp); + + /* 5. Load X, Y, M, r' operands. */ + esp_mpint_to_memblock(RSA_MEM_X_BLOCK_BASE, + X, + mph->Xs, + mph->hwWords_sz); + esp_mpint_to_memblock(RSA_MEM_Y_BLOCK_BASE, + Y, + mph->Ys, + mph->hwWords_sz); + esp_mpint_to_memblock(RSA_MEM_M_BLOCK_BASE, + M, + mph->Ms, + mph->hwWords_sz); + esp_mpint_to_memblock(RSA_MEM_Z_BLOCK_BASE, + &(mph->r_inv), + mph->Rs, + mph->hwWords_sz); + + /* 6. Start operation and wait until it completes. */ + process_start(RSA_MODEXP_START_REG); + ret = wait_until_done(RSA_QUERY_INTERRUPT_REG); } - /* 7. read the result form MEM_Z */ - esp_memblock_to_mpint(RSA_MEM_Z_BLOCK_BASE, Z, BITS_TO_WORDS(Ms)); + if (MP_OKAY == ret) { + /* 7. read the result form MEM_Z */ + esp_memblock_to_mpint(RSA_MEM_Z_BLOCK_BASE, Z, BITS_TO_WORDS(mph->Ms)); + } /* 8. clear and release HW */ esp_mp_hw_unlock(); - mp_clear(&r_inv); - - return ret; /* end if CONFIG_IDF_TARGET_ESP32S3 */ #else - /* non-ESP32S3 Xtensa (regular ESP32) */ + /* unknown or unsupported targets fall back to SW */ + ret = MP_HW_FALLBACK; +#endif - /* Steps to use HW in the following order: - * 1. Write(N/512bits - 1) to MODEXP_MODE_REG - * 2. Write X, Y, M and r_inv to memory blocks - * need to write data to each memory block only according to the length - * of the number. - * 3. Write M' to M_PRIME_REG - * 4. Write 1 to MODEXP_START_REG - * 5. Wait for the operation to be done. Poll INTERRUPT_REG until it reads 1. - * (Or until the INTER interrupt is generated.) - * 6. Read the result Z(=Y) from Z_MEM - * 7. Write 1 to INTERRUPT_REG to clear the interrupt. - */ - if ((ret = esp_mp_hw_wait_clean()) != MP_OKAY) { - return ret; +#ifdef DEBUG_WOLFSSL + if (esp_mp_exptmod_depth_counter != 1) { + ESP_LOGE(TAG, "esp_mp_exptmod exit Depth Counter Error!"); } + esp_mp_exptmod_depth_counter--; +#endif - /* step.1 */ - DPORT_REG_WRITE(RSA_MODEXP_MODE_REG, (hwWords_sz >> 4) - 1); - /* step.2 write G, X, P, r_inv and M' into memory */ - esp_mpint_to_memblock(RSA_MEM_X_BLOCK_BASE, X, Xs, hwWords_sz); - esp_mpint_to_memblock(RSA_MEM_Y_BLOCK_BASE, Y, Ys, hwWords_sz); - esp_mpint_to_memblock(RSA_MEM_M_BLOCK_BASE, M, Ms, hwWords_sz); - esp_mpint_to_memblock(RSA_MEM_Z_BLOCK_BASE, - &r_inv, - mp_count_bits(&r_inv), - hwWords_sz); - /* step.3 write M' into memory */ - DPORT_REG_WRITE(RSA_M_DASH_REG, mp); - /* step.4 start process */ - process_start(RSA_START_MODEXP_REG); - - /* step.5 wait until done */ - wait_until_done(RSA_INTERRUPT_REG); - /* step.6 read a result form memory */ - esp_memblock_to_mpint(RSA_MEM_Z_BLOCK_BASE, Z, BITS_TO_WORDS(Ms)); - /* step.7 clear and release HW */ - esp_mp_hw_unlock(); - - mp_clear(&r_inv); + /* never modify the result if we are falling back as the result + * may be the same as one of the operands! */ + if (ret == MP_OKAY) { + esp_clean_result(Z, 0); + } +#ifdef WOLFSSL_HW_METRICS + esp_mp_max_used = (Z->used > esp_mp_max_used) ? Z->used : esp_mp_max_used; +#endif return ret; -#endif -} +} /* esp_mp_exptmod */ +#endif /* ! NO_WOLFSSL_ESP32_CRYPT_RSA_PRI_EXPTMOD + * (turns on/off mp_exptmod) */ #endif /* WOLFSSL_ESP32_CRYPT_RSA_PRI) && * !NO_WOLFSSL_ESP32_CRYPT_RSA_PRI */ #endif /* !NO_RSA || HAVE_ECC */ + +#ifdef WOLFSSL_HW_METRICS +int esp_hw_show_mp_metrics(void) +{ + int ret; +#ifdef HW_MATH_ENABLED + ret = MP_OKAY; + + /* Metrics: esp_mp_mul() */ + ESP_LOGI(TAG, "Number of calls to esp_mp_mul: %lu", + esp_mp_mul_usage_ct); + if (esp_mp_mul_error_ct == 0) { + ESP_LOGI(TAG, "Success: no esp_mp_mul() errors."); + } + else { + ESP_LOGW(TAG, "Number of esp_mp_mul failures: %lu", + esp_mp_mul_error_ct); + ret = MP_VAL; + } + + ESP_LOGI(TAG, ""); /* mulmod follows */ + + /* Metrics: esp_mp_mulmod() */ + ESP_LOGI(TAG, "Number of calls to esp_mp_mulmod: %lu", + esp_mp_mulmod_usage_ct); + ESP_LOGI(TAG, "Number of fallback to SW mp_mulmod: %lu", + esp_mp_mulmod_fallback_ct); + + if (esp_mp_mulmod_error_ct == 0) { + ESP_LOGI(TAG, "Success: no esp_mp_mulmod errors."); + } + else { + ESP_LOGW(TAG, "Number of esp_mp_mulmod failures: %lu", + esp_mp_mulmod_error_ct); + ret = MP_VAL; + } + + if (esp_mp_mulmod_even_mod_ct == 0) { + ESP_LOGI(TAG, "Success: no esp_mp_mulmod even mod."); + } + else { + ESP_LOGW(TAG, "Number of esp_mp_mulmod even mod: %lu", + esp_mp_mulmod_even_mod_ct); + } + + if (esp_mp_mulmod_error_ct == 0) { + ESP_LOGI(TAG, "Success: no esp_mp_mulmod small x or y."); + } + else { + ESP_LOGW(TAG, "Number of esp_mp_mulmod small x: %lu", + esp_mp_mulmod_small_x_ct); + ESP_LOGW(TAG, "Number of esp_mp_mulmod small y: %lu", + esp_mp_mulmod_small_y_ct); + } + + ESP_LOGI(TAG, ""); /* exptmod follows */ + + ESP_LOGI(TAG, "Number of calls to esp_mp_exptmod: %lu", + esp_mp_exptmod_usage_ct); + ESP_LOGI(TAG, "Number of fallback to SW mp_exptmod: %lu", + esp_mp_exptmod_fallback_ct); + if (esp_mp_exptmod_error_ct == 0) { + ESP_LOGI(TAG, "Success: no esp_mp_exptmod errors."); + } + else { + ESP_LOGW(TAG, "Number of esp_mp_exptmod errors: %lu", + esp_mp_exptmod_error_ct); + ret = MP_VAL; + } + ESP_LOGI(TAG, "Max N->used: esp_mp_max_used = %lu", esp_mp_max_used); +#else + /* no HW math, no HW math metrics */ + ret = 0; +#endif /* HW_MATH_ENABLED */ + return ret; +} +#endif diff --git a/wolfcrypt/src/port/Espressif/esp32_sha.c b/wolfcrypt/src/port/Espressif/esp32_sha.c index 8d7dacfca2..e159a1ad04 100644 --- a/wolfcrypt/src/port/Espressif/esp32_sha.c +++ b/wolfcrypt/src/port/Espressif/esp32_sha.c @@ -65,11 +65,11 @@ static const char* TAG = "wolf_hw_sha"; static int InUse = 0; #else static wolfSSL_Mutex sha_mutex = NULL; +#endif - #if defined(DEBUG_WOLFSSL) - /* Only when debugging, we'll keep tracking of block numbers. */ - static int this_block_num = 0; - #endif +#if defined(DEBUG_WOLFSSL) + /* Only when debugging, we'll keep tracking of block numbers. */ + static int this_block_num = 0; #endif /* esp_sha_init @@ -502,7 +502,7 @@ int esp_sha512_ctx_copy(struct wc_Sha512* src, struct wc_Sha512* dst) ** Returns zero for bad digest size type request. ** */ -static word32 wc_esp_sha_digest_size(enum SHA_TYPE type) +static word32 wc_esp_sha_digest_size(WC_ESP_SHA_TYPE type) { int ret = 0; ESP_LOGV(TAG, " esp_sha_digest_size"); @@ -842,7 +842,7 @@ static int esp_sha_start_process(WC_ESP32SHA* sha) #if defined(CONFIG_IDF_TARGET_ESP32C3) || defined(CONFIG_IDF_TARGET_ESP32C6) /* ESP32-C3 RISC-V TODO */ - #elif defined(CONFIG_IDF_TARGET_ESP32S3) + #elif defined(CONFIG_IDF_TARGET_ESP32S3) /* Translate from Wolf SHA type to hardware algorithm. */ HardwareAlgorithm = 0; diff --git a/wolfcrypt/src/port/Espressif/esp32_util.c b/wolfcrypt/src/port/Espressif/esp32_util.c index 4cea99a9be..809c62bec1 100644 --- a/wolfcrypt/src/port/Espressif/esp32_util.c +++ b/wolfcrypt/src/port/Espressif/esp32_util.c @@ -18,9 +18,31 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA */ + +/* +** Version / Platform info. +** +** This could evolve into a wolfSSL-wide feature. For now, here only. See: +** https://github.com/wolfSSL/wolfssl/pull/6149 +*/ + #include #include +#include /* needed to print MATH_INT_T value */ + +#if defined(WOLFSSL_ESPIDF) + #include + #include "sdkconfig.h" + #define WOLFSSL_VERSION_PRINTF(...) ESP_LOGI(TAG, __VA_ARGS__) +#else + #include + #define WOLFSSL_VERSION_PRINTF(...) { printf(__VA_ARGS__); printf("\n"); } +#endif + +static const char* TAG = "esp32_util"; + +/* some functions are only applicable when hardware encryption is enabled */ #if defined(WOLFSSL_ESP32_CRYPT) && \ (!defined(NO_AES) || !defined(NO_SHA) || !defined(NO_SHA256) ||\ defined(WOLFSSL_SHA384) || defined(WOLFSSL_SHA512)) @@ -29,6 +51,7 @@ #include #include +#define MAX_WORDS_ESP_SHOW_MP 32 /* * initialize our mutex used to lock hardware access @@ -39,7 +62,7 @@ * other value from wc_InitMutex() * */ -int esp_CryptHwMutexInit(wolfSSL_Mutex* mutex) { +WOLFSSL_LOCAL int esp_CryptHwMutexInit(wolfSSL_Mutex* mutex) { if (mutex == NULL) { return BAD_MUTEX_E; } @@ -51,7 +74,7 @@ int esp_CryptHwMutexInit(wolfSSL_Mutex* mutex) { * call the ESP-IDF mutex lock; xSemaphoreTake * */ -int esp_CryptHwMutexLock(wolfSSL_Mutex* mutex, TickType_t xBlockTime) { +WOLFSSL_LOCAL int esp_CryptHwMutexLock(wolfSSL_Mutex* mutex, TickType_t block_time) { if (mutex == NULL) { WOLFSSL_ERROR_MSG("esp_CryptHwMutexLock called with null mutex"); return BAD_MUTEX_E; @@ -60,7 +83,7 @@ int esp_CryptHwMutexLock(wolfSSL_Mutex* mutex, TickType_t xBlockTime) { #ifdef SINGLE_THREADED return wc_LockMutex(mutex); /* xSemaphoreTake take with portMAX_DELAY */ #else - return ((xSemaphoreTake( *mutex, xBlockTime ) == pdTRUE) ? 0 : BAD_MUTEX_E); + return ((xSemaphoreTake( *mutex, block_time ) == pdTRUE) ? 0 : BAD_MUTEX_E); #endif } @@ -68,7 +91,7 @@ int esp_CryptHwMutexLock(wolfSSL_Mutex* mutex, TickType_t xBlockTime) { * call the ESP-IDF mutex UNlock; xSemaphoreGive * */ -int esp_CryptHwMutexUnLock(wolfSSL_Mutex* mutex) { +WOLFSSL_LOCAL int esp_CryptHwMutexUnLock(wolfSSL_Mutex* mutex) { if (mutex == NULL) { WOLFSSL_ERROR_MSG("esp_CryptHwMutexLock called with null mutex"); return BAD_MUTEX_E; @@ -81,22 +104,13 @@ int esp_CryptHwMutexUnLock(wolfSSL_Mutex* mutex) { return 0; #endif } +#endif /* WOLFSSL_ESP32_CRYPT, etc. */ -/* -** Version / Platform info. + +/* esp_ShowExtendedSystemInfo and supporting info. ** -** This could evolve into a wolfSSL-wide feature. For now, here only. See: -** https://github.com/wolfSSL/wolfssl/pull/6149 +** available regardless if HW acceleration is turned on or not. */ -#if defined(WOLFSSL_ESPIDF) - #include - #include "sdkconfig.h" - const char* TAG = "Version Info"; - #define WOLFSSL_VERSION_PRINTF(...) ESP_LOGI(TAG, __VA_ARGS__) -#else - #include - #define WOLFSSL_VERSION_PRINTF(...) { printf(__VA_ARGS__); printf("\n"); } -#endif /* ******************************************************************************* @@ -182,8 +196,15 @@ static int ShowExtendedSystemInfo_platform_espressif() WOLFSSL_VERSION_PRINTF("ESP32_CRYPT is enabled for ESP32-S2."); #elif defined(CONFIG_IDF_TARGET_ESP32S3) WOLFSSL_VERSION_PRINTF("ESP32_CRYPT is enabled for ESP32-S3."); +#elif defined(CONFIG_IDF_TARGET_ESP32C3) + WOLFSSL_VERSION_PRINTF("ESP32_CRYPT is enabled for ESP32-C3."); +#elif defined(CONFIG_IDF_TARGET_ESP32C6) + WOLFSSL_VERSION_PRINTF("ESP32_CRYPT is enabled for ESP32-C6."); +#elif defined(CONFIG_IDF_TARGET_ESP32H2) + WOLFSSL_VERSION_PRINTF("ESP32_CRYPT is enabled for ESP32-H2."); #else -#error "ESP32_CRYPT not yet supported on this IDF TARGET" + /* this should have been detected & disabled in user_settins.h */ + #error "ESP32_CRYPT not yet supported on this IDF TARGET" #endif /* Even though enabled, some specifics may be disabled */ @@ -200,9 +221,10 @@ static int ShowExtendedSystemInfo_platform_espressif() #if defined(NO_WOLFSSL_ESP32_CRYPT_RSA_PRI) WOLFSSL_VERSION_PRINTF("NO_WOLFSSL_ESP32_CRYPT_RSA_PRI defined!" "(disabled HW RSA)"); -#endif #endif +#endif /* ! NO_ESP32_CRYPT */ + return 0; } #endif @@ -230,8 +252,16 @@ static int ShowExtendedSystemInfo_git() ** but not desired for introspection which requires object code to be ** maximally bitwise-invariant. */ + + +#if defined(LIBWOLFSSL_VERSION_GIT_TAG) + /* git config describe --tags --abbrev=0 */ + WOLFSSL_VERSION_PRINTF("LIBWOLFSSL_VERSION_GIT_TAG = %s", + LIBWOLFSSL_VERSION_GIT_TAG); +#endif + #if defined(LIBWOLFSSL_VERSION_GIT_ORIGIN) - /* git config --get remote.origin.url */ + /* git config --get remote.origin.url */ WOLFSSL_VERSION_PRINTF("LIBWOLFSSL_VERSION_GIT_ORIGIN = %s", LIBWOLFSSL_VERSION_GIT_ORIGIN); #endif @@ -243,16 +273,19 @@ static int ShowExtendedSystemInfo_git() #endif #if defined(LIBWOLFSSL_VERSION_GIT_HASH) + /* git rev-parse HEAD */ WOLFSSL_VERSION_PRINTF("LIBWOLFSSL_VERSION_GIT_HASH = %s", LIBWOLFSSL_VERSION_GIT_HASH); #endif #if defined(LIBWOLFSSL_VERSION_GIT_SHORT_HASH ) + /* git rev-parse --short HEAD */ WOLFSSL_VERSION_PRINTF("LIBWOLFSSL_VERSION_GIT_SHORT_HASH = %s", LIBWOLFSSL_VERSION_GIT_SHORT_HASH); #endif #if defined(LIBWOLFSSL_VERSION_GIT_HASH_DATE) + /* git show --no-patch --no-notes --pretty=\'\%cd\' */ WOLFSSL_VERSION_PRINTF("LIBWOLFSSL_VERSION_GIT_HASH_DATE = %s", LIBWOLFSSL_VERSION_GIT_HASH_DATE); #endif @@ -292,10 +325,9 @@ static int ShowExtendedSystemInfo_platform() /* ******************************************************************************* -** The public ShowExtendedSystemInfo() +** The internal, portable, but currently private ShowExtendedSystemInfo() ******************************************************************************* */ - int ShowExtendedSystemInfo(void) { WOLFSSL_VERSION_PRINTF("Extended Version and Platform Information."); @@ -324,12 +356,140 @@ int ShowExtendedSystemInfo(void) return 0; } - - -int esp_ShowExtendedSystemInfo() +WOLFSSL_LOCAL int esp_ShowExtendedSystemInfo() { return ShowExtendedSystemInfo(); } +/* Print a MATH_INT_T attribute list. + * + * Note with the right string parameters, the result can be pasted as + * initialization code. + */ +WOLFSSL_LOCAL int esp_show_mp_attributes(char* c, MATH_INT_T* X) +{ + static const char* MP_TAG = "MATH_INT_T"; + int ret = 0; + if (X == NULL) { + ret = -1; + ESP_LOGV(MP_TAG, "esp_show_mp_attributes called with X == NULL"); + } + else { + ESP_LOGI(MP_TAG, ""); + ESP_LOGI(MP_TAG, "%s.used = %d;", c, X->used); +#if defined(WOLFSSL_SP_INT_NEGATIVE) || defined(USE_FAST_MATH) + ESP_LOGI(MP_TAG, "%s.sign = %d;", c, X->sign); #endif + } + return ret; +} +/* Print a MATH_INT_T value. + * + * Note with the right string parameters, the result can be pasted as + * initialization code. + */ +WOLFSSL_LOCAL int esp_show_mp(char* c, MATH_INT_T* X) +{ + static const char* MP_TAG = "MATH_INT_T"; + int ret = MP_OKAY; + int words_to_show = 0; + size_t i; + + if (X == NULL) { + ret = -1; + ESP_LOGV(MP_TAG, "esp_show_mp called with X == NULL"); + } + else { + words_to_show = X->used; + /* if too small, we'll show just 1 word */ + if (words_to_show < 1) { + ESP_LOGI(MP_TAG, "Bad word count. Adjusting from %d to %d", + words_to_show, + 1); + words_to_show = 1; + } + #ifdef MAX_WORDS_ESP_SHOW_MP + /* if too big, we'll show MAX_WORDS_ESP_SHOW_MP words */ + if (words_to_show > MAX_WORDS_ESP_SHOW_MP) { + ESP_LOGI(MP_TAG, "Limiting word count from %d to %d", + words_to_show, + MAX_WORDS_ESP_SHOW_MP); + words_to_show = MAX_WORDS_ESP_SHOW_MP; + } + #endif + ESP_LOGI(MP_TAG, "%s:",c); + esp_show_mp_attributes(c, X); + for (i = 0; i < words_to_show; i++) { + ESP_LOGI(MP_TAG, "%s.dp[%2d] = 0x%08x; /* %2d */ ", + c, /* the supplied variable name */ + i, /* the index, i for dp[%d] */ + (unsigned int)X->dp[i], /* the value */ + i /* the index, again, for comment */ + ); + } + ESP_LOGI(MP_TAG, ""); + } + return ret; +} + +/* Perform a full mp_cmp and binary compare. + * (typically only used during debugging) */ +WOLFSSL_LOCAL int esp_mp_cmp(char* name_A, MATH_INT_T* A, char* name_B, MATH_INT_T* B) +{ + int ret = MP_OKAY; + int e; + + e = memcmp(A, B, sizeof(mp_int)); + if (mp_cmp(A, B) == MP_EQ) { + if (e == 0) { + /* we always want to be here: both esp_show_mp and binary equal! */ + ESP_LOGV(TAG, "fp_cmp and memcmp match for %s and %s!", + name_A, name_B); + } + else { + ret = MP_VAL; + ESP_LOGE(TAG, "fp_cmp match, memcmp mismatch for %s and %s!", + name_A, name_B); + if (A->dp[0] == 1) { + ESP_LOGE(TAG, "Both memcmp and fp_cmp fail for %s and %s!", + name_A, name_B); + } + } + } + else { + ret = MP_VAL; + if (e == 0) { + /* if mp_cmp says different, + * but memcmp says equal, that's a problem */ + ESP_LOGE(TAG, "memcmp error for %s and %s!", + name_A, name_B); + } + else { + /* in the normal case where mp_cmp and memcmp say the + * values are different, we'll optionally show details. */ + ESP_LOGI(TAG, "e = %d", e); + ESP_LOGE(TAG, "fp_cmp mismatch! memcmp " + "offset 0x%02x for %s vs %s!", + e, name_A, name_B); + if (A->dp[0] == 1) { + ESP_LOGE(TAG, "Both memcmp and fp_cmp fail for %s and %s!", + name_A, name_B); + } + } + ESP_LOGV(TAG, "Mismatch for %s and %s!", + name_A, name_B); + } + + if (ret == MP_OKAY) { + ESP_LOGV(TAG, "esp_mp_cmp equal for %s and %s!", + name_A, name_B); + } + else { +#ifdef DEBUG_WOLFSSL + esp_show_mp(name_A, A); + esp_show_mp(name_B, B); +#endif + } + return ret; +} diff --git a/wolfcrypt/src/sha.c b/wolfcrypt/src/sha.c index 603b8be6f5..6943b50b37 100644 --- a/wolfcrypt/src/sha.c +++ b/wolfcrypt/src/sha.c @@ -58,6 +58,28 @@ #include #endif +#undef WOLFSSL_USE_ESP32_CRYPT_HASH_HW +#if defined(WOLFSSL_ESP32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32_CRYPT_HASH) + /* define a single keyword for simplicity & readability + * + * by default the HW acceleration is on for ESP32-WROOM32 + * but individual components can be turned off. + */ + #define WOLFSSL_USE_ESP32_CRYPT_HASH_HW + #include "wolfssl/wolfcrypt/port/Espressif/esp32-crypt.h" + + /* Although we have hardware acceleration, + ** we may need to fall back to software */ + #define USE_SHA_SOFTWARE_IMPL + +#elif defined(WOLFSSL_USE_ESP32C3_CRYPT_HASH_HW) + /* The ESP32C3 is different; HW crypto here. Not yet implemented. + ** We'll be using software for RISC-V at this time */ +#else + #undef WOLFSSL_USE_ESP32_CRYPT_HASH_HW +#endif + #undef WOLFSSL_USE_ESP32_CRYPT_HASH_HW #if defined(WOLFSSL_ESP32_CRYPT) && \ !defined(NO_WOLFSSL_ESP32_CRYPT_HASH) @@ -775,6 +797,9 @@ int wc_ShaFinal(wc_Sha* sha, byte* hash) else { ret = esp_sha_process(sha, (const byte*)local); } + #elif defined(WOLFSSL_USE_ESP32C3_CRYPT_HASH_HW) + /* The ESP32C3 is different; SW crypto here. Not yet implemented */ + ret = XTRANSFORM(sha, (const byte*)local); #else /* ** The #if defined(WOLFSSL_USE_ESP32C3_CRYPT_HASH_HW) also falls @@ -835,6 +860,9 @@ int wc_ShaFinal(wc_Sha* sha, byte* hash) XMEMCPY(hash, (byte *)&sha->digest[0], WC_SHA_DIGEST_SIZE); + /* we'll always reset state upon exit and return the error code from above, + * which may cause fall back to SW if HW is busy. we do not return result + * of initSha here */ (void)InitSha(sha); /* reset state */ return ret; diff --git a/wolfcrypt/src/sha256.c b/wolfcrypt/src/sha256.c index 747d3dd46f..0e78167ba8 100644 --- a/wolfcrypt/src/sha256.c +++ b/wolfcrypt/src/sha256.c @@ -109,7 +109,9 @@ on the specific device platform. ** ** Beware of possible conflict in test.c (that one now named TEST_TAG) */ - static const char* TAG = "wc_sha256"; + #if defined(WOLFSSL_USE_ESP32_CRYPT_HASH_HW) + static const char* TAG = "wc_sha256"; + #endif #endif #if defined(WOLFSSL_TI_HASH) diff --git a/wolfcrypt/src/sha512.c b/wolfcrypt/src/sha512.c index 83af39c4b0..d8d6b16b88 100644 --- a/wolfcrypt/src/sha512.c +++ b/wolfcrypt/src/sha512.c @@ -1021,6 +1021,7 @@ static WC_INLINE int Sha512Final(wc_Sha512* sha512) ByteReverseWords64(sha512->digest, sha512->digest, WC_SHA512_DIGEST_SIZE); #endif + return 0; } @@ -1275,7 +1276,7 @@ static int InitSha384(wc_Sha384* sha384) sha384->loLen = 0; sha384->hiLen = 0; -#if defined(WOLFSSL_USE_ESP32_CRYPT_HASH_HW) +#if defined(WOLFSSL_USE_ESP32_CRYPT_HASH_HW) /* HW needs to be carefully initialized, taking into account soft copy. ** If already in use; copy may revert to SW as needed. */ esp_sha_init(&(sha384->ctx), WC_HASH_TYPE_SHA384); diff --git a/wolfcrypt/src/tfm.c b/wolfcrypt/src/tfm.c index cceb6801d6..e9bf039b16 100644 --- a/wolfcrypt/src/tfm.c +++ b/wolfcrypt/src/tfm.c @@ -52,6 +52,39 @@ #include /* will define asm MACROS or C ones */ #include /* common functions */ +#ifdef WOLFSSL_ESPIDF + #include + #include +#endif + +#if defined(WOLFSSL_ESP32_CRYPT_RSA_PRI) + static const char* TAG = "TFM"; /* esp log breadcrumb */ + #if !defined(NO_WOLFSSL_ESP32_CRYPT_RSA_PRI) + /* Each individual math HW can be turned on or off. + * Listed in order of complexity and historical difficulty. */ + #define WOLFSSL_ESP32_CRYPT_RSA_PRI_MP_MUL + #define WOLFSSL_ESP32_CRYPT_RSA_PRI_EXPTMOD + #define WOLFSSL_ESP32_CRYPT_RSA_PRI_MULMOD + #endif + + #if defined(NO_WOLFSSL_ESP32_CRYPT_RSA_PRI_MP_MUL) + #undef WOLFSSL_ESP32_CRYPT_RSA_PRI_MP_MUL + #endif + + #if defined(NO_WOLFSSL_ESP32_CRYPT_RSA_PRI_EXPTMOD) + #undef WOLFSSL_ESP32_CRYPT_RSA_PRI_EXPTMOD + #endif + + #if defined(NO_WOLFSSL_ESP32_CRYPT_RSA_PRI_MULMOD) + #undef WOLFSSL_ESP32_CRYPT_RSA_PRI_MULMOD + #endif + + /* Note with HW there's a EPS_RSA_EXPT_XBTIS setting + * as for some small numbers, SW may be faster. + * See ESP_LOGV messages for EPS_RSA_EXPT_XBTIS values. */ + +#endif /* WOLFSSL_ESP32_CRYPT_RSA_PRI */ + #if defined(FREESCALE_LTC_TFM) #include #endif @@ -139,11 +172,44 @@ int s_fp_add(fp_int *a, fp_int *b, fp_int *c) c->used = y; t = 0; +#ifdef HONOR_MATH_USED_LENGTH + for (x = 0; x < y; x++) { + if ( (x < a->used) && (x < b->used) ) { + /* x is less than both [a].used and [b].used, so we add both */ + t += ((fp_word)a->dp[x]) + ((fp_word)b->dp[x]); + } + else { + /* Here we honor the actual [a].used and [b].used values + * and NOT assume that values beyond [used] are zero. */ + if ((x >= a->used) && (x < b->used)) { + /* x more than [a].used, [b] ok, so just add [b] */ + t += /* ((fp_word)(0)) + */ ((fp_word)b->dp[x]); + } + else { + if ((x < a->used) && (x >= b->used)) { + /* x more than [b].used, [a] ok, so just add [a] */ + t += ((fp_word)a->dp[x]) /* + (fp_word)(0) */; + } + else { + /* we should never get here, as a.used cannot be greater + * than b.used, while b.used is greater than a.used! */ + /* t += 0 + 0 */ + } + } + } + c->dp[x] = (fp_digit)t; + t >>= DIGIT_BIT; + } + +#else + /* the original code */ for (x = 0; x < y; x++) { t += ((fp_word)a->dp[x]) + ((fp_word)b->dp[x]); c->dp[x] = (fp_digit)t; t >>= DIGIT_BIT; } +#endif /* HONOR_MATH_USED_LENGTH */ + if (t != 0) { if (x == FP_SIZE) return FP_VAL; @@ -229,15 +295,9 @@ void s_fp_sub(fp_int *a, fp_int *b, fp_int *c) /* c = a * b */ int fp_mul(fp_int *A, fp_int *B, fp_int *C) { - int ret = 0; + int ret = FP_OKAY; int y, yy, oldused; -#if defined(WOLFSSL_ESP32_CRYPT_RSA_PRI) && \ - !defined(NO_WOLFSSL_ESP32_CRYPT_RSA_PRI) - ret = esp_mp_mul(A, B, C); - if(ret != -2) return ret; -#endif - oldused = C->used; y = MAX(A->used, B->used); @@ -249,6 +309,36 @@ int fp_mul(fp_int *A, fp_int *B, fp_int *C) goto clean; } +#if defined(WOLFSSL_ESP32_CRYPT_RSA_PRI_MP_MUL) + if (esp_hw_validation_active()) { + ESP_LOGV(TAG, "Skipping call to esp_mp_mul " + "during active validation."); + } + else { + ret = esp_mp_mul(A, B, C); /* HW accelerated multiply */ + switch (ret) { + case MP_OKAY: + goto clean; /* success */ + break; + + case WC_HW_WAIT_E: /* MP_HW_BUSY math HW busy, fall back */ + case MP_HW_FALLBACK: /* forced fallback from HW to SW */ + case MP_HW_VALIDATION_ACTIVE: /* use SW to compare to HW */ + /* fall back to software, below */ + break; + + default: + /* Once we've failed, exit without trying to continue. + * We may have mangled operands: (e.g. Z = X * Z) + * Future implementation may consider saving operands, + * but errors should never occur. */ + goto clean; /* error */ + break; + } + } + /* fall through to software calcs */ +#endif /* WOLFSSL_ESP32_CRYPT_RSA_PRI_MP_MUL */ + /* pick a comba (unrolled 4/8/16/32 x or rolled) based on the size of the largest input. We also want to avoid doing excess mults if the inputs are not close to the next power of two. That is, for example, @@ -536,6 +626,7 @@ WC_INLINE static int fp_mul_comba_mulx(fp_int *A, fp_int *B, fp_int *C) } #endif +/* C = (A * B) */ int fp_mul_comba(fp_int *A, fp_int *B, fp_int *C) { int ret = 0; @@ -602,6 +693,8 @@ int fp_mul_comba(fp_int *A, fp_int *B, fp_int *C) COMBA_FINI; dst->used = pa; + + /* warning: WOLFSSL_SP_INT_NEGATIVE may disable negative numbers */ dst->sign = A->sign ^ B->sign; fp_clamp(dst); fp_copy(dst, C); @@ -1802,7 +1895,7 @@ int fp_exptmod_nb(exptModNb_t* nb, fp_int* G, fp_int* X, fp_int* P, fp_int* Y) switch (nb->state) { case TFM_EXPTMOD_NB_INIT: - /* now setup montgomery */ + /* now setup montgomery */ if ((err = fp_montgomery_setup(P, &nb->mp)) != FP_OKAY) { nb->state = TFM_EXPTMOD_NB_INIT; return err; @@ -2158,6 +2251,7 @@ static int _fp_exptmod_ct(fp_int * G, fp_int * X, int digits, fp_int * P, #ifdef WOLFSSL_SMALL_STACK XFREE(R, NULL, DYNAMIC_TYPE_BIGINT); #endif + return err; } @@ -2993,13 +3087,11 @@ static int _fp_exptmod_base_2(fp_int * X, int digits, fp_int * P, #undef WINSIZE #endif - +/* Y = (G * X) mod P */ int fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y) { - -#if defined(WOLFSSL_ESP32_CRYPT_RSA_PRI) && \ - !defined(NO_WOLFSSL_ESP32_CRYPT_RSA_PRI) - int x = fp_count_bits (X); +#if defined(WOLFSSL_ESP32_CRYPT_RSA_PRI_EXPTMOD) + int retHW = FP_OKAY; #endif /* handle modulus of zero and prevent overflows */ @@ -3019,12 +3111,37 @@ int fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y) return FP_OKAY; } -#if defined(WOLFSSL_ESP32_CRYPT_RSA_PRI) && \ - !defined(NO_WOLFSSL_ESP32_CRYPT_RSA_PRI) - if(x > EPS_RSA_EXPT_XBTIS) { - return esp_mp_exptmod(G, X, x, P, Y); +#if defined(WOLFSSL_ESP32_CRYPT_RSA_PRI_EXPTMOD) + if (esp_hw_validation_active()) { + ESP_LOGV(TAG, "Skipping call to esp_mp_exptmod " + "during active validation."); } -#endif + else { + /* HW accelerated exptmod */ + retHW = esp_mp_exptmod(G, X, P, Y); + switch (retHW) { + case MP_OKAY: + /* successfully computed in HW */ + return retHW; + break; + + case WC_HW_WAIT_E: /* MP_HW_BUSY math HW busy, fall back */ + case MP_HW_FALLBACK: /* forced fallback from HW to SW */ + case MP_HW_VALIDATION_ACTIVE: /* use SW to compare to HW */ + /* use software calc */ + break; + + default: + /* Once we've failed, exit without trying to continue. + * We may have mangled operands: (e.g. Z = X * Z) + * Future implementation may consider saving operands, + * but hard errors should never actually occur. */ + return retHW; /* error */ + break; + } /* switch */ + } /* if validation check */ + /* fall through to software calcs */ +#endif /* WOLFSSL_ESP32_CRYPT_RSA_PRI_EXPTMOD */ if (X->sign == FP_NEG) { #ifndef POSITIVE_EXP_ONLY /* reduce stack if assume no negatives */ @@ -3049,11 +3166,11 @@ int fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y) if (err == FP_OKAY) { fp_copy(X, &tmp[1]); tmp[1].sign = FP_ZPOS; -#ifdef TFM_TIMING_RESISTANT + #ifdef TFM_TIMING_RESISTANT err = _fp_exptmod_ct(&tmp[0], &tmp[1], tmp[1].used, P, Y); -#else + #else err = _fp_exptmod_nct(&tmp[0], &tmp[1], P, Y); -#endif + #endif if ((err == 0) && (P->sign == FP_NEG)) { err = fp_add(Y, P, Y); } @@ -3064,7 +3181,7 @@ int fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y) return err; #else return FP_VAL; -#endif +#endif /* POSITIVE_EXP_ONLY check */ } else if (G->used == 1 && G->dp[0] == 2) { return _fp_exptmod_base_2(X, X->used, P, Y); @@ -3081,10 +3198,8 @@ int fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y) int fp_exptmod_ex(fp_int * G, fp_int * X, int digits, fp_int * P, fp_int * Y) { - -#if defined(WOLFSSL_ESP32_CRYPT_RSA_PRI) && \ - !defined(NO_WOLFSSL_ESP32_CRYPT_RSA_PRI) - int x = fp_count_bits (X); +#if defined(WOLFSSL_ESP32_CRYPT_RSA_PRI_EXPTMOD) + int retHW = FP_OKAY; #endif /* handle modulus of zero and prevent overflows */ @@ -3104,12 +3219,30 @@ int fp_exptmod_ex(fp_int * G, fp_int * X, int digits, fp_int * P, fp_int * Y) return FP_OKAY; } -#if defined(WOLFSSL_ESP32_CRYPT_RSA_PRI) && \ - !defined(NO_WOLFSSL_ESP32_CRYPT_RSA_PRI) - if(x > EPS_RSA_EXPT_XBTIS) { - return esp_mp_exptmod(G, X, x, P, Y); - } -#endif +#if defined(WOLFSSL_ESP32_CRYPT_RSA_PRI_EXPTMOD) + retHW = esp_mp_exptmod(G, X, P, Y); + switch (retHW) { + case MP_OKAY: + /* successfully computed in HW */ + return retHW; + break; + + case WC_HW_WAIT_E: /* MP_HW_BUSY math HW busy, fall back */ + case MP_HW_FALLBACK: /* forced fallback from HW to SW */ + case MP_HW_VALIDATION_ACTIVE: /* use SW to compare to HW */ + /* use software calc */ + break; + + default: + /* Once we've failed, exit without trying to continue. + * We may have mangled operands: (e.g. Z = X * Z) + * Future implementation may consider saving operands, + * but hard errors should never actually occur. */ + return retHW; + break; + } /* HW result switch */ + /* falling through to SW: */ +#endif /* WOLFSSL_ESP32_CRYPT_RSA_PRI_EXPTMOD */ if (X->sign == FP_NEG) { #ifndef POSITIVE_EXP_ONLY /* reduce stack if assume no negatives */ @@ -3166,9 +3299,8 @@ int fp_exptmod_ex(fp_int * G, fp_int * X, int digits, fp_int * P, fp_int * Y) int fp_exptmod_nct(fp_int * G, fp_int * X, fp_int * P, fp_int * Y) { -#if defined(WOLFSSL_ESP32_CRYPT_RSA_PRI) && \ - !defined(NO_WOLFSSL_ESP32_CRYPT_RSA_PRI) - int x = fp_count_bits (X); +#if defined(WOLFSSL_ESP32_CRYPT_RSA_PRI_EXPTMOD) + int retHW = FP_OKAY; #endif /* handle modulus of zero and prevent overflows */ @@ -3188,11 +3320,29 @@ int fp_exptmod_nct(fp_int * G, fp_int * X, fp_int * P, fp_int * Y) return FP_OKAY; } -#if defined(WOLFSSL_ESP32_CRYPT_RSA_PRI) && \ - !defined(NO_WOLFSSL_ESP32_CRYPT_RSA_PRI) - if(x > EPS_RSA_EXPT_XBTIS) { - return esp_mp_exptmod(G, X, x, P, Y); +#if defined(WOLFSSL_ESP32_CRYPT_RSA_PRI_EXPTMOD) + retHW = esp_mp_exptmod(G, X, P, Y); + switch (retHW) { + case MP_OKAY: + /* successfully computed in HW */ + return retHW; + break; + + case WC_HW_WAIT_E: /* MP_HW_BUSY math HW busy, fall back */ + case MP_HW_FALLBACK: /* forced fallback from HW to SW */ + case MP_HW_VALIDATION_ACTIVE: /* use SW to compare to HW */ + /* use software calc */ + break; + + default: + /* Once we've failed, exit without trying to continue. + * We may have mangled operands: (e.g. Z = X * Z) + * Future implementation may consider saving operands, + * but hard errors should never actually occur. */ + return retHW; + break; } + /* falling through to SW: */ #endif if (X->sign == FP_NEG) { @@ -4505,16 +4655,32 @@ int wolfcrypt_mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d) int mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d) #endif { - #if defined(WOLFSSL_ESP32_CRYPT_RSA_PRI) && \ - !defined(NO_WOLFSSL_ESP32_CRYPT_RSA_PRI) - int A = fp_count_bits (a); - int B = fp_count_bits (b); + int ret = MP_OKAY; +#ifdef WOLFSSL_ESP32_CRYPT_RSA_PRI_MULMOD + ret = esp_mp_mulmod(a, b, c, d); + switch (ret) { + case MP_OKAY: + /* successfully computed in HW */ + break; - if( A >= ESP_RSA_MULM_BITS && B >= ESP_RSA_MULM_BITS) - return esp_mp_mulmod(a, b, c, d); - else - #endif - return fp_mulmod(a, b, c, d); + case WC_HW_WAIT_E: /* MP_HW_BUSY math HW busy, fall back */ + case MP_HW_FALLBACK: /* forced fallback from HW to SW */ + case MP_HW_VALIDATION_ACTIVE: /* use SW to compare to HW */ + /* use software calc */ + ret = fp_mulmod(a, b, c, d); + break; + + default: + /* Once we've failed, exit without trying to continue. + * We may have mangled operands: (e.g. Z = X * Z) + * Future implementation may consider saving operands, + * but hard errors should never actually occur. */ + break; + } +#else /* no HW */ + ret = fp_mulmod(a, b, c, d); +#endif /* WOLFSSL_ESP32_CRYPT_RSA_PRI_MULMOD */ + return ret; } /* d = a - b (mod c) */ diff --git a/wolfssl/wolfcrypt/port/Espressif/esp32-crypt.h b/wolfssl/wolfcrypt/port/Espressif/esp32-crypt.h index 4bfef9ff4b..96c20f5cee 100644 --- a/wolfssl/wolfcrypt/port/Espressif/esp32-crypt.h +++ b/wolfssl/wolfcrypt/port/Espressif/esp32-crypt.h @@ -22,13 +22,160 @@ #define __ESP32_CRYPT_H__ -#include "wolfssl/wolfcrypt/settings.h" -#include /* for MATH_INT_T */ +#include "sdkconfig.h" /* ensure ESP-IDF settings are available everywhere */ -#include "esp_idf_version.h" -#include "esp_types.h" -#include "esp_log.h" +/* wolfSSL */ +#include /* references user_settings.h */ +#include +#include /* for MATH_INT_T */ +/* Espressif */ +#include +#include +#include + +/* exit codes to be used in tfm.c, sp_int.c, integer.c, etc. + * + * see wolfssl/wolfcrypt/error-crypt.h + * + * WC_HW_E - generic hardware failure. Consider falling back to SW. + * WC_HW_WAIT_E - waited too long for HW, fall back to SW + */ + +/* exit code only used in Espressif port */ + +/* MP_HW_FALLBACK: signal to caller to fall back to SW for math: + * algorithm not supported in SW + * known state needing only SW, (e.g. ctx copy) + * any other reason to force SW */ +#define MP_HW_FALLBACK (-108) + +/* MP_HW_VALIDATION_ACTIVE this is informative only: + * typically also means "MP_HW_FALLBACK": fall back to SW. + * optional HW validation active, so compute in SW to compare. + * fall back to SW, typically only used during debugging + */ +#define MP_HW_VALIDATION_ACTIVE (-109) + +/* +******************************************************************************* +******************************************************************************* +** +** Primary Settings: +** +** WOLFSSL_ESP32_CRYPT_RSA_PRI +** Defined in wolfSSL settings.h: this turns on or off esp32_mp math library. +** Unless turned off, this is enabled by default for the ESP32 +** +** NO_ESP32_CRYPT +** When defined, disables all hardware acceleration on the ESP32 +** +** NO_WOLFSSL_ESP32_CRYPT_HASH +** Used to disabled only hash hardware algorithms: SHA2, etc. +** +** WOLFSSL_NOSHA512_224 +** Define to disable SHA-512/224 +** +** WOLFSSL_NOSHA512_256 +** Define to disable SHA-512/512 +** +** NO_WOLFSSL_ESP32_CRYPT_AES +** Used to disable only AES hardware algorithms. Software used instead. +** +** NO_WOLFSSL_ESP32_CRYPT_RSA_PRI_MP_MUL +** Turns off hardware acceleration esp_mp_mul() +** +** NO_WOLFSSL_ESP32_CRYPT_RSA_PRI_EXPTMOD +** Turns off hardware acceleration esp_mp_exptmod() +** +** NO_WOLFSSL_ESP32_CRYPT_RSA_PRI_MULMOD +** Turns off hardware acceleration esp_mp_mulmod() +** +******************************************************************************* +** Math library settings: TFM +******************************************************************************* +** Listed in increasing order of complexity: +** +** WOLFSSL_ESP32_CRYPT_RSA_PRI_MP_MUL +** When defined, use hardware acceleration esp_mp_mul() +** for Large Number Multiplication: Z = X * Y +** +** WOLFSSL_ESP32_CRYPT_RSA_PRI_EXPTMOD +** When defined, use hardware acceleration esp_mp_exptmod() +** for Large Number Modular Exponentiation Z = X^Y mod M +** +** WOLFSSL_ESP32_CRYPT_RSA_PRI_MULMOD +** When defined, use hardware acceleration esp_mp_mulmod() +** for Large Number Modular Multiplication: Z = X * Y mod M +** +******************************************************************************* +** Optional Settings: +******************************************************************************* +** +** WOLFSSL_HW_METRICS +** Enables metric counters for calls to HW, success, fall back, oddities. +** +** DEBUG_WOLFSSL +** Turns on development testing. Validates HW accelerated results to software +** - Automatically turns on WOLFSSL_HW_METRICS +** +** LOG_LOCAL_LEVEL +** Debugging. Default value is ESP_LOG_DEBUG +** +** ESP_VERIFY_MEMBLOCK +** Used to re-read data from registers in esp32_mp & verify written contents +** actually match the source data. +** +** WOLFSSL_ESP32_CRYPT_DEBUG +** When defined, enables hardware cryptography debugging +** +** NO_HW_MATH_TEST +** Even if HW is enabled, do not run HW math tests. See HW_MATH_ENABLED. +** +** NO_ESP_MP_MUL_EVEN_ALT_CALC +** Used during Z = X × Y mod M +** By default, even moduli use a two step HW esp_mp_mul with SW mp_mod. +** Enable this to instead fall back to pure software mp_mulmod. +** +** NO_RECOVER_SOFTWARE_CALC +** When defined, will NOT recover software calculation result when not +** matched with hardware. Useful only during development. Needs DEBUG_WOLFSSL +** +** ESP_PROHIBIT_SMALL_X +** When set to 1 X operands less than 8 bits will fall back to SW +** +** ESP_NO_ERRATA_MITIGATION +** Disable all errata mitigation code. +** +** USE_ESP_DPORT_ACCESS_READ_BUFFER +** Sets ESP_NO_ERRATA_MITIGATION and uses esp_dport_access_read_buffer() +** +******************************************************************************* +** Settings used from +******************************************************************************* +** +** ESP_IDF_VERSION_MAJOR +** +** +******************************************************************************* +** Settings used from ESP-IDF (sdkconfig.h) +******************************************************************************* +** +** +******************************************************************************* +** +** +******************************************************************************* +** Informative settings. Not meant to be edited +******************************************************************************* +** +** HW_MATH_ENABLED +** Used to detect if any hardware math acceleration algorithms are used. +** This is typically only used to flag wolfCrypt tests to run HW tests. +** See NO_HW_MATH_TEST. +** +******************************************************************************* +*/ #ifdef WOLFSSL_ESP32_CRYPT_DEBUG #undef LOG_LOCAL_LEVEL #define LOG_LOCAL_LEVEL ESP_LOG_DEBUG @@ -38,17 +185,8 @@ #endif #include -#if defined(CONFIG_IDF_TARGET_ESP32C3) - /* no includes for ESP32C3 at this time (no HW implemented yet) */ -#elif defined(CONFIG_IDF_TARGET_ESP32S3) - #include "soc/dport_reg.h" - #include "soc/hwcrypto_reg.h" - #if defined(ESP_IDF_VERSION_MAJOR) && ESP_IDF_VERSION_MAJOR >= 5 - #include "esp_private/periph_ctrl.h" - #else - #include "driver/periph_ctrl.h" - #endif -#else + +#if defined(CONFIG_IDF_TARGET_ESP32) #include "soc/dport_reg.h" #include "soc/hwcrypto_reg.h" @@ -67,19 +205,72 @@ #else #include #endif - + #define ESP_PROHIBIT_SMALL_X 0 +#elif defined(CONFIG_IDF_TARGET_ESP32S2) + #include "soc/dport_reg.h" + #include "soc/hwcrypto_reg.h" + #if defined(ESP_IDF_VERSION_MAJOR) && ESP_IDF_VERSION_MAJOR >= 5 + #include "esp_private/periph_ctrl.h" + #else + #include "driver/periph_ctrl.h" + #endif + #define ESP_PROHIBIT_SMALL_X 0 +#elif defined(CONFIG_IDF_TARGET_ESP32S3) + #include "soc/dport_reg.h" + #include "soc/hwcrypto_reg.h" + #if defined(ESP_IDF_VERSION_MAJOR) && ESP_IDF_VERSION_MAJOR >= 5 + #include "esp_private/periph_ctrl.h" + #else + #include "driver/periph_ctrl.h" + #endif + #define ESP_PROHIBIT_SMALL_X 0 +#elif defined(CONFIG_IDF_TARGET_ESP32C3) + /* no includes for ESP32C3 at this time (no HW implemented yet) */ +#else + /* not yet supported. no HW */ #endif +#if defined(USE_ESP_DPORT_ACCESS_READ_BUFFER) + #define ESP_NO_ERRATA_MITIGATION +#endif #ifdef __cplusplus - extern "C" { +extern "C" +{ #endif -int esp_ShowExtendedSystemInfo(void); + /* + ****************************************************************************** + ** Some common esp utilities + ****************************************************************************** + */ -int esp_CryptHwMutexInit(wolfSSL_Mutex* mutex); -int esp_CryptHwMutexLock(wolfSSL_Mutex* mutex, TickType_t xBloxkTime); -int esp_CryptHwMutexUnLock(wolfSSL_Mutex* mutex); + WOLFSSL_LOCAL int esp_ShowExtendedSystemInfo(void); + + /* Compare MATH_INT_T A to MATH_INT_T B + * During debug, the strings name_A and name_B can help + * identify variable name. */ + WOLFSSL_LOCAL int esp_mp_cmp(char* name_A, MATH_INT_T* A, char* name_B, MATH_INT_T* B); + + /* Show MATH_INT_T value attributes. */ + WOLFSSL_LOCAL int esp_show_mp_attributes(char* c, MATH_INT_T* X); + + /* Show MATH_INT_T value. + * + * Calls esp_show_mp_attributes(). + * + * During debug, the string name_A can help + * identify variable name. */ + WOLFSSL_LOCAL int esp_show_mp(char* name_X, MATH_INT_T* X); + + /* To use a Mutex, if must first be initialized */ + WOLFSSL_LOCAL int esp_CryptHwMutexInit(wolfSSL_Mutex* mutex); + + /* When the HW is in use, the mutex will be locked. */ + WOLFSSL_LOCAL int esp_CryptHwMutexLock(wolfSSL_Mutex* mutex, TickType_t block_time); + + /* Release the mutex to indicate the HW is no longer in use. */ + WOLFSSL_LOCAL int esp_CryptHwMutexUnLock(wolfSSL_Mutex* mutex); #ifndef NO_AES @@ -89,20 +280,29 @@ int esp_CryptHwMutexUnLock(wolfSSL_Mutex* mutex); #include "rom/aes.h" #endif - typedef enum tagES32_AES_PROCESS { - ESP32_AES_LOCKHW = 1, + typedef enum tagES32_AES_PROCESS + { + ESP32_AES_LOCKHW = 1, ESP32_AES_UPDATEKEY_ENCRYPT = 2, ESP32_AES_UPDATEKEY_DECRYPT = 3, ESP32_AES_UNLOCKHW = 4 } ESP32_AESPROCESS; struct Aes; /* see aes.h */ - int wc_esp32AesCbcEncrypt(struct Aes* aes, byte* out, const byte* in, word32 sz); - int wc_esp32AesCbcDecrypt(struct Aes* aes, byte* out, const byte* in, word32 sz); - int wc_esp32AesEncrypt(struct Aes *aes, const byte* in, byte* out); - int wc_esp32AesDecrypt(struct Aes *aes, const byte* in, byte* out); - -#endif + WOLFSSL_LOCAL int wc_esp32AesSupportedKeyLenValue(int keylen); + WOLFSSL_LOCAL int wc_esp32AesSupportedKeyLen(struct Aes* aes); + WOLFSSL_LOCAL int wc_esp32AesCbcEncrypt(struct Aes* aes, + byte* out, + const byte* in, + word32 sz); + WOLFSSL_LOCAL int wc_esp32AesCbcDecrypt(struct Aes* aes, + byte* out, + const byte* in, + word32 sz); + WOLFSSL_LOCAL int wc_esp32AesEncrypt(struct Aes *aes, const byte* in, byte* out); + WOLFSSL_LOCAL int wc_esp32AesDecrypt(struct Aes *aes, const byte* in, byte* out); + +#endif /* ! NO_AES */ #ifdef WOLFSSL_ESP32_CRYPT_DEBUG @@ -116,14 +316,34 @@ int esp_CryptHwMutexUnLock(wolfSSL_Mutex* mutex); defined(WOLFSSL_SHA384) || defined(WOLFSSL_SHA512) \ ) - /* RAW hash function APIs are not implemented with esp32 hardware acceleration*/ + /* RAW hash function APIs are not implemented with + * esp32 hardware acceleration*/ #define WOLFSSL_NO_HASH_RAW #define SHA_CTX ETS_SHAContext #if ESP_IDF_VERSION_MAJOR >= 4 - #include "esp32/rom/sha.h" - #elif defined(CONFIG_IDF_TARGET_ESP32S3) - #include "esp32s3/rom/sha.h" + #if defined(CONFIG_IDF_TARGET_ESP32) + #include "esp32/rom/sha.h" + #define WC_ESP_SHA_TYPE enum SHA_TYPE + #elif defined(CONFIG_IDF_TARGET_ESP32C2) + #include "esp32c2/rom/sha.h" + #define WC_ESP_SHA_TYPE SHA_TYPE + #elif defined(CONFIG_IDF_TARGET_ESP32C3) + #include "esp32c3/rom/sha.h" + #define WC_ESP_SHA_TYPE SHA_TYPE + #elif defined(CONFIG_IDF_TARGET_ESP32H2) + #include "esp32h2/rom/sha.h" + #define WC_ESP_SHA_TYPE SHA_TYPE + #elif defined(CONFIG_IDF_TARGET_ESP32S2) + #include "esp32s2/rom/sha.h" + #define WC_ESP_SHA_TYPE SHA_TYPE + #elif defined(CONFIG_IDF_TARGET_ESP32S3) + #include "esp32s3/rom/sha.h" + #define WC_ESP_SHA_TYPE SHA_TYPE + #else + #include "rom/sha.h" + #define WC_ESP_SHA_TYPE SHA_TYPE + #endif #else #include "rom/sha.h" #endif @@ -154,48 +374,49 @@ int esp_CryptHwMutexUnLock(wolfSSL_Mutex* mutex); ** ** the Espressif type: SHA1, SHA256, etc. */ - enum SHA_TYPE sha_type; + + WC_ESP_SHA_TYPE sha_type; /* we'll keep track of our own locks. ** actual enable/disable only occurs for ref_counts[periph] == 0 ** ** see ref_counts[periph] in periph_ctrl.c */ - byte lockDepth:7; /* 7 bits for a small number, pack with below. */ + byte lockDepth : 7; /* 7 bits for a small number, pack with below. */ /* 0 (false) this is NOT first block. ** 1 (true ) this is first block. */ - byte isfirstblock:1; /* 1 bit only for true / false */ + byte isfirstblock : 1; /* 1 bit only for true / false */ } WC_ESP32SHA; - int esp_sha_init(WC_ESP32SHA* ctx, enum wc_HashType hash_type); - int esp_sha_init_ctx(WC_ESP32SHA* ctx); - int esp_sha_try_hw_lock(WC_ESP32SHA* ctx); - int esp_sha_hw_unlock(WC_ESP32SHA* ctx); + WOLFSSL_LOCAL int esp_sha_init(WC_ESP32SHA* ctx, enum wc_HashType hash_type); + WOLFSSL_LOCAL int esp_sha_init_ctx(WC_ESP32SHA* ctx); + WOLFSSL_LOCAL int esp_sha_try_hw_lock(WC_ESP32SHA* ctx); + WOLFSSL_LOCAL int esp_sha_hw_unlock(WC_ESP32SHA* ctx); struct wc_Sha; - int esp_sha_ctx_copy(struct wc_Sha* src, struct wc_Sha* dst); - int esp_sha_digest_process(struct wc_Sha* sha, byte blockprocess); - int esp_sha_process(struct wc_Sha* sha, const byte* data); + WOLFSSL_LOCAL int esp_sha_ctx_copy(struct wc_Sha* src, struct wc_Sha* dst); + WOLFSSL_LOCAL int esp_sha_digest_process(struct wc_Sha* sha, byte blockprocess); + WOLFSSL_LOCAL int esp_sha_process(struct wc_Sha* sha, const byte* data); #ifndef NO_SHA256 - struct wc_Sha256; - int esp_sha224_ctx_copy(struct wc_Sha256* src, struct wc_Sha256* dst); - int esp_sha256_ctx_copy(struct wc_Sha256* src, struct wc_Sha256* dst); - int esp_sha256_digest_process(struct wc_Sha256* sha, byte blockprocess); - int esp_sha256_process(struct wc_Sha256* sha, const byte* data); - int esp32_Transform_Sha256_demo(struct wc_Sha256* sha256, const byte* data); - #endif + struct wc_Sha256; + WOLFSSL_LOCAL int esp_sha224_ctx_copy(struct wc_Sha256* src, struct wc_Sha256* dst); + WOLFSSL_LOCAL int esp_sha256_ctx_copy(struct wc_Sha256* src, struct wc_Sha256* dst); + WOLFSSL_LOCAL int esp_sha256_digest_process(struct wc_Sha256* sha, byte blockprocess); + WOLFSSL_LOCAL int esp_sha256_process(struct wc_Sha256* sha, const byte* data); + WOLFSSL_LOCAL int esp32_Transform_Sha256_demo(struct wc_Sha256* sha256, const byte* data); +#endif /* TODO do we really call esp_sha512_process for WOLFSSL_SHA384 ? */ #if defined(WOLFSSL_SHA512) || defined(WOLFSSL_SHA384) - struct wc_Sha512; - int esp_sha384_ctx_copy(struct wc_Sha512* src, struct wc_Sha512* dst); - int esp_sha512_ctx_copy(struct wc_Sha512* src, struct wc_Sha512* dst); - int esp_sha512_process(struct wc_Sha512* sha); - int esp_sha512_digest_process(struct wc_Sha512* sha, byte blockproc); - #endif + struct wc_Sha512; + WOLFSSL_LOCAL int esp_sha384_ctx_copy(struct wc_Sha512* src, struct wc_Sha512* dst); + WOLFSSL_LOCAL int esp_sha512_ctx_copy(struct wc_Sha512* src, struct wc_Sha512* dst); + WOLFSSL_LOCAL int esp_sha512_process(struct wc_Sha512* sha); + WOLFSSL_LOCAL int esp_sha512_digest_process(struct wc_Sha512* sha, byte blockproc); +#endif -#endif /* NO_SHA && */ +#endif /* NO_SHA && etc */ #if !defined(NO_RSA) || defined(HAVE_ECC) @@ -204,6 +425,7 @@ int esp_CryptHwMutexUnLock(wolfSSL_Mutex* mutex); #define ESP_RSA_TIMEOUT_CNT 0x249F00 #endif +#ifndef NO_WOLFSSL_ESP32_CRYPT_RSA_PRI_EXPTMOD /* * The parameter names in the Espressif implementation are arbitrary. * @@ -214,25 +436,119 @@ int esp_CryptHwMutexUnLock(wolfSSL_Mutex* mutex); /* Z = (X ^ Y) mod M : Espressif generic notation */ /* Y = (G ^ X) mod P : wolfSSL DH reference notation */ - int esp_mp_exptmod(MATH_INT_T* X, /* G */ - MATH_INT_T* Y, /* X */ - word32 Xbits, /* Ys typically = mp_count_bits (X) */ - MATH_INT_T* M, /* P */ - MATH_INT_T* Z); /* Y */ + WOLFSSL_LOCAL int esp_mp_exptmod(MATH_INT_T* X, /* G */ + MATH_INT_T* Y, /* X */ + MATH_INT_T* M, /* P */ + MATH_INT_T* Z); /* Y */ + /* HW_MATH_ENABLED is typically used in wolfcrypt tests */ + #undef HW_MATH_ENABLED + #define HW_MATH_ENABLED + #endif /* ! NO_WOLFSSL_ESP32_CRYPT_RSA_PRI_EXPTMOD */ + + #ifndef NO_WOLFSSL_ESP32_CRYPT_RSA_PRI_MP_MUL + /* Z = X * Y */ + WOLFSSL_LOCAL int esp_mp_mul(MATH_INT_T* X, + MATH_INT_T* Y, + MATH_INT_T* Z); + /* HW_MATH_ENABLED is typically used in wolfcrypt tests */ + #undef HW_MATH_ENABLED + #define HW_MATH_ENABLED +#endif /* ! NO_WOLFSSL_ESP32_CRYPT_RSA_PRI_MP_MUL */ + +#ifndef NO_WOLFSSL_ESP32_CRYPT_RSA_PRI_MULMOD + /* Z = X * Y (mod M) */ + WOLFSSL_LOCAL int esp_mp_mulmod(MATH_INT_T* X, + MATH_INT_T* Y, + MATH_INT_T* M, + MATH_INT_T* Z); + /* HW_MATH_ENABLED is typically used in wolfcrypt tests */ + #undef HW_MATH_ENABLED + #define HW_MATH_ENABLED +#endif /* ! NO_WOLFSSL_ESP32_CRYPT_RSA_PRI_MULMOD */ - /* Z = X * Y */ - int esp_mp_mul(MATH_INT_T* X, - MATH_INT_T* Y, - MATH_INT_T* Z); +#endif /* !NO_RSA || HAVE_ECC*/ - /* Z = X * Y (mod M) */ - int esp_mp_mulmod(MATH_INT_T* X, - MATH_INT_T* Y, - MATH_INT_T* M, - MATH_INT_T* Z); + WOLFSSL_LOCAL int esp_hw_validation_active(void); -#endif /* !NO_RSA || HAVE_ECC*/ +#ifdef WOLFSSL_HW_METRICS + int esp_hw_show_mp_metrics(void); +#endif + +#define ESP_MP_HW_LOCK_MAX_DELAY ( TickType_t ) 0xffUL + +/* + * Errata Mitigation. See + * https://www.espressif.com/sites/default/files/documentation/esp32_errata_en.pdf + * https://www.espressif.com/sites/default/files/documentation/esp32-c3_errata_en.pdf + * https://www.espressif.com/sites/default/files/documentation/esp32-s3_errata_en.pdf + */ +#if defined(CONFIG_IDF_TARGET_ESP32) && !defined(ESP_NO_ERRATA_MITIGATION) + /* some of these may be tuned for specific silicon versions */ + #define ESP_EM__MP_HW_WAIT_CLEAN {__asm__ __volatile__("memw");} + #define ESP_EM__MP_HW_WAIT_DONE {__asm__ __volatile__("memw");} + #define ESP_EM__POST_SP_MP_HW_LOCK {__asm__ __volatile__("memw");} + #define ESP_EM__PRE_MP_HW_WAIT_CLEAN {__asm__ __volatile__("memw");} + #define ESP_EM__PRE_DPORT_READ {__asm__ __volatile__("memw");} + #define ESP_EM__PRE_DPORT_WRITE {__asm__ __volatile__("memw");} + + /* Non-FIFO read may not be needed in chip revision v3.0. */ + #define ESP_EM__READ_NON_FIFO_REG {DPORT_SEQUENCE_REG_READ(0x3FF40078);} + + /* When the CPU frequency is 160 MHz, add six �nop� between two consecutive + ** FIFO reads. When the CPU frequency is 240 MHz, add seven �nop� between + ** two consecutive FIFO reads. See 3.16 */ + #if defined(CONFIG_ESP_DEFAULT_CPU_FREQ_MHZ_80) + #define ESP_EM__3_16 { \ + __asm__ __volatile__("memw"); \ + __asm__ __volatile__("nop"); /* 1 */ \ + __asm__ __volatile__("nop"); /* 2 */ \ + __asm__ __volatile__("nop"); /* 3 */ \ + __asm__ __volatile__("nop"); /* 4 */ \ + __asm__ __volatile__("nop"); /* 5 */ \ + }; + #elif defined(CONFIG_ESP_DEFAULT_CPU_FREQ_MHZ_160) + #define ESP_EM__3_16 { \ + __asm__ __volatile__("memw"); \ + __asm__ __volatile__("nop"); /* 1 */ \ + __asm__ __volatile__("nop"); /* 2 */ \ + __asm__ __volatile__("nop"); /* 3 */ \ + __asm__ __volatile__("nop"); /* 4 */ \ + __asm__ __volatile__("nop"); /* 5 */ \ + __asm__ __volatile__("nop"); /* 6 */ \ + __asm__ __volatile__("nop"); /* 7 */ \ + }; + #elif defined(CONFIG_ESP_DEFAULT_CPU_FREQ_MHZ_240) + #define ESP_EM__3_16 { \ + __asm__ __volatile__("memw"); \ + __asm__ __volatile__("nop"); /* 1 */ \ + __asm__ __volatile__("nop"); /* 2 */ \ + __asm__ __volatile__("nop"); /* 3 */ \ + __asm__ __volatile__("nop"); /* 4 */ \ + __asm__ __volatile__("nop"); /* 5 */ \ + __asm__ __volatile__("nop"); /* 6 */ \ + __asm__ __volatile__("nop"); /* 7 */ \ + __asm__ __volatile__("nop"); /* 8 */ \ + __asm__ __volatile__("nop"); /* 9 */ \ + }; + #else + #define ESP_EM__3_16 {}; + #endif + + #define ESP_EM__POST_PROCESS_START { ESP_EM__3_16 }; + #define ESP_EM__DPORT_FIFO_READ { ESP_EM__3_16 }; +#else + #define ESP_EM__3_16 {}; + #define ESP_EM__MP_HW_WAIT_CLEAN {}; + #define ESP_EM__MP_HW_WAIT_DONE {}; + #define ESP_EM__POST_SP_MP_HW_LOCK {}; + #define ESP_EM__PRE_MP_HW_WAIT_CLEAN {}; + #define ESP_EM__POST_PROCESS_START {}; + #define ESP_EM__DPORT_FIFO_READ {}; + #define ESP_EM__READ_NON_FIFO_REG {}; + #define ESP_EM__PRE_DPORT_READ {}; + #define ESP_EM__PRE_DPORT_WRITE {}; +#endif /* end c++ wrapper */ #ifdef __cplusplus