From 04438ba1140f8b03baf9233e8ccbfa5dac3dfed0 Mon Sep 17 00:00:00 2001
From: stnolting <22944758+stnolting@users.noreply.github.com>
Date: Sat, 6 Jul 2024 05:55:57 +0200
Subject: [PATCH 1/6] [fpu test] add f2si corner cases

fix "random" operand generator
---
 sw/example/floating_point_test/main.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/sw/example/floating_point_test/main.c b/sw/example/floating_point_test/main.c
index c2506dc19..c1d46e7c1 100644
--- a/sw/example/floating_point_test/main.c
+++ b/sw/example/floating_point_test/main.c
@@ -264,6 +264,14 @@ int main() {
 
   neorv32_uart0_printf("\n#%u: FCVT.W.S (float to signed integer)...\n", test_cnt);
   err_cnt = 0;
+  // corner case tests (#942)
+  for (i=0;i<256;i++) {
+    opa.binary_value = i << 24;
+    res_hw.binary_value = (uint32_t)riscv_intrinsic_fcvt_ws(opa.float_value);
+    res_sw.binary_value = (uint32_t)riscv_emulate_fcvt_ws(opa.float_value);
+    err_cnt += verify_result(i, opa.binary_value, 0, res_sw.binary_value, res_hw.binary_value);
+  }
+  // regular tests
   for (i=0;i<(uint32_t)NUM_TEST_CASES; i++) {
     opa.binary_value = get_test_vector();
     res_hw.binary_value = (uint32_t)riscv_intrinsic_fcvt_ws(opa.float_value);
@@ -902,14 +910,14 @@ uint32_t get_test_vector(void) {
   // generate special value "every" ~256th time this function is called
   if ((neorv32_aux_xorshift32() & 0xff) == 0xff) {
 
-    switch((neorv32_aux_xorshift32() >> 10) & 0x3) { // random decision which special value we are taking
+    switch((neorv32_aux_xorshift32() >> 20) & 0x7) { // random decision which special value we are taking
       case  0: tmp.float_value  = +INFINITY; break;
       case  1: tmp.float_value  = -INFINITY; break;
       case  2: tmp.float_value  = +0.0f; break;
       case  3: tmp.float_value  = -0.0f; break;
       case  4: tmp.binary_value = 0x7fffffff; break;
       case  5: tmp.binary_value = 0xffffffff; break;
-      case  6: tmp.float_value  = NAN; break;
+      case  6: tmp.binary_value = 0xff000000; break;
       case  7: tmp.float_value  = NAN; break; // FIXME signaling_NAN?
       default: tmp.float_value  = NAN; break;
     }

From 735330c420049a2f0ffd40182b029063c85506da Mon Sep 17 00:00:00 2001
From: stnolting <22944758+stnolting@users.noreply.github.com>
Date: Sat, 6 Jul 2024 05:57:14 +0200
Subject: [PATCH 2/6] :bug: [rtl] fix FPU float-to-int corner case

---
 rtl/core/neorv32_cpu_cp_fpu.vhd | 4 ++--
 rtl/core/neorv32_package.vhd    | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/rtl/core/neorv32_cpu_cp_fpu.vhd b/rtl/core/neorv32_cpu_cp_fpu.vhd
index 94fdc19bd..2a0aa5e46 100644
--- a/rtl/core/neorv32_cpu_cp_fpu.vhd
+++ b/rtl/core/neorv32_cpu_cp_fpu.vhd
@@ -2267,8 +2267,8 @@ begin
             -- the mantissa is not 0 (without hidden 1) then we have a true overflow.
             -- Otherwise we have a "real" 1 in the result MSB which should result in -MAX as the correct value.
             -- This captures the corner case where the number is exactly 2^-31
-            elsif ((ctrl.sign = '1') and (ctrl.over = '1') and
-                   (ctrl.result_tmp /= x"80000000") and (mantissa_i /= "00000000000000000000000")) then -- negative out-of-range
+            elsif (ctrl.sign = '1') and (ctrl.over = '1') then -- negative out-of-range
+--                and (ctrl.result_tmp /= x"80000000") and (mantissa_i /= "00000000000000000000000") then -- negative out-of-range
               ctrl.result <= x"80000000";
               -- if we had a negative out of range we are not valid but never inexact
               ctrl.flags(fp_exc_nv_c) <= '1';
diff --git a/rtl/core/neorv32_package.vhd b/rtl/core/neorv32_package.vhd
index 7f01fac0b..7908cc9b2 100644
--- a/rtl/core/neorv32_package.vhd
+++ b/rtl/core/neorv32_package.vhd
@@ -29,7 +29,7 @@ package neorv32_package is
 
   -- Architecture Constants -----------------------------------------------------------------
   -- -------------------------------------------------------------------------------------------
-  constant hw_version_c : std_ulogic_vector(31 downto 0) := x"01100101"; -- hardware version
+  constant hw_version_c : std_ulogic_vector(31 downto 0) := x"01100102"; -- hardware version
   constant archid_c     : natural := 19; -- official RISC-V architecture ID
   constant XLEN         : natural := 32; -- native data path width
 

From 8d2a71aa0f3368160d07f8bd5eb0fc986a8a465b Mon Sep 17 00:00:00 2001
From: stnolting <22944758+stnolting@users.noreply.github.com>
Date: Sat, 6 Jul 2024 06:09:05 +0200
Subject: [PATCH 3/6] [changelog] add v1.10.1.2

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0c60581ca..b14e319a7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,6 +29,7 @@ mimpid = 0x01040312 -> Version 01.04.03.12 -> v1.4.3.12
 
 | Date | Version | Comment | Ticket |
 |:----:|:-------:|:--------|:------:|
+| 06.07.2024 | 1.10.1.2 | :bug: fix corner case in FPU's float-to-signed-integer converter | [#943](https://github.com/stnolting/neorv32/pull/943) |
 | 05.07.2024 | 1.10.1.1 | minor rtl cleanups and optimizations | [#941](https://github.com/stnolting/neorv32/pull/941) |
 | 04.07.2024 | [**:rocket:1.10.1**](https://github.com/stnolting/neorv32/releases/tag/v1.10.1) | **New release** | |
 | 04.07.2024 | 1.10.0.10 | :warning: rework GPTMRM and remove capture mode | [#939](https://github.com/stnolting/neorv32/pull/939) |

From 6e33e9c2498151dc8c28915ad7aac973a1e7386b Mon Sep 17 00:00:00 2001
From: stnolting <22944758+stnolting@users.noreply.github.com>
Date: Mon, 8 Jul 2024 21:32:28 +0200
Subject: [PATCH 4/6] [zfinx intrinsics] use DYNAMIC rounding mode

rounding mode defined by fcsr "rm" bits
---
 .../neorv32_zfinx_extension_intrinsics.h      | 26 ++++++++++++-------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/sw/example/floating_point_test/neorv32_zfinx_extension_intrinsics.h b/sw/example/floating_point_test/neorv32_zfinx_extension_intrinsics.h
index 6c75c5e88..1bbad8a7f 100644
--- a/sw/example/floating_point_test/neorv32_zfinx_extension_intrinsics.h
+++ b/sw/example/floating_point_test/neorv32_zfinx_extension_intrinsics.h
@@ -44,11 +44,11 @@
  * @brief Also provides emulation functions for all intrinsics (functionality re-built in pure software). The functionality of the emulation
  * @brief functions is based on the RISC-V floating-point spec.
  *
- * @note All operations from this library use the default GCC "round to nearest, ties to even" rounding mode.
+ * @note All intrinsics/instruction use the DYNAMIC rounding mode (actual rounding mode is defined by the FCSR).
  *
  * @warning This library is just a temporary fall-back until the Zfinx extensions are supported by the upstream RISC-V GCC port.
  **************************************************************************/
- 
+
 #ifndef neorv32_zfinx_extension_intrinsics_h
 #define neorv32_zfinx_extension_intrinsics_h
 
@@ -112,6 +112,7 @@ float subnormal_flush(float tmp) {
 
 /**********************************************************************//**
  * Single-precision floating-point addition
+ * @note Instruction uses DYNAMIC rounding mode.
  *
  * @param[in] rs1 Source operand 1.
  * @param[in] rs2 Source operand 2.
@@ -123,13 +124,14 @@ inline float __attribute__ ((always_inline)) riscv_intrinsic_fadds(float rs1, fl
   opa.float_value = rs1;
   opb.float_value = rs2;
 
-  res.binary_value = CUSTOM_INSTR_R3_TYPE(0b0000000, opb.binary_value, opa.binary_value, 0b000, 0b1010011);
+  res.binary_value = CUSTOM_INSTR_R3_TYPE(0b0000000, opb.binary_value, opa.binary_value, 0b111, 0b1010011);
   return res.float_value;
 }
 
 
 /**********************************************************************//**
  * Single-precision floating-point subtraction
+ * @note Instruction uses DYNAMIC rounding mode.
  *
  * @param[in] rs1 Source operand 1.
  * @param[in] rs2 Source operand 2.
@@ -141,13 +143,14 @@ inline float __attribute__ ((always_inline)) riscv_intrinsic_fsubs(float rs1, fl
   opa.float_value = rs1;
   opb.float_value = rs2;
 
-  res.binary_value = CUSTOM_INSTR_R3_TYPE(0b0000100, opb.binary_value, opa.binary_value, 0b000, 0b1010011);
+  res.binary_value = CUSTOM_INSTR_R3_TYPE(0b0000100, opb.binary_value, opa.binary_value, 0b111, 0b1010011);
   return res.float_value;
 }
 
 
 /**********************************************************************//**
  * Single-precision floating-point multiplication
+ * @note Instruction uses DYNAMIC rounding mode.
  *
  * @param[in] rs1 Source operand 1.
  * @param[in] rs2 Source operand 2.
@@ -159,7 +162,7 @@ inline float __attribute__ ((always_inline)) riscv_intrinsic_fmuls(float rs1, fl
   opa.float_value = rs1;
   opb.float_value = rs2;
 
-  res.binary_value = CUSTOM_INSTR_R3_TYPE(0b0001000, opb.binary_value, opa.binary_value, 0b000, 0b1010011);
+  res.binary_value = CUSTOM_INSTR_R3_TYPE(0b0001000, opb.binary_value, opa.binary_value, 0b111, 0b1010011);
   return res.float_value;
 }
 
@@ -202,6 +205,7 @@ inline float __attribute__ ((always_inline)) riscv_intrinsic_fmaxs(float rs1, fl
 
 /**********************************************************************//**
  * Single-precision floating-point convert float to unsigned integer
+ * @note Instruction uses DYNAMIC rounding mode.
  *
  * @param[in] rs1 Source operand 1.
  * @return Result.
@@ -211,12 +215,13 @@ inline uint32_t __attribute__ ((always_inline)) riscv_intrinsic_fcvt_wus(float r
   float_conv_t opa;
   opa.float_value = rs1;
 
-  return CUSTOM_INSTR_R2_TYPE(0b1100000, 0b00001, opa.binary_value, 0b000, 0b1010011);
+  return CUSTOM_INSTR_R2_TYPE(0b1100000, 0b00001, opa.binary_value, 0b111, 0b1010011);
 }
 
 
 /**********************************************************************//**
  * Single-precision floating-point convert float to signed integer
+ * @note Instruction uses DYNAMIC rounding mode.
  *
  * @param[in] rs1 Source operand 1.
  * @return Result.
@@ -226,12 +231,13 @@ inline int32_t __attribute__ ((always_inline)) riscv_intrinsic_fcvt_ws(float rs1
   float_conv_t opa;
   opa.float_value = rs1;
 
-  return (int32_t)CUSTOM_INSTR_R2_TYPE(0b1100000, 0b00000, opa.binary_value, 0b000, 0b1010011);
+  return (int32_t)CUSTOM_INSTR_R2_TYPE(0b1100000, 0b00000, opa.binary_value, 0b111, 0b1010011);
 }
 
 
 /**********************************************************************//**
  * Single-precision floating-point convert unsigned integer to float
+ * @note Instruction uses DYNAMIC rounding mode.
  *
  * @param[in] rs1 Source operand 1.
  * @return Result.
@@ -240,13 +246,14 @@ inline float __attribute__ ((always_inline)) riscv_intrinsic_fcvt_swu(uint32_t r
 
   float_conv_t res;
 
-  res.binary_value = CUSTOM_INSTR_R2_TYPE(0b1101000, 0b00001, rs1, 0b000, 0b1010011);
+  res.binary_value = CUSTOM_INSTR_R2_TYPE(0b1101000, 0b00001, rs1, 0b111, 0b1010011);
   return res.float_value;
 }
 
 
 /**********************************************************************//**
  * Single-precision floating-point convert signed integer to float
+ * @note Instruction uses DYNAMIC rounding mode.
  *
  * @param[in] rs1 Source operand 1.
  * @return Result.
@@ -255,7 +262,7 @@ inline float __attribute__ ((always_inline)) riscv_intrinsic_fcvt_sw(int32_t rs1
 
   float_conv_t res;
 
-  res.binary_value = CUSTOM_INSTR_R2_TYPE(0b1101000, 0b00000, rs1, 0b000, 0b1010011);
+  res.binary_value = CUSTOM_INSTR_R2_TYPE(0b1101000, 0b00000, rs1, 0b111, 0b1010011);
   return res.float_value;
 }
 
@@ -1083,4 +1090,3 @@ float __attribute__ ((noinline)) riscv_emulate_fnmadds(float rs1, float rs2, flo
 
 
 #endif // neorv32_zfinx_extension_intrinsics_h
- 
\ No newline at end of file

From 0d560117c75328cd9b66cc3379d8364311cabc86 Mon Sep 17 00:00:00 2001
From: stnolting <22944758+stnolting@users.noreply.github.com>
Date: Mon, 8 Jul 2024 21:32:44 +0200
Subject: [PATCH 5/6] [fp test] setup FPU csrs

---
 sw/example/floating_point_test/main.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sw/example/floating_point_test/main.c b/sw/example/floating_point_test/main.c
index c1d46e7c1..cfd8faff4 100644
--- a/sw/example/floating_point_test/main.c
+++ b/sw/example/floating_point_test/main.c
@@ -218,7 +218,8 @@ int main() {
 // ----------------------------------------------------------------------------
 // Initialize FPU hardware
 // ----------------------------------------------------------------------------
-  neorv32_cpu_csr_write(CSR_FCSR, 0); // clear exception flags and set "round to nearest"
+  neorv32_cpu_csr_write(CSR_FFLAGS, 0); // clear exception flags
+  neorv32_cpu_csr_write(CSR_FRM, 0b000); // set dynamic rounding mode "round to nearest, ties to even"
 
 
 // ----------------------------------------------------------------------------

From 9c9abf2af450fecc2ed055597bd4b2f995ad5fac Mon Sep 17 00:00:00 2001
From: stnolting <22944758+stnolting@users.noreply.github.com>
Date: Wed, 10 Jul 2024 20:03:39 +0200
Subject: [PATCH 6/6] revert version ID update

for now...
---
 CHANGELOG.md                 | 1 -
 rtl/core/neorv32_package.vhd | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b14e319a7..0c60581ca 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,7 +29,6 @@ mimpid = 0x01040312 -> Version 01.04.03.12 -> v1.4.3.12
 
 | Date | Version | Comment | Ticket |
 |:----:|:-------:|:--------|:------:|
-| 06.07.2024 | 1.10.1.2 | :bug: fix corner case in FPU's float-to-signed-integer converter | [#943](https://github.com/stnolting/neorv32/pull/943) |
 | 05.07.2024 | 1.10.1.1 | minor rtl cleanups and optimizations | [#941](https://github.com/stnolting/neorv32/pull/941) |
 | 04.07.2024 | [**:rocket:1.10.1**](https://github.com/stnolting/neorv32/releases/tag/v1.10.1) | **New release** | |
 | 04.07.2024 | 1.10.0.10 | :warning: rework GPTMRM and remove capture mode | [#939](https://github.com/stnolting/neorv32/pull/939) |
diff --git a/rtl/core/neorv32_package.vhd b/rtl/core/neorv32_package.vhd
index 7908cc9b2..7f01fac0b 100644
--- a/rtl/core/neorv32_package.vhd
+++ b/rtl/core/neorv32_package.vhd
@@ -29,7 +29,7 @@ package neorv32_package is
 
   -- Architecture Constants -----------------------------------------------------------------
   -- -------------------------------------------------------------------------------------------
-  constant hw_version_c : std_ulogic_vector(31 downto 0) := x"01100102"; -- hardware version
+  constant hw_version_c : std_ulogic_vector(31 downto 0) := x"01100101"; -- hardware version
   constant archid_c     : natural := 19; -- official RISC-V architecture ID
   constant XLEN         : natural := 32; -- native data path width