Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

B ext #3667

Merged
merged 11 commits into from
Aug 13, 2024
Merged

B ext #3667

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/mill-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
config: [DefaultConfig, DefaultBufferlessConfig, DefaultRV32Config, TinyConfig, DefaultFP16Config]
config: [DefaultConfig, DefaultBufferlessConfig, DefaultRV32Config, TinyConfig, DefaultFP16Config, DefaultBConfig, DefaultRV32BConfig]
steps:
- uses: actions/checkout@v2
with:
Expand Down
16 changes: 13 additions & 3 deletions build.sc
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,7 @@ object emulator extends Cross[Emulator](
("freechips.rocketchip.system.TestHarness", "freechips.rocketchip.system.DefaultBufferlessConfig"),
// RocketSuiteC
("freechips.rocketchip.system.TestHarness", "freechips.rocketchip.system.TinyConfig"),

// Unittest
("freechips.rocketchip.unittest.TestHarness", "freechips.rocketchip.unittest.AMBAUnitTestConfig"),
("freechips.rocketchip.unittest.TestHarness", "freechips.rocketchip.unittest.TLSimpleUnitTestConfig"),
Expand Down Expand Up @@ -343,6 +344,9 @@ object emulator extends Cross[Emulator](
//
("freechips.rocketchip.system.TestHarness", "freechips.rocketchip.system.DefaultRV32Config"),
("freechips.rocketchip.system.TestHarness", "freechips.rocketchip.system.DefaultFP16Config"),
("freechips.rocketchip.system.TestHarness", "freechips.rocketchip.system.DefaultBConfig"),
("freechips.rocketchip.system.TestHarness", "freechips.rocketchip.system.DefaultRV32BConfig"),

)

object `runnable-riscv-test` extends mill.Cross[RiscvTest](
Expand Down Expand Up @@ -404,8 +408,8 @@ object `runnable-riscv-test` extends mill.Cross[RiscvTest](
("freechips.rocketchip.system.TestHarness", "freechips.rocketchip.system.DefaultRV32Config", "rv32uc-v", "none"),
("freechips.rocketchip.system.TestHarness", "freechips.rocketchip.system.DefaultRV32Config", "rv32uf-p", "none"),
("freechips.rocketchip.system.TestHarness", "freechips.rocketchip.system.DefaultRV32Config", "rv32uf-v", "none"),
("freechips.rocketchip.system.TestHarness", "freechips.rocketchip.system.DefaultRV32Config", "rv32ui-p", "none"),
("freechips.rocketchip.system.TestHarness", "freechips.rocketchip.system.DefaultRV32Config", "rv32ui-v", "none"),
("freechips.rocketchip.system.TestHarness", "freechips.rocketchip.system.DefaultRV32Config", "rv32ui-p", "ma_data"),
("freechips.rocketchip.system.TestHarness", "freechips.rocketchip.system.DefaultRV32Config", "rv32ui-v", "ma_data"),
("freechips.rocketchip.system.TestHarness", "freechips.rocketchip.system.DefaultRV32Config", "rv32um-p", "none"),
("freechips.rocketchip.system.TestHarness", "freechips.rocketchip.system.DefaultRV32Config", "rv32um-v", "none"),

Expand All @@ -417,11 +421,17 @@ object `runnable-riscv-test` extends mill.Cross[RiscvTest](
// lsrc is not implemented if usingDataScratchpad
("freechips.rocketchip.system.TestHarness", "freechips.rocketchip.system.TinyConfig", "rv32ua-p", "lrsc"),
("freechips.rocketchip.system.TestHarness", "freechips.rocketchip.system.TinyConfig", "rv32uc-p", "none"),
("freechips.rocketchip.system.TestHarness", "freechips.rocketchip.system.TinyConfig", "rv32ui-p", "none"),
("freechips.rocketchip.system.TestHarness", "freechips.rocketchip.system.TinyConfig", "rv32ui-p", "ma_data"),
("freechips.rocketchip.system.TestHarness", "freechips.rocketchip.system.TinyConfig", "rv32um-p", "none"),

("freechips.rocketchip.system.TestHarness", "freechips.rocketchip.system.DefaultFP16Config", "rv64uzfh-p", "none"),
("freechips.rocketchip.system.TestHarness", "freechips.rocketchip.system.DefaultFP16Config", "rv64uzfh-v", "none"),
("freechips.rocketchip.system.TestHarness", "freechips.rocketchip.system.DefaultBConfig", "rv64uzba-p", "none"),
("freechips.rocketchip.system.TestHarness", "freechips.rocketchip.system.DefaultBConfig", "rv64uzbb-p", "none"),
("freechips.rocketchip.system.TestHarness", "freechips.rocketchip.system.DefaultBConfig", "rv64uzbs-p", "none"),
("freechips.rocketchip.system.TestHarness", "freechips.rocketchip.system.DefaultRV32BConfig", "rv32uzba-p", "none"),
("freechips.rocketchip.system.TestHarness", "freechips.rocketchip.system.DefaultRV32BConfig", "rv32uzbb-p", "none"),
("freechips.rocketchip.system.TestHarness", "freechips.rocketchip.system.DefaultRV32BConfig", "rv32uzbs-p", "none"),
)

object `runnable-arch-test` extends mill.Cross[ArchTest](
Expand Down
4 changes: 2 additions & 2 deletions overlay.nix
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@ final: prev: {
});
riscvTests = final.pkgsCross.riscv64-embedded.stdenv.mkDerivation rec {
pname = "riscv-tests";
version = "55bbcc8c06637a31cc01970881ba8072838a9121";
version = "f2f748ebb9cf8ea049103f85c4cbf7e8a2927b16";
jerryz123 marked this conversation as resolved.
Show resolved Hide resolved
src = final.fetchgit {
url = "https://github.com/riscv-software-src/riscv-tests.git";
rev = "${version}";
fetchSubmodules = true;
sha256 = "sha256-TcIU+WFQxPqAG7lvfKPgHm4CnBpTkosqe+fYOxS+J7I=";
sha256 = "sha256-E3RfrP+PFIYy9c/pY04jYPxeGpnfgWwjV8iwL5+s+9w=";
};

enableParallelBuilding = true;
Expand Down
113 changes: 88 additions & 25 deletions src/main/scala/rocket/ALU.scala
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@
package freechips.rocketchip.rocket

import chisel3._
import chisel3.util.{BitPat, Fill, Cat, Reverse}
import chisel3.util.{BitPat, Fill, Cat, Reverse, PriorityEncoderOH, PopCount, MuxLookup}
import org.chipsalliance.cde.config.Parameters
import freechips.rocketchip.tile.CoreModule
import freechips.rocketchip.util._

class ALUFN {
val SZ_ALU_FN = 4
def FN_X = BitPat("b????")
object ALU {
sequencer marked this conversation as resolved.
Show resolved Hide resolved
val SZ_ALU_FN = 5
def FN_X = BitPat("b?????")
def FN_ADD = 0.U
def FN_SL = 1.U
def FN_SEQ = 2.U
Expand All @@ -27,6 +28,20 @@ class ALUFN {
def FN_SGE = 13.U
def FN_SLTU = 14.U
def FN_SGEU = 15.U
def FN_UNARY = 16.U
def FN_ROL = 17.U
def FN_ROR = 18.U
def FN_BEXT = 19.U

def FN_ANDN = 24.U
def FN_ORN = 25.U
def FN_XNOR = 26.U

def FN_MAX = 28.U
def FN_MIN = 29.U
def FN_MAXU = 30.U
def FN_MINU = 31.U
def FN_MAXMIN = BitPat("b111??")

// Mul/div reuse some integer FNs
def FN_DIV = FN_XOR
Expand All @@ -41,21 +56,22 @@ class ALUFN {

def isMulFN(fn: UInt, cmp: UInt) = fn(1,0) === cmp(1,0)
def isSub(cmd: UInt) = cmd(3)
def isCmp(cmd: UInt) = cmd >= FN_SLT
def isCmp(cmd: UInt) = (cmd >= FN_SLT && cmd <= FN_SGEU)
def isMaxMin(cmd: UInt) = (cmd >= FN_MAX && cmd <= FN_MINU)
sequencer marked this conversation as resolved.
Show resolved Hide resolved
def cmpUnsigned(cmd: UInt) = cmd(1)
def cmpInverted(cmd: UInt) = cmd(0)
def cmpEq(cmd: UInt) = !cmd(3)
def shiftReverse(cmd: UInt) = !cmd.isOneOf(FN_SR, FN_SRA, FN_ROR, FN_BEXT)
def bwInvRs2(cmd: UInt) = cmd.isOneOf(FN_ANDN, FN_ORN, FN_XNOR)
}

object ALUFN {
def apply() = new ALUFN
}
import ALU._


abstract class AbstractALU[T <: ALUFN](val aluFn: T)(implicit p: Parameters) extends CoreModule()(p) {
abstract class AbstractALU(implicit p: Parameters) extends CoreModule()(p) {
val io = IO(new Bundle {
val dw = Input(UInt(SZ_DW.W))
val fn = Input(UInt(aluFn.SZ_ALU_FN.W))
val fn = Input(UInt(SZ_ALU_FN.W))
val in2 = Input(UInt(xLen.W))
val in1 = Input(UInt(xLen.W))
val out = Output(UInt(xLen.W))
Expand All @@ -64,50 +80,97 @@ abstract class AbstractALU[T <: ALUFN](val aluFn: T)(implicit p: Parameters) ext
})
}

class ALU(implicit p: Parameters) extends AbstractALU(new ALUFN)(p) {
class ALU(implicit p: Parameters) extends AbstractALU()(p) {
// ADD, SUB
val in2_inv = Mux(aluFn.isSub(io.fn), ~io.in2, io.in2)
val in2_inv = Mux(isSub(io.fn), ~io.in2, io.in2)
val in1_xor_in2 = io.in1 ^ in2_inv
io.adder_out := io.in1 + in2_inv + aluFn.isSub(io.fn)
val in1_and_in2 = io.in1 & in2_inv
io.adder_out := io.in1 + in2_inv + isSub(io.fn)

// SLT, SLTU
val slt =
Mux(io.in1(xLen-1) === io.in2(xLen-1), io.adder_out(xLen-1),
Mux(aluFn.cmpUnsigned(io.fn), io.in2(xLen-1), io.in1(xLen-1)))
io.cmp_out := aluFn.cmpInverted(io.fn) ^ Mux(aluFn.cmpEq(io.fn), in1_xor_in2 === 0.U, slt)
Mux(cmpUnsigned(io.fn), io.in2(xLen-1), io.in1(xLen-1)))
io.cmp_out := cmpInverted(io.fn) ^ Mux(cmpEq(io.fn), in1_xor_in2 === 0.U, slt)

// SLL, SRL, SRA
val (shamt, shin_r) =
if (xLen == 32) (io.in2(4,0), io.in1)
else {
require(xLen == 64)
val shin_hi_32 = Fill(32, aluFn.isSub(io.fn) && io.in1(31))
val shin_hi_32 = Fill(32, isSub(io.fn) && io.in1(31))
val shin_hi = Mux(io.dw === DW_64, io.in1(63,32), shin_hi_32)
val shamt = Cat(io.in2(5) & (io.dw === DW_64), io.in2(4,0))
(shamt, Cat(shin_hi, io.in1(31,0)))
}
val shin = Mux(io.fn === aluFn.FN_SR || io.fn === aluFn.FN_SRA, shin_r, Reverse(shin_r))
val shout_r = (Cat(aluFn.isSub(io.fn) & shin(xLen-1), shin).asSInt >> shamt)(xLen-1,0)
val shin = Mux(shiftReverse(io.fn), Reverse(shin_r), shin_r)
val shout_r = (Cat(isSub(io.fn) & shin(xLen-1), shin).asSInt >> shamt)(xLen-1,0)
val shout_l = Reverse(shout_r)
val shout = Mux(io.fn === aluFn.FN_SR || io.fn === aluFn.FN_SRA, shout_r, 0.U) |
Mux(io.fn === aluFn.FN_SL, shout_l, 0.U)
val shout = Mux(io.fn === FN_SR || io.fn === FN_SRA || io.fn === FN_BEXT, shout_r, 0.U) |
Mux(io.fn === FN_SL, shout_l, 0.U)

// CZEQZ, CZNEZ
val in2_not_zero = io.in2.orR
val cond_out = Option.when(usingConditionalZero)(
Mux((io.fn === aluFn.FN_CZEQZ && in2_not_zero) || (io.fn === aluFn.FN_CZNEZ && !in2_not_zero), io.in1, 0.U)
Mux((io.fn === FN_CZEQZ && in2_not_zero) || (io.fn === FN_CZNEZ && !in2_not_zero), io.in1, 0.U)
)

// AND, OR, XOR
val logic = Mux(io.fn === aluFn.FN_XOR || io.fn === aluFn.FN_OR, in1_xor_in2, 0.U) |
Mux(io.fn === aluFn.FN_OR || io.fn === aluFn.FN_AND, io.in1 & io.in2, 0.U)
val logic = Mux(io.fn === FN_XOR || io.fn === FN_OR || io.fn === FN_ORN || io.fn === FN_XNOR, in1_xor_in2, 0.U) |
Mux(io.fn === FN_OR || io.fn === FN_AND || io.fn === FN_ORN || io.fn === FN_ANDN, in1_and_in2, 0.U)

val shift_logic = (aluFn.isCmp (io.fn) && slt) | logic | shout
val bext_mask = Mux(coreParams.useZbs.B && io.fn === FN_BEXT, 1.U, ~(0.U(xLen.W)))
val shift_logic = (isCmp (io.fn) && slt) | logic | (shout & bext_mask)
val shift_logic_cond = cond_out match {
case Some(co) => shift_logic | co
case _ => shift_logic
}
val out = Mux(io.fn === aluFn.FN_ADD || io.fn === aluFn.FN_SUB, io.adder_out, shift_logic_cond)

// CLZ, CTZ, CPOP
val tz_in = MuxLookup((io.dw === DW_32) ## !io.in2(0), 0.U)(Seq(
0.U -> io.in1,
1.U -> Reverse(io.in1),
2.U -> 1.U ## io.in1(31,0),
3.U -> 1.U ## Reverse(io.in1(31,0))
))
val popc_in = Mux(io.in2(1),
Mux(io.dw === DW_32, io.in1(31,0), io.in1),
PriorityEncoderOH(1.U ## tz_in) - 1.U)(xLen-1,0)
val count = PopCount(popc_in)
val in1_bytes = io.in1.asTypeOf(Vec(xLen / 8, UInt(8.W)))
val orcb = VecInit(in1_bytes.map(b => Fill(8, b =/= 0.U))).asUInt
val rev8 = VecInit(in1_bytes.reverse).asUInt
val unary = MuxLookup(io.in2(11,0), count)(Seq(
0x287.U -> orcb,
(if (xLen == 32) 0x698 else 0x6b8).U -> rev8,
0x080.U -> io.in1(15,0),
0x604.U -> Fill(xLen-8, io.in1(7)) ## io.in1(7,0),
0x605.U -> Fill(xLen-16, io.in1(15)) ## io.in1(15,0)
))
sequencer marked this conversation as resolved.
Show resolved Hide resolved

// MAX, MIN, MAXU, MINU
val maxmin_out = Mux(io.cmp_out, io.in2, io.in1)

// ROL, ROR
val rot_shamt = Mux(io.dw === DW_32, 32.U, xLen.U) - shamt
val rotin = Mux(io.fn(0), shin_r, Reverse(shin_r))
val rotout_r = (rotin >> rot_shamt)(xLen-1,0)
val rotout_l = Reverse(rotout_r)
val rotout = Mux(io.fn(0), rotout_r, rotout_l) | Mux(io.fn(0), shout_l, shout_r)

val out = MuxLookup(io.fn, shift_logic_cond)(Seq(
FN_ADD -> io.adder_out,
FN_SUB -> io.adder_out
) ++ (if (coreParams.useZbb) Seq(
FN_UNARY -> unary,
FN_MAX -> maxmin_out,
FN_MIN -> maxmin_out,
FN_MAXU -> maxmin_out,
FN_MINU -> maxmin_out,
FN_ROL -> rotout,
FN_ROR -> rotout,
) else Nil))


io.out := out
if (xLen > 32) {
Expand Down
53 changes: 52 additions & 1 deletion src/main/scala/rocket/Configs.scala
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,56 @@ import org.chipsalliance.diplomacy.lazymodule._

import freechips.rocketchip.prci.{SynchronousCrossing, AsynchronousCrossing, RationalCrossing, ClockCrossingType}
import freechips.rocketchip.subsystem.{TilesLocated, NumTiles, HierarchicalLocation, RocketCrossingParams, SystemBusKey, CacheBlockBytes, RocketTileAttachParams, InSubsystem, InCluster, HierarchicalElementMasterPortParams, HierarchicalElementSlavePortParams, CBUS, CCBUS, ClustersLocated, TileAttachConfig, CloneTileAttachParams}
import freechips.rocketchip.tile.{RocketTileParams, RocketTileBoundaryBufferParams}
import freechips.rocketchip.tile.{RocketTileParams, RocketTileBoundaryBufferParams, FPUParams}
import scala.reflect.ClassTag

// All the user-level bells and whistles
class WithNHugeCores(
n: Int,
location: HierarchicalLocation,
crossing: RocketCrossingParams,
) extends Config((site, here, up) => {
case TilesLocated(`location`) => {
val prev = up(TilesLocated(`location`), site)
val idOffset = up(NumTiles)
val big = RocketTileParams(
core = RocketCoreParams(
mulDiv = Some(MulDivParams(
mulUnroll = 8,
mulEarlyOut = true,
divEarlyOut = true,
)),
useZba = true,
useZbb = true,
useZbs = true,
fpu = Some(FPUParams(minFLen = 16))),
dcache = Some(DCacheParams(
nSets = 64,
nWays = 8,
rowBits = site(SystemBusKey).beatBits,
nMSHRs = 0,
blockBytes = site(CacheBlockBytes))),
icache = Some(ICacheParams(
nSets = 64,
nWays = 8,
rowBits = site(SystemBusKey).beatBits,
blockBytes = site(CacheBlockBytes))))
List.tabulate(n)(i => RocketTileAttachParams(
big.copy(tileId = i + idOffset),
crossing
)) ++ prev
}
case NumTiles => up(NumTiles) + n
}) {
def this(n: Int, location: HierarchicalLocation = InSubsystem) = this(n, location, RocketCrossingParams(
master = HierarchicalElementMasterPortParams.locationDefault(location),
slave = HierarchicalElementSlavePortParams.locationDefault(location),
mmioBaseAddressPrefixWhere = location match {
case InSubsystem => CBUS
case InCluster(clusterId) => CCBUS(clusterId)
}
))
}

class WithNBigCores(
n: Int,
Expand Down Expand Up @@ -227,6 +274,10 @@ class WithHypervisor(hext: Boolean = true) extends RocketCoreConf
class WithCease(enable: Boolean = true) extends RocketCoreConfig(_.copy(haveCease = enable))
class WithCoreClockGatingEnabled extends RocketCoreConfig(_.copy(clockGate = true))
class WithPgLevels(n: Int) extends RocketCoreConfig(_.copy(pgLevels = n))
class WithZba extends RocketCoreConfig(_.copy(useZba = true))
class WithZbb extends RocketCoreConfig(_.copy(useZbb = true))
class WithZbs extends RocketCoreConfig(_.copy(useZbs = true))
class WithB extends RocketCoreConfig(_.copy(useZba = true, useZbb = true, useZbs = true))
class WithSV48 extends WithPgLevels(4)
class WithSV39 extends WithPgLevels(3)

Expand Down
14 changes: 9 additions & 5 deletions src/main/scala/rocket/Consts.scala
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ trait ScalarOpConstants {
def A1_ZERO = 0.U(2.W)
def A1_RS1 = 1.U(2.W)
def A1_PC = 2.U(2.W)
def A1_RS1SHL = 3.U(2.W)

def IMM_X = BitPat("b???")
def IMM_S = 0.U(3.W)
Expand All @@ -31,11 +32,14 @@ trait ScalarOpConstants {
def IMM_I = 4.U(3.W)
def IMM_Z = 5.U(3.W)

def A2_X = BitPat("b??")
def A2_ZERO = 0.U(2.W)
def A2_SIZE = 1.U(2.W)
def A2_RS2 = 2.U(2.W)
def A2_IMM = 3.U(2.W)
def A2_X = BitPat("b???")
def A2_ZERO = 0.U(3.W)
def A2_SIZE = 1.U(3.W)
def A2_RS2 = 2.U(3.W)
def A2_IMM = 3.U(3.W)
def A2_RS2OH = 4.U(3.W)
def A2_IMMOH = 5.U(3.W)


def X = BitPat("b?")
def N = BitPat("b0")
Expand Down
Loading
Loading