vram/VRAM: finish the top-level VRAM module

Well, for now at least. It can build 112KiB and 128KiB memories that
seem to synthesize to something reasonable.
This commit is contained in:
David Anderson 2024-09-07 16:00:45 -07:00
parent dd551ce09b
commit b2b2c14009
2 changed files with 72 additions and 63 deletions

View File

@ -4,35 +4,10 @@ import VRAM::*;
import ECP5_RAM::*; import ECP5_RAM::*;
import TriState::*; import TriState::*;
(* always_enabled *)
interface Top;
method Action phi2(bit v);
method Action we(bit we);
method Action addr(UInt#(24) addr);
interface InOut#(Bit#(8)) data();
endinterface
(* synthesize *) (* synthesize *)
module mkTop(Top); module mkTop(VRAM);
Reg#(PortReq) reqA <- mkRegU(); let _ret <- mkVRAM(112);
Reg#(VRAMData) respA <- mkRegU(); return _ret;
let _ret <- mkByteRAMArray(8);
rule putA;
_ret.portA.put(reqA.chip_select, reqA.write, reqA.addr, reqA.datain);
endrule
rule getA;
respA <= _ret.portA.read();
endrule
method portA_read = respA._read;
method Action portA_put(cs, w, a, d);
reqA <= PortReq{chip_select: cs, write: w, addr: a, datain: d};
endmethod
method portB_read = _ret.portB.read;
method portB_put = _ret.portB.put;
endmodule endmodule
endpackage endpackage

View File

@ -7,11 +7,19 @@ import BRAM::*;
import Vector::*; import Vector::*;
import FIFOF::*; import FIFOF::*;
import SpecialFIFOs::*; import SpecialFIFOs::*;
import Real::*;
import Printf::*;
import DelayLine::*; import DelayLine::*;
import ECP5_RAM::*; import ECP5_RAM::*;
typedef UInt#(17) VRAMAddr; export VRAMAddr;
export VRAMData;
export mkVRAM;
export VRAMRequest;
export VRAMResponse;
export VRAMServer;
export VRAM;
typedef Bit#(8) VRAMData; typedef Bit#(8) VRAMData;
@ -19,9 +27,7 @@ typedef Bit#(8) VRAMData;
// address bits. // address bits.
typedef UInt#(12) ByteAddr; typedef UInt#(12) ByteAddr;
// The difference between ByteRAM_Addr and VRAMAddr is the chip typedef UInt#(3) ChipAddr;
// select ID.
typedef UInt#(5) ChipAddr;
// ByteRAM is two EBRs glued together to make a whole-byte memory. // ByteRAM is two EBRs glued together to make a whole-byte memory.
typedef EBR#(ByteAddr, VRAMData, ByteAddr, VRAMData) ByteRAM; typedef EBR#(ByteAddr, VRAMData, ByteAddr, VRAMData) ByteRAM;
@ -30,14 +36,14 @@ typedef EBR#(ByteAddr, VRAMData, ByteAddr, VRAMData) ByteRAM;
// block. Like the underlying ECP5 EBRs, callers must bring their own // block. Like the underlying ECP5 EBRs, callers must bring their own
// flow control to read out responses one cycle after putting a read // flow control to read out responses one cycle after putting a read
// request. // request.
module mkByteRAM(UInt#(3) chip_addr, ByteRAM ifc); module mkByteRAM(ChipAddr chip_addr, ByteRAM ifc);
EBRPortConfig cfg = defaultValue; EBRPortConfig cfg = defaultValue;
cfg.chip_select_addr = chip_addr; cfg.chip_select_addr = chip_addr;
EBR#(ByteAddr, Bit#(4), ByteAddr, Bit#(4)) upper <- mkEBRCore(cfg, cfg); EBR#(ByteAddr, Bit#(4), ByteAddr, Bit#(4)) upper <- mkEBRCore(cfg, cfg);
EBR#(ByteAddr, Bit#(4), ByteAddr, Bit#(4)) lower <- mkEBRCore(cfg, cfg); EBR#(ByteAddr, Bit#(4), ByteAddr, Bit#(4)) lower <- mkEBRCore(cfg, cfg);
interface EBRPort portA; interface EBRPort portA;
method Action put(UInt#(3) chip_select, Bool write, ByteAddr addr, VRAMData data_in); method Action put(ChipAddr chip_select, Bool write, ByteAddr addr, VRAMData data_in);
upper.portA.put(chip_select, write, addr, truncate(data_in>>4)); upper.portA.put(chip_select, write, addr, truncate(data_in>>4));
lower.portA.put(chip_select, write, addr, truncate(data_in)); lower.portA.put(chip_select, write, addr, truncate(data_in));
endmethod endmethod
@ -48,7 +54,7 @@ module mkByteRAM(UInt#(3) chip_addr, ByteRAM ifc);
endinterface endinterface
interface EBRPort portB; interface EBRPort portB;
method Action put(UInt#(3) chip_select, Bool write, ByteAddr addr, VRAMData data_in); method Action put(ChipAddr chip_select, Bool write, ByteAddr addr, VRAMData data_in);
upper.portB.put(chip_select, write, addr, truncate(data_in>>4)); upper.portB.put(chip_select, write, addr, truncate(data_in>>4));
lower.portB.put(chip_select, write, addr, truncate(data_in)); lower.portB.put(chip_select, write, addr, truncate(data_in));
endmethod endmethod
@ -59,6 +65,10 @@ module mkByteRAM(UInt#(3) chip_addr, ByteRAM ifc);
endinterface endinterface
endmodule : mkByteRAM endmodule : mkByteRAM
// mkByteRAMArray arrays up to 8 mkByteRAMs together, using the
// hardwired chip select lines to route inputs appropriately and a mux
// tree to collect outputs. With num_chips=8, the resulting ByteRAM is
// 32768x8b.
module mkByteRAMArray(Integer num_chips, ByteRAM ifc); module mkByteRAMArray(Integer num_chips, ByteRAM ifc);
if (num_chips > 8) if (num_chips > 8)
error("mkByteRAMArray can only array 8 raw ByteRAMs"); error("mkByteRAMArray can only array 8 raw ByteRAMs");
@ -67,11 +77,11 @@ module mkByteRAMArray(Integer num_chips, ByteRAM ifc);
for (Integer i=0; i<num_chips; i=i+1) for (Integer i=0; i<num_chips; i=i+1)
blocks[i] <- mkByteRAM(fromInteger(i)); blocks[i] <- mkByteRAM(fromInteger(i));
DelayLine#(UInt#(3)) read_chip_A <- mkDelayLine(1); DelayLine#(ChipAddr) read_chip_A <- mkDelayLine(1);
DelayLine#(UInt#(3)) read_chip_B <- mkDelayLine(1); DelayLine#(ChipAddr) read_chip_B <- mkDelayLine(1);
interface EBRPort portA; interface EBRPort portA;
method Action put(UInt#(3) chip_select, Bool write, ByteAddr addr, VRAMData data_in); method Action put(ChipAddr chip_select, Bool write, ByteAddr addr, VRAMData data_in);
for (Integer i=0; i<num_chips; i=i+1) for (Integer i=0; i<num_chips; i=i+1)
blocks[i].portA.put(chip_select, write, addr, data_in); blocks[i].portA.put(chip_select, write, addr, data_in);
if (write) if (write)
@ -89,7 +99,7 @@ module mkByteRAMArray(Integer num_chips, ByteRAM ifc);
endinterface endinterface
interface EBRPort portB; interface EBRPort portB;
method Action put(UInt#(3) chip_select, Bool write, ByteAddr addr, VRAMData data_in); method Action put(ChipAddr chip_select, Bool write, ByteAddr addr, VRAMData data_in);
for (Integer i=0; i<num_chips; i=i+1) for (Integer i=0; i<num_chips; i=i+1)
blocks[i].portB.put(chip_select, write, addr, data_in); blocks[i].portB.put(chip_select, write, addr, data_in);
if (write) if (write)
@ -107,6 +117,10 @@ module mkByteRAMArray(Integer num_chips, ByteRAM ifc);
endinterface endinterface
endmodule endmodule
typedef UInt#(2) ArrayAddr;
typedef UInt#(17) VRAMAddr;
typedef struct { typedef struct {
VRAMAddr addr; VRAMAddr addr;
Maybe#(VRAMData) data; Maybe#(VRAMData) data;
@ -124,38 +138,58 @@ interface VRAM;
interface VRAMServer portB; interface VRAMServer portB;
endinterface endinterface
module mkVRAM(Integer num_4kB_blocks, VRAM ifc); // mkVRAM creates a dual port VRAM of the specified size, using ECP5
if (num_4kB_blocks > 32) // EBR memory primitives. The memory size must be a multiple of 4KiB,
error("maximum number of blocks is 32 (128KiB)"); // with a maximum of 128KiB.
UInt#(TAdd#(SizeOf#(VRAMAddr), 1)) max_request_addr = fromInteger((4096 * num_4kB_blocks)); //
// The returned VRAM servers implement flow control. As long as
// responses are processed as soon as they're available, each port can
// process one memory operation per cycle.
//
// The VRAM does not prevent write-write or write-read conflicts
// between the ports. The outcome of a simultaneous write to the same
// address is unspecified, as is the read output in a simultaneous
// read and write of the same address. The caller must use external
// arbitration to avoid such accesses.
module mkVRAM(Integer num_kilobytes, VRAM ifc);
if (num_kilobytes > 128)
error("maximum VRAM size is 128KiB");
let num_bytes = num_kilobytes*1024;
if (num_bytes % 4096 != 0)
error("VRAM must be a multiple of 4096b");
let num_byterams = num_bytes/4096;
let num_arrays = ceil(fromInteger(num_byterams) / 8);
function Tuple2#(ChipAddr, ByteAddr) split_addr(VRAMAddr a); function Tuple3#(ArrayAddr, ChipAddr, ByteAddr) split_addr(VRAMAddr a);
UInt#(TAdd#(SizeOf#(VRAMAddr), 1)) expanded = extend(a); if (num_bytes < 128*1024)
VRAMAddr wrapped = truncate(expanded % max_request_addr); a = a % fromInteger(num_bytes);
match {.chip, .off} = split(pack(wrapped)); match {.top, .byteaddr} = split(pack(a));
return tuple2(unpack(chip), unpack(off)); Tuple2#(Bit#(SizeOf#(ArrayAddr)), Bit#(SizeOf#(ChipAddr))) route = split(top);
return tuple3(unpack(tpl_1(route)), unpack(tpl_2(route)), unpack(byteaddr));
endfunction endfunction
ByteRAM blocks[num_4kB_blocks]; ByteRAM arrays[num_arrays];
for (Integer i=0; i<num_4kB_blocks; i=i+1) for (Integer i=0; i<num_arrays; i=i+1) begin
blocks[i] <- mkByteRAM(0); let array_size = min(num_byterams - (i*8), 8);
arrays[i] <- mkByteRAMArray(array_size);
end
Reg#(Maybe#(ChipAddr)) inflight_A[2] <- mkCReg(2, tagged Invalid); Reg#(Maybe#(ArrayAddr)) inflight_A[2] <- mkCReg(2, tagged Invalid);
Reg#(Maybe#(ChipAddr)) inflight_B[2] <- mkCReg(2, tagged Invalid); Reg#(Maybe#(ArrayAddr)) inflight_B[2] <- mkCReg(2, tagged Invalid);
interface VRAMServer portA; interface VRAMServer portA;
interface Put request; interface Put request;
method Action put(VRAMRequest req) if (inflight_A[1] matches tagged Invalid); method Action put(VRAMRequest req) if (inflight_A[1] matches tagged Invalid);
match {.chip, .off} = split_addr(req.addr); match {.array, .chip, .byteaddr} = split_addr(req.addr);
blocks[chip].portA.put(0, isValid(req.data), off, fromMaybe(0, req.data)); arrays[array].portA.put(chip, isValid(req.data), byteaddr, fromMaybe(0, req.data));
if (!isValid(req.data)) if (!isValid(req.data))
inflight_A[1] <= tagged Valid chip; inflight_A[1] <= tagged Valid array;
endmethod endmethod
endinterface endinterface
interface Get response; interface Get response;
method ActionValue#(VRAMResponse) get() if (inflight_A[0] matches tagged Valid .chip); method ActionValue#(VRAMResponse) get() if (inflight_A[0] matches tagged Valid .array);
inflight_A[0] <= tagged Invalid; inflight_A[0] <= tagged Invalid;
return VRAMResponse{data: blocks[chip].portA.read()}; return VRAMResponse{data: arrays[array].portA.read()};
endmethod endmethod
endinterface endinterface
endinterface endinterface
@ -163,16 +197,16 @@ module mkVRAM(Integer num_4kB_blocks, VRAM ifc);
interface VRAMServer portB; interface VRAMServer portB;
interface Put request; interface Put request;
method Action put(VRAMRequest req) if (inflight_B[1] matches tagged Invalid); method Action put(VRAMRequest req) if (inflight_B[1] matches tagged Invalid);
match {.chip, .off} = split_addr(req.addr); match {.array, .chip, .byteaddr} = split_addr(req.addr);
blocks[chip].portB.put(0, isValid(req.data), off, fromMaybe(0, req.data)); arrays[array].portB.put(0, isValid(req.data), byteaddr, fromMaybe(0, req.data));
if (!isValid(req.data)) if (!isValid(req.data))
inflight_B[1] <= tagged Valid chip; inflight_B[1] <= tagged Valid array;
endmethod endmethod
endinterface endinterface
interface Get response; interface Get response;
method ActionValue#(VRAMResponse) get() if (inflight_B[0] matches tagged Valid .chip); method ActionValue#(VRAMResponse) get() if (inflight_B[0] matches tagged Valid .array);
inflight_B[0] <= tagged Invalid; inflight_B[0] <= tagged Invalid;
return VRAMResponse{data: blocks[chip].portB.read()}; return VRAMResponse{data: arrays[array].portB.read()};
endmethod endmethod
endinterface endinterface
endinterface endinterface