From 1acec6d83542af67d663dfc5fb1ef79c5d1040dd Mon Sep 17 00:00:00 2001 From: David Anderson Date: Wed, 18 Sep 2024 21:31:34 -0700 Subject: [PATCH] vram/VRAMCore: fix timing bug with slow readers The internal EBR array must stall new operations if there's a pending read result that hasn't been retired yet. All that clever stuff with non-blocking DelayLines and all that? Yeah, spoiler alert, there's a reason guarded FIFOs are the preferred API for this stuff. Play unsafe games, win unsafe prizes. --- vram/VRAMCore.bsv | 242 +++++++++++++++++++++++++++-------------- vram/VRAMCore_Test.bsv | 101 ++++++++++------- 2 files changed, 226 insertions(+), 117 deletions(-) diff --git a/vram/VRAMCore.bsv b/vram/VRAMCore.bsv index 3b9db94..4a05a4e 100644 --- a/vram/VRAMCore.bsv +++ b/vram/VRAMCore.bsv @@ -4,6 +4,8 @@ import GetPut::*; import ClientServer::*; import BRAMCore::*; import Real::*; +import FIFOF::*; +import SpecialFIFOs::*; import DelayLine::*; import ECP5_RAM::*; @@ -21,9 +23,15 @@ typedef UInt#(17) VRAMAddr; typedef UInt#(2) ArrayAddr; typedef UInt#(3) ChipAddr; typedef UInt#(12) ByteAddr; +typedef struct { + ChipAddr chip; + ByteAddr addr; +} EBRAddr deriving (Bits, Eq, FShow); -// ByteRAM is two EBRs glued together to make a whole-byte memory. -typedef EBR#(ByteAddr, VRAMData, ByteAddr, VRAMData) ByteRAM; +typedef struct { + EBRAddr addr; + Maybe#(data) data; +} VRAMInternalRequest#(type data) deriving (Bits, Eq, FShow); typedef struct { VRAMAddr addr; @@ -34,6 +42,11 @@ typedef struct { VRAMData data; } VRAMResponse deriving (Bits, Eq, FShow); +interface VRAMCoreInternal#(type data); + interface Server#(VRAMInternalRequest#(data), data) portA; + interface Server#(VRAMInternalRequest#(data), data) portB; +endinterface + module mkNibbleRAM_ECP5(ChipAddr chip_addr, EBR#(ByteAddr, Bit#(4), ByteAddr, Bit#(4)) ifc); EBRPortConfig cfg = defaultValue; cfg.chip_select_addr = chip_addr; @@ -61,43 +74,102 @@ module mkNibbleRAM_Sim(ChipAddr chip_addr, EBR#(ByteAddr, Bit#(4), ByteAddr, Bit endinterface endmodule -module mkNibbleRAM(ChipAddr chip_addr, EBR#(ByteAddr, Bit#(4), ByteAddr, Bit#(4)) ifc); +module mkNibbleRAM(ChipAddr chip_addr, VRAMCoreInternal#(Bit#(4)) ifc); let _ret; if (genC()) _ret <- mkNibbleRAM_Sim(chip_addr); else _ret <- mkNibbleRAM_ECP5(chip_addr); - return _ret; + + interface Server portA; + interface Put request; + method Action put(req); + _ret.portA.put(req.addr.chip, isValid(req.data), req.addr.addr, fromMaybe(0, req.data)); + endmethod + endinterface + interface Get response; + method ActionValue#(Bit#(4)) get(); + return _ret.portA.read(); + endmethod + endinterface + endinterface + + interface Server portB; + interface Put request; + method Action put(req); + _ret.portB.put(req.addr.chip, isValid(req.data), req.addr.addr, fromMaybe(0, req.data)); + endmethod + endinterface + interface Get response; + method ActionValue#(Bit#(4)) get(); + return _ret.portB.read(); + endmethod + endinterface + endinterface endmodule // mkByteRAM glues two ECP5 EBRs together to make a 4096x8b memory // block. Like the underlying ECP5 EBRs, callers must bring their own // flow control to read out responses one cycle after putting a read // request. -module mkByteRAM(ChipAddr chip_addr, ByteRAM ifc); - EBR#(ByteAddr, Bit#(4), ByteAddr, Bit#(4)) upper <- mkNibbleRAM(chip_addr); - EBR#(ByteAddr, Bit#(4), ByteAddr, Bit#(4)) lower <- mkNibbleRAM(chip_addr); +module mkByteRAM(ChipAddr chip_addr, VRAMCoreInternal#(VRAMData) ifc); + VRAMCoreInternal#(Bit#(4)) upper <- mkNibbleRAM(chip_addr); + VRAMCoreInternal#(Bit#(4)) lower <- mkNibbleRAM(chip_addr); - interface EBRPort portA; - method Action put(ChipAddr chip_select, Bool write, ByteAddr addr, VRAMData data_in); - upper.portA.put(chip_select, write, addr, truncate(data_in>>4)); - lower.portA.put(chip_select, write, addr, truncate(data_in)); - endmethod - - method VRAMData read(); - return (extend(upper.portA.read())<<4) | (extend(lower.portA.read())); - endmethod + interface Server portA; + interface Put request; + method Action put(req); + Maybe#(Bit#(4)) ud = tagged Invalid; + Maybe#(Bit#(4)) ld = tagged Invalid; + if (req.data matches tagged Valid .data) begin + ud = tagged Valid data[7:4]; + ld = tagged Valid data[3:0]; + end + upper.portA.request.put(VRAMInternalRequest{ + addr: req.addr, + data: ud + }); + lower.portA.request.put(VRAMInternalRequest{ + addr: req.addr, + data: ld + }); + endmethod + endinterface + interface Get response; + method ActionValue#(VRAMData) get(); + let u <- upper.portA.response.get(); + let l <- lower.portA.response.get(); + return {u, l}; + endmethod + endinterface endinterface - interface EBRPort portB; - method Action put(ChipAddr chip_select, Bool write, ByteAddr addr, VRAMData data_in); - upper.portB.put(chip_select, write, addr, truncate(data_in>>4)); - lower.portB.put(chip_select, write, addr, truncate(data_in)); - endmethod - - method VRAMData read(); - return (extend(upper.portB.read())<<4) | (extend(lower.portB.read())); - endmethod + interface Server portB; + interface Put request; + method Action put(req); + Maybe#(Bit#(4)) ud = tagged Invalid; + Maybe#(Bit#(4)) ld = tagged Invalid; + if (req.data matches tagged Valid .data) begin + ud = tagged Valid data[7:4]; + ld = tagged Valid data[3:0]; + end + upper.portB.request.put(VRAMInternalRequest{ + addr: req.addr, + data: ud + }); + lower.portB.request.put(VRAMInternalRequest{ + addr: req.addr, + data: ld + }); + endmethod + endinterface + interface Get response; + method ActionValue#(VRAMData) get(); + let u <- upper.portB.response.get(); + let l <- lower.portB.response.get(); + return {u, l}; + endmethod + endinterface endinterface endmodule : mkByteRAM @@ -105,51 +177,55 @@ endmodule : mkByteRAM // hardwired chip select lines to route inputs appropriately and a mux // tree to collect outputs. With num_chips=8, the resulting ByteRAM is // 32768x8b. -module mkByteRAMArray(Integer num_chips, ByteRAM ifc); +// +// The returned ByteRAM _does_ provide flow control: both read() and +// put() are guarded. If reads are consumed as soon as they're +// available, the RAM can process a put() every cycle. +module mkByteRAMArray(Integer num_chips, VRAMCoreInternal#(VRAMData) ifc); if (num_chips > 8) error("mkByteRAMArray can only array 8 raw ByteRAMs"); - ByteRAM blocks[num_chips]; + VRAMCoreInternal#(VRAMData) blocks[num_chips]; for (Integer i=0; i 0); let got <- dut.response.get(); - let want <- next_value(); - dynamicAssert(got.data == want, "wrong value seen during read"); + let want <- next_value.get(); if (flags.verbose) $display("%0d: verify_read(%0d) = %0d, want %0d", cycles.all, verify_idx, got, want); + dynamicAssert(got.data == want, "wrong value seen during read"); if (verify_remaining == 1) $display("Verified %0d reads in %0d cycles", total, cycles); @@ -187,17 +193,36 @@ module mkTwoPortTest(VRAMCore dut, Stmt ret); endseq); endmodule +module mkSlowConsumerTest(VRAMCore dut, Stmt ret); + let winc <- mkIncrementingValue(); + let writer <- mkWriter(dut.portA, winc); + + let rinc <- mkIncrementingValue(); + let rinc_slow <- mkSlowReader(rinc); + let reader <- mkReader(dut.portA, rinc_slow); + + return (seq + writer.start(3000, 6000); + await(writer.done); + + reader.start(3000, 6000); + await(reader.done); + endseq); +endmodule + (* descending_urgency="simple.reader.issue_read,two_port.writer.write" *) module mkTB(Empty); let dut <- mkVRAMCore(112); let simple <- mkSimpleTest(dut); let two_port <- mkTwoPortTest(dut); + let slow_reader <- mkSlowConsumerTest(dut); runTest(100000, mkTest("VRAMCore", seq - mkTest("VRAMCore/simple", simple); - mkTest("VRAMCore/two_port", two_port); + //mkTest("VRAMCore/simple", simple); + //mkTest("VRAMCore/two_port", two_port); + mkTest("VRAMCore/slow_reader", slow_reader); endseq)); endmodule