vram/VRAMCore: fix timing bug with slow readers

The internal EBR array must stall new operations if there's a pending
read result that hasn't been retired yet. All that clever stuff with
non-blocking DelayLines and all that? Yeah, spoiler alert, there's a
reason guarded FIFOs are the preferred API for this stuff. Play unsafe
games, win unsafe prizes.
This commit is contained in:
David Anderson 2024-09-18 21:31:34 -07:00
parent af91f1bda8
commit 1acec6d835
2 changed files with 226 additions and 117 deletions

View File

@ -4,6 +4,8 @@ import GetPut::*;
import ClientServer::*; import ClientServer::*;
import BRAMCore::*; import BRAMCore::*;
import Real::*; import Real::*;
import FIFOF::*;
import SpecialFIFOs::*;
import DelayLine::*; import DelayLine::*;
import ECP5_RAM::*; import ECP5_RAM::*;
@ -21,9 +23,15 @@ typedef UInt#(17) VRAMAddr;
typedef UInt#(2) ArrayAddr; typedef UInt#(2) ArrayAddr;
typedef UInt#(3) ChipAddr; typedef UInt#(3) ChipAddr;
typedef UInt#(12) ByteAddr; typedef UInt#(12) ByteAddr;
typedef struct {
ChipAddr chip;
ByteAddr addr;
} EBRAddr deriving (Bits, Eq, FShow);
// ByteRAM is two EBRs glued together to make a whole-byte memory. typedef struct {
typedef EBR#(ByteAddr, VRAMData, ByteAddr, VRAMData) ByteRAM; EBRAddr addr;
Maybe#(data) data;
} VRAMInternalRequest#(type data) deriving (Bits, Eq, FShow);
typedef struct { typedef struct {
VRAMAddr addr; VRAMAddr addr;
@ -34,6 +42,11 @@ typedef struct {
VRAMData data; VRAMData data;
} VRAMResponse deriving (Bits, Eq, FShow); } VRAMResponse deriving (Bits, Eq, FShow);
interface VRAMCoreInternal#(type data);
interface Server#(VRAMInternalRequest#(data), data) portA;
interface Server#(VRAMInternalRequest#(data), data) portB;
endinterface
module mkNibbleRAM_ECP5(ChipAddr chip_addr, EBR#(ByteAddr, Bit#(4), ByteAddr, Bit#(4)) ifc); module mkNibbleRAM_ECP5(ChipAddr chip_addr, EBR#(ByteAddr, Bit#(4), ByteAddr, Bit#(4)) ifc);
EBRPortConfig cfg = defaultValue; EBRPortConfig cfg = defaultValue;
cfg.chip_select_addr = chip_addr; cfg.chip_select_addr = chip_addr;
@ -61,43 +74,102 @@ module mkNibbleRAM_Sim(ChipAddr chip_addr, EBR#(ByteAddr, Bit#(4), ByteAddr, Bit
endinterface endinterface
endmodule endmodule
module mkNibbleRAM(ChipAddr chip_addr, EBR#(ByteAddr, Bit#(4), ByteAddr, Bit#(4)) ifc); module mkNibbleRAM(ChipAddr chip_addr, VRAMCoreInternal#(Bit#(4)) ifc);
let _ret; let _ret;
if (genC()) if (genC())
_ret <- mkNibbleRAM_Sim(chip_addr); _ret <- mkNibbleRAM_Sim(chip_addr);
else else
_ret <- mkNibbleRAM_ECP5(chip_addr); _ret <- mkNibbleRAM_ECP5(chip_addr);
return _ret;
interface Server portA;
interface Put request;
method Action put(req);
_ret.portA.put(req.addr.chip, isValid(req.data), req.addr.addr, fromMaybe(0, req.data));
endmethod
endinterface
interface Get response;
method ActionValue#(Bit#(4)) get();
return _ret.portA.read();
endmethod
endinterface
endinterface
interface Server portB;
interface Put request;
method Action put(req);
_ret.portB.put(req.addr.chip, isValid(req.data), req.addr.addr, fromMaybe(0, req.data));
endmethod
endinterface
interface Get response;
method ActionValue#(Bit#(4)) get();
return _ret.portB.read();
endmethod
endinterface
endinterface
endmodule endmodule
// mkByteRAM glues two ECP5 EBRs together to make a 4096x8b memory // mkByteRAM glues two ECP5 EBRs together to make a 4096x8b memory
// block. Like the underlying ECP5 EBRs, callers must bring their own // block. Like the underlying ECP5 EBRs, callers must bring their own
// flow control to read out responses one cycle after putting a read // flow control to read out responses one cycle after putting a read
// request. // request.
module mkByteRAM(ChipAddr chip_addr, ByteRAM ifc); module mkByteRAM(ChipAddr chip_addr, VRAMCoreInternal#(VRAMData) ifc);
EBR#(ByteAddr, Bit#(4), ByteAddr, Bit#(4)) upper <- mkNibbleRAM(chip_addr); VRAMCoreInternal#(Bit#(4)) upper <- mkNibbleRAM(chip_addr);
EBR#(ByteAddr, Bit#(4), ByteAddr, Bit#(4)) lower <- mkNibbleRAM(chip_addr); VRAMCoreInternal#(Bit#(4)) lower <- mkNibbleRAM(chip_addr);
interface EBRPort portA; interface Server portA;
method Action put(ChipAddr chip_select, Bool write, ByteAddr addr, VRAMData data_in); interface Put request;
upper.portA.put(chip_select, write, addr, truncate(data_in>>4)); method Action put(req);
lower.portA.put(chip_select, write, addr, truncate(data_in)); Maybe#(Bit#(4)) ud = tagged Invalid;
endmethod Maybe#(Bit#(4)) ld = tagged Invalid;
if (req.data matches tagged Valid .data) begin
method VRAMData read(); ud = tagged Valid data[7:4];
return (extend(upper.portA.read())<<4) | (extend(lower.portA.read())); ld = tagged Valid data[3:0];
endmethod end
upper.portA.request.put(VRAMInternalRequest{
addr: req.addr,
data: ud
});
lower.portA.request.put(VRAMInternalRequest{
addr: req.addr,
data: ld
});
endmethod
endinterface
interface Get response;
method ActionValue#(VRAMData) get();
let u <- upper.portA.response.get();
let l <- lower.portA.response.get();
return {u, l};
endmethod
endinterface
endinterface endinterface
interface EBRPort portB; interface Server portB;
method Action put(ChipAddr chip_select, Bool write, ByteAddr addr, VRAMData data_in); interface Put request;
upper.portB.put(chip_select, write, addr, truncate(data_in>>4)); method Action put(req);
lower.portB.put(chip_select, write, addr, truncate(data_in)); Maybe#(Bit#(4)) ud = tagged Invalid;
endmethod Maybe#(Bit#(4)) ld = tagged Invalid;
if (req.data matches tagged Valid .data) begin
method VRAMData read(); ud = tagged Valid data[7:4];
return (extend(upper.portB.read())<<4) | (extend(lower.portB.read())); ld = tagged Valid data[3:0];
endmethod end
upper.portB.request.put(VRAMInternalRequest{
addr: req.addr,
data: ud
});
lower.portB.request.put(VRAMInternalRequest{
addr: req.addr,
data: ld
});
endmethod
endinterface
interface Get response;
method ActionValue#(VRAMData) get();
let u <- upper.portB.response.get();
let l <- lower.portB.response.get();
return {u, l};
endmethod
endinterface
endinterface endinterface
endmodule : mkByteRAM endmodule : mkByteRAM
@ -105,51 +177,55 @@ endmodule : mkByteRAM
// hardwired chip select lines to route inputs appropriately and a mux // hardwired chip select lines to route inputs appropriately and a mux
// tree to collect outputs. With num_chips=8, the resulting ByteRAM is // tree to collect outputs. With num_chips=8, the resulting ByteRAM is
// 32768x8b. // 32768x8b.
module mkByteRAMArray(Integer num_chips, ByteRAM ifc); //
// The returned ByteRAM _does_ provide flow control: both read() and
// put() are guarded. If reads are consumed as soon as they're
// available, the RAM can process a put() every cycle.
module mkByteRAMArray(Integer num_chips, VRAMCoreInternal#(VRAMData) ifc);
if (num_chips > 8) if (num_chips > 8)
error("mkByteRAMArray can only array 8 raw ByteRAMs"); error("mkByteRAMArray can only array 8 raw ByteRAMs");
ByteRAM blocks[num_chips]; VRAMCoreInternal#(VRAMData) blocks[num_chips];
for (Integer i=0; i<num_chips; i=i+1) for (Integer i=0; i<num_chips; i=i+1)
blocks[i] <- mkByteRAM(fromInteger(i)); blocks[i] <- mkByteRAM(fromInteger(i));
DelayLine#(ChipAddr) read_chip_A <- mkDelayLine(1); FIFOF#(ChipAddr) read_chip_A <- mkPipelineFIFOF();
DelayLine#(ChipAddr) read_chip_B <- mkDelayLine(1); FIFOF#(ChipAddr) read_chip_B <- mkPipelineFIFOF();
interface EBRPort portA; interface Server portA;
method Action put(ChipAddr chip_select, Bool write, ByteAddr addr, VRAMData data_in); interface Put request;
for (Integer i=0; i<num_chips; i=i+1) method Action put(req) if (read_chip_A.notFull);
blocks[i].portA.put(chip_select, write, addr, data_in); for (Integer i=0; i<num_chips; i=i+1)
if (!write) blocks[i].portA.request.put(req);
read_chip_A <= chip_select; if (!isValid(req.data))
endmethod read_chip_A.enq(req.addr.chip);
method VRAMData read(); endmethod
if (read_chip_A.ready) endinterface
if (read_chip_A <= fromInteger(num_chips-1)) interface Get response;
return blocks[read_chip_A].portA.read(); method ActionValue#(VRAMData) get();
else read_chip_A.deq();
return 0; let res <- blocks[read_chip_A.first].portA.response.get();
else return res;
return 0; endmethod
endmethod endinterface
endinterface endinterface
interface EBRPort portB; interface Server portB;
method Action put(ChipAddr chip_select, Bool write, ByteAddr addr, VRAMData data_in); interface Put request;
for (Integer i=0; i<num_chips; i=i+1) method Action put(req) if (read_chip_B.notFull);
blocks[i].portB.put(chip_select, write, addr, data_in); for (Integer i=0; i<num_chips; i=i+1)
if (!write) blocks[i].portB.request.put(req);
read_chip_B <= chip_select; if (!isValid(req.data))
endmethod read_chip_B.enq(req.addr.chip);
method VRAMData read(); endmethod
if (read_chip_B.ready) endinterface
if (read_chip_B <= fromInteger(num_chips-1)) interface Get response;
return blocks[read_chip_B].portB.read(); method ActionValue#(VRAMData) get();
else read_chip_B.deq();
return 0; let res <- blocks[read_chip_B.first].portB.response.get();
else return res;
return 0; endmethod
endmethod endinterface
endinterface endinterface
endmodule endmodule
@ -180,49 +256,57 @@ module mkVRAMCore(Integer num_kilobytes, VRAMCore ifc);
let num_byterams = num_bytes/4096; let num_byterams = num_bytes/4096;
let num_arrays = ceil(fromInteger(num_byterams) / 8); let num_arrays = ceil(fromInteger(num_byterams) / 8);
function Tuple3#(ArrayAddr, ChipAddr, ByteAddr) split_addr(VRAMAddr a); function Tuple2#(ArrayAddr, EBRAddr) split_addr(VRAMAddr a);
return unpack(pack(a)); return unpack(pack(a));
endfunction endfunction
ByteRAM arrays[num_arrays]; VRAMCoreInternal#(VRAMData) arrays[num_arrays];
for (Integer i=0; i<num_arrays; i=i+1) begin for (Integer i=0; i<num_arrays; i=i+1) begin
let array_size = min(num_byterams - (i*8), 8); let array_size = min(num_byterams - (i*8), 8);
arrays[i] <- mkByteRAMArray(array_size); arrays[i] <- mkByteRAMArray(array_size);
end end
Reg#(Maybe#(ArrayAddr)) inflight_A[2] <- mkCReg(2, tagged Invalid); FIFOF#(ArrayAddr) array_addr_A <- mkPipelineFIFOF();
Reg#(Maybe#(ArrayAddr)) inflight_B[2] <- mkCReg(2, tagged Invalid); FIFOF#(ArrayAddr) array_addr_B <- mkPipelineFIFOF();
interface Server portA; interface Server portA;
interface Put request; interface Put request;
method Action put(VRAMRequest req) if (inflight_A[1] matches tagged Invalid); method Action put(req);
match {.array, .chip, .byteaddr} = split_addr(req.addr); match {.array, .addr} = split_addr(req.addr);
arrays[array].portA.put(chip, isValid(req.data), byteaddr, fromMaybe(0, req.data)); arrays[array].portA.request.put(VRAMInternalRequest{
addr: addr,
data: req.data
});
if (!isValid(req.data)) if (!isValid(req.data))
inflight_A[1] <= tagged Valid array; array_addr_A.enq(array);
endmethod endmethod
endinterface endinterface
interface Get response; interface Get response;
method ActionValue#(VRAMResponse) get() if (inflight_A[0] matches tagged Valid .array); method ActionValue#(VRAMResponse) get();
inflight_A[0] <= tagged Invalid; array_addr_A.deq();
return VRAMResponse{data: arrays[array].portA.read()}; let ret <- arrays[array_addr_A.first].portA.response.get();
return VRAMResponse{data: ret};
endmethod endmethod
endinterface endinterface
endinterface endinterface
interface Server portB; interface Server portB;
interface Put request; interface Put request;
method Action put(VRAMRequest req) if (inflight_B[1] matches tagged Invalid); method Action put(req);
match {.array, .chip, .byteaddr} = split_addr(req.addr); match {.array, .addr} = split_addr(req.addr);
arrays[array].portB.put(0, isValid(req.data), byteaddr, fromMaybe(0, req.data)); arrays[array].portB.request.put(VRAMInternalRequest{
addr: addr,
data: req.data
});
if (!isValid(req.data)) if (!isValid(req.data))
inflight_B[1] <= tagged Valid array; array_addr_B.enq(array);
endmethod endmethod
endinterface endinterface
interface Get response; interface Get response;
method ActionValue#(VRAMResponse) get() if (inflight_B[0] matches tagged Valid .array); method ActionValue#(VRAMResponse) get();
inflight_B[0] <= tagged Invalid; array_addr_B.deq();
return VRAMResponse{data: arrays[array].portB.read()}; let ret <- arrays[array_addr_B.first].portB.response.get();
return VRAMResponse{data: ret};
endmethod endmethod
endinterface endinterface
endinterface endinterface

View File

@ -21,40 +21,46 @@ function ActionValue#(Bool) verbose();
endactionvalue); endactionvalue);
endfunction endfunction
typedef (function ActionValue#(Bit#(8)) next()) ValFn; module mkConstantValue(Integer cnst, Get#(Bit#(8)) ifc);
method ActionValue#(Bit#(8)) get();
function ValFn constant_value(Integer cnst); return fromInteger(cnst);
function ActionValue#(Bit#(8)) next(); endmethod
return (actionvalue
return fromInteger(cnst);
endactionvalue);
endfunction
return next;
endfunction
module mkIncrementingValue(ValFn);
Reg#(Bit#(8)) val <- mkReg(0);
function ActionValue#(Bit#(8)) next();
return (actionvalue
// Cycle through 101 values. 101 is prime, so the
// pattern it generates doesn't align to a power of
// two and should detect any memory mapping errors.
if (val == 100)
val <= 0;
else
val <= val+1;
// Add another number to get all nonzero values, to
// detect writes that don't stick.
return 23+val;
endactionvalue);
endfunction
return next;
endmodule endmodule
module mkWriter(Server#(VRAMRequest, VRAMResponse) dut, ValFn next_value, Machine ifc); module mkIncrementingValue(Get#(Bit#(8)));
Reg#(Bit#(8)) val <- mkReg(0);
method ActionValue#(Bit#(8)) get();
// Cycle through 101 values. 101 is prime, so the pattern it
// generates doesn't align to a power of two and should detect
// any memory mapping errors.
if (val == 100)
val <= 0;
else
val <= val+1;
// Add another number to get all nonzero values, to detect
// writes that don't stick.
return 23+val;
endmethod
endmodule
module mkSlowReader(Get#(Bit#(8)) inner, Get#(Bit#(8)) ifc);
Reg#(Bool) delay <- mkReg(True);
(* no_implicit_conditions,fire_when_enabled *)
rule clear_delay (delay);
delay <= False;
endrule
method ActionValue#(Bit#(8)) get() if (!delay);
delay <= True;
let ret <- inner.get();
return ret;
endmethod
endmodule
module mkWriter(Server#(VRAMRequest, VRAMResponse) dut, Get#(Bit#(8)) next_value, Machine ifc);
let flags <- mkTestFlags(); let flags <- mkTestFlags();
let cycles <- mkCycleCounter(); let cycles <- mkCycleCounter();
let write_cycle_time <- mkCycleCounter(); let write_cycle_time <- mkCycleCounter();
@ -67,7 +73,7 @@ module mkWriter(Server#(VRAMRequest, VRAMResponse) dut, ValFn next_value, Machin
dynamicAssert(write_cycle_time == 1, "write didn't happen every cycle"); dynamicAssert(write_cycle_time == 1, "write didn't happen every cycle");
write_cycle_time.reset(); write_cycle_time.reset();
let data <- next_value(); let data <- next_value.get();
let req = VRAMRequest{ let req = VRAMRequest{
addr: idx, addr: idx,
data: tagged Valid data data: tagged Valid data
@ -96,7 +102,7 @@ module mkWriter(Server#(VRAMRequest, VRAMResponse) dut, ValFn next_value, Machin
endmethod endmethod
endmodule endmodule
module mkReader(Server#(VRAMRequest, VRAMResponse) dut, ValFn next_value, Machine ifc); module mkReader(Server#(VRAMRequest, VRAMResponse) dut, Get#(Bit#(8)) next_value, Machine ifc);
let flags <- mkTestFlags(); let flags <- mkTestFlags();
let cycles <- mkCycleCounter(); let cycles <- mkCycleCounter();
@ -126,11 +132,11 @@ module mkReader(Server#(VRAMRequest, VRAMResponse) dut, ValFn next_value, Machin
rule verify_read (verify_remaining > 0); rule verify_read (verify_remaining > 0);
let got <- dut.response.get(); let got <- dut.response.get();
let want <- next_value(); let want <- next_value.get();
dynamicAssert(got.data == want, "wrong value seen during read");
if (flags.verbose) if (flags.verbose)
$display("%0d: verify_read(%0d) = %0d, want %0d", cycles.all, verify_idx, got, want); $display("%0d: verify_read(%0d) = %0d, want %0d", cycles.all, verify_idx, got, want);
dynamicAssert(got.data == want, "wrong value seen during read");
if (verify_remaining == 1) if (verify_remaining == 1)
$display("Verified %0d reads in %0d cycles", total, cycles); $display("Verified %0d reads in %0d cycles", total, cycles);
@ -187,17 +193,36 @@ module mkTwoPortTest(VRAMCore dut, Stmt ret);
endseq); endseq);
endmodule endmodule
module mkSlowConsumerTest(VRAMCore dut, Stmt ret);
let winc <- mkIncrementingValue();
let writer <- mkWriter(dut.portA, winc);
let rinc <- mkIncrementingValue();
let rinc_slow <- mkSlowReader(rinc);
let reader <- mkReader(dut.portA, rinc_slow);
return (seq
writer.start(3000, 6000);
await(writer.done);
reader.start(3000, 6000);
await(reader.done);
endseq);
endmodule
(* descending_urgency="simple.reader.issue_read,two_port.writer.write" *) (* descending_urgency="simple.reader.issue_read,two_port.writer.write" *)
module mkTB(Empty); module mkTB(Empty);
let dut <- mkVRAMCore(112); let dut <- mkVRAMCore(112);
let simple <- mkSimpleTest(dut); let simple <- mkSimpleTest(dut);
let two_port <- mkTwoPortTest(dut); let two_port <- mkTwoPortTest(dut);
let slow_reader <- mkSlowConsumerTest(dut);
runTest(100000, runTest(100000,
mkTest("VRAMCore", seq mkTest("VRAMCore", seq
mkTest("VRAMCore/simple", simple); //mkTest("VRAMCore/simple", simple);
mkTest("VRAMCore/two_port", two_port); //mkTest("VRAMCore/two_port", two_port);
mkTest("VRAMCore/slow_reader", slow_reader);
endseq)); endseq));
endmodule endmodule