vram/VRAMCore: fix timing bug with slow readers

The internal EBR array must stall new operations if there's a pending
read result that hasn't been retired yet. All that clever stuff with
non-blocking DelayLines and all that? Yeah, spoiler alert, there's a
reason guarded FIFOs are the preferred API for this stuff. Play unsafe
games, win unsafe prizes.
This commit is contained in:
David Anderson 2024-09-18 21:31:34 -07:00
parent af91f1bda8
commit 1acec6d835
2 changed files with 226 additions and 117 deletions

View File

@ -4,6 +4,8 @@ import GetPut::*;
import ClientServer::*;
import BRAMCore::*;
import Real::*;
import FIFOF::*;
import SpecialFIFOs::*;
import DelayLine::*;
import ECP5_RAM::*;
@ -21,9 +23,15 @@ typedef UInt#(17) VRAMAddr;
typedef UInt#(2) ArrayAddr;
typedef UInt#(3) ChipAddr;
typedef UInt#(12) ByteAddr;
typedef struct {
ChipAddr chip;
ByteAddr addr;
} EBRAddr deriving (Bits, Eq, FShow);
// ByteRAM is two EBRs glued together to make a whole-byte memory.
typedef EBR#(ByteAddr, VRAMData, ByteAddr, VRAMData) ByteRAM;
typedef struct {
EBRAddr addr;
Maybe#(data) data;
} VRAMInternalRequest#(type data) deriving (Bits, Eq, FShow);
typedef struct {
VRAMAddr addr;
@ -34,6 +42,11 @@ typedef struct {
VRAMData data;
} VRAMResponse deriving (Bits, Eq, FShow);
interface VRAMCoreInternal#(type data);
interface Server#(VRAMInternalRequest#(data), data) portA;
interface Server#(VRAMInternalRequest#(data), data) portB;
endinterface
module mkNibbleRAM_ECP5(ChipAddr chip_addr, EBR#(ByteAddr, Bit#(4), ByteAddr, Bit#(4)) ifc);
EBRPortConfig cfg = defaultValue;
cfg.chip_select_addr = chip_addr;
@ -61,43 +74,102 @@ module mkNibbleRAM_Sim(ChipAddr chip_addr, EBR#(ByteAddr, Bit#(4), ByteAddr, Bit
endinterface
endmodule
module mkNibbleRAM(ChipAddr chip_addr, EBR#(ByteAddr, Bit#(4), ByteAddr, Bit#(4)) ifc);
module mkNibbleRAM(ChipAddr chip_addr, VRAMCoreInternal#(Bit#(4)) ifc);
let _ret;
if (genC())
_ret <- mkNibbleRAM_Sim(chip_addr);
else
_ret <- mkNibbleRAM_ECP5(chip_addr);
return _ret;
interface Server portA;
interface Put request;
method Action put(req);
_ret.portA.put(req.addr.chip, isValid(req.data), req.addr.addr, fromMaybe(0, req.data));
endmethod
endinterface
interface Get response;
method ActionValue#(Bit#(4)) get();
return _ret.portA.read();
endmethod
endinterface
endinterface
interface Server portB;
interface Put request;
method Action put(req);
_ret.portB.put(req.addr.chip, isValid(req.data), req.addr.addr, fromMaybe(0, req.data));
endmethod
endinterface
interface Get response;
method ActionValue#(Bit#(4)) get();
return _ret.portB.read();
endmethod
endinterface
endinterface
endmodule
// mkByteRAM glues two ECP5 EBRs together to make a 4096x8b memory
// block. Like the underlying ECP5 EBRs, callers must bring their own
// flow control to read out responses one cycle after putting a read
// request.
module mkByteRAM(ChipAddr chip_addr, ByteRAM ifc);
EBR#(ByteAddr, Bit#(4), ByteAddr, Bit#(4)) upper <- mkNibbleRAM(chip_addr);
EBR#(ByteAddr, Bit#(4), ByteAddr, Bit#(4)) lower <- mkNibbleRAM(chip_addr);
module mkByteRAM(ChipAddr chip_addr, VRAMCoreInternal#(VRAMData) ifc);
VRAMCoreInternal#(Bit#(4)) upper <- mkNibbleRAM(chip_addr);
VRAMCoreInternal#(Bit#(4)) lower <- mkNibbleRAM(chip_addr);
interface EBRPort portA;
method Action put(ChipAddr chip_select, Bool write, ByteAddr addr, VRAMData data_in);
upper.portA.put(chip_select, write, addr, truncate(data_in>>4));
lower.portA.put(chip_select, write, addr, truncate(data_in));
endmethod
method VRAMData read();
return (extend(upper.portA.read())<<4) | (extend(lower.portA.read()));
endmethod
interface Server portA;
interface Put request;
method Action put(req);
Maybe#(Bit#(4)) ud = tagged Invalid;
Maybe#(Bit#(4)) ld = tagged Invalid;
if (req.data matches tagged Valid .data) begin
ud = tagged Valid data[7:4];
ld = tagged Valid data[3:0];
end
upper.portA.request.put(VRAMInternalRequest{
addr: req.addr,
data: ud
});
lower.portA.request.put(VRAMInternalRequest{
addr: req.addr,
data: ld
});
endmethod
endinterface
interface Get response;
method ActionValue#(VRAMData) get();
let u <- upper.portA.response.get();
let l <- lower.portA.response.get();
return {u, l};
endmethod
endinterface
endinterface
interface EBRPort portB;
method Action put(ChipAddr chip_select, Bool write, ByteAddr addr, VRAMData data_in);
upper.portB.put(chip_select, write, addr, truncate(data_in>>4));
lower.portB.put(chip_select, write, addr, truncate(data_in));
endmethod
method VRAMData read();
return (extend(upper.portB.read())<<4) | (extend(lower.portB.read()));
endmethod
interface Server portB;
interface Put request;
method Action put(req);
Maybe#(Bit#(4)) ud = tagged Invalid;
Maybe#(Bit#(4)) ld = tagged Invalid;
if (req.data matches tagged Valid .data) begin
ud = tagged Valid data[7:4];
ld = tagged Valid data[3:0];
end
upper.portB.request.put(VRAMInternalRequest{
addr: req.addr,
data: ud
});
lower.portB.request.put(VRAMInternalRequest{
addr: req.addr,
data: ld
});
endmethod
endinterface
interface Get response;
method ActionValue#(VRAMData) get();
let u <- upper.portB.response.get();
let l <- lower.portB.response.get();
return {u, l};
endmethod
endinterface
endinterface
endmodule : mkByteRAM
@ -105,51 +177,55 @@ endmodule : mkByteRAM
// hardwired chip select lines to route inputs appropriately and a mux
// tree to collect outputs. With num_chips=8, the resulting ByteRAM is
// 32768x8b.
module mkByteRAMArray(Integer num_chips, ByteRAM ifc);
//
// The returned ByteRAM _does_ provide flow control: both read() and
// put() are guarded. If reads are consumed as soon as they're
// available, the RAM can process a put() every cycle.
module mkByteRAMArray(Integer num_chips, VRAMCoreInternal#(VRAMData) ifc);
if (num_chips > 8)
error("mkByteRAMArray can only array 8 raw ByteRAMs");
ByteRAM blocks[num_chips];
VRAMCoreInternal#(VRAMData) blocks[num_chips];
for (Integer i=0; i<num_chips; i=i+1)
blocks[i] <- mkByteRAM(fromInteger(i));
DelayLine#(ChipAddr) read_chip_A <- mkDelayLine(1);
DelayLine#(ChipAddr) read_chip_B <- mkDelayLine(1);
FIFOF#(ChipAddr) read_chip_A <- mkPipelineFIFOF();
FIFOF#(ChipAddr) read_chip_B <- mkPipelineFIFOF();
interface EBRPort portA;
method Action put(ChipAddr chip_select, Bool write, ByteAddr addr, VRAMData data_in);
for (Integer i=0; i<num_chips; i=i+1)
blocks[i].portA.put(chip_select, write, addr, data_in);
if (!write)
read_chip_A <= chip_select;
endmethod
method VRAMData read();
if (read_chip_A.ready)
if (read_chip_A <= fromInteger(num_chips-1))
return blocks[read_chip_A].portA.read();
else
return 0;
else
return 0;
endmethod
interface Server portA;
interface Put request;
method Action put(req) if (read_chip_A.notFull);
for (Integer i=0; i<num_chips; i=i+1)
blocks[i].portA.request.put(req);
if (!isValid(req.data))
read_chip_A.enq(req.addr.chip);
endmethod
endinterface
interface Get response;
method ActionValue#(VRAMData) get();
read_chip_A.deq();
let res <- blocks[read_chip_A.first].portA.response.get();
return res;
endmethod
endinterface
endinterface
interface EBRPort portB;
method Action put(ChipAddr chip_select, Bool write, ByteAddr addr, VRAMData data_in);
for (Integer i=0; i<num_chips; i=i+1)
blocks[i].portB.put(chip_select, write, addr, data_in);
if (!write)
read_chip_B <= chip_select;
endmethod
method VRAMData read();
if (read_chip_B.ready)
if (read_chip_B <= fromInteger(num_chips-1))
return blocks[read_chip_B].portB.read();
else
return 0;
else
return 0;
endmethod
interface Server portB;
interface Put request;
method Action put(req) if (read_chip_B.notFull);
for (Integer i=0; i<num_chips; i=i+1)
blocks[i].portB.request.put(req);
if (!isValid(req.data))
read_chip_B.enq(req.addr.chip);
endmethod
endinterface
interface Get response;
method ActionValue#(VRAMData) get();
read_chip_B.deq();
let res <- blocks[read_chip_B.first].portB.response.get();
return res;
endmethod
endinterface
endinterface
endmodule
@ -180,49 +256,57 @@ module mkVRAMCore(Integer num_kilobytes, VRAMCore ifc);
let num_byterams = num_bytes/4096;
let num_arrays = ceil(fromInteger(num_byterams) / 8);
function Tuple3#(ArrayAddr, ChipAddr, ByteAddr) split_addr(VRAMAddr a);
function Tuple2#(ArrayAddr, EBRAddr) split_addr(VRAMAddr a);
return unpack(pack(a));
endfunction
ByteRAM arrays[num_arrays];
VRAMCoreInternal#(VRAMData) arrays[num_arrays];
for (Integer i=0; i<num_arrays; i=i+1) begin
let array_size = min(num_byterams - (i*8), 8);
arrays[i] <- mkByteRAMArray(array_size);
end
Reg#(Maybe#(ArrayAddr)) inflight_A[2] <- mkCReg(2, tagged Invalid);
Reg#(Maybe#(ArrayAddr)) inflight_B[2] <- mkCReg(2, tagged Invalid);
FIFOF#(ArrayAddr) array_addr_A <- mkPipelineFIFOF();
FIFOF#(ArrayAddr) array_addr_B <- mkPipelineFIFOF();
interface Server portA;
interface Put request;
method Action put(VRAMRequest req) if (inflight_A[1] matches tagged Invalid);
match {.array, .chip, .byteaddr} = split_addr(req.addr);
arrays[array].portA.put(chip, isValid(req.data), byteaddr, fromMaybe(0, req.data));
method Action put(req);
match {.array, .addr} = split_addr(req.addr);
arrays[array].portA.request.put(VRAMInternalRequest{
addr: addr,
data: req.data
});
if (!isValid(req.data))
inflight_A[1] <= tagged Valid array;
array_addr_A.enq(array);
endmethod
endinterface
interface Get response;
method ActionValue#(VRAMResponse) get() if (inflight_A[0] matches tagged Valid .array);
inflight_A[0] <= tagged Invalid;
return VRAMResponse{data: arrays[array].portA.read()};
method ActionValue#(VRAMResponse) get();
array_addr_A.deq();
let ret <- arrays[array_addr_A.first].portA.response.get();
return VRAMResponse{data: ret};
endmethod
endinterface
endinterface
interface Server portB;
interface Put request;
method Action put(VRAMRequest req) if (inflight_B[1] matches tagged Invalid);
match {.array, .chip, .byteaddr} = split_addr(req.addr);
arrays[array].portB.put(0, isValid(req.data), byteaddr, fromMaybe(0, req.data));
method Action put(req);
match {.array, .addr} = split_addr(req.addr);
arrays[array].portB.request.put(VRAMInternalRequest{
addr: addr,
data: req.data
});
if (!isValid(req.data))
inflight_B[1] <= tagged Valid array;
array_addr_B.enq(array);
endmethod
endinterface
interface Get response;
method ActionValue#(VRAMResponse) get() if (inflight_B[0] matches tagged Valid .array);
inflight_B[0] <= tagged Invalid;
return VRAMResponse{data: arrays[array].portB.read()};
method ActionValue#(VRAMResponse) get();
array_addr_B.deq();
let ret <- arrays[array_addr_B.first].portB.response.get();
return VRAMResponse{data: ret};
endmethod
endinterface
endinterface

View File

@ -21,40 +21,46 @@ function ActionValue#(Bool) verbose();
endactionvalue);
endfunction
typedef (function ActionValue#(Bit#(8)) next()) ValFn;
function ValFn constant_value(Integer cnst);
function ActionValue#(Bit#(8)) next();
return (actionvalue
return fromInteger(cnst);
endactionvalue);
endfunction
return next;
endfunction
module mkIncrementingValue(ValFn);
Reg#(Bit#(8)) val <- mkReg(0);
function ActionValue#(Bit#(8)) next();
return (actionvalue
// Cycle through 101 values. 101 is prime, so the
// pattern it generates doesn't align to a power of
// two and should detect any memory mapping errors.
if (val == 100)
val <= 0;
else
val <= val+1;
// Add another number to get all nonzero values, to
// detect writes that don't stick.
return 23+val;
endactionvalue);
endfunction
return next;
module mkConstantValue(Integer cnst, Get#(Bit#(8)) ifc);
method ActionValue#(Bit#(8)) get();
return fromInteger(cnst);
endmethod
endmodule
module mkWriter(Server#(VRAMRequest, VRAMResponse) dut, ValFn next_value, Machine ifc);
module mkIncrementingValue(Get#(Bit#(8)));
Reg#(Bit#(8)) val <- mkReg(0);
method ActionValue#(Bit#(8)) get();
// Cycle through 101 values. 101 is prime, so the pattern it
// generates doesn't align to a power of two and should detect
// any memory mapping errors.
if (val == 100)
val <= 0;
else
val <= val+1;
// Add another number to get all nonzero values, to detect
// writes that don't stick.
return 23+val;
endmethod
endmodule
module mkSlowReader(Get#(Bit#(8)) inner, Get#(Bit#(8)) ifc);
Reg#(Bool) delay <- mkReg(True);
(* no_implicit_conditions,fire_when_enabled *)
rule clear_delay (delay);
delay <= False;
endrule
method ActionValue#(Bit#(8)) get() if (!delay);
delay <= True;
let ret <- inner.get();
return ret;
endmethod
endmodule
module mkWriter(Server#(VRAMRequest, VRAMResponse) dut, Get#(Bit#(8)) next_value, Machine ifc);
let flags <- mkTestFlags();
let cycles <- mkCycleCounter();
let write_cycle_time <- mkCycleCounter();
@ -67,7 +73,7 @@ module mkWriter(Server#(VRAMRequest, VRAMResponse) dut, ValFn next_value, Machin
dynamicAssert(write_cycle_time == 1, "write didn't happen every cycle");
write_cycle_time.reset();
let data <- next_value();
let data <- next_value.get();
let req = VRAMRequest{
addr: idx,
data: tagged Valid data
@ -96,7 +102,7 @@ module mkWriter(Server#(VRAMRequest, VRAMResponse) dut, ValFn next_value, Machin
endmethod
endmodule
module mkReader(Server#(VRAMRequest, VRAMResponse) dut, ValFn next_value, Machine ifc);
module mkReader(Server#(VRAMRequest, VRAMResponse) dut, Get#(Bit#(8)) next_value, Machine ifc);
let flags <- mkTestFlags();
let cycles <- mkCycleCounter();
@ -126,11 +132,11 @@ module mkReader(Server#(VRAMRequest, VRAMResponse) dut, ValFn next_value, Machin
rule verify_read (verify_remaining > 0);
let got <- dut.response.get();
let want <- next_value();
dynamicAssert(got.data == want, "wrong value seen during read");
let want <- next_value.get();
if (flags.verbose)
$display("%0d: verify_read(%0d) = %0d, want %0d", cycles.all, verify_idx, got, want);
dynamicAssert(got.data == want, "wrong value seen during read");
if (verify_remaining == 1)
$display("Verified %0d reads in %0d cycles", total, cycles);
@ -187,17 +193,36 @@ module mkTwoPortTest(VRAMCore dut, Stmt ret);
endseq);
endmodule
module mkSlowConsumerTest(VRAMCore dut, Stmt ret);
let winc <- mkIncrementingValue();
let writer <- mkWriter(dut.portA, winc);
let rinc <- mkIncrementingValue();
let rinc_slow <- mkSlowReader(rinc);
let reader <- mkReader(dut.portA, rinc_slow);
return (seq
writer.start(3000, 6000);
await(writer.done);
reader.start(3000, 6000);
await(reader.done);
endseq);
endmodule
(* descending_urgency="simple.reader.issue_read,two_port.writer.write" *)
module mkTB(Empty);
let dut <- mkVRAMCore(112);
let simple <- mkSimpleTest(dut);
let two_port <- mkTwoPortTest(dut);
let slow_reader <- mkSlowConsumerTest(dut);
runTest(100000,
mkTest("VRAMCore", seq
mkTest("VRAMCore/simple", simple);
mkTest("VRAMCore/two_port", two_port);
//mkTest("VRAMCore/simple", simple);
//mkTest("VRAMCore/two_port", two_port);
mkTest("VRAMCore/slow_reader", slow_reader);
endseq));
endmodule