vram/VRAMCore: fix timing bug with slow readers
The internal EBR array must stall new operations if there's a pending read result that hasn't been retired yet. All that clever stuff with non-blocking DelayLines and all that? Yeah, spoiler alert, there's a reason guarded FIFOs are the preferred API for this stuff. Play unsafe games, win unsafe prizes.
This commit is contained in:
parent
af91f1bda8
commit
1acec6d835
|
@ -4,6 +4,8 @@ import GetPut::*;
|
|||
import ClientServer::*;
|
||||
import BRAMCore::*;
|
||||
import Real::*;
|
||||
import FIFOF::*;
|
||||
import SpecialFIFOs::*;
|
||||
|
||||
import DelayLine::*;
|
||||
import ECP5_RAM::*;
|
||||
|
@ -21,9 +23,15 @@ typedef UInt#(17) VRAMAddr;
|
|||
typedef UInt#(2) ArrayAddr;
|
||||
typedef UInt#(3) ChipAddr;
|
||||
typedef UInt#(12) ByteAddr;
|
||||
typedef struct {
|
||||
ChipAddr chip;
|
||||
ByteAddr addr;
|
||||
} EBRAddr deriving (Bits, Eq, FShow);
|
||||
|
||||
// ByteRAM is two EBRs glued together to make a whole-byte memory.
|
||||
typedef EBR#(ByteAddr, VRAMData, ByteAddr, VRAMData) ByteRAM;
|
||||
typedef struct {
|
||||
EBRAddr addr;
|
||||
Maybe#(data) data;
|
||||
} VRAMInternalRequest#(type data) deriving (Bits, Eq, FShow);
|
||||
|
||||
typedef struct {
|
||||
VRAMAddr addr;
|
||||
|
@ -34,6 +42,11 @@ typedef struct {
|
|||
VRAMData data;
|
||||
} VRAMResponse deriving (Bits, Eq, FShow);
|
||||
|
||||
interface VRAMCoreInternal#(type data);
|
||||
interface Server#(VRAMInternalRequest#(data), data) portA;
|
||||
interface Server#(VRAMInternalRequest#(data), data) portB;
|
||||
endinterface
|
||||
|
||||
module mkNibbleRAM_ECP5(ChipAddr chip_addr, EBR#(ByteAddr, Bit#(4), ByteAddr, Bit#(4)) ifc);
|
||||
EBRPortConfig cfg = defaultValue;
|
||||
cfg.chip_select_addr = chip_addr;
|
||||
|
@ -61,96 +74,159 @@ module mkNibbleRAM_Sim(ChipAddr chip_addr, EBR#(ByteAddr, Bit#(4), ByteAddr, Bit
|
|||
endinterface
|
||||
endmodule
|
||||
|
||||
module mkNibbleRAM(ChipAddr chip_addr, EBR#(ByteAddr, Bit#(4), ByteAddr, Bit#(4)) ifc);
|
||||
module mkNibbleRAM(ChipAddr chip_addr, VRAMCoreInternal#(Bit#(4)) ifc);
|
||||
let _ret;
|
||||
if (genC())
|
||||
_ret <- mkNibbleRAM_Sim(chip_addr);
|
||||
else
|
||||
_ret <- mkNibbleRAM_ECP5(chip_addr);
|
||||
return _ret;
|
||||
|
||||
interface Server portA;
|
||||
interface Put request;
|
||||
method Action put(req);
|
||||
_ret.portA.put(req.addr.chip, isValid(req.data), req.addr.addr, fromMaybe(0, req.data));
|
||||
endmethod
|
||||
endinterface
|
||||
interface Get response;
|
||||
method ActionValue#(Bit#(4)) get();
|
||||
return _ret.portA.read();
|
||||
endmethod
|
||||
endinterface
|
||||
endinterface
|
||||
|
||||
interface Server portB;
|
||||
interface Put request;
|
||||
method Action put(req);
|
||||
_ret.portB.put(req.addr.chip, isValid(req.data), req.addr.addr, fromMaybe(0, req.data));
|
||||
endmethod
|
||||
endinterface
|
||||
interface Get response;
|
||||
method ActionValue#(Bit#(4)) get();
|
||||
return _ret.portB.read();
|
||||
endmethod
|
||||
endinterface
|
||||
endinterface
|
||||
endmodule
|
||||
|
||||
// mkByteRAM glues two ECP5 EBRs together to make a 4096x8b memory
|
||||
// block. Like the underlying ECP5 EBRs, callers must bring their own
|
||||
// flow control to read out responses one cycle after putting a read
|
||||
// request.
|
||||
module mkByteRAM(ChipAddr chip_addr, ByteRAM ifc);
|
||||
EBR#(ByteAddr, Bit#(4), ByteAddr, Bit#(4)) upper <- mkNibbleRAM(chip_addr);
|
||||
EBR#(ByteAddr, Bit#(4), ByteAddr, Bit#(4)) lower <- mkNibbleRAM(chip_addr);
|
||||
module mkByteRAM(ChipAddr chip_addr, VRAMCoreInternal#(VRAMData) ifc);
|
||||
VRAMCoreInternal#(Bit#(4)) upper <- mkNibbleRAM(chip_addr);
|
||||
VRAMCoreInternal#(Bit#(4)) lower <- mkNibbleRAM(chip_addr);
|
||||
|
||||
interface EBRPort portA;
|
||||
method Action put(ChipAddr chip_select, Bool write, ByteAddr addr, VRAMData data_in);
|
||||
upper.portA.put(chip_select, write, addr, truncate(data_in>>4));
|
||||
lower.portA.put(chip_select, write, addr, truncate(data_in));
|
||||
endmethod
|
||||
|
||||
method VRAMData read();
|
||||
return (extend(upper.portA.read())<<4) | (extend(lower.portA.read()));
|
||||
interface Server portA;
|
||||
interface Put request;
|
||||
method Action put(req);
|
||||
Maybe#(Bit#(4)) ud = tagged Invalid;
|
||||
Maybe#(Bit#(4)) ld = tagged Invalid;
|
||||
if (req.data matches tagged Valid .data) begin
|
||||
ud = tagged Valid data[7:4];
|
||||
ld = tagged Valid data[3:0];
|
||||
end
|
||||
upper.portA.request.put(VRAMInternalRequest{
|
||||
addr: req.addr,
|
||||
data: ud
|
||||
});
|
||||
lower.portA.request.put(VRAMInternalRequest{
|
||||
addr: req.addr,
|
||||
data: ld
|
||||
});
|
||||
endmethod
|
||||
endinterface
|
||||
|
||||
interface EBRPort portB;
|
||||
method Action put(ChipAddr chip_select, Bool write, ByteAddr addr, VRAMData data_in);
|
||||
upper.portB.put(chip_select, write, addr, truncate(data_in>>4));
|
||||
lower.portB.put(chip_select, write, addr, truncate(data_in));
|
||||
interface Get response;
|
||||
method ActionValue#(VRAMData) get();
|
||||
let u <- upper.portA.response.get();
|
||||
let l <- lower.portA.response.get();
|
||||
return {u, l};
|
||||
endmethod
|
||||
endinterface
|
||||
endinterface
|
||||
|
||||
method VRAMData read();
|
||||
return (extend(upper.portB.read())<<4) | (extend(lower.portB.read()));
|
||||
interface Server portB;
|
||||
interface Put request;
|
||||
method Action put(req);
|
||||
Maybe#(Bit#(4)) ud = tagged Invalid;
|
||||
Maybe#(Bit#(4)) ld = tagged Invalid;
|
||||
if (req.data matches tagged Valid .data) begin
|
||||
ud = tagged Valid data[7:4];
|
||||
ld = tagged Valid data[3:0];
|
||||
end
|
||||
upper.portB.request.put(VRAMInternalRequest{
|
||||
addr: req.addr,
|
||||
data: ud
|
||||
});
|
||||
lower.portB.request.put(VRAMInternalRequest{
|
||||
addr: req.addr,
|
||||
data: ld
|
||||
});
|
||||
endmethod
|
||||
endinterface
|
||||
interface Get response;
|
||||
method ActionValue#(VRAMData) get();
|
||||
let u <- upper.portB.response.get();
|
||||
let l <- lower.portB.response.get();
|
||||
return {u, l};
|
||||
endmethod
|
||||
endinterface
|
||||
endinterface
|
||||
endmodule : mkByteRAM
|
||||
|
||||
// mkByteRAMArray arrays up to 8 mkByteRAMs together, using the
|
||||
// hardwired chip select lines to route inputs appropriately and a mux
|
||||
// tree to collect outputs. With num_chips=8, the resulting ByteRAM is
|
||||
// 32768x8b.
|
||||
module mkByteRAMArray(Integer num_chips, ByteRAM ifc);
|
||||
//
|
||||
// The returned ByteRAM _does_ provide flow control: both read() and
|
||||
// put() are guarded. If reads are consumed as soon as they're
|
||||
// available, the RAM can process a put() every cycle.
|
||||
module mkByteRAMArray(Integer num_chips, VRAMCoreInternal#(VRAMData) ifc);
|
||||
if (num_chips > 8)
|
||||
error("mkByteRAMArray can only array 8 raw ByteRAMs");
|
||||
|
||||
ByteRAM blocks[num_chips];
|
||||
VRAMCoreInternal#(VRAMData) blocks[num_chips];
|
||||
for (Integer i=0; i<num_chips; i=i+1)
|
||||
blocks[i] <- mkByteRAM(fromInteger(i));
|
||||
|
||||
DelayLine#(ChipAddr) read_chip_A <- mkDelayLine(1);
|
||||
DelayLine#(ChipAddr) read_chip_B <- mkDelayLine(1);
|
||||
FIFOF#(ChipAddr) read_chip_A <- mkPipelineFIFOF();
|
||||
FIFOF#(ChipAddr) read_chip_B <- mkPipelineFIFOF();
|
||||
|
||||
interface EBRPort portA;
|
||||
method Action put(ChipAddr chip_select, Bool write, ByteAddr addr, VRAMData data_in);
|
||||
interface Server portA;
|
||||
interface Put request;
|
||||
method Action put(req) if (read_chip_A.notFull);
|
||||
for (Integer i=0; i<num_chips; i=i+1)
|
||||
blocks[i].portA.put(chip_select, write, addr, data_in);
|
||||
if (!write)
|
||||
read_chip_A <= chip_select;
|
||||
endmethod
|
||||
method VRAMData read();
|
||||
if (read_chip_A.ready)
|
||||
if (read_chip_A <= fromInteger(num_chips-1))
|
||||
return blocks[read_chip_A].portA.read();
|
||||
else
|
||||
return 0;
|
||||
else
|
||||
return 0;
|
||||
blocks[i].portA.request.put(req);
|
||||
if (!isValid(req.data))
|
||||
read_chip_A.enq(req.addr.chip);
|
||||
endmethod
|
||||
endinterface
|
||||
interface Get response;
|
||||
method ActionValue#(VRAMData) get();
|
||||
read_chip_A.deq();
|
||||
let res <- blocks[read_chip_A.first].portA.response.get();
|
||||
return res;
|
||||
endmethod
|
||||
endinterface
|
||||
endinterface
|
||||
|
||||
interface EBRPort portB;
|
||||
method Action put(ChipAddr chip_select, Bool write, ByteAddr addr, VRAMData data_in);
|
||||
interface Server portB;
|
||||
interface Put request;
|
||||
method Action put(req) if (read_chip_B.notFull);
|
||||
for (Integer i=0; i<num_chips; i=i+1)
|
||||
blocks[i].portB.put(chip_select, write, addr, data_in);
|
||||
if (!write)
|
||||
read_chip_B <= chip_select;
|
||||
blocks[i].portB.request.put(req);
|
||||
if (!isValid(req.data))
|
||||
read_chip_B.enq(req.addr.chip);
|
||||
endmethod
|
||||
method VRAMData read();
|
||||
if (read_chip_B.ready)
|
||||
if (read_chip_B <= fromInteger(num_chips-1))
|
||||
return blocks[read_chip_B].portB.read();
|
||||
else
|
||||
return 0;
|
||||
else
|
||||
return 0;
|
||||
endinterface
|
||||
interface Get response;
|
||||
method ActionValue#(VRAMData) get();
|
||||
read_chip_B.deq();
|
||||
let res <- blocks[read_chip_B.first].portB.response.get();
|
||||
return res;
|
||||
endmethod
|
||||
endinterface
|
||||
endinterface
|
||||
endmodule
|
||||
|
||||
interface VRAMCore;
|
||||
|
@ -180,49 +256,57 @@ module mkVRAMCore(Integer num_kilobytes, VRAMCore ifc);
|
|||
let num_byterams = num_bytes/4096;
|
||||
let num_arrays = ceil(fromInteger(num_byterams) / 8);
|
||||
|
||||
function Tuple3#(ArrayAddr, ChipAddr, ByteAddr) split_addr(VRAMAddr a);
|
||||
function Tuple2#(ArrayAddr, EBRAddr) split_addr(VRAMAddr a);
|
||||
return unpack(pack(a));
|
||||
endfunction
|
||||
|
||||
ByteRAM arrays[num_arrays];
|
||||
VRAMCoreInternal#(VRAMData) arrays[num_arrays];
|
||||
for (Integer i=0; i<num_arrays; i=i+1) begin
|
||||
let array_size = min(num_byterams - (i*8), 8);
|
||||
arrays[i] <- mkByteRAMArray(array_size);
|
||||
end
|
||||
|
||||
Reg#(Maybe#(ArrayAddr)) inflight_A[2] <- mkCReg(2, tagged Invalid);
|
||||
Reg#(Maybe#(ArrayAddr)) inflight_B[2] <- mkCReg(2, tagged Invalid);
|
||||
FIFOF#(ArrayAddr) array_addr_A <- mkPipelineFIFOF();
|
||||
FIFOF#(ArrayAddr) array_addr_B <- mkPipelineFIFOF();
|
||||
|
||||
interface Server portA;
|
||||
interface Put request;
|
||||
method Action put(VRAMRequest req) if (inflight_A[1] matches tagged Invalid);
|
||||
match {.array, .chip, .byteaddr} = split_addr(req.addr);
|
||||
arrays[array].portA.put(chip, isValid(req.data), byteaddr, fromMaybe(0, req.data));
|
||||
method Action put(req);
|
||||
match {.array, .addr} = split_addr(req.addr);
|
||||
arrays[array].portA.request.put(VRAMInternalRequest{
|
||||
addr: addr,
|
||||
data: req.data
|
||||
});
|
||||
if (!isValid(req.data))
|
||||
inflight_A[1] <= tagged Valid array;
|
||||
array_addr_A.enq(array);
|
||||
endmethod
|
||||
endinterface
|
||||
interface Get response;
|
||||
method ActionValue#(VRAMResponse) get() if (inflight_A[0] matches tagged Valid .array);
|
||||
inflight_A[0] <= tagged Invalid;
|
||||
return VRAMResponse{data: arrays[array].portA.read()};
|
||||
method ActionValue#(VRAMResponse) get();
|
||||
array_addr_A.deq();
|
||||
let ret <- arrays[array_addr_A.first].portA.response.get();
|
||||
return VRAMResponse{data: ret};
|
||||
endmethod
|
||||
endinterface
|
||||
endinterface
|
||||
|
||||
interface Server portB;
|
||||
interface Put request;
|
||||
method Action put(VRAMRequest req) if (inflight_B[1] matches tagged Invalid);
|
||||
match {.array, .chip, .byteaddr} = split_addr(req.addr);
|
||||
arrays[array].portB.put(0, isValid(req.data), byteaddr, fromMaybe(0, req.data));
|
||||
method Action put(req);
|
||||
match {.array, .addr} = split_addr(req.addr);
|
||||
arrays[array].portB.request.put(VRAMInternalRequest{
|
||||
addr: addr,
|
||||
data: req.data
|
||||
});
|
||||
if (!isValid(req.data))
|
||||
inflight_B[1] <= tagged Valid array;
|
||||
array_addr_B.enq(array);
|
||||
endmethod
|
||||
endinterface
|
||||
interface Get response;
|
||||
method ActionValue#(VRAMResponse) get() if (inflight_B[0] matches tagged Valid .array);
|
||||
inflight_B[0] <= tagged Invalid;
|
||||
return VRAMResponse{data: arrays[array].portB.read()};
|
||||
method ActionValue#(VRAMResponse) get();
|
||||
array_addr_B.deq();
|
||||
let ret <- arrays[array_addr_B.first].portB.response.get();
|
||||
return VRAMResponse{data: ret};
|
||||
endmethod
|
||||
endinterface
|
||||
endinterface
|
||||
|
|
|
@ -21,40 +21,46 @@ function ActionValue#(Bool) verbose();
|
|||
endactionvalue);
|
||||
endfunction
|
||||
|
||||
typedef (function ActionValue#(Bit#(8)) next()) ValFn;
|
||||
|
||||
function ValFn constant_value(Integer cnst);
|
||||
function ActionValue#(Bit#(8)) next();
|
||||
return (actionvalue
|
||||
module mkConstantValue(Integer cnst, Get#(Bit#(8)) ifc);
|
||||
method ActionValue#(Bit#(8)) get();
|
||||
return fromInteger(cnst);
|
||||
endactionvalue);
|
||||
endfunction
|
||||
endmethod
|
||||
endmodule
|
||||
|
||||
return next;
|
||||
endfunction
|
||||
|
||||
module mkIncrementingValue(ValFn);
|
||||
module mkIncrementingValue(Get#(Bit#(8)));
|
||||
Reg#(Bit#(8)) val <- mkReg(0);
|
||||
|
||||
function ActionValue#(Bit#(8)) next();
|
||||
return (actionvalue
|
||||
// Cycle through 101 values. 101 is prime, so the
|
||||
// pattern it generates doesn't align to a power of
|
||||
// two and should detect any memory mapping errors.
|
||||
method ActionValue#(Bit#(8)) get();
|
||||
// Cycle through 101 values. 101 is prime, so the pattern it
|
||||
// generates doesn't align to a power of two and should detect
|
||||
// any memory mapping errors.
|
||||
if (val == 100)
|
||||
val <= 0;
|
||||
else
|
||||
val <= val+1;
|
||||
// Add another number to get all nonzero values, to
|
||||
// detect writes that don't stick.
|
||||
return 23+val;
|
||||
endactionvalue);
|
||||
endfunction
|
||||
|
||||
return next;
|
||||
// Add another number to get all nonzero values, to detect
|
||||
// writes that don't stick.
|
||||
return 23+val;
|
||||
endmethod
|
||||
endmodule
|
||||
|
||||
module mkWriter(Server#(VRAMRequest, VRAMResponse) dut, ValFn next_value, Machine ifc);
|
||||
module mkSlowReader(Get#(Bit#(8)) inner, Get#(Bit#(8)) ifc);
|
||||
Reg#(Bool) delay <- mkReg(True);
|
||||
|
||||
(* no_implicit_conditions,fire_when_enabled *)
|
||||
rule clear_delay (delay);
|
||||
delay <= False;
|
||||
endrule
|
||||
|
||||
method ActionValue#(Bit#(8)) get() if (!delay);
|
||||
delay <= True;
|
||||
let ret <- inner.get();
|
||||
return ret;
|
||||
endmethod
|
||||
endmodule
|
||||
|
||||
module mkWriter(Server#(VRAMRequest, VRAMResponse) dut, Get#(Bit#(8)) next_value, Machine ifc);
|
||||
let flags <- mkTestFlags();
|
||||
let cycles <- mkCycleCounter();
|
||||
let write_cycle_time <- mkCycleCounter();
|
||||
|
@ -67,7 +73,7 @@ module mkWriter(Server#(VRAMRequest, VRAMResponse) dut, ValFn next_value, Machin
|
|||
dynamicAssert(write_cycle_time == 1, "write didn't happen every cycle");
|
||||
write_cycle_time.reset();
|
||||
|
||||
let data <- next_value();
|
||||
let data <- next_value.get();
|
||||
let req = VRAMRequest{
|
||||
addr: idx,
|
||||
data: tagged Valid data
|
||||
|
@ -96,7 +102,7 @@ module mkWriter(Server#(VRAMRequest, VRAMResponse) dut, ValFn next_value, Machin
|
|||
endmethod
|
||||
endmodule
|
||||
|
||||
module mkReader(Server#(VRAMRequest, VRAMResponse) dut, ValFn next_value, Machine ifc);
|
||||
module mkReader(Server#(VRAMRequest, VRAMResponse) dut, Get#(Bit#(8)) next_value, Machine ifc);
|
||||
let flags <- mkTestFlags();
|
||||
let cycles <- mkCycleCounter();
|
||||
|
||||
|
@ -126,11 +132,11 @@ module mkReader(Server#(VRAMRequest, VRAMResponse) dut, ValFn next_value, Machin
|
|||
|
||||
rule verify_read (verify_remaining > 0);
|
||||
let got <- dut.response.get();
|
||||
let want <- next_value();
|
||||
dynamicAssert(got.data == want, "wrong value seen during read");
|
||||
let want <- next_value.get();
|
||||
|
||||
if (flags.verbose)
|
||||
$display("%0d: verify_read(%0d) = %0d, want %0d", cycles.all, verify_idx, got, want);
|
||||
dynamicAssert(got.data == want, "wrong value seen during read");
|
||||
|
||||
if (verify_remaining == 1)
|
||||
$display("Verified %0d reads in %0d cycles", total, cycles);
|
||||
|
@ -187,17 +193,36 @@ module mkTwoPortTest(VRAMCore dut, Stmt ret);
|
|||
endseq);
|
||||
endmodule
|
||||
|
||||
module mkSlowConsumerTest(VRAMCore dut, Stmt ret);
|
||||
let winc <- mkIncrementingValue();
|
||||
let writer <- mkWriter(dut.portA, winc);
|
||||
|
||||
let rinc <- mkIncrementingValue();
|
||||
let rinc_slow <- mkSlowReader(rinc);
|
||||
let reader <- mkReader(dut.portA, rinc_slow);
|
||||
|
||||
return (seq
|
||||
writer.start(3000, 6000);
|
||||
await(writer.done);
|
||||
|
||||
reader.start(3000, 6000);
|
||||
await(reader.done);
|
||||
endseq);
|
||||
endmodule
|
||||
|
||||
(* descending_urgency="simple.reader.issue_read,two_port.writer.write" *)
|
||||
module mkTB(Empty);
|
||||
let dut <- mkVRAMCore(112);
|
||||
|
||||
let simple <- mkSimpleTest(dut);
|
||||
let two_port <- mkTwoPortTest(dut);
|
||||
let slow_reader <- mkSlowConsumerTest(dut);
|
||||
|
||||
runTest(100000,
|
||||
mkTest("VRAMCore", seq
|
||||
mkTest("VRAMCore/simple", simple);
|
||||
mkTest("VRAMCore/two_port", two_port);
|
||||
//mkTest("VRAMCore/simple", simple);
|
||||
//mkTest("VRAMCore/two_port", two_port);
|
||||
mkTest("VRAMCore/slow_reader", slow_reader);
|
||||
endseq));
|
||||
endmodule
|
||||
|
||||
|
|
Loading…
Reference in New Issue