diff --git a/experiments/primitive_ram/Top.bsv b/experiments/primitive_ram/Top.bsv index 14993b3..8af2445 100644 --- a/experiments/primitive_ram/Top.bsv +++ b/experiments/primitive_ram/Top.bsv @@ -2,22 +2,25 @@ package Top; import ECP5_RAM::*; -(* always_enabled *) +//(* always_enabled *) interface Top; - method Action put(UInt#(3) select, Bool write, Bit#(12) address, Bit#(4) data); - method Bit#(4) read(); + interface EBRPort#(Bit#(12), Bit#(4)) ram1; + //interface EBRPort#(Bit#(14), Bit#(1)) ram2; + interface EBRPort#(void, void) ram2; endinterface (* synthesize *) -module mkTop(Clock extClk, Reset extRst, Top ifc); - ECP5_EBRPortConfig cfgA = defaultValue; - cfgA.clk = tagged Valid extClk; - cfgA.rstN = tagged Valid extRst; - ECP5_EBRPortConfig cfgB = defaultValue; - ECP5_EBRCore#(Bit#(12), Bit#(4), UInt#(12), UInt#(4)) ram <- mkECP5_EBRCore(cfgA, cfgB); +module mkTop(Clock clk2, Reset rst2, Top ifc); + EBRPortConfig cfgA = defaultValue; + cfgA.write_mode = Normal; + EBRPortConfig cfgB = defaultValue; + cfgB.clk = tagged Valid clk2; + cfgB.rstN = tagged Valid rst2; + cfgB.register_output = True; + let r <- mkEBR(cfgA, cfgB); - method put = ram.portA.put; - method read = ram.portA.read; + interface EBRPort ram1 = r.portA; + interface EBRPort ram2 = r.portB; endmodule endpackage diff --git a/lib/ECP5_RAM.bsv b/lib/ECP5_RAM.bsv index adf703a..c9ff54c 100644 --- a/lib/ECP5_RAM.bsv +++ b/lib/ECP5_RAM.bsv @@ -1,204 +1,425 @@ +//////////////////////////////////////////////////////////// package ECP5_RAM; +import DReg::*; import Printf::*; +import ToString::*; +import StmtFSM::*; -// ECP5_EBRWriteMode specifies what the EBR outputs on a write cycle. +export EBRWriteMode(..); +export EBRPortConfig(..); +export EBRPort(..); +export EBR(..); +export mkEBRCore; +export mkEBR; + +//////////////////////////////////////////////////////////// +// Configuration types +// +// The exported block RAMs in this package have one or more ports, +// where each port is independently configurable. Not all parameters +// are exposed, notably reset behavior is hardcoded to synchronous +// reset and release. This is purely because I don't yet understand +// Bluespec's reset semantics well enough to be confident in exposing +// async reset without messing it up. +// +// The exported EBRPortConfig type is internally expanded into an +// EBRPortConfig_Resolved. This expansion process resolves defaults, +// (e.g. assigning a default clock if none was provided), derives some +// additional values that implementations need (e.g. the widths of the +// data and address I/Os as regular integers), and checks the +// configuration for consistency errors (e.g. an address type larger +// than what the hardware can support). + +// EBRWriteMode specifies an EBR port's output for a write operation, +// if any. typedef enum { - // In Normal mode, the EBR's output on a write cycle is undefined. + // In Normal mode, write operations do not output a value. Normal, - // In WriteThrough mode, the EBR outputs the new value at the - // written address. + // In WriteThrough mode, write operations output the value that was + // written. WriteThrough, - // In ReadBeforeWrite mode, the EBR outputs the prior value of the - // written address. ReadBeforeWrite is only available on 9 and 18 - // bit ports. + // In ReadBeforeWrite mode, write operations output the value that + // was overwritten. This mode is only available on 9-bit and 18-bit + // EBR configurations. ReadBeforeWrite -} ECP5_EBRWriteMode deriving (Bits, Eq); +} EBRWriteMode deriving (Bits, Eq); -// ECP5_EBRPortConfig is the static configuration of an EBR port. +// EBRPortConfig is the configuration of an EBR port. typedef struct { // clk, if specified, is the Clock to use for the port. If - // unspecified, uses the module default clock. + // unspecified, uses the module's default clock. Maybe#(Clock) clk; // rstN, if specified, is the Reset to use for the port. If - // unspecified, uses the module default reset. + // unspecified, uses the module's default reset. Maybe#(Reset) rstN; - // By default, ECP5 EBRs only register the input address and write - // data, giving a 1-cycle latency for operations. If - // registered_output is true, the output value is also registered, - // resulting in 2 cycles of latency but shorter datapaths. - Bool registered_output; - // chip_select_addr is the chip address of this EBR port. put - // method invocations whose select argument don't match this - // address are ignored. + // Whether to register the output of the EBR port. + // + // EBR ports always register their inputs, to present predictable + // signals to the memory circuitry. Ports can optionally also + // enable an output register, which adds latency to operations but + // decouples the memory's internal latency from the logic connected + // to the output. This may allow designs to run at higher clock + // speeds, outweighing the added cycle overhead. + // + // With non-registered output, EBR operations have a latency of 1 + // cycle. Registering the output increases that to 2 cycles. By + // default, the output is not registered. + Bool register_output; + // chip_select_addr is the port's chip select address. The port + // ignores put operations that don't provide a matching chip_select + // argument. + // + // This is intended to make it easier to construct larger memories + // out of multiple EBR ports: by configuring different chip + // addresses for each port, the inputs to the overall memory can be + // routed directly to all EBR ports, rather than having to provide + // your own address decoding and routing logic. UInt#(3) chip_select_addr; - // write_mode specifies the output's behavior for write operations. - ECP5_EBRWriteMode write_mode; -} ECP5_EBRPortConfig; + // write_mode specifies what the EBR port outputs for write + // operations. In the default Normal mode, write operations do not + // produce any output. + EBRWriteMode write_mode; +} EBRPortConfig deriving (Eq); -instance DefaultValue#(ECP5_EBRPortConfig); - defaultValue = ECP5_EBRPortConfig{ +instance DefaultValue#(EBRPortConfig); + defaultValue = EBRPortConfig{ clk: defaultValue, rstN: defaultValue, - registered_output: False, + register_output: False, chip_select_addr: 0, write_mode: Normal }; endinstance -(* always_ready *) -interface ECP5_EBRCoreInnerPort; - // Put starts a read or write operation, if select's value matches - // the port's configured chip_select_addr. - method Action put(UInt#(3) select, Bool write, Bit#(14) address, Bit#(18) data); - // Read returns the value on the EBR's output port. The output - // value is only defined when the read follows a put with the - // correct number of latency cycles for the port's configuration. - method Bit#(18) read(); -endinterface +// EBRPortConfig_Resolved is an elaborated version of EBRPortConfig, +// with all defaults and overrides resolved to their concrete values, +// port widths made explicit and verified. +typedef struct { + // These fields are the same as in EBRPortConfig. If the port is + // not in use, they are tied to default values that avoid any logic + // or wires being generated outside of the EBR. + Clock clk; + Reset rstN; + Bool register_output; + UInt#(3) chip_select_addr; + EBRWriteMode write_mode; -interface ECP5_EBRCoreInner; - interface ECP5_EBRCoreInnerPort portA; - interface ECP5_EBRCoreInnerPort portB; -endinterface + // These are values derived by resolvePortCfg from an EBRPortConfig + // and other contextual information from a module + // instantiation. These are values that modules need to derive, so + // we derive them all once here instead of forcing each module to + // do so. -// mkECP5_EBRCoreInner instantiates an ECP5 EBR primitive with the -// given configuration. The returned interface has full-width I/O -// ports -import "BVI" ECP5_RAM = - module mkECP5_EBRCoreInner#(ECP5_EBRPortConfig port_a, - ECP5_EBRPortConfig port_b, - Integer portA_width, - Integer portB_width) - (ECP5_EBRCoreInner); + // enabled is whether the port is in use at all. Modules omit all + // glue logic and wiring for disabled ports, resulting in zero + // burden during synthesis (other than consuming an EBR primitive, + // but presumably you're using the other port still). + // + // Enabled is true if the memory's type for values is a non-zero + // number of bits. In particular, eanbled=False if the caller uses + // 'void' as the port's data type. + Bool enabled; + // addr_width is the bit width of addresses. resolvePortCfg ensures + // that it is less than or equal to the maximum address width that + // makes sense for data_width. + Integer addr_width; + // data_width is the bit width of input and output values. It is + // always one of the valid values for the EBR primitive: 1, 2, 4, 9 + // or 18. + Integer data_width; + // write_outputs_data is whether write_mode is one of the modes + // where write operations output a value. Modules use this to + // generate the appropriate conditions for port reads. + Bool write_outputs_data; + // operation_latency is how many cycles elapse between put() + // executing to read() being ready. It is used to generate the + // appropriate conditions for port reads. + // + // Operation latency on enabled ports is 2 if the output is + // registered, or 1 for unregistered output. Disabled ports have 0 + // latency, meaning no timing logic is needed. + Integer operation_latency; + // chip_select_addr_str is the string encoding of chip_select_addr + // that the EBR hardware primitive wants for its configuration + // parameter. + String chip_select_addr_str; + // write_mode_str is the string encoding of write_mode that hte EBR + // hardware primitive wants for its configuration parameter. + String write_mode_str; + // register_output_str is the string encoding of register_output + // that the EBR hardware primitive wants for its configuration + // parameter. + String register_output_str; +} EBRPortConfig_Resolved; - let defClk <- exposeCurrentClock; - let defRstN <- exposeCurrentReset; - - let portA_bsv_clock = case (port_a.clk) matches - tagged Invalid: defClk; - tagged Valid .clk: clk; - endcase; - let portA_bsv_rstN = case (port_a.rstN) matches - tagged Invalid: defRstN; - tagged Valid .rstN: rstN; - endcase; - let portB_bsv_clock = case (port_b.clk) matches - tagged Invalid: defClk; - tagged Valid .clk: clk; - endcase; - let portB_bsv_rstN = case (port_b.rstN) matches - tagged Invalid: defRstN; - tagged Valid .rstN: rstN; - endcase; - - default_clock no_clock; - default_reset no_reset; - - input_clock portA_clk(CLKA, (* unused *)CLKA_GATE) = portA_bsv_clock; - input_reset portA_rstN(RSTA) clocked_by(portA_clk) = portA_bsv_rstN; - - input_clock portB_clk(CLKB, (* unused *)CLKB_GATE) = portB_bsv_clock; - input_reset portB_rstN(RSTB) clocked_by(portB_clk) = portB_bsv_rstN; - - parameter DATA_WIDTH_A = portA_width; - parameter REGMODE_A = port_a.registered_output ? "OUTREG" : "NOREG"; - parameter CSDECODE_A = "0b000"; //$format("0b%b", port_a.chip_select_addr); - parameter WRITEMODE_A = case (port_a.write_mode) matches - Normal: "NORMAL"; - WriteThrough: "WRITETHROUGH"; - ReadBeforeWrite: "READBEFOREWRITE"; - endcase; - - parameter DATA_WIDTH_B = portB_width; - parameter REGMODE_B = port_b.registered_output ? "OUTREG" : "NOREG"; - parameter CSDECODE_B = "0b000"; //$format("0b%b", port_b.chip_select_addr); - parameter WRITEMODE_B = case (port_b.write_mode) matches - Normal: "NORMAL"; - WriteThrough: "WRITETHROUGH"; - ReadBeforeWrite: "READBEFOREWRITE"; - endcase; - - port OCEA = True; - port OCEB = True; - - interface ECP5_EBRCoreInnerPort portA; - method put((*reg*)CSA, (*reg*)WEA, (*reg*)ADA, (*reg*)DIA) enable(CEA) clocked_by(portA_clk) reset_by(portA_rstN); - method DOA read() clocked_by(portA_clk) reset_by(portA_rstN); - endinterface - interface ECP5_EBRCoreInnerPort portB; - method put((*reg*)CSB, (*reg*)WEB, (*reg*)ADB, (*reg*)DIB) enable(CEB) clocked_by(portB_clk) reset_by(portB_rstN); - method DOB read() clocked_by(portB_clk) reset_by(portB_rstN); - endinterface - - schedule (portA.read) CF (portA.read, portA.put); - schedule (portA.put) C (portA.put); - schedule (portB.read) CF (portB.read, portB.put); - schedule (portB.put) C (portB.put); - endmodule : mkECP5_EBRCoreInner - -module checkSizes#(addr a, data d, String module_name, String port_name)(Empty) +function EBRPortConfig_Resolved resolvePortCfg(String module_name, String port_name, addr a, data d, EBRPortConfig cfg, Clock defaultClk, Reset defaultRstN) provisos (Bits#(addr, addr_sz), Bits#(data, data_sz)); - - let data_sz = valueOf(data_sz); let addr_sz = valueOf(addr_sz); + let data_sz = valueOf(data_sz); let addr_max = case (data_sz) matches + 0: 0; 1: 14; 2: 13; 4: 12; 9: 11; 18: 10; - default: error(sprintf("invalid data width %d for port, must be one of 1,2,4,9,18", data_sz)); + default: error(sprintf("invalid data width %d for %s port %s, must be one of 0,1,2,4,9,18", data_sz, module_name, port_name)); endcase; + let enabled = data_sz != 0; + let ret = ?; + if (enabled) + ret = EBRPortConfig_Resolved{ + enabled: True, + clk: cfg.clk matches tagged Valid .clk ? clk : defaultClk, + rstN: cfg.rstN matches tagged Valid .rstN ? rstN : defaultRstN, + addr_width: addr_sz, + data_width: data_sz, + register_output: cfg.register_output, + chip_select_addr: cfg.chip_select_addr, + write_mode: cfg.write_mode, + write_outputs_data: cfg.write_mode != Normal, + operation_latency: cfg.register_output ? 2 : 1, + chip_select_addr_str: sprintf("0b%03b", cfg.chip_select_addr), + write_mode_str: case (cfg.write_mode) matches + Normal: "NORMAL"; + WriteThrough: "WRITETHROUGH"; + ReadBeforeWrite: "READBEFOREWRITE"; + endcase, + register_output_str: cfg.register_output ? "OUTREG": "NOREG" + }; + else + ret = EBRPortConfig_Resolved{ + enabled: False, + clk: noClock, + rstN: noReset, + addr_width: 14, + data_width: 18, + register_output: False, + chip_select_addr: 0, + write_mode: Normal, + write_outputs_data: False, + operation_latency: 0, + chip_select_addr_str: "0b000", + write_mode_str: "NORMAL", + register_output_str: "NOREG" + }; if (addr_sz > addr_max) begin addr dummy = ?; - errorM(sprintf("The address type for port %s of %s is wider than the hardware can implement. "+ - "Address type %s has %d bits, maximum is %d", - port_name, module_name, - printType(typeOf(dummy)), - addr_sz, - addr_max)); + ret = error(sprintf("The address type for port %s of %s is wider than the hardware can implement. "+ + "Address type %s has %d bits, maximum is %d", + port_name, module_name, + printType(typeOf(dummy)), + addr_sz, + addr_max)); end -endmodule + return ret; +endfunction -// ECP5_EBRCorePort is the raw interface to one port of an ECP5 EBR -// memory block. +//////////////////////////////////////////////////////////// +// Exported interfaces // -// The port has no implicit conditions, it is the caller's -// responsibility to wait the correct number of cycles after a put() -// before capturing data with read(). The caller must wait 1 cycle for -// unregistered ports, and 2 cycles for registered ports. When invoked -// at other times, read() returns an unspecified arbitrary value. -interface ECP5_EBRCorePort#(type addr, type data); + +// EBRPort is a port of an EBR memory. +interface EBRPort#(type addr, type data); method Action put(UInt#(3) chip_select, Bool write, addr address, data datain); method data read(); endinterface -// ECP5_EBRCore is the raw interface to an ECP5 EBR memory block. -// -// The ports have no implicit conditions, the caller must wait the -// correct number of latency cycles to get valid data. -// -// It is the caller's responsibility to enforce synchronization -// between the ports, as specified in Lattice Technical Note 02204: -// the two ports must not issue concurrent writes to the same address, -// or a write concurrent with a read of the same address. If the two -// ports are being operated from different clock domains, the caller -// must implement appropriate synchronization to ensure that no -// read-during-write or write-during-write races occur. -interface ECP5_EBRCore#(type portA_addr, type portA_data, type portB_addr, type portB_data); - interface ECP5_EBRCorePort#(portA_addr, portA_data) portA; - interface ECP5_EBRCorePort#(portB_addr, portB_data) portB; +// EBR is an EBR memory. +interface EBR#(type portA_addr, type portA_data, type portB_addr, type portB_data); + interface EBRPort#(portA_addr, portA_data) portA; + interface EBRPort#(portB_addr, portB_data) portB; endinterface -// mkECP5_EBRCore instantiates an ECP5 EBR memory primitive with the -// given configuration. This memory has no implicit or explicit -// conditions, the caller is responsible for upholding the primitive's -// timing and synchronization requirements. -module mkECP5_EBRCore#(ECP5_EBRPortConfig port_a, - ECP5_EBRPortConfig port_b) - (ECP5_EBRCore#(addr_a, data_a, addr_b, data_b)) +//////////////////////////////////////////////////////////// +// Verilog import +// +// The raw primitive for EBR is called DP16KD. However, Lattice and +// Yosys both expose it with the I/O ports exploded out into +// individual bit signals, which is pretty horrible to plumb up here. +// +// Instead, ECP5_RAM.v defines a tiny Verilog wrapper, whose only +// purpose is to group those individual bit signals back into +// multi-bit ports that Bluespec can manipulate more elegantly. +// +// This wrapper exposes all the I/O ports with their maximum bit +// width, even though there is no configuration that can use all the +// bits. For example if you use all 14 address bits, you're only using +// 1 data bit (16384x1b configuration). If you're using all 18 bits of +// data, you're only using 10 address bits (1024x18b +// configuration). We do this because we want to drive unused signals +// to defined values, so we have to be able to see all of them. +// +// The exported wrapper modules defined further down translate these +// large raw ports into proper Bluespec types, and handle the +// necessary padding and truncation. + +(* always_ready *) +interface V_EBRPort; + // Put starts an operation, if select's value matches the port's + // configured chip_select_addr. + method Action put(UInt#(3) select, Bool write, Bit#(14) address, Bit#(18) data); + // Read provides the EBR's output value. At this raw layer, read + // always returns a value, but that value is undefined unless a put + // which generates output happened N cycles prior, where N is the + // port's configured latency (see EBRPortConfig). + // + // It is the caller's responsibility to time reads correctly + // relative to puts. + method Bit#(18) read(); +endinterface + +interface V_EBR; + interface V_EBRPort portA; + interface V_EBRPort portB; +endinterface + +// vEBRCoreInner instantiates a raw EBR primitive with the given +// configuration. +// +// The returned interface has maximally wide types on all I/O, and +// uses plain bit arrays. It also has no conditions on any methods, +// it's the caller's reponsibility to time method calls appropriately. +// +// Nothing should use this module directly, except for mkEBRCore +// below. mkEBRCore wraps the Verilog primitive in stronger types and +// handles configuration edge cases (detecting invalid configs, tying +// off unused ports), but otherwise presents the same "raw" primitive +// from a semantic perspective. Anything you can build using +// vMkEBRCore, you can build better with mkEBRCore. +import "BVI" ECP5_RAM = + module vMkEBRCore#(EBRPortConfig_Resolved cfgA, + EBRPortConfig_Resolved cfgB) + (V_EBR); + + // EBRs are dual-port with independent clocks and resets on each + // port, so we need to be careful to map things correctly. Unset + // the default clock and reset entirely, so that the compiler + // complains loudly if we forget to explicitly specify the + // clocking/reset on a signal. + default_clock no_clock; + default_reset no_reset; + + input_clock portA_clk(CLKA, (* unused *)CLKA_GATE) = cfgA.clk; + input_reset portA_rstN(RSTA) clocked_by(portA_clk) = cfgA.rstN; + + input_clock portB_clk(CLKB, (* unused *)CLKB_GATE) = cfgB.clk; + input_reset portB_rstN(RSTB) clocked_by(portB_clk) = cfgB.rstN; + + parameter DATA_WIDTH_A = cfgA.data_width; + parameter REGMODE_A = cfgA.register_output ? "OUTREG" : "NOREG"; + parameter CSDECODE_A = cfgA.chip_select_addr_str; + parameter WRITEMODE_A = cfgA.write_mode_str; + + parameter DATA_WIDTH_B = cfgB.data_width; + parameter REGMODE_B = cfgB.register_output ? "OUTREG" : "NOREG"; + parameter CSDECODE_B = cfgB.chip_select_addr_str; + parameter WRITEMODE_B = cfgB.write_mode_str; + + // The outputs of EBR ports also have an enable signal. It's + // unclear why you'd want to suppress the output of things you + // asked the memory to give you. Since I can't think of any use + // for them, leave them always enabled if the corresponding port + // is active. + port OCEA = cfgA.enabled; + port OCEB = cfgB.enabled; + + interface V_EBRPort portA; + method put((*reg*)CSA, (*reg*)WEA, (*reg*)ADA, (*reg*)DIA) enable(CEA) clocked_by(portA_clk) reset_by(portA_rstN); + method DOA read() clocked_by(portA_clk) reset_by(portA_rstN); + endinterface + interface V_EBRPort portB; + method put((*reg*)CSB, (*reg*)WEB, (*reg*)ADB, (*reg*)DIB) enable(CEB) clocked_by(portB_clk) reset_by(portB_rstN); + method DOB read() clocked_by(portB_clk) reset_by(portB_rstN); + endinterface + + // A quick crash course on Bluespec's scheduling instructions. + // + // Bluespec's fundamental property is that rule execution is + // serializable: all designs behave as if they execute a single + // rule at a time, in some order. In the actual hardware + // typically many rules execute in parallel on every cycle, but + // that's just an optimization: the observed behavior of the + // system must always be explainable by executing rules one at a + // time, where each rule sees the effects of all previously + // executed rules. + // + // When pulling Verilog modules into a Bluespec universe, the + // compiler must be told explicitly what orders of execution are + // valid, given the hardware's behavior. The canonical example + // is a read of a register's value and a write to the same + // register. Those two actions produce different system states + // depending on which one executes first: if read-before-write, + // the read sees the register's old value. In write-before-read, + // the read sees the updated value. + // + // That's why, if you go digging into the low level Bluespec + // definition of what a register is, you'll find a scheduling + // annotation which says that if a read and a write both want to + // happen (both methods are "enabled" in a clock cycle), the + // read must execute before the write. When translated into + // hardware, this matches familiar synchronous logic: on a given + // cycle, your logic sees the previous cycle's value, and all + // writes to registers happen at the "end" of the cycle. + // + // And so we come to the scheduling rules. Our annotations tell + // the compiler how the memory's methods can be called, if + // several of them are able to execute. Each scheduling + // annotation is written as: + // + // schedule ORDERING + // + // This means: assuming that method(s) A and method(s) B both + // want both execute, can both be executed without issues? And + // if yes, do they need to execute in a specific order? + // + // The orderings you can specify are: + // + // - C : "conflict". The scheduler must pick a single one of A + // or B to execute. + // - CF : "conflict-free". A and B can both execute, and the + // outcome is the same regardless of which executes first. + // - SB : "schedule before". A and B can both execute, but A + // must execute first to get correct results. + // - SBR: "schedule before (restricted)". Same as SB, but A + // and B must also execute from different rules. + // + // With that, here are the scheduling annotations for + // vMkEBRCore. + + // TODO: why is portA.read CF portA.put? Shouldn't that be SB to + // match register semantics? + schedule (portA.read) CF (portA.read); + schedule (portA.read) SB (portA.put); + schedule (portA.put) C (portA.put); + schedule (portB.read) CF (portB.read); + schedule (portB.read) SB (portB.put); + schedule (portB.put) C (portB.put); + endmodule : vMkEBRCore + +//////////////////////////////////////////////////////////// +// Exported modules + +// mkEBRCore instantiates one EBR memory block with the given +// configuration. +// +// The returned ports have no implicit conditions. The caller is +// responsible for upholding the block's timing and synchronization +// requirements, following Lattice TN 02204. +// +// read() yields valid data 1 cycle after put() for ports configured +// with unregistered output, or 2 cycles for registered outputs. At +// all other times, the returned value is undefined. +// +// portA and portB must not concurrently write the same bits, or read +// bits while the other is writing them. The stored value in a +// write-write race is undefined, as is the read value in a write-read +// race. +module mkEBRCore#(EBRPortConfig cfgA, + EBRPortConfig cfgB) + (EBR#(addr_a, data_a, addr_b, data_b)) provisos (Bits#(addr_a, addr_sz_a), Bits#(data_a, data_sz_a), Bits#(addr_b, addr_sz_b), @@ -208,64 +429,171 @@ module mkECP5_EBRCore#(ECP5_EBRPortConfig port_a, Add#(addr_b_pad, addr_sz_b, 14), Add#(data_b_pad, data_sz_b, 18)); - checkSizes(addr_a ' (?), data_a ' (?), "mkECP5_EBRCore", "A"); - checkSizes(addr_b ' (?), data_b ' (?), "mkECP5_EBRCore", "B"); + let defaultClk <- exposeCurrentClock; + let defaultRstN <- exposeCurrentReset; + let rcfgA = resolvePortCfg("mkEBRCore", "A", addr_a ' (?), data_a ' (?), cfgA, defaultClk, defaultRstN); + let rcfgB = resolvePortCfg("mkEBRCore", "B", addr_b ' (?), data_b ' (?), cfgB, defaultClk, defaultRstN); - let inner <- mkECP5_EBRCoreInner(port_a, port_b, valueOf(data_sz_a), valueOf(data_sz_b)); + let vEBR <- vMkEBRCore(rcfgA, rcfgB); - interface ECP5_EBRCorePort portA; + interface EBRPort portA; method Action put(UInt#(3) chip_select, Bool write, addr_a address, data_a datain); - inner.portA.put(chip_select, write, zeroExtend(pack(address)), zeroExtend(pack(datain))); + if (!rcfgA.enabled) + noAction; + else + vEBR.portA.put(chip_select, write, zeroExtend(pack(address)), zeroExtend(pack(datain))); endmethod method data_a read(); - return unpack(truncate(inner.portA.read())); + if (!rcfgA.enabled) + return ?; + else + return unpack(truncate(vEBR.portA.read())); endmethod endinterface - interface ECP5_EBRCorePort portB; + interface EBRPort portB; method Action put(UInt#(3) chip_select, Bool write, addr_b address, data_b datain); - inner.portB.put(chip_select, write, zeroExtend(pack(address)), zeroExtend(pack(datain))); + if (!rcfgB.enabled) + noAction; + else + vEBR.portB.put(chip_select, write, zeroExtend(pack(address)), zeroExtend(pack(datain))); endmethod method data_b read(); - return unpack(truncate(inner.portB.read())); + if (!rcfgB.enabled) + return ?; + else + return unpack(truncate(vEBR.portB.read())); endmethod endinterface endmodule -module mkECP5_EBRCoreByte#(ECP5_EBRPortConfig port_a, - ECP5_EBRPortConfig port_b) - (ECP5_EBRCore#(addr_a, data_a, addr_b, data_b)) - provisos (Bits#(addr_a, 12), - Bits#(data_a, 8), - Bits#(addr_b, 12), - Bits#(data_b, 8)); +// mkEBRCore instantiates one EBR memory block with the given +// configuration. +// +// This module includes flow control for reads, but unlike the +// standard library BRAM servers there is no flow control on puts. Put +// is always_ready, and read behaves like a Wire: the result of each +// put is available for a single cycle, and is lost if not read at +// that time. +module mkEBR#(EBRPortConfig cfgA, + EBRPortConfig cfgB) + (EBR#(addr_a, data_a, addr_b, data_b)) + provisos (Bits#(addr_a, addr_sz_a), + Bits#(data_a, data_sz_a), + Bits#(addr_b, addr_sz_b), + Bits#(data_b, data_sz_b), + Add#(addr_a_pad, addr_sz_a, 14), + Add#(data_a_pad, data_sz_a, 18), + Add#(addr_b_pad, addr_sz_b, 14), + Add#(data_b_pad, data_sz_b, 18)); - let ebr1 <- mkECP5_EBRCore(port_a, port_b); - let ebr2 <- mkECP5_EBRCore(port_a, port_b); + let defaultClk <- exposeCurrentClock; + let defaultRstN <- exposeCurrentReset; + let rcfgA = resolvePortCfg("mkEBR", "A", addr_a ' (?), data_a ' (?), cfgA, defaultClk, defaultRstN); + let rcfgB = resolvePortCfg("mkEBR", "B", addr_b ' (?), data_b ' (?), cfgB, defaultClk, defaultRstN); - interface ECP5_EBRCorePort portA; + let mem <- mkEBRCore(cfgA, cfgB); + + WriteOnly#(Bool) portA_start_op = ?; + ReadOnly#(Bool) portA_op_complete = ?; + WriteOnly#(Bool) portB_start_op = ?; + ReadOnly#(Bool) portB_op_complete = ?; + + // TODO: this variable-depth register chain should be pulled into a + // separate "delay line" module. + if (!rcfgA.enabled) begin + portA_start_op = discardingWriteOnly; + portA_op_complete = constToReadOnly(False); + end + else if (rcfgA.register_output) begin + let syncA1 <- mkDReg(False, clocked_by(rcfgA.clk), reset_by(rcfgA.rstN)); + let syncA2 <- mkReg(False, clocked_by(rcfgA.clk), reset_by(rcfgA.rstN)); + portA_start_op = regToWriteOnly(syncA1); + portA_op_complete = regToReadOnly(syncA2); + + (* no_implicit_conditions, fire_when_enabled *) + rule syncA1_to_syncA2; + syncA2 <= syncA1; + endrule + end + else begin + let syncA <- mkDReg(False, clocked_by(rcfgA.clk), reset_by(rcfgA.rstN)); + portA_start_op = regToWriteOnly(syncA); + portA_op_complete = regToReadOnly(syncA); + end + + if (!rcfgB.enabled) begin + portB_start_op = discardingWriteOnly; + portB_op_complete = constToReadOnly(False); + end + else if (rcfgB.register_output) begin + let syncB1 <- mkDReg(False, clocked_by(rcfgB.clk), reset_by(rcfgB.rstN)); + let syncB2 <- mkReg(False, clocked_by(rcfgB.clk), reset_by(rcfgB.rstN)); + portB_start_op = regToWriteOnly(syncB1); + portB_op_complete = regToReadOnly(syncB2); + + (* no_implicit_conditions, fire_when_enabled *) + rule syncB1_to_syncB2; + syncB2 <= syncB1; + endrule + end + else begin + let syncB1 <- mkDReg(False, clocked_by(rcfgB.clk), reset_by(rcfgB.rstN)); + portB_start_op = regToWriteOnly(syncB1); + portB_op_complete = regToReadOnly(syncB1); + end + + interface EBRPort portA; method Action put(UInt#(3) chip_select, Bool write, addr_a address, data_a datain); - let data_bits = pack(datain); - ebr1.portA.put(chip_select, write, address, data_bits[7:4]); - ebr2.portA.put(chip_select, write, address, data_bits[3:0]); + mem.portA.put(chip_select, write, address, datain); + if (rcfgA.write_outputs_data || !write) + portA_start_op <= True; endmethod - - method data_a read(); - return unpack({ebr1.portA.read(), ebr2.portA.read}); + method data_a read() if (rcfgA.enabled && portA_op_complete == True); + return mem.portA.read(); endmethod endinterface - interface ECP5_EBRCorePort portB; + interface EBRPort portB; method Action put(UInt#(3) chip_select, Bool write, addr_b address, data_b datain); - let data_bits = pack(datain); - ebr1.portB.put(chip_select, write, address, data_bits[7:4]); - ebr2.portB.put(chip_select, write, address, data_bits[3:0]); + mem.portB.put(chip_select, write, address, datain); + if (rcfgB.write_outputs_data || !write) + portB_start_op <= True; endmethod - - method data_b read(); - return unpack({ebr1.portB.read(), ebr2.portB.read}); + method data_b read() if (rcfgB.enabled && portB_op_complete == True); + return mem.portB.read(); endmethod endinterface -endmodule +endmodule : mkEBR + +//////////////////////////////////////////////////////////// +// Utilities +// +// These are little helpers that I expected to find in the stdlib, but +// aren't there. Thankfully, they are easy to write by following the +// examples of similar helpers. + +function WriteOnly#(a) discardingWriteOnly(); + return (interface WriteOnly + method Action _write(a x); + noAction; + endmethod + endinterface); +endfunction + +function WriteOnly#(a) regToWriteOnly(Reg#(a) r); + return (interface WriteOnly + method _write = r._write; + endinterface); +endfunction + +function ReadOnly#(a) constToReadOnly(a x); + return (interface ReadOnly + method _read; + return x; + endmethod + endinterface); +endfunction endpackage +