//////////////////////////////////////////////////////////// package ECP5_RAM; import DReg::*; import Printf::*; import ToString::*; import StmtFSM::*; import DelayLine::*; export EBRWriteMode(..); export EBRPortConfig(..); export EBRPort(..); export EBR(..); export mkEBRCore; export mkEBR; //////////////////////////////////////////////////////////// // Configuration types // // The exported block RAMs in this package have one or more ports, // where each port is independently configurable. Not all parameters // are exposed, notably reset behavior is hardcoded to synchronous // reset and release. This is purely because I don't yet understand // Bluespec's reset semantics well enough to be confident in exposing // async reset without messing it up. // // The exported EBRPortConfig type is internally expanded into an // EBRPortConfig_Resolved. This expansion process resolves defaults, // (e.g. assigning a default clock if none was provided), derives some // additional values that implementations need (e.g. the widths of the // data and address I/Os as regular integers), and checks the // configuration for consistency errors (e.g. an address type larger // than what the hardware can support). // EBRWriteMode specifies an EBR port's output for a write operation, // if any. typedef enum { // In Normal mode, write operations do not output a value. Normal, // In WriteThrough mode, write operations output the value that was // written. WriteThrough, // In ReadBeforeWrite mode, write operations output the value that // was overwritten. This mode is only available on 9-bit and 18-bit // EBR configurations. ReadBeforeWrite } EBRWriteMode deriving (Bits, Eq); // EBRPortConfig is the configuration of an EBR port. typedef struct { // clk, if specified, is the Clock to use for the port. If // unspecified, uses the module's default clock. Maybe#(Clock) clk; // rstN, if specified, is the Reset to use for the port. If // unspecified, uses the module's default reset. Maybe#(Reset) rstN; // Whether to register the output of the EBR port. // // EBR ports always register their inputs, to present predictable // signals to the memory circuitry. Ports can optionally also // enable an output register, which adds latency to operations but // decouples the memory's internal latency from the logic connected // to the output. This may allow designs to run at higher clock // speeds, outweighing the added cycle overhead. // // With non-registered output, EBR operations have a latency of 1 // cycle. Registering the output increases that to 2 cycles. By // default, the output is not registered. Bool register_output; // chip_select_addr is the port's chip select address. The port // ignores put operations that don't provide a matching chip_select // argument. // // This is intended to make it easier to construct larger memories // out of multiple EBR ports: by configuring different chip // addresses for each port, the inputs to the overall memory can be // routed directly to all EBR ports, rather than having to provide // your own address decoding and routing logic. UInt#(3) chip_select_addr; // write_mode specifies what the EBR port outputs for write // operations. In the default Normal mode, write operations do not // produce any output. EBRWriteMode write_mode; } EBRPortConfig deriving (Eq); instance DefaultValue#(EBRPortConfig); defaultValue = EBRPortConfig{ clk: defaultValue, rstN: defaultValue, register_output: False, chip_select_addr: 0, write_mode: Normal }; endinstance // EBRPortConfig_Resolved is an elaborated version of EBRPortConfig, // with all defaults and overrides resolved to their concrete values, // port widths made explicit and verified. typedef struct { // These fields are the same as in EBRPortConfig. If the port is // not in use, they are tied to default values that avoid any logic // or wires being generated outside of the EBR. Clock clk; Reset rstN; Bool register_output; UInt#(3) chip_select_addr; EBRWriteMode write_mode; // These are values derived by resolvePortCfg from an EBRPortConfig // and other contextual information from a module // instantiation. These are values that modules need to derive, so // we derive them all once here instead of forcing each module to // do so. // enabled is whether the port is in use at all. Modules omit all // glue logic and wiring for disabled ports, resulting in zero // burden during synthesis (other than consuming an EBR primitive, // but presumably you're using the other port still). // // Enabled is true if the memory's type for values is a non-zero // number of bits. In particular, eanbled=False if the caller uses // 'void' as the port's data type. Bool enabled; // addr_width is the bit width of addresses. resolvePortCfg ensures // that it is less than or equal to the maximum address width that // makes sense for data_width. Integer addr_width; // data_width is the bit width of input and output values at the // primitive hardware layer. The data width exported from the // wrapper types can be narrower. This value is always one of the // valid values for the EBR primitive: 1, 2, 4, 9 or 18. Integer data_width; // write_outputs_data is whether write_mode is one of the modes // where write operations output a value. Modules use this to // generate the appropriate conditions for port reads. Bool write_outputs_data; // operation_latency is how many cycles elapse between put() // executing to read() being ready. It is used to generate the // appropriate conditions for port reads. // // Operation latency on enabled ports is 2 if the output is // registered, or 1 for unregistered output. Disabled ports have 0 // latency, meaning no timing logic is needed. Integer operation_latency; // chip_select_addr_str is the string encoding of chip_select_addr // that the EBR hardware primitive wants for its configuration // parameter. String chip_select_addr_str; // write_mode_str is the string encoding of write_mode that hte EBR // hardware primitive wants for its configuration parameter. String write_mode_str; // register_output_str is the string encoding of register_output // that the EBR hardware primitive wants for its configuration // parameter. String register_output_str; } EBRPortConfig_Resolved; function EBRPortConfig_Resolved resolvePortCfg(String module_name, String port_name, addr a, data d, EBRPortConfig cfg, Clock defaultClk, Reset defaultRstN) provisos (Bits#(addr, addr_sz), Bits#(data, data_sz)); let addr_sz = valueOf(addr_sz); let data_sz = valueOf(data_sz); let data_hw_sz = 0; if (data_sz > 18) data_hw_sz = error(sprintf("invalid data width %d for %s port %s, must be 0..18 bits", data_sz, module_name, port_name)); else if (data_sz > 9) data_hw_sz = 18; else if (data_sz > 4) data_hw_sz = 9; else if (data_sz > 2) data_hw_sz = 4; else if (data_sz > 1) data_hw_sz = 2; else if (data_sz > 0) data_hw_sz = 1; let addr_max = case (data_hw_sz) 0: 0; 1: 14; 2: 13; 4: 12; 9: 11; 18: 10; default: error("unreachable"); endcase; let enabled = data_hw_sz != 0; let ret = ?; if (enabled) ret = EBRPortConfig_Resolved{ enabled: True, clk: cfg.clk matches tagged Valid .clk ? clk : defaultClk, rstN: cfg.rstN matches tagged Valid .rstN ? rstN : defaultRstN, addr_width: addr_sz, data_width: data_hw_sz, register_output: cfg.register_output, chip_select_addr: cfg.chip_select_addr, write_mode: cfg.write_mode, write_outputs_data: cfg.write_mode != Normal, operation_latency: cfg.register_output ? 2 : 1, chip_select_addr_str: sprintf("0b%03b", cfg.chip_select_addr), write_mode_str: case (cfg.write_mode) matches Normal: "NORMAL"; WriteThrough: "WRITETHROUGH"; ReadBeforeWrite: "READBEFOREWRITE"; endcase, register_output_str: cfg.register_output ? "OUTREG": "NOREG" }; else ret = EBRPortConfig_Resolved{ enabled: False, clk: noClock, rstN: noReset, addr_width: 14, data_width: 18, register_output: False, chip_select_addr: 0, write_mode: Normal, write_outputs_data: False, operation_latency: 0, chip_select_addr_str: "0b000", write_mode_str: "NORMAL", register_output_str: "NOREG" }; if (addr_sz > addr_max) begin addr dummy = ?; ret = error(sprintf("The address type for port %s of %s is wider than the hardware can implement. "+ "Address type %s has %d bits, maximum is %d", port_name, module_name, printType(typeOf(dummy)), addr_sz, addr_max)); end return ret; endfunction //////////////////////////////////////////////////////////// // Exported interfaces // // EBRPort is a port of an EBR memory. interface EBRPort#(type addr, type data); method Action put(UInt#(3) chip_select, Bool write, addr address, data datain); method data read(); endinterface // EBR is an EBR memory. interface EBR#(type portA_addr, type portA_data, type portB_addr, type portB_data); interface EBRPort#(portA_addr, portA_data) portA; interface EBRPort#(portB_addr, portB_data) portB; endinterface //////////////////////////////////////////////////////////// // Verilog import // // The raw primitive for EBR is called DP16KD. However, Lattice and // Yosys both expose it with the I/O ports exploded out into // individual bit signals, which is pretty horrible to plumb up here. // // Instead, ECP5_RAM.v defines a tiny Verilog wrapper, whose only // purpose is to group those individual bit signals back into // multi-bit ports that Bluespec can manipulate more elegantly. // // This wrapper exposes all the I/O ports with their maximum bit // width, even though there is no configuration that can use all the // bits. For example if you use all 14 address bits, you're only using // 1 data bit (16384x1b configuration). If you're using all 18 bits of // data, you're only using 10 address bits (1024x18b // configuration). We do this because we want to drive unused signals // to defined values, so we have to be able to see all of them. // // The exported wrapper modules defined further down translate these // large raw ports into proper Bluespec types, and handle the // necessary padding and truncation. (* always_ready *) interface V_EBRPort; // Put starts an operation, if select's value matches the port's // configured chip_select_addr. method Action put(UInt#(3) select, Bool write, Bit#(14) address, Bit#(18) data); // Read provides the EBR's output value. At this raw layer, read // always returns a value, but that value is undefined unless a put // which generates output happened N cycles prior, where N is the // port's configured latency (see EBRPortConfig). // // It is the caller's responsibility to time reads correctly // relative to puts. method Bit#(18) read(); endinterface interface V_EBR; interface V_EBRPort portA; interface V_EBRPort portB; endinterface // vEBRCoreInner instantiates a raw EBR primitive with the given // configuration. // // The returned interface has maximally wide types on all I/O, and // uses plain bit arrays. It also has no conditions on any methods, // it's the caller's reponsibility to time method calls appropriately. // // Nothing should use this module directly, except for mkEBRCore // below. mkEBRCore wraps the Verilog primitive in stronger types and // handles configuration edge cases (detecting invalid configs, tying // off unused ports), but otherwise presents the same "raw" primitive // from a semantic perspective. Anything you can build using // vMkEBRCore, you can build better with mkEBRCore. import "BVI" ECP5_RAM = module vMkEBRCore#(EBRPortConfig_Resolved cfgA, EBRPortConfig_Resolved cfgB) (V_EBR); // EBRs are dual-port with independent clocks and resets on each // port, so we need to be careful to map things correctly. Unset // the default clock and reset entirely, so that the compiler // complains loudly if we forget to explicitly specify the // clocking/reset on a signal. default_clock no_clock; default_reset no_reset; input_clock portA_clk(CLKA, (* unused *)CLKA_GATE) = cfgA.clk; input_reset portA_rstN(RSTA) clocked_by(portA_clk) = cfgA.rstN; input_clock portB_clk(CLKB, (* unused *)CLKB_GATE) = cfgB.clk; input_reset portB_rstN(RSTB) clocked_by(portB_clk) = cfgB.rstN; parameter DATA_WIDTH_A = cfgA.data_width; parameter REGMODE_A = cfgA.register_output ? "OUTREG" : "NOREG"; parameter CSDECODE_A = cfgA.chip_select_addr_str; parameter WRITEMODE_A = cfgA.write_mode_str; parameter DATA_WIDTH_B = cfgB.data_width; parameter REGMODE_B = cfgB.register_output ? "OUTREG" : "NOREG"; parameter CSDECODE_B = cfgB.chip_select_addr_str; parameter WRITEMODE_B = cfgB.write_mode_str; // The outputs of EBR ports also have an enable signal. It's // unclear why you'd want to suppress the output of things you // asked the memory to give you. Since I can't think of any use // for them, leave them always enabled if the corresponding port // is active. port OCEA = cfgA.enabled; port OCEB = cfgB.enabled; interface V_EBRPort portA; method put((*reg*)CSA, (*reg*)WEA, (*reg*)ADA, (*reg*)DIA) enable(CEA) clocked_by(portA_clk) reset_by(portA_rstN); method DOA read() clocked_by(portA_clk) reset_by(portA_rstN); endinterface interface V_EBRPort portB; method put((*reg*)CSB, (*reg*)WEB, (*reg*)ADB, (*reg*)DIB) enable(CEB) clocked_by(portB_clk) reset_by(portB_rstN); method DOB read() clocked_by(portB_clk) reset_by(portB_rstN); endinterface // A quick crash course on Bluespec's scheduling instructions. // // Bluespec's fundamental property is that rule execution is // serializable: all designs behave as if they execute a single // rule at a time, in some order. In the actual hardware // typically many rules execute in parallel on every cycle, but // that's just an optimization: the observed behavior of the // system must always be explainable by executing rules one at a // time, where each rule sees the effects of all previously // executed rules. // // When pulling Verilog modules into a Bluespec universe, the // compiler must be told explicitly what orders of execution are // valid, given the hardware's behavior. The canonical example // is a read of a register's value and a write to the same // register. Those two actions produce different system states // depending on which one executes first: if read-before-write, // the read sees the register's old value. In write-before-read, // the read sees the updated value. // // That's why, if you go digging into the low level Bluespec // definition of what a register is, you'll find a scheduling // annotation which says that if a read and a write both want to // happen (both methods are "enabled" in a clock cycle), the // read must execute before the write. When translated into // hardware, this matches familiar synchronous logic: on a given // cycle, your logic sees the previous cycle's value, and all // writes to registers happen at the "end" of the cycle. // // And so we come to the scheduling rules. Our annotations tell // the compiler how the memory's methods can be called, if // several of them are able to execute. Each scheduling // annotation is written as: // // schedule ORDERING // // This means: assuming that method(s) A and method(s) B both // want both execute, can both be executed without issues? And // if yes, do they need to execute in a specific order? // // The orderings you can specify are: // // - C : "conflict". The scheduler must pick a single one of A // or B to execute. // - CF : "conflict-free". A and B can both execute, and the // outcome is the same regardless of which executes first. // - SB : "schedule before". A and B can both execute, but A // must execute first to get correct results. // - SBR: "schedule before (restricted)". Same as SB, but A // and B must also execute from different rules. // // With that, here are the scheduling annotations for // vMkEBRCore. // TODO: why is portA.read CF portA.put? Shouldn't that be SB to // match register semantics? schedule (portA.read) CF (portA.read); schedule (portA.read) SB (portA.put); schedule (portA.put) C (portA.put); schedule (portB.read) CF (portB.read); schedule (portB.read) SB (portB.put); schedule (portB.put) C (portB.put); endmodule : vMkEBRCore //////////////////////////////////////////////////////////// // Exported modules // mkEBRCore instantiates one EBR memory block with the given // configuration. // // The returned ports have no implicit conditions. The caller is // responsible for upholding the block's timing and synchronization // requirements, following Lattice TN 02204. // // read() yields valid data 1 cycle after put() for ports configured // with unregistered output, or 2 cycles for registered outputs. At // all other times, the returned value is undefined. // // portA and portB must not concurrently write the same bits, or read // bits while the other is writing them. The stored value in a // write-write race is undefined, as is the read value in a write-read // race. module mkEBRCore#(EBRPortConfig cfgA, EBRPortConfig cfgB) (EBR#(addr_a, data_a, addr_b, data_b)) provisos (Bits#(addr_a, addr_sz_a), Bits#(data_a, data_sz_a), Bits#(addr_b, addr_sz_b), Bits#(data_b, data_sz_b), Add#(addr_a_pad, addr_sz_a, 14), Add#(data_a_pad, data_sz_a, 18), Add#(addr_b_pad, addr_sz_b, 14), Add#(data_b_pad, data_sz_b, 18)); let defaultClk <- exposeCurrentClock; let defaultRstN <- exposeCurrentReset; let rcfgA = resolvePortCfg("mkEBRCore", "A", addr_a ' (?), data_a ' (?), cfgA, defaultClk, defaultRstN); let rcfgB = resolvePortCfg("mkEBRCore", "B", addr_b ' (?), data_b ' (?), cfgB, defaultClk, defaultRstN); let vEBR <- vMkEBRCore(rcfgA, rcfgB); interface EBRPort portA; method Action put(UInt#(3) chip_select, Bool write, addr_a address, data_a datain); if (!rcfgA.enabled) noAction; else vEBR.portA.put(chip_select, write, zeroExtend(pack(address)), zeroExtend(pack(datain))); endmethod method data_a read(); if (!rcfgA.enabled) return ?; else return unpack(truncate(vEBR.portA.read())); endmethod endinterface interface EBRPort portB; method Action put(UInt#(3) chip_select, Bool write, addr_b address, data_b datain); if (!rcfgB.enabled) noAction; else vEBR.portB.put(chip_select, write, zeroExtend(pack(address)), zeroExtend(pack(datain))); endmethod method data_b read(); if (!rcfgB.enabled) return ?; else return unpack(truncate(vEBR.portB.read())); endmethod endinterface endmodule // mkEBR instantiates one EBR memory block with the given // configuration. // // This module includes flow control for reads, but unlike the // standard library BRAM servers there is no flow control on puts. Put // is always_ready, and read behaves like a Wire: the result of each // put is available for a single cycle, and is lost if not read at // that time. module mkEBR#(EBRPortConfig cfgA, EBRPortConfig cfgB) (EBR#(addr_a, data_a, addr_b, data_b)) provisos (Bits#(addr_a, addr_sz_a), Bits#(data_a, data_sz_a), Bits#(addr_b, addr_sz_b), Bits#(data_b, data_sz_b), Add#(addr_a_pad, addr_sz_a, 14), Add#(data_a_pad, data_sz_a, 18), Add#(addr_b_pad, addr_sz_b, 14), Add#(data_b_pad, data_sz_b, 18)); let defaultClk <- exposeCurrentClock; let defaultRstN <- exposeCurrentReset; let rcfgA = resolvePortCfg("mkEBR", "A", addr_a ' (?), data_a ' (?), cfgA, defaultClk, defaultRstN); let rcfgB = resolvePortCfg("mkEBR", "B", addr_b ' (?), data_b ' (?), cfgB, defaultClk, defaultRstN); let mem <- mkEBRCore(cfgA, cfgB); DelayLine#(void) latencyA <- mkDelayLine(rcfgA.operation_latency, clocked_by(rcfgA.clk), reset_by(rcfgA.rstN)); DelayLine#(void) latencyB <- mkDelayLine(rcfgB.operation_latency, clocked_by(rcfgB.clk), reset_by(rcfgB.rstN)); interface EBRPort portA; method Action put(UInt#(3) chip_select, Bool write, addr_a address, data_a datain); mem.portA.put(chip_select, write, address, datain); if (rcfgA.write_outputs_data || !write) latencyA <= ?; endmethod method data_a read() if (rcfgA.enabled && latencyA.ready); return mem.portA.read(); endmethod endinterface interface EBRPort portB; method Action put(UInt#(3) chip_select, Bool write, addr_b address, data_b datain); mem.portB.put(chip_select, write, address, datain); if (rcfgB.write_outputs_data || !write) latencyB <= ?; endmethod method data_b read() if (rcfgB.enabled && latencyB.ready); return mem.portB.read(); endmethod endinterface endmodule : mkEBR //////////////////////////////////////////////////////////// // Utilities // // These are little helpers that I expected to find in the stdlib, but // aren't there. Thankfully, they are easy to write by following the // examples of similar helpers. function WriteOnly#(a) discardingWriteOnly(); return (interface WriteOnly method Action _write(a x); noAction; endmethod endinterface); endfunction function WriteOnly#(a) regToWriteOnly(Reg#(a) r); return (interface WriteOnly method _write = r._write; endinterface); endfunction function ReadOnly#(a) constToReadOnly(a x); return (interface ReadOnly method _read; return x; endmethod endinterface); endfunction endpackage