Compare commits

...

2 Commits

Author SHA1 Message Date
David Anderson 8d2261e245 lib: initial implementation of an ECP5 EBR primitive
Only the core unconditioned primitive right now, and still needs refining.
2024-08-17 15:43:36 -07:00
David Anderson db30e4a23f tasks.py: prettify output, support running partial synthesis
Partial synth is handy when writing gnarly Bluespec modules, because it
lets you inspect the Verilog output of the Bluespec compiler as well as
Yosys's compile output at various stages of synthesis, to see if things
are being produced the way you expect.
2024-08-17 15:41:21 -07:00
4 changed files with 499 additions and 15 deletions

View File

@ -0,0 +1,17 @@
package Top;
import ECP5_RAM::*;
(* synthesize *)
module mkTop(ECP5_EBRCorePort#(Bit#(12), Bit#(8)));
let clk <- exposeCurrentClock;
let rstN <- exposeCurrentReset;
ECP5_EBRPortConfig cfg = defaultValue;
ECP5_EBRCore#(Bit#(12), Bit#(8), UInt#(12), UInt#(8)) ram <- mkECP5_EBRCoreByte(cfg, cfg);
method put = ram.portA.put;
method read = ram.portA.read;
endmodule
endpackage

247
lib/ECP5_RAM.bsv Normal file
View File

@ -0,0 +1,247 @@
package ECP5_RAM;
import Printf::*;
// ECP5_EBRWriteMode specifies what the EBR outputs on a write cycle.
typedef enum {
// In Normal mode, the EBR's output on a write cycle is undefined.
Normal,
// In WriteThrough mode, the EBR outputs the new value at the
// written address.
WriteThrough,
// In ReadBeforeWrite mode, the EBR outputs the prior value of the
// written address. ReadBeforeWrite is only available on 9 and 18
// bit ports.
ReadBeforeWrite
} ECP5_EBRWriteMode deriving (Bits, Eq);
// ECP5_EBRPortConfig is the static configuration of an EBR port.
typedef struct {
Clock clk;
Reset rstN;
// By default, ECP5 EBRs only register the input address and write
// data, giving a 1-cycle latency for operations. If
// registered_output is true, the output value is also registered,
// resulting in 2 cycles of latency but shorter datapaths.
Bool registered_output;
// chip_select_addr is the chip address of this EBR port. put
// method invocations whose select argument don't match this
// address are ignored.
UInt#(3) chip_select_addr;
// write_mode specifies the output's behavior for write operations.
ECP5_EBRWriteMode write_mode;
} ECP5_EBRPortConfig;
instance DefaultValue#(ECP5_EBRPortConfig);
defaultValue = ECP5_EBRPortConfig{
clk: noClock,
rstN: noReset,
registered_output: False,
chip_select_addr: 0,
write_mode: Normal
};
endinstance
(* always_ready *)
interface ECP5_EBRCoreInnerPort;
// Put starts a read or write operation, if select's value matches
// the port's configured chip_select_addr.
method Action put(UInt#(3) select, Bool write, Bit#(14) address, Bit#(18) data);
// Read returns the value on the EBR's output port. The output
// value is only defined when the read follows a put with the
// correct number of latency cycles for the port's configuration.
method Bit#(18) read();
endinterface
interface ECP5_EBRCoreInner;
interface ECP5_EBRCoreInnerPort portA;
interface ECP5_EBRCoreInnerPort portB;
endinterface
// mkECP5_EBRCoreInner instantiates an ECP5 EBR primitive with the
// given configuration. The returned interface has full-width I/O
// ports
import "BVI" ECP5_RAM =
module mkECP5_EBRCoreInner#(ECP5_EBRPortConfig port_a,
ECP5_EBRPortConfig port_b,
Integer portA_width,
Integer portB_width)
(ECP5_EBRCoreInner);
default_clock no_clock;
default_reset no_reset;
input_clock portA_clk(CLKA, (* unused *)CLKA_GATE) = port_a.clk;
input_reset portA_rstN(RSTA) clocked_by(portA_clk) = port_a.rstN;
input_clock portB_clk(CLKB, (* unused *)CLKB_GATE) = port_b.clk;
input_reset portB_rstN(RSTB) clocked_by(portB_clk) = port_b.rstN;
parameter DATA_WIDTH_A = portA_width;
parameter REGMODE_A = port_a.registered_output ? "OUTREG" : "NOREG";
parameter CSDECODE_A = "0b000"; //$format("0b%b", port_a.chip_select_addr);
parameter WRITEMODE_A = case (port_a.write_mode) matches
Normal: "NORMAL";
WriteThrough: "WRITETHROUGH";
ReadBeforeWrite: "READBEFOREWRITE";
endcase;
parameter DATA_WIDTH_B = portB_width;
parameter REGMODE_B = port_b.registered_output ? "OUTREG" : "NOREG";
parameter CSDECODE_B = "0b000"; //$format("0b%b", port_b.chip_select_addr);
parameter WRITEMODE_B = case (port_b.write_mode) matches
Normal: "NORMAL";
WriteThrough: "WRITETHROUGH";
ReadBeforeWrite: "READBEFOREWRITE";
endcase;
port OCEA = True;
port OCEB = True;
interface ECP5_EBRCoreInnerPort portA;
method put((*reg*)CSA, (*reg*)WEA, (*reg*)ADA, (*reg*)DIA) enable(CEA) clocked_by(portA_clk) reset_by(portA_rstN);
method DOA read() clocked_by(portA_clk) reset_by(portA_rstN);
endinterface
interface ECP5_EBRCoreInnerPort portB;
method put((*reg*)CSB, (*reg*)WEB, (*reg*)ADB, (*reg*)DIB) enable(CEB) clocked_by(portB_clk) reset_by(portB_rstN);
method DOB read() clocked_by(portB_clk) reset_by(portB_rstN);
endinterface
schedule (portA.read) CF (portA.read, portA.put);
schedule (portA.put) C (portA.put);
schedule (portB.read) CF (portB.read, portB.put);
schedule (portB.put) C (portB.put);
endmodule : mkECP5_EBRCoreInner
module checkSizes#(addr a, data d, String module_name, String port_name)(Empty)
provisos (Bits#(addr, addr_sz),
Bits#(data, data_sz));
let data_sz = valueOf(data_sz);
let addr_sz = valueOf(addr_sz);
let addr_max = case (data_sz) matches
1: 14;
2: 13;
4: 12;
9: 11;
18: 10;
default: error(sprintf("invalid data width %d for port, must be one of 1,2,4,9,18", data_sz));
endcase;
if (addr_sz > addr_max) begin
addr dummy = ?;
errorM(sprintf("The address type for port %s of %s is wider than the hardware can implement. "+
"Address type %s has %d bits, maximum is %d",
port_name, module_name,
printType(typeOf(dummy)),
addr_sz,
addr_max));
end
endmodule
// ECP5_EBRCorePort is the raw interface to one port of an ECP5 EBR
// memory block.
//
// The port has no implicit conditions, it is the caller's
// responsibility to wait the correct number of cycles after a put()
// before capturing data with read(). The caller must wait 1 cycle for
// unregistered ports, and 2 cycles for registered ports. When invoked
// at other times, read() returns an unspecified arbitrary value.
interface ECP5_EBRCorePort#(type addr, type data);
method Action put(UInt#(3) chip_select, Bool write, addr address, data datain);
method data read();
endinterface
// ECP5_EBRCore is the raw interface to an ECP5 EBR memory block.
//
// The ports have no implicit conditions, the caller must wait the
// correct number of latency cycles to get valid data.
//
// It is the caller's responsibility to enforce synchronization
// between the ports, as specified in Lattice Technical Note 02204:
// the two ports must not issue concurrent writes to the same address,
// or a write concurrent with a read of the same address. If the two
// ports are being operated from different clock domains, the caller
// must implement appropriate synchronization to ensure that no
// read-during-write or write-during-write races occur.
interface ECP5_EBRCore#(type portA_addr, type portA_data, type portB_addr, type portB_data);
interface ECP5_EBRCorePort#(portA_addr, portA_data) portA;
interface ECP5_EBRCorePort#(portB_addr, portB_data) portB;
endinterface
// mkECP5_EBRCore instantiates an ECP5 EBR memory primitive with the
// given configuration. This memory has no implicit or explicit
// conditions, the caller is responsible for upholding the primitive's
// timing and synchronization requirements.
module mkECP5_EBRCore#(ECP5_EBRPortConfig port_a,
ECP5_EBRPortConfig port_b)
(ECP5_EBRCore#(addr_a, data_a, addr_b, data_b))
provisos (Bits#(addr_a, addr_sz_a),
Bits#(data_a, data_sz_a),
Bits#(addr_b, addr_sz_b),
Bits#(data_b, data_sz_b),
Add#(addr_a_pad, addr_sz_a, 14),
Add#(data_a_pad, data_sz_a, 18),
Add#(addr_b_pad, addr_sz_b, 14),
Add#(data_b_pad, data_sz_b, 18));
checkSizes(addr_a ' (?), data_a ' (?), "mkECP5_EBRCore", "A");
checkSizes(addr_b ' (?), data_b ' (?), "mkECP5_EBRCore", "B");
let inner <- mkECP5_EBRCoreInner(port_a, port_b, valueOf(data_sz_a), valueOf(data_sz_b));
interface ECP5_EBRCorePort portA;
method Action put(UInt#(3) chip_select, Bool write, addr_a address, data_a datain);
inner.portA.put(chip_select, write, zeroExtend(pack(address)), zeroExtend(pack(datain)));
endmethod
method data_a read();
return unpack(truncate(inner.portA.read()));
endmethod
endinterface
interface ECP5_EBRCorePort portB;
method Action put(UInt#(3) chip_select, Bool write, addr_b address, data_b datain);
inner.portB.put(chip_select, write, zeroExtend(pack(address)), zeroExtend(pack(datain)));
endmethod
method data_b read();
return unpack(truncate(inner.portB.read()));
endmethod
endinterface
endmodule
module mkECP5_EBRCoreByte#(ECP5_EBRPortConfig port_a,
ECP5_EBRPortConfig port_b)
(ECP5_EBRCore#(addr_a, data_a, addr_b, data_b))
provisos (Bits#(addr_a, 12),
Bits#(data_a, 8),
Bits#(addr_b, 12),
Bits#(data_b, 8));
let ebr1 <- mkECP5_EBRCore(port_a, port_b);
let ebr2 <- mkECP5_EBRCore(port_a, port_b);
interface ECP5_EBRCorePort portA;
method Action put(UInt#(3) chip_select, Bool write, addr_a address, data_a datain);
let data_bits = pack(datain);
ebr1.portA.put(chip_select, write, address, data_bits[7:4]);
ebr2.portA.put(chip_select, write, address, data_bits[3:0]);
endmethod
method data_a read();
return unpack({ebr1.portA.read(), ebr2.portA.read});
endmethod
endinterface
interface ECP5_EBRCorePort portB;
method Action put(UInt#(3) chip_select, Bool write, addr_b address, data_b datain);
let data_bits = pack(datain);
ebr1.portB.put(chip_select, write, address, data_bits[7:4]);
ebr2.portB.put(chip_select, write, address, data_bits[3:0]);
endmethod
method data_b read();
return unpack({ebr1.portB.read(), ebr2.portB.read});
endmethod
endinterface
endmodule
endpackage

182
lib/ECP5_RAM.v Normal file
View File

@ -0,0 +1,182 @@
module ECP5_RAM(CLKA,
CEA,
OCEA,
WEA,
DIA,
ADA,
CSA,
RSTA,
DOA,
CLKB,
CEB,
OCEB,
WEB,
DIB,
ADB,
CSB,
RSTB,
DOB);
parameter GSR = "AUTO";
parameter RESETMODE = "SYNC";
parameter ASYNC_RESET_RELEASE = "SYNC";
parameter DATA_WIDTH_A = 18;
parameter REGMODE_A = "NOREG";
parameter WRITEMODE_A = "NORMAL";
parameter CSDECODE_A = "0b000";
parameter DATA_WIDTH_B = 18;
parameter REGMODE_B = "NOREG";
parameter WRITEMODE_B = "NORMAL";
parameter CSDECODE_B = "0b000";
input CLKA;
input CEA;
input OCEA;
input WEA;
input [17:0] DIA;
input [13:0] ADA;
input [2:0] CSA;
input RSTA;
output [17:0] DOA;
input CLKB;
input CEB;
input OCEB;
input WEB;
input [17:0] DIB;
input [13:0] ADB;
input [2:0] CSB;
input RSTB;
output [17:0] DOB;
DP16KD#(.GSR(GSR),
.RESETMODE(RESETMODE),
.ASYNC_RESET_RELEASE(ASYNC_RESET_RELEASE),
.DATA_WIDTH_A(DATA_WIDTH_A),
.REGMODE_A(REGMODE_A),
.WRITEMODE_A(WRITEMODE_A),
.CSDECODE_A(CSDECODE_A),
.DATA_WIDTH_B(DATA_WIDTH_B),
.REGMODE_B(REGMODE_B),
.WRITEMODE_B(WRITEMODE_B),
.CSDECODE_B(CSDECODE_B)) ram(.CLKA(CLKA),
.CEA(CEA),
.OCEA(OCEA),
.WEA(WEA),
.DIA17(DIA[17]),
.DIA16(DIA[16]),
.DIA15(DIA[15]),
.DIA14(DIA[14]),
.DIA13(DIA[13]),
.DIA12(DIA[12]),
.DIA11(DIA[11]),
.DIA10(DIA[10]),
.DIA9(DIA[9]),
.DIA8(DIA[8]),
.DIA7(DIA[7]),
.DIA6(DIA[6]),
.DIA5(DIA[5]),
.DIA4(DIA[4]),
.DIA3(DIA[3]),
.DIA2(DIA[2]),
.DIA1(DIA[1]),
.DIA0(DIA[0]),
.ADA13(ADA[13]),
.ADA12(ADA[12]),
.ADA11(ADA[11]),
.ADA10(ADA[10]),
.ADA9(ADA[9]),
.ADA8(ADA[8]),
.ADA7(ADA[7]),
.ADA6(ADA[6]),
.ADA5(ADA[5]),
.ADA4(ADA[4]),
.ADA3(ADA[3]),
.ADA2(ADA[2]),
.ADA1(ADA[1]),
.ADA0(ADA[0]),
.CSA2(CSA[2]),
.CSA1(CSA[1]),
.CSA0(CSA[0]),
.RSTA(RSTA),
.DOA17(DOA[17]),
.DOA16(DOA[16]),
.DOA15(DOA[15]),
.DOA14(DOA[14]),
.DOA13(DOA[13]),
.DOA12(DOA[12]),
.DOA11(DOA[11]),
.DOA10(DOA[10]),
.DOA9(DOA[9]),
.DOA8(DOA[8]),
.DOA7(DOA[7]),
.DOA6(DOA[6]),
.DOA5(DOA[5]),
.DOA4(DOA[4]),
.DOA3(DOA[3]),
.DOA2(DOA[2]),
.DOA1(DOA[1]),
.DOA0(DOA[0]),
.CLKB(CLKB),
.CEB(CEB),
.OCEB(OCEB),
.WEB(WEB),
.DIB17(DIB[17]),
.DIB16(DIB[16]),
.DIB15(DIB[15]),
.DIB14(DIB[14]),
.DIB13(DIB[13]),
.DIB12(DIB[12]),
.DIB11(DIB[11]),
.DIB10(DIB[10]),
.DIB9(DIB[9]),
.DIB8(DIB[8]),
.DIB7(DIB[7]),
.DIB6(DIB[6]),
.DIB5(DIB[5]),
.DIB4(DIB[4]),
.DIB3(DIB[3]),
.DIB2(DIB[2]),
.DIB1(DIB[1]),
.DIB0(DIB[0]),
.ADB13(ADB[13]),
.ADB12(ADB[12]),
.ADB11(ADB[11]),
.ADB10(ADB[10]),
.ADB9(ADB[9]),
.ADB8(ADB[8]),
.ADB7(ADB[7]),
.ADB6(ADB[6]),
.ADB5(ADB[5]),
.ADB4(ADB[4]),
.ADB3(ADB[3]),
.ADB2(ADB[2]),
.ADB1(ADB[1]),
.ADB0(ADB[0]),
.CSA2(CSA[2]),
.CSA1(CSA[1]),
.CSA0(CSA[0]),
.RSTB(RSTA),
.DOB17(DOB[17]),
.DOB16(DOB[16]),
.DOB15(DOB[15]),
.DOB14(DOB[14]),
.DOB13(DOB[13]),
.DOB12(DOB[12]),
.DOB11(DOB[11]),
.DOB10(DOB[10]),
.DOB9(DOB[9]),
.DOB8(DOB[8]),
.DOB7(DOB[7]),
.DOB6(DOB[6]),
.DOB5(DOB[5]),
.DOB4(DOB[4]),
.DOB3(DOB[3]),
.DOB2(DOB[2]),
.DOB1(DOB[1]),
.DOB0(DOB[0]));
endmodule

View File

@ -74,49 +74,86 @@ def expand_test_target(target):
else:
raise ValueError(f"Unknown target type {t}")
def phase(name):
print("")
print("*"*(len(name)+6))
print(f"** {name} **")
print("*"*(len(name)+6))
print("")
@task
def build(c, target="."):
phase("Compile Bluespec")
verilog_files = []
for target in expand_build_target(target):
out_info, out_verilog, out_bsc = ensure_build_dirs(target, "info", "verilog", "bsc")
print(f"Building {target}")
c.run(f"bsc -aggressive-conditions -check-assert -u -verilog -info-dir {out_info} -vdir {out_verilog} -bdir {out_bsc} -p {target.parent}:lib:%/Libraries -show-module-use -show-compiles {target}")
module_name = Path(f"mk{target.stem}")
verilog_files.append(out_verilog / module_name.with_suffix(".v"))
with open(out_verilog / module_name.with_suffix(".use")) as f:
verilog_files.extend(find_verilog_modules(c, f.read().splitlines()))
print("\nVerilog files for synthesis:")
for v in verilog_files:
print(" "+str(v))
return verilog_files
@task
def synth(c, target):
target = resolve_synth_target(target)
pin_map = target.parent / "pin_map.lpf"
if not pin_map.is_file():
raise ArgumentError(f"Pin mapping {pin_map} not found")
module_name = Path(f"mk{target.stem}")
out_info, out_verilog, out_bsc, out_yosys, out_nextpnr = ensure_build_dirs(target, "info", "verilog", "bsc", "yosys", "nextpnr")
build(c, target)
verilog_files = [out_verilog / module_name.with_suffix(".v")]
with open(out_verilog / module_name.with_suffix(".use")) as f:
verilog_files.extend(find_verilog_modules(c, f.read().splitlines()))
module_name = Path(f"mk{target.stem}")
verilog_files = build(c, target)
phase("Logic synthesis")
yosys_script = out_yosys / "script.ys"
yosys_json = out_yosys / module_name.with_suffix(".json")
yosys_report = out_yosys / module_name.with_suffix(".stats")
yosys_log = out_yosys / module_name.with_suffix(".log")
yosys_preprocessed = out_yosys / module_name.with_suffix(".v")
yosys_compiled = out_yosys / f"{module_name.stem}_compiled.v"
with open(yosys_script, "w", encoding="UTF-8") as f:
f.write(dedent(f"""\
read_verilog -sv -defer {' '.join(str(f) for f in verilog_files)}
hierarchy -top {module_name}
synth_ecp5 -top {module_name} -json {yosys_json}
synth_ecp5 -top {module_name} -run :map_ram
write_verilog -sv {yosys_preprocessed}
synth_ecp5 -run map_ram: -json {yosys_json}
write_verilog -sv {yosys_compiled}
tee -o {yosys_report} stat
"""))
c.run(f"yosys -q -L {yosys_log} -T {yosys_script}")
with open(yosys_report) as f:
print(f.read())
ls = f.readlines()[3:]
print("".join(ls))
print(f" JSON : {yosys_json}")
print(f" Log : {yosys_log}")
print(f" Flat : {yosys_preprocessed}")
print(f"Compiled : {yosys_compiled}")
pin_map = target.parent / "pin_map.lpf"
if not pin_map.is_file():
print(f"WARNING: no pin map at {pin_map}, skipping place&route and bitstream generation")
return
phase("Place and route")
nextpnr_out = out_nextpnr / module_name.with_suffix(".pnr")
out = c.run(f"nextpnr-ecp5 --85k --detailed-timing-report --report {out_nextpnr / "timing.json"} --json {yosys_json} --lpf {pin_map} --package=CABGA381 --textcfg {nextpnr_out}", hide='stderr')
print_filtered_paragraphs(out.stderr, "Device utilization", "Critical path", common_prefix="Info: ")
nextpnr_log = out_nextpnr / module_name.with_suffix(".log")
nextpnr_timing = out_nextpnr / module_name.with_suffix(".log")
out = c.run(f"nextpnr-ecp5 --85k --detailed-timing-report -l {nextpnr_log} --report {nextpnr_timing} --json {yosys_json} --lpf {pin_map} --package=CABGA381 --textcfg {nextpnr_out}") #, hide='stderr')
print_filtered_paragraphs(out.stderr, "Device utilisation", "Critical path", "Max frequency", "Max delay", common_prefix="Info: ")
print(f" PNR : {nextpnr_out}")
print(f" Log : {nextpnr_log}")
print(f"Timing : {nextpnr_timing}")
phase("Bitstream generation")
bitstream = nextpnr_out.with_suffix(".bit")
c.run(f"ecppack {nextpnr_out} {bitstream}")
print(f"Wrote bitstream to {bitstream}")
@ -139,4 +176,5 @@ def test(c, target="."):
@task
def clean(c):
if Path("out").is_dir():
shutil.rmtree("out")