gary/vram/VRAM.bsv

package VRAM;

import GetPut::*;
import ClientServer::*;
import DReg::*;
import BRAM::*;
import Vector::*;
import FIFOF::*;
import SpecialFIFOs::*;
import Real::*;
import Printf::*;

import DelayLine::*;
import ECP5_RAM::*;

export VRAMAddr;
export VRAMData;
export mkVRAM;
export VRAMRequest;
export VRAMResponse;
export VRAMServer;
export VRAM;

typedef Bit#(8) VRAMData;

// Each byte RAM we build below can address 4096 bytes, which is 12
// address bits.
typedef UInt#(12) ByteAddr;

typedef UInt#(3) ChipAddr;

// ByteRAM is two EBRs glued together to make a whole-byte memory.
typedef EBR#(ByteAddr, VRAMData, ByteAddr, VRAMData) ByteRAM;

// mkByteRAM glues two ECP5 EBRs together to make a 4096x8b memory
// block. Like the underlying ECP5 EBRs, callers must bring their own
// flow control to read out responses one cycle after putting a read
// request.
module mkByteRAM(ChipAddr chip_addr, ByteRAM ifc);
   EBRPortConfig cfg = defaultValue;
   cfg.chip_select_addr = chip_addr;
   EBR#(ByteAddr, Bit#(4), ByteAddr, Bit#(4)) upper <- mkEBRCore(cfg, cfg);
   EBR#(ByteAddr, Bit#(4), ByteAddr, Bit#(4)) lower <- mkEBRCore(cfg, cfg);

   interface EBRPort portA;
      method Action put(ChipAddr chip_select, Bool write, ByteAddr addr, VRAMData data_in);
         upper.portA.put(chip_select, write, addr, truncate(data_in>>4));
         lower.portA.put(chip_select, write, addr, truncate(data_in));
      endmethod

      method VRAMData read();
         return (extend(upper.portA.read())<<4) | (extend(lower.portA.read()));
      endmethod
   endinterface

   interface EBRPort portB;
      method Action put(ChipAddr chip_select, Bool write, ByteAddr addr, VRAMData data_in);
         upper.portB.put(chip_select, write, addr, truncate(data_in>>4));
         lower.portB.put(chip_select, write, addr, truncate(data_in));
      endmethod

      method VRAMData read();
         return (extend(upper.portB.read())<<4) | (extend(lower.portB.read()));
      endmethod
   endinterface
endmodule : mkByteRAM

// mkByteRAMArray arrays up to 8 mkByteRAMs together, using the
// hardwired chip select lines to route inputs appropriately and a mux
// tree to collect outputs. With num_chips=8, the resulting ByteRAM is
// 32768x8b.
module mkByteRAMArray(Integer num_chips, ByteRAM ifc);
   if (num_chips > 8)
      error("mkByteRAMArray can only array 8 raw ByteRAMs");

   ByteRAM blocks[num_chips];
   for (Integer i=0; i<num_chips; i=i+1)
      blocks[i] <- mkByteRAM(fromInteger(i));

   DelayLine#(ChipAddr) read_chip_A <- mkDelayLine(1);
   DelayLine#(ChipAddr) read_chip_B <- mkDelayLine(1);

   interface EBRPort portA;
      method Action put(ChipAddr chip_select, Bool write, ByteAddr addr, VRAMData data_in);
         for (Integer i=0; i<num_chips; i=i+1)
            blocks[i].portA.put(chip_select, write, addr, data_in);
         if (write)
            read_chip_A <= chip_select;
      endmethod
      method VRAMData read();
         if (read_chip_A.ready)
            if (read_chip_A <= fromInteger(num_chips-1))
               return blocks[read_chip_A].portA.read();
            else
               return 0;
         else
            return 0;
      endmethod
   endinterface

   interface EBRPort portB;
      method Action put(ChipAddr chip_select, Bool write, ByteAddr addr, VRAMData data_in);
         for (Integer i=0; i<num_chips; i=i+1)
            blocks[i].portB.put(chip_select, write, addr, data_in);
         if (write)
            read_chip_B <= chip_select;
      endmethod
      method VRAMData read();
         if (read_chip_B.ready)
            if (read_chip_B <= fromInteger(num_chips-1))
               return blocks[read_chip_B].portB.read();
            else
               return 0;
         else
            return 0;
      endmethod
   endinterface
endmodule

typedef UInt#(2) ArrayAddr;

typedef UInt#(17) VRAMAddr;

typedef struct {
   VRAMAddr addr;
   Maybe#(VRAMData) data;
} VRAMRequest deriving (Bits, Eq);

typedef struct {
   VRAMData data;
} VRAMResponse deriving (Bits, Eq);

typedef Server#(VRAMRequest, VRAMResponse) VRAMServer;
typedef Client#(VRAMRequest, VRAMResponse) VRAMClient;

interface VRAM;
   interface VRAMServer portA;
   interface VRAMServer portB;
endinterface

// mkVRAM creates a dual port VRAM of the specified size, using ECP5
// EBR memory primitives. The memory size must be a multiple of 4KiB,
// with a maximum of 128KiB.
//
// The returned VRAM servers implement flow control. As long as
// responses are processed as soon as they're available, each port can
// process one memory operation per cycle.
//
// The VRAM does not prevent write-write or write-read conflicts
// between the ports. The outcome of a simultaneous write to the same
// address is unspecified, as is the read output in a simultaneous
// read and write of the same address. The caller must use external
// arbitration to avoid such accesses.
module mkVRAM(Integer num_kilobytes, VRAM ifc);
   if (num_kilobytes > 128)
      error("maximum VRAM size is 128KiB");
   let num_bytes = num_kilobytes*1024;
   if (num_bytes % 4096 != 0)
      error("VRAM must be a multiple of 4096b");
   let num_byterams = num_bytes/4096;
   let num_arrays = ceil(fromInteger(num_byterams) / 8);

   function Tuple3#(ArrayAddr, ChipAddr, ByteAddr) split_addr(VRAMAddr a);
      if (num_bytes < 128*1024)
         a = a % fromInteger(num_bytes);
      match {.top, .byteaddr} = split(pack(a));
      Tuple2#(Bit#(SizeOf#(ArrayAddr)), Bit#(SizeOf#(ChipAddr))) route = split(top);
      return tuple3(unpack(tpl_1(route)), unpack(tpl_2(route)), unpack(byteaddr));
   endfunction

   ByteRAM arrays[num_arrays];
   for (Integer i=0; i<num_arrays; i=i+1) begin
      let array_size = min(num_byterams - (i*8), 8);
      arrays[i] <- mkByteRAMArray(array_size);
   end

   Reg#(Maybe#(ArrayAddr)) inflight_A[2] <- mkCReg(2, tagged Invalid);
   Reg#(Maybe#(ArrayAddr)) inflight_B[2] <- mkCReg(2, tagged Invalid);

   interface VRAMServer portA;
      interface Put request;
         method Action put(VRAMRequest req) if (inflight_A[1] matches tagged Invalid);
            match {.array, .chip, .byteaddr} = split_addr(req.addr);
            arrays[array].portA.put(chip, isValid(req.data), byteaddr, fromMaybe(0, req.data));
            if (!isValid(req.data))
               inflight_A[1] <= tagged Valid array;
         endmethod
      endinterface
      interface Get response;
         method ActionValue#(VRAMResponse) get() if (inflight_A[0] matches tagged Valid .array);
            inflight_A[0] <= tagged Invalid;
            return VRAMResponse{data: arrays[array].portA.read()};
         endmethod
      endinterface
   endinterface

   interface VRAMServer portB;
      interface Put request;
         method Action put(VRAMRequest req) if (inflight_B[1] matches tagged Invalid);
            match {.array, .chip, .byteaddr} = split_addr(req.addr);
            arrays[array].portB.put(0, isValid(req.data), byteaddr, fromMaybe(0, req.data));
            if (!isValid(req.data))
               inflight_B[1] <= tagged Valid array;
         endmethod
      endinterface
      interface Get response;
         method ActionValue#(VRAMResponse) get() if (inflight_B[0] matches tagged Valid .array);
            inflight_B[0] <= tagged Invalid;
            return VRAMResponse{data: arrays[array].portB.read()};
         endmethod
      endinterface
   endinterface
endmodule

endpackage