Implements a memory with multiple read and write ports which can all be used concurrently, implemented using Block RAMs and a Live Value Table.
`default_nettype none
module RAM_Multiported_LVT
#(
parameter WORD_WIDTH = 0,
parameter READ_PORT_COUNT = 0,
parameter WRITE_PORT_COUNT = 0,
parameter ADDR_WIDTH = 0,
parameter DEPTH = 0,
parameter USE_INIT_FILE = 0,
parameter INIT_FILE = "",
parameter [WORD_WIDTH-1:0] INIT_VALUE = 0,
parameter LVT_RAMSTYLE = "",
parameter STORAGE_RAMSTYLE = "",
parameter LVT_READ_PIPELINE_DEPTH = 0,
parameter STORAGE_READ_PIPELINE_DEPTH = 0,
parameter ON_WRITE_CONFLICT = "",
parameter READ_NEW_DATA = 0,
parameter RW_ADDR_COLLISION = "",
// Do not set at instantiation, except in IPI
parameter TOTAL_READ_DATA = WORD_WIDTH * READ_PORT_COUNT,
parameter TOTAL_WRITE_DATA = WORD_WIDTH * WRITE_PORT_COUNT,
parameter TOTAL_READ_ADDR = ADDR_WIDTH * READ_PORT_COUNT,
parameter TOTAL_WRITE_ADDR = ADDR_WIDTH * WRITE_PORT_COUNT
)
(
input wire clock,
// Write Data Interface
input wire [WRITE_PORT_COUNT-1:0] write_valid,
input wire [TOTAL_WRITE_DATA-1:0] write_data,
input wire [TOTAL_WRITE_ADDR-1:0] write_address,
output wire [WRITE_PORT_COUNT-1:0] write_ready,
// Write Conflict Interface
output wire [WRITE_PORT_COUNT-1:0] write_conflict_valid,
output wire [WRITE_PORT_COUNT-1:0] write_conflict,
output wire [TOTAL_WRITE_ADDR-1:0] write_conflict_address,
input wire [WRITE_PORT_COUNT-1:0] write_conflict_ready,
// Read Address Interface
input wire [READ_PORT_COUNT-1:0] read_valid,
input wire [TOTAL_READ_ADDR-1:0] read_address,
output wire [READ_PORT_COUNT-1:0] read_ready,
// Read Data Interface
output wire [READ_PORT_COUNT-1:0] read_data_valid,
output wire [TOTAL_READ_DATA-1:0] read_data,
input wire [TOTAL_READ_ADDR-1:0] read_data_address,
input wire [READ_PORT_COUNT-1:0] read_data_ready
);
localparam WORD_ZERO = {WORD_WIDTH{1'b0}};
We must handle the edge case where we only have 1 write port, which implies an LVT width of zero, which is not representable in Verilog-2001. In that case, we implement a 1-bit-wide LVT whose contents are always zero (the index of the single write port), and so will optimize away and reduce this design to a RAM_1WnR_Replicated module.
`include "clog2_function.vh"
localparam LVT_WIDTH = (WRITE_PORT_COUNT > 1) ? clog2(WRITE_PORT_COUNT) : 1;
localparam LVT_ZERO = {LVT_WIDTH{1'b0}};
We need a set of constants, numbering all the write ports, of LVT_WIDTH.
localparam WRITE_PORT_NUMBERS_WIDTH = LVT_WIDTH * WRITE_PORT_COUNT;
reg [WRITE_PORT_NUMBERS_WIDTH-1:0] write_port_number;
integer k;
initial begin
for (k=0; k < WRITE_PORT_COUNT; k=k+1) begin: per_write_port_number
write_port_number [LVT_WIDTH*k :+ LVT_WIDTH] = k [LVT_WIDTH-1:0];
end
end
One vector to collect the live write port for each read access. Pipelined, then used to drive the final read port multiplexers.
localparam READ_PORT_NUMBERS_WIDTH = LVT_WIDTH * READ_PORT_COUNT;
reg [READ_PORT_NUMBERS_WIDTH-1:0] write_port_live;
RAM_Multiported_LE
#(
.WORD_WIDTH (LVT_WIDTH),
.READ_PORT_COUNT (READ_PORT_COUNT),
.WRITE_PORT_COUNT (WRITE_PORT_COUNT),
.ADDR_WIDTH (ADDR_WIDTH),
.DEPTH (DEPTH),
.USE_INIT_FILE (0),
.INIT_FILE (),
.INIT_VALUE (LVT_ZERO),
.RAMSTYLE (LVT_RAMSTYLE),
.ON_WRITE_CONFLICT (ON_WRITE_CONFLICT),
.READ_PIPELINE_DEPTH (LVT_READ_PIPELINE_DEPTH),
)
Live_Value_Table
(
.clock (clock),
.clear (1'b0),
.write_data (write_port_number),
.write_address (write_address),
.write_enable (write_enable),
.write_conflict (write_conflict),
.read_data (write_port_live),
.read_address (read_address),
.read_enable (read_enable)
);
Flatten out all the read signals, repeated once for each write port.
localparam FLAT_READ_DATA = TOTAL_READ_DATA * WRITE_PORT_COUNT;
localparam FLAT_READ_ADDR = TOTAL_READ_ADDR * WRITE_PORT_COUNT;
localparam FLAT_READ_ENABLE = READ_PORT_COUNT * WRITE_PORT_COUNT;
localparam FLAT_READ_ADDR_ZERO = {FLAT_READ_ADDR{1'b0}};
localparam FLAT_READ_ENABLE_ZERO = {FLAT_READ_ENABLE{1'b0}};
wire [FLAT_READ_DATA-1:0] read_data_storage;
reg [FLAT_READ_DATA-1:0] read_address_storage = FLAT_READ_ADDR_ZERO;
reg [FLAT_READ_ENABLE-1:0] read_address_storage = FLAT_READ_ENABLE_ZERO;
generate
genvar i;
for (i=0; i < WRITE_PORT_COUNT; i=i+1) begin: per_storage_bank
RAM_1WnR_Replicated
#(
.WORD_WIDTH (WORD_WIDTH),
.READ_PORT_COUNT (READ_PORT_COUNT),
.ADDR_WIDTH (ADDR_WIDTH),
.DEPTH (DEPTH),
.USE_INIT_FILE (USE_INIT_FILE),
.INIT_FILE (INIT_FILE),
.INIT_VALUE (INIT_VALUE),
.RAMSTYLE (RAMSTYLE),
// See RAM_Simple_Dual_Port for usage of these parameters
.READ_NEW_DATA (READ_NEW_DATA),
.RW_ADDR_COLLISION (RW_ADDR_COLLISION),
)
Storage_Banks
(
.clock (clock),
.write_data (write_data [WORD_WIDTH*i +: WORD_WIDTH]),
.write_address (write_address [ADDR_WIDTH*i +: ADDR_WIDTH]),
.write_enable (write_enable [i]),
.read_data (read_data_storage),
.read_address (read_address_storage),
.read_enable (read_enable_storage)
);
end
endgenerate
Now pipeline the storage bank read data to synchronize with any pipeline for the LVT read data, then add another pipeline to retime into the final output read multiplexer.
Here we have to iterate over the compound vector created above of all the read ports, organized as one replicated set per write port. The structure of the compund vector is reflected by the nested loop indexed on both read and write port counts.
We have to iterate over each read port, per write port, so we select all the first read ports over each write port, then all the second read ports, etc...
generate
genvar j, k;
for (j=0; j < READ_PORT_COUNT; j=j+1) begin: per_read_port
for (k=0; k < WRITE_PORT_COUNT; k=k+1) begin: per_write_port
Register_Pipeline_Simple
#(
.WORD_WIDTH (TOTAL_READ_DATA),
.PIPE_DEPTH (LVT_READ_PIPELINE_DEPTH)
)
LVT_Read_Alignment
(
.clock (clock),
.clock_enable (1'b1),
.clear (1'b0),
.pipe_in (read_data_storage [((TOTAL_READ_DATA*k)+(WORD_WIDTH*j)) :+ WORD_WIDTH]),
.pipe_out (read_data_per_port)
);
Register_Pipeline_Simple
#(
.WORD_WIDTH (TOTAL_READ_DATA),
.PIPE_DEPTH (STORAGE_READ_PIPELINE_DEPTH)
)
Storage_Read
(
.clock (clock),
.clock_enable (1'b1),
.clear (1'b0),
.pipe_in (),
.pipe_out ()
);
end
Multiplexer_Binary_Behavioural
#(
.WORD_WIDTH (WORD_WIDTH),
.ADDR_WIDTH (LVT_WIDTH),
.INPUT_COUNT (READ_PORT_COUNT)
)
Bank_Read_Selector
(
.selector (),
.words_in (),
.word_out ()
);
end
endgenerate
endmodule