Source

License

Index

Memory tester

Contains the MIG IP DDR3 controller instance to connect to the DDR3 RAM

NOTE: this is a temporary partial implementation to get the memory test function working first.

Parameters, Ports, and Constants

`default_nettype none

module memory_tester
#(
parameter SENSOR_CHANNEL_COUNT      = 18,
    // Do Not Set At Instantiation, except in Vivado IPI
parameter SENSOR_WORD_WIDTH         = 12,           // CONSTANT, DO NOT CHANGE.
parameter SENSOR_WORD_WIDTH_TOTAL   = WORD_WIDTH * CHANNEL_COUNT
    // MEM is for the User Interface

    parameter MEM_FIFO_DEPTH            = 512,  // The depth doesn't seem to matter much. 

    parameter MEM_DATA_WIDTH            = 128,  // 16 bit words * 8, from 4:1 clock ratio and Double Data Rate
    parameter MEM_ADDR_WIDTH            = 28,   // Only 2**27 exists
    parameter MEM_CMD_WIDTH             = 3,    // Read (001) or Write (000) (see UG 586, p.93) 
    parameter MEM_MASK_WIDTH            = 16,   // Bytemask for writes
    parameter MEM_BURST_LENGTH          = 8,    // How many locations per read/write

    // DDR3 is for the connections to the DDR3 RAM device, not seen/touched by the user.

    parameter DDR3_ADDR_WIDTH           = 14,
    parameter DDR3_BANK_WIDTH           = 3,
    parameter DDR3_DATA_WIDTH           = 16,
    parameter DDR3_MASK_WIDTH           = 2,
    parameter DDR3_STROBE_WIDTH         = 2
)
(
    // Main clock domain (50 MHz)
input  wire                                 clk_main,
input  wire                                 rst_main_n,

output wire                                 operation_ready,
input  wire                                 operation_valid,
input  wire [OPERATION_WIDTH-1:0]           operation,      // Set controller operation (read, write, test, etc...)

output wire                                 sensor_data_ready,
input  wire                                 sensor_data_valid,
input  wire [SENSOR_WORD_WIDTH_TOTAL-1:0]   sensor_data,    // Sensor data being read out and written to DDR3 RAM
    // MIG User Interface domain (100 MHz)

    output wire                                 mem_clk,        // 4:1 ratio to DDR3 clock, so 100 MHz
    output wire                                 mem_rst,           

    input  wire                                 mem_test_start,     // Raw switch input: simple pull to GND (internal pull-up on pin)
    output reg                                  mem_test_running,   // Default low, raises while running, drops when done
    
    output wire                                 mem_test_valid,             // High for 1 cycle for each new test output
    output reg  [MEM_DATA_WIDTH-1:0]            mem_test_error,             // High where bits don't match 
    output wire [MEM_ADDR_WIDTH-1:0]            mem_test_addr,              // Address of read/write
    output wire [MEM_DATA_WIDTH-1:0]            mem_test_data_write,        // Data written to RAM
    output wire [MEM_DATA_WIDTH-1:0]            mem_test_data_read,         // Data read back from RAM
input  wire                                 mem_data_ready,
output wire                                 mem_data_valid,
output wire [MEM_DATA_WIDTH-1:0]            mem_data,       // Data read out of DDR3
    // DDR3 Interface domain (no user signals here)

    input  wire                                 ddr3_clk,        // 400 MHz (DDR3 operating speed)
    input  wire                                 ddr3_rst,        // Raise for 5ns minimum
    input  wire                                 ddr3_clk_ref,    // IDELAYCTRL reference clock: 200 MHz

    // These connect to the DDR3 RAM device, and nothing else.

    output wire [DDR3_ADDR_WIDTH-1:0]           ddr3_addr,
    output wire [DDR3_BANK_WIDTH-1:0]           ddr3_ba,
    output wire                                 ddr3_cas_n,
    output wire                                 ddr3_ck_n,
    output wire                                 ddr3_ck_p,
    output wire                                 ddr3_cke,
    output wire                                 ddr3_ras_n,
    output wire                                 ddr3_reset_n,
    output wire                                 ddr3_we_n,
    inout  wire [DDR3_DATA_WIDTH-1:0]           ddr3_dq,
    inout  wire [DDR3_STROBE_WIDTH-1:0]         ddr3_dqs_n,
    inout  wire [DDR3_STROBE_WIDTH-1:0]         ddr3_dqs_p,
    output wire                                 ddr3_cs_n,
    output wire [DDR3_MASK_WIDTH-1:0]           ddr3_dm,
    output wire                                 ddr3_odt
);

    `include "clog2_function.vh"
    `include "memory_controller_operations.vh"

    localparam MEM_ADDR_ZERO    = {MEM_ADDR_WIDTH{1'b0}};
    localparam MEM_DATA_ZERO    = {MEM_DATA_WIDTH{1'b0}};
    localparam MEM_MASK_NONE    = {MEM_MASK_WIDTH{1'b0}};
    localparam DDR3_DATA_ZERO   = {DDR3_DATA_WIDTH{1'b0}};

    localparam [MEM_CMD_WIDTH-1:0] MEM_CMD_WRITE = 'd0;
    localparam [MEM_CMD_WIDTH-1:0] MEM_CMD_READ  = 'd1;

    initial begin
        mem_test_running = 1'b0;
        mem_test_error   = MEM_DATA_ZERO;
    end

First, the DDR3 Controller generated by the Xilinx MIG.

One side connects to the DDR3 device, the other presents the UI (User Interface), which is a set of ready/valid interfaces into command and read/write data FIFOs, and a couple req/ack pulse interfaces for low-level control of refresh, calibration, etc... (not used here).

    // MEM Command FIFO

    reg   [MEM_ADDR_WIDTH-1:0]      app_addr        = MEM_ADDR_ZERO;
    reg   [MEM_CMD_WIDTH-1:0]       app_cmd         = MEM_CMD_WRITE;
    reg                             app_en          = 1'b0;
    wire                            app_rdy;

    // MEM write data FIFO

    reg   [MEM_DATA_WIDTH-1:0]      app_wdf_data    = MEM_DATA_ZERO;
    reg                             app_wdf_end     = 1'b0;
    reg                             app_wdf_wren    = 1'b0;
    wire                            app_wdf_rdy;
    localparam [MEM_MASK_WIDTH-1:0] app_wdf_mask    = MEM_MASK_NONE; // CONSTANT

    // MEM read data FIFO

    wire  [MEM_DATA_WIDTH-1:0]      app_rd_data;
    wire                            app_rd_data_end;
    wire                            app_rd_data_valid;

    // MEM special requests/status

    wire                            init_calib_complete;

    localparam                      app_sr_req      = 1'b0; // RESERVED, UNUSED
    wire                            app_sr_active;

    reg                             app_zq_req      = 1'b0; // ZQ calibration request, UNUSED
    wire                            app_zq_ack;

    reg                             app_ref_req     = 1'b0; // Refresh command, UNUSED
    wire                            app_ref_ack;

    // Taken from MIG_DDR3.veo in ip/MIG_DDR3

    // There are pins present in the defition not shown in the instantiation
    // template.
    // verilator lint_off PINNOCONNECT

    MIG_DDR3 
    DDR3_Controller
    (
        // Memory interface ports to/from DDR3 device

        .ddr3_addr              (ddr3_addr),            // output [13:0]    ddr3_addr
        .ddr3_ba                (ddr3_ba),              // output [2:0]     ddr3_ba
        .ddr3_cas_n             (ddr3_cas_n),           // output           ddr3_cas_n
        .ddr3_ck_n              (ddr3_ck_n),            // output [0:0]     ddr3_ck_n
        .ddr3_ck_p              (ddr3_ck_p),            // output [0:0]     ddr3_ck_p
        .ddr3_cke               (ddr3_cke),             // output [0:0]     ddr3_cke
        .ddr3_ras_n             (ddr3_ras_n),           // output           ddr3_ras_n
        .ddr3_reset_n           (ddr3_reset_n),         // output           ddr3_reset_n
        .ddr3_we_n              (ddr3_we_n),            // output           ddr3_we_n
        .ddr3_dq                (ddr3_dq),              // inout [15:0]     ddr3_dq
        .ddr3_dqs_n             (ddr3_dqs_n),           // inout [1:0]      ddr3_dqs_n
        .ddr3_dqs_p             (ddr3_dqs_p),           // inout [1:0]      ddr3_dqs_p
        .ddr3_cs_n              (ddr3_cs_n),            // output [0:0]     ddr3_cs_n
        .ddr3_dm                (ddr3_dm),              // output [1:0]     ddr3_dm
        .ddr3_odt               (ddr3_odt),             // output [0:0]     ddr3_odt

        // Application interface ports (outputs own clock)

        .app_addr               (app_addr),             // input [27:0]     app_addr
        .app_cmd                (app_cmd),              // input [2:0]      app_cmd
        .app_en                 (app_en),               // input            app_en
        .app_rdy                (app_rdy),              // output           app_rdy

        .app_wdf_data           (app_wdf_data),         // input [127:0]    app_wdf_data
        .app_wdf_end            (app_wdf_end),          // input            app_wdf_end
        .app_wdf_wren           (app_wdf_wren),         // input            app_wdf_wren
        .app_wdf_rdy            (app_wdf_rdy),          // output           app_wdf_rdy
        .app_wdf_mask           (app_wdf_mask),         // input [15:0]     app_wdf_mask

        .app_rd_data            (app_rd_data),          // output [127:0]   app_rd_data
        .app_rd_data_end        (app_rd_data_end),      // output           app_rd_data_end
        .app_rd_data_valid      (app_rd_data_valid),    // output           app_rd_data_valid

        .app_ref_ack            (app_ref_ack),          // output           app_ref_ack
        .app_ref_req            (app_ref_req),          // input            app_ref_req

        .app_zq_req             (app_zq_req),           // input            app_zq_req
        .app_zq_ack             (app_zq_ack),           // output           app_zq_ack

        .app_sr_req             (app_sr_req),           // input            app_sr_req
        .app_sr_active          (app_sr_active),        // output           app_sr_active

        .ui_clk                 (mem_clk),              // output           ui_clk
        .ui_clk_sync_rst        (mem_rst),              // output           ui_clk_sync_rst

        .init_calib_complete    (init_calib_complete),  // output           init_calib_complete

        // System Clock Ports (operating clock for DDR3)

        .sys_clk_i              (ddr3_clk),
        .sys_rst                (ddr3_rst),              // input            sys_rst

        // Reference Clock Ports (200 MHz (typ) for IDELAYCTRL)

        .clk_ref_i              (ddr3_clk_ref)
    );

    // verilator lint_on PINNOCONNECT

//# Operation Interface and Buffer

// Then, we take in an operation, buffer it internally, and drop // operation_ready for the duration of the operation.

 `include "memory_controller_operations.vh"

 reg                     operation_current_ready = 1'b1;
 wire                    operation_current_valid;
 wire [MEM_OP_WIDTH-1:0] operation_current;

 Pipeline_Half_Buffer
 #(
     .WORD_WIDTH         (MEM_OP_WIDTH),
     .CIRCULAR_BUFFER    (0)  // non-zero to enable
 )
 operation_buffer
 (
     .clock          (clk_main),
     .clear          (~rst_main_n),

     .input_valid    (operation_valid),
     .input_ready    (operation_ready),
     .input_data     (operation),

     .output_valid   (operation_current_valid),
     .output_ready   (operation_current_ready),
     .output_data    (operation_current)
 );

Interfaces

First, let's debounce the switch input and convert it into a pulse

We are assuming an INITIAL_INPUT rest state of 1, given by an internal pull-up on the pin, so the switch only has to pull to ground, so there is no chance of frying anything with an incorrect voltage.

If there is electrical noise on the pin, decouple to GND with a few pF right at the pin. The internal pull-up should charge it to whatever the I/O Bank supply is.

Let's be very conservative and debounce for 50ms, which at 100 MHz, is a count of 5 million, which needs 23 bits.

    `ifdef SYNTHESIS
        localparam DEBOUNCE_DELAY_CYCLES  = 5000000;
    `else
        localparam DEBOUNCE_DELAY_CYCLES  = 5; // For Simulation
    `endif
    localparam DEBOUNCE_COUNTER_WIDTH = clog2(DEBOUNCE_DELAY_CYCLES);

    wire mem_test_start_debounced; // Pulses high

    Debouncer_Low_Latency
    #(
        .COUNTER_WIDTH      (DEBOUNCE_COUNTER_WIDTH),   // Wide enough to hold largest delay.
        .INITIAL_INPUT      (1'b1),                     // 1'b0 or 1'b1. The input rest state.
        .EXTRA_CDC_STAGES   (0)                         // Must be 0 or greater.
    )
    switch_debouncer
    (
        .clock                  (mem_clk),
        // No reset or enable, as that could cause artificial input events.

        .delay_cycles_rising    (DEBOUNCE_DELAY_CYCLES [DEBOUNCE_COUNTER_WIDTH-1:0]),
        .delay_cycles_falling   (DEBOUNCE_DELAY_CYCLES [DEBOUNCE_COUNTER_WIDTH-1:0]),

        .input_raw              (mem_test_start),
        .input_falling          (mem_test_start_debounced),
        // verilator lint_off PINCONNECTEMPTY
        .input_rising           (),
        .input_clean            (),

        // For calibration and testing (see notes in source)
        .diag_synchronized_input    (),
        .diag_ignoring_input        ()
        // verilator lint_on  PINCONNECTEMPTY
    );

Datapath

Then, let's create a counter to generate all consecutive valid addresses within the range of 0 to 2**(MEM_ADDR_WIDTH-1), incrementing by MEM_BURST_LENGTH since MEM_DATA_WIDTH is a multiple of the actul RAM word width DDR3_DATA_WIDTH.

The RAM has 8 banks, each with 16M entries of 16 bits. This is not the actual row/column/bank geometry, but it's equivalent. This means a total of 2Gb of storage, organized as (224)*8 = 128M words (227) of 16 bits, incremented in steps of 2**3 (8) since that's the multiple of MEM_DATA_WIDTH to DDR3_DATA_WIDTH.

    localparam [MEM_ADDR_WIDTH-1:0] MEM_ADDR_INCR = 8;

    `ifdef SYNTHESIS
        localparam [MEM_ADDR_WIDTH-1:0] MEM_ADDR_LAST = (2**27) - MEM_ADDR_INCR;
    `else
        localparam [MEM_ADDR_WIDTH-1:0] MEM_ADDR_LAST = (2**10) - MEM_ADDR_INCR; // For Simulation (1024 addresses)
    `endif

    reg                         address_counter_run     = 1'b0;
    reg                         address_counter_load    = 1'b0;
    wire [MEM_ADDR_WIDTH-1:0]   address_counter_value;

    Counter_Binary
    #(
        .WORD_WIDTH     (MEM_ADDR_WIDTH),
        .INCREMENT      (MEM_ADDR_INCR),
        .INITIAL_COUNT  (MEM_ADDR_ZERO)
    )
    address_counter
    (
        .clock          (mem_clk),
        .clear          (mem_rst),

        .up_down        (1'b0), // 0/1 --> up/down
        .run            (address_counter_run),

        .load           (address_counter_load),
        .load_count     (MEM_ADDR_ZERO),

        .carry_in       (1'b0),
        // verilator lint_off PINCONNECTEMPTY
        .carry_out  (),
        .carries    (),
        .overflow   (),
        // verilator lint_on PINCONNECTEMPTY

        .count          (address_counter_value)
    );

Now, let's generate 8 counters, each DDR3_DATA_WIDTH wide, then concatenated together into a word MEM_DATA_WIDTH wide. Each counter starts at one more than the previous counter (so, 0 to 7), and they increment by 8 at each step. This generates all possible consecutive counts, stored in consecutive 16 bit words, wrapping around at overflow.

    localparam [DDR3_DATA_WIDTH-1:0] DDR3_DATA_INCR = MEM_BURST_LENGTH;

    reg                         data_counters_run   = 1'b0;
    reg                         data_counters_load  = 1'b0;
    wire [MEM_DATA_WIDTH-1:0]   data_counters_value;

    generate
    genvar i;

        for (i=0; i < MEM_BURST_LENGTH; i=i+1) begin : per_data_word
            Counter_Binary
            #(
                .WORD_WIDTH     (DDR3_DATA_WIDTH),
                .INCREMENT      (DDR3_DATA_INCR),
                .INITIAL_COUNT  (DDR3_DATA_ZERO + i [DDR3_DATA_WIDTH-1:0])
            )
            data_counter
            (
                .clock          (mem_clk),
                .clear          (mem_rst),

                .up_down        (1'b0), // 0/1 --> up/down
                .run            (data_counters_run),

                .load           (data_counters_load),
                .load_count     (DDR3_DATA_ZERO + i [DDR3_DATA_WIDTH-1:0]),

                .carry_in       (1'b0),
                // verilator lint_off PINCONNECTEMPTY
                .carry_out  (),
                .carries    (),
                .overflow   (),
                // verilator lint_on PINCONNECTEMPTY

                .count          (data_counters_value [DDR3_DATA_WIDTH*i +: DDR3_DATA_WIDTH])
            );
        end

    endgenerate

Synchronize address and data pipelines

We synchronize the data and address counters so we can feed both write address and data together to the DDR3 Controller, and so we can store both read address and the expected data together in FIFOs for comparison to read-back data.

Normally, we would have to Join, then Fork the two pipelines to create this synchronization and duplication to feed both RAM and FIFO, but counters are always ready by design, so we can just use a bit of control logic later to run the counters when the pipeline can accept data. (Loads are treated separately.)

    reg     test_addr_data_valid = 1'b0;
    wire    test_addr_data_ready;

    wire    test_data_valid;
    wire    test_addr_valid;
    wire    test_data_ready;
    wire    test_addr_ready;

    wire [MEM_ADDR_WIDTH-1:0] test_addr;
    wire [MEM_DATA_WIDTH-1:0] test_data;

    // verilator lint_off UNUSED
    wire [MEM_ADDR_WIDTH-1:0] test_addr_dummy;
    wire [MEM_DATA_WIDTH-1:0] test_data_dummy;
    // verilator lint_on  UNUSED

    Pipeline_Fork_Lazy
    #(
        .WORD_WIDTH     (MEM_DATA_WIDTH + MEM_ADDR_WIDTH),
        .OUTPUT_COUNT   (2)
    )
    test_addr_data_synchronizer
    (
        .input_valid    (test_addr_data_valid),
        .input_ready    (test_addr_data_ready),
        .input_data     ({data_counters_value, address_counter_value}),

        .output_valid   ({test_data_valid,               test_addr_valid}),
        .output_ready   ({test_data_ready,               test_addr_ready}),
        .output_data    ({{test_data, test_addr_dummy}, {test_data_dummy, test_addr}})
    );

Address Pipelines

For writes, we send the address only to the RAM

For reads, we send the address both to the RAM and to a FIFO so we can associate it with the eventual read data. This works because read data is returned in the same order of the read addresses sent to RAM.

First, fork the test address into write_read and read copies. The write_read copy first is used for writes, then for reads at the same time as the read copy (one goes to RAM, the other to a FIFO).

    wire                        write_read_addr_valid;
    reg                         write_read_addr_ready    = 1'b0;
    wire [MEM_ADDR_WIDTH-1:0]   write_read_addr;

    wire                        read_addr_valid;
    wire                        read_addr_ready;
    wire [MEM_ADDR_WIDTH-1:0]   read_addr;

    Pipeline_Fork_Lazy
    #(
        .WORD_WIDTH     (MEM_ADDR_WIDTH),
        .OUTPUT_COUNT   (2)
    )
    test_addr_fork
    (
        .input_valid    (test_addr_valid),
        .input_ready    (test_addr_ready),
        .input_data     (test_addr),

        .output_valid   ({write_read_addr_valid, read_addr_valid}),
        .output_ready   ({write_read_addr_ready, read_addr_ready}),
        .output_data    ({write_read_addr,       read_addr})
    );

Then, if we are writing, sink the read address copy to the FIFO.

    reg sink_read_addr = 1'b0;

    wire                        read_addr_sunk_valid;
    wire                        read_addr_sunk_ready;
    wire [MEM_ADDR_WIDTH-1:0]   read_addr_sunk;

    Pipeline_Sink
    #(
        .WORD_WIDTH     (MEM_ADDR_WIDTH),
        .IMPLEMENTATION ("AND")
    )
    read_addr_sink
    (
        .sink           (sink_read_addr),

        .input_valid    (read_addr_valid),
        .input_ready    (read_addr_ready),
        .input_data     (read_addr),

        .output_valid   (read_addr_sunk_valid),
        .output_ready   (read_addr_sunk_ready),
        .output_data    (read_addr_sunk)
    );

Finally, feed the sunk read address to a FIFO. (i.e.: no address is fed to the FIFO while writing)

    wire                        read_addr_output_valid;
    wire                        read_addr_output_ready;
    wire [MEM_ADDR_WIDTH-1:0]   read_addr_output;

    Pipeline_FIFO_Buffer
    #(
        .WORD_WIDTH         (MEM_ADDR_WIDTH),
        .DEPTH              (MEM_FIFO_DEPTH),
        .RAMSTYLE           ("block"),
        .CIRCULAR_BUFFER    (0)  // non-zero to enable
    )
    read_addr_fifo
    (
        .clock          (mem_clk),
        .clear          (mem_rst),

        .input_valid    (read_addr_sunk_valid),
        .input_ready    (read_addr_sunk_ready),
        .input_data     (read_addr_sunk),

        .output_valid   (read_addr_output_valid),
        .output_ready   (read_addr_output_ready),
        .output_data    (read_addr_output)
    );

Write Data Pipelines

If we are writing, steer the test data to the RAM write data port only, else if we are reading, steer it to a FIFO only, which also leaves the write data port idle. We later read out the data FIFO to compare the written data to the data read back from RAM. (They should match.)

    wire                        write_data_ram_valid;
    reg                         write_data_ram_ready    = 1'b0;
    wire [MEM_DATA_WIDTH-1:0]   write_data_ram;

    wire                        write_data_fifo_valid;
    wire                        write_data_fifo_ready;
    wire [MEM_DATA_WIDTH-1:0]   write_data_fifo;

    reg [1:0] test_data_destination = 2'b0; // Needs a binary to one-hot conversion, see control logic.

    Pipeline_Branch_One_Hot
    #(
        .WORD_WIDTH     (MEM_DATA_WIDTH),
        .OUTPUT_COUNT   (2),
        .IMPLEMENTATION ("AND")
    )
    write_data_steering
    (
        .selector       (test_data_destination),

        .input_valid    (test_data_valid),
        .input_ready    (test_data_ready),
        .input_data     (test_data),

        .output_valid   ({write_data_ram_valid, write_data_fifo_valid}),
        .output_ready   ({write_data_ram_ready, write_data_fifo_ready}),
        .output_data    ({write_data_ram,       write_data_fifo})
    );

    wire                        write_data_output_valid;
    wire                        write_data_output_ready;
    wire [MEM_DATA_WIDTH-1:0]   write_data_output;

    Pipeline_FIFO_Buffer
    #(
        .WORD_WIDTH         (MEM_DATA_WIDTH),
        .DEPTH              (MEM_FIFO_DEPTH),
        .RAMSTYLE           ("block"),
        .CIRCULAR_BUFFER    (0)  // non-zero to enable
    )
    written_data_fifo
    (
        .clock          (mem_clk),
        .clear          (mem_rst),

        .input_valid    (write_data_fifo_valid),
        .input_ready    (write_data_fifo_ready),
        .input_data     (write_data_fifo),

        .output_valid   (write_data_output_valid),
        .output_ready   (write_data_output_ready),
        .output_data    (write_data_output)
    );

Test Output Pipelines

Finally, we join the read address and write data FIFO outputs to the read data output from the RAM, so we can compare them all.

NOTE: there is no back-pressure ready signal on the RAM read data pipeline, so we are depending here that the RAM read latency (from send address to returned data) is longer than that of the FIFOs, which is 2 cycles, so the saved read address and write data are ready before the corresponding read data arrives.

NOTE: We MUST use a buffered Join since the RAM data arrives slightly later within the clock cycle, and so causes a glitch in the mem_test_error output, even if the RAM read-back value is correct.

    localparam ADDR_PAD_WIDTH = MEM_DATA_WIDTH - MEM_ADDR_WIDTH;
    localparam ADDR_PAD_ZERO  = {ADDR_PAD_WIDTH{1'b0}};

    // verilator lint_off UNUSED
    wire [ADDR_PAD_WIDTH-1:0] mem_test_pad;
    wire                      app_rd_data_ready_dummy;
    // verilator lint_on  UNUSED

    Pipeline_Join
    #(
        .WORD_WIDTH     (MEM_DATA_WIDTH),
        .INPUT_COUNT    (3)
    )
    output_addr_data_join
    (
        .clock          (mem_clk),
        .clear          (mem_rst),

        .input_valid    ({read_addr_output_valid,           write_data_output_valid, app_rd_data_valid}),
        .input_ready    ({read_addr_output_ready,           write_data_output_ready, app_rd_data_ready_dummy}),
        .input_data     ({{ADDR_PAD_ZERO,read_addr_output}, write_data_output,       app_rd_data}),

        .output_valid   (mem_test_valid),
        .output_ready   (1'b1),
        .output_data    ({{mem_test_pad,mem_test_addr}, mem_test_data_write, mem_test_data_read})
    );

Read-back data check, masked by valid bit Each bit position is set where there is a mismatch

    always @(*) begin
        mem_test_error = {MEM_DATA_WIDTH{(mem_test_valid == 1'b1)}} & (mem_test_data_write ^ mem_test_data_read);
    end

Controlpath

Latch the internal state of the test once started. This state is cleared once the test is done internally, that is, when the counters are done generating the addresses and data, regardless of what is left in the FIFOs and what has not yet returned from RAM.

    wire mem_test_started;
    reg  mem_test_done = 1'b0;

    Pulse_Latch
    #(
        .RESET_VALUE    (1'b0)
    )
    test_running_state
    (
        .clock      (mem_clk),
        .clear      (mem_test_done || mem_rst),
        .pulse_in   (mem_test_start_debounced),
        .level_out  (mem_test_started)
    );

Separately latch the visible running state of the test. This state is cleared once all the pending FIFO and RAM data has been compared.

    wire mem_test_running_internal;
    reg  mem_test_complete = 1'b0;

    Pulse_Latch
    #(
        .RESET_VALUE    (1'b0)
    )
    test_running_output
    (
        .clock      (mem_clk),
        .clear      (mem_test_complete || mem_rst),
        .pulse_in   (mem_test_start_debounced),
        .level_out  (mem_test_running_internal)
    );

We can start the test early, but it will only show as running once the RAM initial calibration is complete. None of the interfaces to the DDR3 controller are active until this time, so the test is started, but stalled until the DDR3 controller is ready.

This is not a case that will happen often in real life, as calibration completes in microseconds. (about 100us in simulation)

    always @(*) begin
        mem_test_running = (mem_test_running_internal == 1'b1) && (init_calib_complete == 1'b1);
    end

Latch phase of running test (WRITE, READ-BACK)

First we write to the whole RAM, then read-back all that we wrote while comparing, then the test is done.

    reg  test_phase_write_done = 1'b0;
    wire test_phase_readback;

    Pulse_Latch
    #(
        .RESET_VALUE    (1'b0)
    )
    test_phase_state
    (
        .clock      (mem_clk),
        .clear      (mem_test_done),
        .pulse_in   (test_phase_write_done),
        .level_out  (test_phase_readback)
    );

Compute the control signals

    always @(*) begin

        // Internally, the test is paced by the value of the address counter.

        mem_test_done           = (mem_test_started == 1'b1) && (test_phase_readback == 1'b1) && (address_counter_value == MEM_ADDR_LAST);
        test_phase_write_done   = (mem_test_started == 1'b1) && (test_phase_readback == 1'b0) && (address_counter_value == MEM_ADDR_LAST);

        // For each write or read, send in the necessary command, with
        // backpressure.

        app_addr                = write_read_addr;
        app_cmd                 = (test_phase_readback == 1'b0) ? MEM_CMD_WRITE : MEM_CMD_READ;
        app_en                  = write_read_addr_valid;
        write_read_addr_ready   = app_rdy;

        // When in write phase, send each data to the RAM. The data and
        // address update in sync and are presented at the same time to the
        // DDR3 controller.

        app_wdf_data            = write_data_ram;
        app_wdf_wren            = write_data_ram_valid;
        app_wdf_end             = write_data_ram_valid; // A 4:1 interface writes a burst of 8 in one cycle
        write_data_ram_ready    = app_wdf_rdy;

        // While the test is started, and the interfaces can accept their
        // output, run the address and data counters.
        
        address_counter_run     = (test_addr_data_ready == 1'b1) && (mem_test_started == 1'b1);
        address_counter_load    = (mem_test_started == 1'b1) && (address_counter_value == MEM_ADDR_LAST);

        data_counters_run       = (test_addr_data_ready == 1'b1) && (mem_test_started == 1'b1);
        data_counters_load      = (mem_test_running == 1'b1) && (address_counter_value == MEM_ADDR_LAST);

        // Declare the address and data valid only when the test is started.

        test_addr_data_valid    = mem_test_started;

        // If we are not yet reading back the written data, sink the address
        // that would be written to the address FIFO, so it only gets filled
        // during read-back.

        sink_read_addr          = (test_phase_readback == 1'b0);

        // Convert the 1-bit read-back phase flag into a one-hot selector for
        // the Branch which steers the data to the RAM or to the data FIFO.

        test_data_destination   = (test_phase_readback == 1'b0) ? 2'b10 : 2'b01;

        // Finally, once the running test outputs the last memory address
        // (during read-back), declare the test complete. This is where it all
        // stops.

        mem_test_complete       = (mem_test_running == 1'b1) && (mem_test_addr == MEM_ADDR_LAST);

    end


endmodule

Back to FPGA Design Elements

fpgacpu.ca