Source

License

Index

Differential Data Deserializer, with N to 2N Ratio

This module extends the 1-to-N Differential Deserializer by packing two differential N-bit deserialized words into one differential 2N-bit word and extending the bitslip word-alignment logic to support this double-width word. The output words emerge at half rate but in the same clock domain. The output word is also sent through a variable latency pipeline so multiple deserialized data channels can be aligned.

All training is controlled externally, but the final trained state (tap delay (bit alignment), bitslip amount and halfword reversal (word alignment), and channel alignment delay) is persistent and stored here in the datapath.

Parameters, Ports, and Constants

`default_nettype none

module Deserializer_Differential_N_to_2N
#(
    // For the input buffer of each data line

    parameter IBUF_DIFF_TERM                = "",   // Differential Termination, "TRUE"/"FALSE" 
    parameter IBUF_LOW_PWR                  = "",   // Low power="TRUE", Highest performance="FALSE" 
    parameter IBUF_IOSTANDARD               = "",   // Specify the input I/O standard (e.g.: "LVDS_25")

    parameter IODELAY_REFCLK_FREQUENCY      = "",   // External IDELAYCTRL reference clock input frequency in MHz (190.0-210.0, 290.0-310.0) 
    parameter IODELAY_GROUP                 = "",   // Must match IODELAY_GROUP applied to IDELAYCTRL module in same I/O Bank.
    parameter IODELAY_HIGH_PERFORMANCE_MODE = "",   // Reduced jitter ("TRUE"), Reduced power ("FALSE"). The clock source should have the same setting.

    // Max depth of the channel alignment variable pipeline
    parameter CHANNEL_PIPELINE_DEPTH        = 0,    // Only 16 or 32 is allowed. See Register_Pipeline_Variable

    // For the 1-to-N SERDES hardware

    parameter DATA_RATE                     = "",   // "DDR" or "SDR".
    parameter INPUT_DATA_WIDTH              = 0,    // How many bits in each N-bit word? Must be natively supported by SERDES hardware. (UPDATE PARALLEL DATA WIRING AT SERDES in Deserializer_Differential_1toN !)

    // Do not set at instantiation, except in Vivado IPI
    // Repeat calculation in enclosing module if necessary
    parameter TAP_COUNTER_WIDTH             = 5,                // Hardcoded to match the IDELAY2 hardware. See UG471.
    parameter INPUT_DATA_COUNT              = 2,                // Hardcoded. Changing this needs more complex word permutations to extend bitslip across words.
    parameter OUTPUT_DATA_WIDTH             = INPUT_DATA_WIDTH * INPUT_DATA_COUNT,
    parameter CHANNEL_DELAY_WIDTH           = clog2(CHANNEL_PIPELINE_DEPTH)
)
(
    input  wire                                 clk_serial,         // High-speed I/O clock for incoming serial data (IO and SERDES)
    input  wire                                 clk_parallel,       // "Low-speed" clock for outgoing parallel data and all control inputs (SERDES)
    input  wire                                 reset_parallel,

    input  wire                                 datain_n,           // High-speed serial I/O data in
    input  wire                                 datain_p,


    input  wire                                 incdec_p_enable,    // Enable increment/decrement of delay tap
    input  wire                                 incdec_p,           // Increment (1) or decrement (0) delay tap 
    output wire     [TAP_COUNTER_WIDTH-1:0]     tap_p_current,      // Current value of delay tap
    input  wire     [TAP_COUNTER_WIDTH-1:0]     tap_p_load_value,   // New value of delay tap
    input  wire                                 tap_p_load,         // Load new delay tap value
    input  wire                                 datain_p_bitslip,   // Pulse to shift output word
    input  wire                                 datain_p_wordflip,  // Pulse to swap halfwords of the output word

    input  wire                                 incdec_n_enable,    // Enable increment/decrement of delay tap
    input  wire                                 incdec_n,           // Increment (1) or decrement (0) delay tap 
    output wire     [TAP_COUNTER_WIDTH-1:0]     tap_n_current,      // Current value of delay tap
    input  wire     [TAP_COUNTER_WIDTH-1:0]     tap_n_load_value,   // New value of delay tap
    input  wire                                 tap_n_load,         // Load new delay tap value
    input  wire                                 datain_n_bitslip,   // Pulse to shift output word
    input  wire                                 datain_n_wordflip,  // Pulse to swap halfwords of the output word

    input  wire                                 channel_delay_valid,// Load a new channel alignment tap delay
    output reg                                  channel_delay_ready,
    input  wire     [CHANNEL_DELAY_WIDTH-1:0]   channel_delay,

    output wire                                 datain_parallel_valid,  // There is a handshake, but no slack for stalling!
    input  wire                                 datain_parallel_ready,  // UNUSED! Must always be ready before datain_parallel_valid!
    output wire     [OUTPUT_DATA_WIDTH-1:0]     datain_p_parallel,      // Deserialized data in clk_parallel domain
    output wire     [OUTPUT_DATA_WIDTH-1:0]     datain_n_parallel       // Deserialized data in clk_parallel domain
);

    `include "clog2_function.vh"

    localparam OUTPUT_DATA_ZERO = {OUTPUT_DATA_WIDTH{1'b0}};

    initial begin
        channel_delay_ready     = 1'b1; // Always ready (see channel delay logic at bottom)
    end

1-to-N Deserializer

This deserializer captures N serial bits into 1 N-bit word, both in positive and negative polarity. These words are half-words of the final 2N-bit word and may not be in the right order (see below).

    wire [INPUT_DATA_WIDTH-1:0] datain_p_parallel_halfword;
    wire [INPUT_DATA_WIDTH-1:0] datain_n_parallel_halfword;

    Deserializer_Differential_1toN
    #(
        // For the input buffer of each data line
        .IBUF_DIFF_TERM                 (IBUF_DIFF_TERM),
        .IBUF_LOW_PWR                   (IBUF_LOW_PWR),
        .IBUF_IOSTANDARD                (IBUF_IOSTANDARD),

        .IODELAY_GROUP                  (IODELAY_GROUP),                    // Must match IODELAY_GROUP applied to IDELAYCTRL module in same I/O Bank.
        .IODELAY_HIGH_PERFORMANCE_MODE  (IODELAY_HIGH_PERFORMANCE_MODE)     // Should match that of the clock in clocking.v
    )
    Deserializer_Differential_1toN
    (
        .clk_serial         (clk_serial),                   // High-speed I/O clock for incoming serial data
        .clk_parallel       (clk_parallel),                 // "Low-speed" clock for outgoing parallel data and all control inputs
        .reset_parallel     (reset_parallel),

        .datain_n           (datain_n),                     // High-speed serial I/O data in
        .datain_p           (datain_p),

        .incdec_p_enable    (incdec_p_enable),              // Enable increment/decrement of delay tap
        .incdec_p           (incdec_p),                     // Increment (1) or decrement (0) delay tap
        .tap_p_current      (tap_p_current),                // Current value of delay tap
        .tap_p_load_value   (tap_p_load_value),             // New value of delay tap
        .tap_p_load         (tap_p_load),                   // Load new delay tap value
        .datain_p_bitslip   (datain_p_bitslip),             // Pulse to shift output word
        .datain_p_parallel  (datain_p_parallel_halfword),   // Deserialized data, framed by clk_parallel

        .incdec_n_enable    (incdec_n_enable),              // Enable increment/decrement of delay tap
        .incdec_n           (incdec_n),                     // Increment (1) or decrement (0) delay tap 
        .tap_n_current      (tap_n_current),                // Current value of delay tap
        .tap_n_load_value   (tap_n_load_value),             // New value of delay tap
        .tap_n_load         (tap_n_load),                   // Load new delay tap value
        .datain_n_bitslip   (datain_n_bitslip),             // Pulse to shift output word
        .datain_n_parallel  (datain_n_parallel_halfword)    // Deserialized data, framed by clk_parallel
    );

Half-Word Packing

Pack two consecutive N-bit halfwords into a 2N-bit word. The LSB N-bit word enters first and so ends up shifted into the MSB N-bit position, so we may have to reverse the halfword order after, depending on word-alignment.

    wire                            datain_p_parallel_valid;
    wire [OUTPUT_DATA_WIDTH-1:0]    datain_p_parallel_packed;

    Pipeline_Serial_Parallel
    #(
        .WORD_WIDTH_IN      (INPUT_DATA_WIDTH),
        .WORD_COUNT_IN      (INPUT_DATA_COUNT)
    )
    pack_serdes_words_p
    (
        .clock              (clk_parallel),
        .clock_enable       (1'b1),
        .clear              (reset_parallel),

        .serial_in_valid    (1'b1), // Each cycle has a valid N-bit word from the SERDES
        // If we stall at the output, we lose data.
        // verilator lint_off PINCONNECTEMPTY
        .serial_in_ready    (),
        // verilator lint_on  PINCONNECTEMPTY
        .serial_in          (datain_p_parallel_halfword),

        .parallel_out_valid (datain_p_parallel_valid),
        .parallel_out_ready (1'b1),
        .parallel_out       (datain_p_parallel_packed)
    );

    wire                            datain_n_parallel_valid;
    wire [OUTPUT_DATA_WIDTH-1:0]    datain_n_parallel_packed;

    Pipeline_Serial_Parallel
    #(
        .WORD_WIDTH_IN      (INPUT_DATA_WIDTH),
        .WORD_COUNT_IN      (INPUT_DATA_COUNT)
    )
    pack_serdes_words_n
    (
        .clock              (clk_parallel),
        .clock_enable       (1'b1),
        .clear              (reset_parallel),

        .serial_in_valid    (1'b1), // Each cycle has a valid N-bit word from the SERDES
        // If we stall at the output, we lose data.
        // verilator lint_off PINCONNECTEMPTY
        .serial_in_ready    (),
        // verilator lint_on  PINCONNECTEMPTY
        .serial_in          (datain_n_parallel_halfword),

        .parallel_out_valid (datain_n_parallel_valid),
        .parallel_out_ready (1'b1),
        .parallel_out       (datain_n_parallel_packed)
    );

Extending N-bit bitslip to 2N bits

Create a halfword-reversed copy of the 2N-bit word. We will use this reversed form if the bitslips cannot find an alignment, because we can only bitslip by up to N bits in the SERDES hardware, but we have 2N bits of data, so it is possible to have a misalignment greater than N bits that cannot be fixed.

    wire [OUTPUT_DATA_WIDTH-1:0] datain_p_parallel_packed_reversed;
    wire [OUTPUT_DATA_WIDTH-1:0] datain_n_parallel_packed_reversed;

    Word_Reverser
    #(
        .WORD_WIDTH (INPUT_DATA_WIDTH),
        .WORD_COUNT (INPUT_DATA_COUNT)
    )
    reverse_halfwords_p
    (
        .words_in   (datain_p_parallel_packed),
        .words_out  (datain_p_parallel_packed_reversed)
    );

    Word_Reverser
    #(
        .WORD_WIDTH (INPUT_DATA_WIDTH),
        .WORD_COUNT (INPUT_DATA_COUNT)
    )
    reverse_halfwords_n
    (
        .words_in   (datain_n_parallel_packed),
        .words_out  (datain_n_parallel_packed_reversed)
    );

Each time we receive the wordflip pulse, switch the halfword ordering to let the bitslips try again for a different alignment. This should only need to happen 0 or 1 times. If it happens more than once, something is wrong and the bitslips are not finding alignment.

Duplicating the toggles for each polarity looks redundant, but the cost is minimal and may be optimized away by the CAD tool. Better to keep the code consistent with separate positive and negative datapaths, for any future changes.

    wire wordflip_selector_p;
    wire wordflip_selector_n;

    Register_Toggle
    #(
        .WORD_WIDTH     (1),
        .RESET_VALUE    (1'b0)
    )
    wordflip_state_p
    (
        .clock          (clk_parallel),
        .clock_enable   (1'b1),
        .clear          (reset_parallel),
        .toggle         (datain_p_wordflip),
        .data_in        (wordflip_selector_p),
        .data_out       (wordflip_selector_p)
    );

    Register_Toggle
    #(
        .WORD_WIDTH     (1),
        .RESET_VALUE    (1'b0)
    )
    wordflip_state_n
    (
        .clock          (clk_parallel),
        .clock_enable   (1'b1),
        .clear          (reset_parallel),
        .toggle         (datain_n_wordflip),
        .data_in        (wordflip_selector_n),
        .data_out       (wordflip_selector_n)
    );

Then, based on the wordflip state, pass on either the regular packed word, or the half-word-flipped version.

    wire [OUTPUT_DATA_WIDTH-1:0] datain_p_parallel_packed_selected;
    wire [OUTPUT_DATA_WIDTH-1:0] datain_n_parallel_packed_selected;

    Multiplexer_Binary_Behavioural
    #(
        .WORD_WIDTH     (OUTPUT_DATA_WIDTH),
        .ADDR_WIDTH     (1),
        .INPUT_COUNT    (2)
    )
    datain_p_parallel_packed_selector
    (
        .selector   (wordflip_selector_p),
        .words_in   ({datain_p_parallel_packed_reversed, datain_p_parallel_packed}),
        .word_out   (datain_p_parallel_packed_selected)
    );

    Multiplexer_Binary_Behavioural
    #(
        .WORD_WIDTH     (OUTPUT_DATA_WIDTH),
        .ADDR_WIDTH     (1),
        .INPUT_COUNT    (2)
    )
    datain_n_parallel_packed_selector
    (
        .selector   (wordflip_selector_n),
        .words_in   ({datain_n_parallel_packed_reversed, datain_n_parallel_packed}),
        .word_out   (datain_n_parallel_packed_selected)
    );

Channel Alignment

Finally, for channel alignment, we run the bit and word-aligned parallel data through a variable delay pipeline. The channel alignment training will adjust the channel tap delay, based on the latency ( in whole words ) from the input source, so multiple source inputs can be aligned with eachother.

    Register_Pipeline_Variable
    #(
        .WORD_WIDTH (OUTPUT_DATA_WIDTH),
        .PIPE_DEPTH (CHANNEL_PIPELINE_DEPTH)
    )
    datain_n_channel_delay
    (
        .clock              (clk_parallel),
        .clear              (reset_parallel),

        .tap_number_load    (channel_delay_valid),
        .tap_number         (channel_delay),

        .shift_data         (datain_n_parallel_valid),
        .input_data         (datain_n_parallel_packed_selected),
        .output_data        (datain_n_parallel)
    );

    Register_Pipeline_Variable
    #(
        .WORD_WIDTH (OUTPUT_DATA_WIDTH),
        .PIPE_DEPTH (CHANNEL_PIPELINE_DEPTH)
    )
    datain_p_channel_delay
    (
        .clock              (clk_parallel),
        .clear              (reset_parallel),

        .tap_number_load    (channel_delay_valid),
        .tap_number         (channel_delay),

        .shift_data         (datain_p_parallel_valid),
        .input_data         (datain_p_parallel_packed_selected),
        .output_data        (datain_p_parallel)
    );

Realign the data valid signal with the channel-aligned data. This is a hack, as proper handshaking should be used instead, but no backpressure is possible here. Thus, the destination must be sufficiently buffered to never stall.

    Register_Pipeline_Variable
    #(
        .WORD_WIDTH (1),
        .PIPE_DEPTH (CHANNEL_PIPELINE_DEPTH)
    )
    datain_valid_channel_delay
    (
        .clock              (clk_parallel),
        .clear              (reset_parallel),

        .tap_number_load    (channel_delay_valid),
        .tap_number         (channel_delay),

        .shift_data         (1'b1),
        .input_data         (datain_p_parallel_valid),
        .output_data        (datain_parallel_valid)
    );

endmodule

Back to FPGA Design Elements

fpgacpu.ca