Given a bit and word-aligned SERDES setup, pulse the sync_train signal to
the sensor for at least one whole word duration (12 bits) and measure the
delay ( in whole words ) until the training word appears. Then delay
this channel to align it with the other channels, so all channels start
sending data at the same time.
Since we do not know the alignment of the other channels, we go for the
worst case delay: if we have a maximum of N cycles of possible delay, and
we start at the minimum delay, and we measure M cycles of delay between
sync_train and the arrival of the training word, we then set the delay to
N-M, so all channels appear to have a latency of N after alignment. As long
as M < N, this will work.
Although we count the delay as whole parallel words, as marked by
datain_parallel_valid, we stay in the clk_rxio_frame data clock domain
so we don't have to mess around with CDC to/from other clock domains.
`default_nettype none
module iserdes_channel_alignment
#(
parameter WORD_WIDTH = 12,
parameter [WORD_WIDTH-1:0] TRAINING_WORD = 'b011110001101,
parameter CHANNEL_PIPELINE_DEPTH = 16, // Must match iserdes_1_to_12_data_diff.v parameter
parameter START_DELAY = 16, // How many cycles to wait before alignment starts
// Do not set at instantiation, except in Vivado IPI
// Enclosing modules must repeat this calculation
parameter CHANNEL_DELAY_WIDTH = CHANNEL_PIPELINE_DEPTH >= 2 ? clog2(CHANNEL_PIPELINE_DEPTH) : 1
)
(
// clk_rxio_frame domain, for SERDES data and control
input wire clk_rxio_frame,
input wire rst_rxio_frame_n,
input wire datain_parallel_valid, // There is a handshake, but no slack for stalling!
output reg datain_parallel_ready, // Must always be ready before valid!
input wire [WORD_WIDTH-1:0] datain_p_parallel, // Deserialized positive data, framed by datain_parallel_valid
// We don't need the negative polarity data here.
output wire channel_delay_valid, // Load a new channel alignment tap delay
input wire channel_delay_ready,
output wire [CHANNEL_DELAY_WIDTH-1:0] channel_delay,
// System control signals in clk_main domain
input wire clk_main, // General logic clock and reset
input wire rst_main_n,
output reg sync_train, // Signal sensor to output training word
input wire start_alignment, // Preferably a one-cycle pulse
output wire done_alignment // Pulsed high means serdes data is word-aligned
);
`include "clog2_function.vh"
localparam CHANNEL_DELAY_ZERO = {CHANNEL_DELAY_WIDTH{1'b0}};
localparam CHANNEL_DELAY_ONE = {{CHANNEL_DELAY_WIDTH-1{1'b0}},1'b1};
initial begin
datain_parallel_ready = 1'b1; // Always ready (no backpressure possible)
sync_train = 1'b0;
end
We delay the start of the alignment to let the previous training words exit the SERDES pipeline, so we don't mistakely train on the output of the previous alignment stage.
wire start_alignment_delayed;
Register_Pipeline_Simple
#(
.WORD_WIDTH (1),
.PIPE_DEPTH (START_DELAY)
)
delay_training_start
(
// If PIPE_DEPTH is zero, these are unused
// verilator lint_off UNUSED
.clock (clk_main),
.clock_enable (1'b1),
.clear (~rst_main_n),
// verilator lint_on UNUSED
.pipe_in (start_alignment),
.pipe_out (start_alignment_delayed)
);
Transfer the control signals to/from the SERDES clock domain, and have the word-alignment logic work in the SERDES clock domain since we cannot have the extra latency of passing the SERDES data into the main clock domain without much complication of the state machine.
FIXME: It's unclear what the control interface should be here, but the CDC of control signals, not data, is certain.
Transfer the pulse signalling start of training into the SERDES clock domain. A pulse send during training in progress is lost and has no effect.
wire start_alignment_rxio;
CDC_Pulse_Synchronizer_2phase
#(
.CDC_EXTRA_DEPTH (0)
)
start_alignment_transfer
(
.sending_clock (clk_main),
.sending_pulse_in (start_alignment_delayed),
// verilator lint_off PINCONNECTEMPTY
.sending_ready (),
// verilator lint_on PINCONNECTEMPTY
.receiving_clock (clk_rxio_frame),
.receiving_pulse_out (start_alignment_rxio)
);
Transfer the pulse signalling the end of training from the SERDES clock domain into the main system clock domain.
wire done_alignment_rxio;
CDC_Pulse_Synchronizer_2phase
#(
.CDC_EXTRA_DEPTH (0)
)
done_alignment_transfer
(
.sending_clock (clk_rxio_frame),
.sending_pulse_in (done_alignment_rxio),
// verilator lint_off PINCONNECTEMPTY
.sending_ready (),
// verilator lint_on PINCONNECTEMPTY
.receiving_clock (clk_main),
.receiving_pulse_out (done_alignment)
);
When we signal to start alignment, turn on and hold sync_train so the
sensor sends out a continuous stream of the training word. Once done
alignment, drop sync_train.
However, we will stop training immediately after the first training word is
correctly received, so sync_train should be active for only a few word
periods (depending on buffers in the path). And since we only need to do
the alignment once, any remaining incoming training words will pass by and
be lost after.
wire sync_train_pulse;
Pulse_Generator
sync_train_pulse_generator
(
.clock (clk_main),
.level_in (start_alignment_delayed),
.pulse_posedge_out (sync_train_pulse),
// verilator lint_off PINCONNECTEMPTY
.pulse_negedge_out (),
.pulse_anyedge_out ()
// verilator lint_on PINCONNECTEMPTY
);
wire sync_train_latched;
Pulse_Latch
#(
.RESET_VALUE (1'b0)
)
sync_train_pulse_latch
(
.clock (clk_main),
.clear (done_alignment),
.pulse_in (sync_train_pulse),
.level_out (sync_train_latched)
);
always @(*) begin
sync_train = sync_train_pulse || sync_train_latched;
end
Once the training starts, count the number of valid data words until we see the training word.
wire cycle_counter_run;
Pulse_Latch
#(
.RESET_VALUE (1'b0)
)
cycle_counter_control
(
.clock (clk_rxio_frame),
.clear (done_alignment_rxio),
.pulse_in (start_alignment_rxio),
.level_out (cycle_counter_run)
);
wire [CHANNEL_DELAY_WIDTH-1:0] channel_latency_words;
Counter_Binary
#(
.WORD_WIDTH (CHANNEL_DELAY_WIDTH),
.INCREMENT (CHANNEL_DELAY_ONE),
.INITIAL_COUNT (CHANNEL_DELAY_ZERO)
)
cycle_counter
(
.clock (clk_rxio_frame),
.clear (~rst_rxio_frame_n),
.up_down (1'b0), // 0/1 --> up/down
.run (cycle_counter_run && datain_parallel_valid),
.load (start_alignment_rxio),
.load_count (CHANNEL_DELAY_ONE),
.carry_in (1'b0),
// verilator lint_off PINCONNECTEMPTY
.carry_out (),
.carries (),
.overflow (),
// verilator lint_on PINCONNECTEMPTY
.count (channel_latency_words)
);
Once the training is done, calculate the required delay, then send it to the SERDES. We use a buffer to decouple the end of training with the sending of the delay value.
NOTE: We MUST gate done_alignment_rxio_internal with cycle_counter_run
so we gate off the datain_p_parallel until we start our alignment.
Otherwise, we end up with a problem in simulation: the initial
datain_p_parallel is all X until the underlying SERDES hardware has run
for a while, before we even issue the initial reset. And since the logic
here would route that X to the CDC Pulse Synchronizer back to the
clk_main domain, the synchronizer ends up internally in an X loop in
simulation, and never recovers.
reg done_alignment_rxio_internal = 1'b0;
reg [CHANNEL_DELAY_WIDTH-1:0] channel_delay_internal = CHANNEL_DELAY_ZERO;
always @(*) begin
done_alignment_rxio_internal = (datain_parallel_valid == 1'b1) && (datain_p_parallel == TRAINING_WORD) && (cycle_counter_run == 1'b1);
channel_delay_internal = CHANNEL_PIPELINE_DEPTH [CHANNEL_DELAY_WIDTH-1:0] - channel_latency_words;
end
Pulse_Generator
training_done_generator
(
.clock (clk_rxio_frame),
.level_in (done_alignment_rxio_internal),
.pulse_posedge_out (done_alignment_rxio),
// verilator lint_off PINCONNECTEMPTY
.pulse_negedge_out (),
.pulse_anyedge_out ()
// verilator lint_on PINCONNECTEMPTY
);
Pipeline_Half_Buffer
#(
.WORD_WIDTH (CHANNEL_DELAY_WIDTH),
.CIRCULAR_BUFFER (0) // non-zero to enable
)
channel_delay_sender
(
.clock (clk_rxio_frame),
.clear (~rst_rxio_frame_n),
.input_valid (done_alignment_rxio),
// verilator lint_off PINCONNECTEMPTY
.input_ready (), // Ignored, always ready is assumed
// verilator lint_on PINCONNECTEMPTY
.input_data (channel_delay_internal),
.output_valid (channel_delay_valid),
.output_ready (channel_delay_ready),
.output_data (channel_delay)
);
endmodule