fix(decoder): replace serial popcount with balanced adder tree for timing closure
Split SYNDROME state into SYNDROME_S1 (parity computation) + SYNDROME_S2 (popcount) pipeline stages. SYNDROME_S2 uses a 4-level balanced adder tree (224→56→14→4→1) instead of a serial accumulator loop, eliminating the loop-carried dependency that Yosys could not optimize. This reduces the critical path from ~48 ns to ~14 ns, achieving WNS=0.0 at TT corner (50 MHz). Verilator verified: 2/2 basic + 20/20 vector tests pass. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -118,7 +118,8 @@ module ldpc_decoder_core #(
|
||||
LAYER_READ, // Read Z beliefs for each of DC columns in current row
|
||||
CN_UPDATE, // Run min-sum CN update on gathered messages
|
||||
LAYER_WRITE, // Write updated beliefs and new CN->VN messages
|
||||
SYNDROME, // Check syndrome after full iteration
|
||||
SYNDROME_S1, // Syndrome pipeline stage 1: compute parity bits
|
||||
SYNDROME_S2, // Syndrome pipeline stage 2: popcount parity vector
|
||||
SYNDROME_DONE, // Read registered syndrome result
|
||||
DONE
|
||||
} state_t;
|
||||
@@ -134,10 +135,16 @@ module ldpc_decoder_core #(
|
||||
logic signed [Q-1:0] vn_to_cn [DC][Z]; // VN->CN messages for current row
|
||||
logic signed [Q-1:0] cn_to_vn [DC][Z]; // new CN->VN messages (output of min-sum)
|
||||
|
||||
// Syndrome check
|
||||
// Syndrome pipeline registers
|
||||
logic [M_BASE*Z-1:0] parity_vec; // 224-bit registered parity results
|
||||
logic [7:0] syndrome_cnt;
|
||||
logic syndrome_ok;
|
||||
|
||||
// Popcount balanced adder tree intermediates (combinational)
|
||||
logic [2:0] pc_l1 [56]; // Level 1: 56 groups of 4 bits → 3-bit counts
|
||||
logic [4:0] pc_l2 [14]; // Level 2: 14 groups of 4 → 5-bit counts
|
||||
logic [6:0] pc_l3 [4]; // Level 3: 4 groups → 7-bit counts
|
||||
|
||||
assign effective_max_iter = (max_iter == 0) ? MAX_ITER[4:0] : max_iter;
|
||||
assign busy = (state != IDLE) && (state != DONE);
|
||||
|
||||
@@ -163,12 +170,13 @@ module ldpc_decoder_core #(
|
||||
LAYER_WRITE: begin
|
||||
if (col_idx == N_BASE - 1) begin
|
||||
if (row_idx == M_BASE - 1)
|
||||
state_next = SYNDROME;
|
||||
state_next = SYNDROME_S1;
|
||||
else
|
||||
state_next = LAYER_READ; // next row
|
||||
end
|
||||
end
|
||||
SYNDROME: state_next = SYNDROME_DONE;
|
||||
SYNDROME_S1: state_next = SYNDROME_S2;
|
||||
SYNDROME_S2: state_next = SYNDROME_DONE;
|
||||
SYNDROME_DONE: begin
|
||||
if (syndrome_ok && early_term_en)
|
||||
state_next = DONE;
|
||||
@@ -312,10 +320,9 @@ module ldpc_decoder_core #(
|
||||
end
|
||||
end
|
||||
|
||||
SYNDROME: begin
|
||||
// Check H * c_hat == 0 (compute syndrome weight)
|
||||
// Only include connected columns (H_BASE >= 0)
|
||||
syndrome_cnt = '0;
|
||||
// Syndrome Pipeline Stage 1: Compute parity bits (register)
|
||||
// Each parity is only 2-3 XOR levels deep (~3-4 ns)
|
||||
SYNDROME_S1: begin
|
||||
for (int r = 0; r < M_BASE; r++) begin
|
||||
for (int z = 0; z < Z; z++) begin
|
||||
logic parity;
|
||||
@@ -328,9 +335,35 @@ module ldpc_decoder_core #(
|
||||
parity = parity ^ beliefs[bit_idx][Q-1];
|
||||
end
|
||||
end
|
||||
if (parity) syndrome_cnt = syndrome_cnt + 1;
|
||||
parity_vec[r * Z + z] <= parity;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// Syndrome Pipeline Stage 2: Popcount registered parity vector
|
||||
// 224-bit popcount via adder tree (~14 ns)
|
||||
SYNDROME_S2: begin
|
||||
// Balanced 4-wide adder tree popcount (no loop-carried dependency)
|
||||
// Level 1: 56 groups of 4 bits → 3-bit counts
|
||||
for (int i = 0; i < 56; i++)
|
||||
pc_l1[i] = {2'b0, parity_vec[4*i]} + {2'b0, parity_vec[4*i+1]} +
|
||||
{2'b0, parity_vec[4*i+2]} + {2'b0, parity_vec[4*i+3]};
|
||||
|
||||
// Level 2: 14 groups of 4 three-bit counts → 5-bit counts
|
||||
for (int i = 0; i < 14; i++)
|
||||
pc_l2[i] = {2'b0, pc_l1[4*i]} + {2'b0, pc_l1[4*i+1]} +
|
||||
{2'b0, pc_l1[4*i+2]} + {2'b0, pc_l1[4*i+3]};
|
||||
|
||||
// Level 3: 14 → 4 (3 groups of 4 + 1 group of 2) → 7-bit counts
|
||||
pc_l3[0] = {2'b0, pc_l2[0]} + {2'b0, pc_l2[1]} + {2'b0, pc_l2[2]} + {2'b0, pc_l2[3]};
|
||||
pc_l3[1] = {2'b0, pc_l2[4]} + {2'b0, pc_l2[5]} + {2'b0, pc_l2[6]} + {2'b0, pc_l2[7]};
|
||||
pc_l3[2] = {2'b0, pc_l2[8]} + {2'b0, pc_l2[9]} + {2'b0, pc_l2[10]} + {2'b0, pc_l2[11]};
|
||||
pc_l3[3] = {2'b0, pc_l2[12]} + {2'b0, pc_l2[13]};
|
||||
|
||||
// Level 4: final sum → 8-bit count
|
||||
syndrome_cnt = {1'b0, pc_l3[0]} + {1'b0, pc_l3[1]} +
|
||||
{1'b0, pc_l3[2]} + {1'b0, pc_l3[3]};
|
||||
|
||||
syndrome_weight <= syndrome_cnt;
|
||||
syndrome_ok <= (syndrome_cnt == 0);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user