fix(decoder): replace serial popcount with balanced adder tree for timing closure
Split SYNDROME state into SYNDROME_S1 (parity computation) + SYNDROME_S2 (popcount) pipeline stages. SYNDROME_S2 uses a 4-level balanced adder tree (224→56→14→4→1) instead of a serial accumulator loop, eliminating the loop-carried dependency that Yosys could not optimize. This reduces the critical path from ~48 ns to ~14 ns, achieving WNS=0.0 at TT corner (50 MHz). Verilator verified: 2/2 basic + 20/20 vector tests pass. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -118,7 +118,8 @@ module ldpc_decoder_core #(
|
|||||||
LAYER_READ, // Read Z beliefs for each of DC columns in current row
|
LAYER_READ, // Read Z beliefs for each of DC columns in current row
|
||||||
CN_UPDATE, // Run min-sum CN update on gathered messages
|
CN_UPDATE, // Run min-sum CN update on gathered messages
|
||||||
LAYER_WRITE, // Write updated beliefs and new CN->VN messages
|
LAYER_WRITE, // Write updated beliefs and new CN->VN messages
|
||||||
SYNDROME, // Check syndrome after full iteration
|
SYNDROME_S1, // Syndrome pipeline stage 1: compute parity bits
|
||||||
|
SYNDROME_S2, // Syndrome pipeline stage 2: popcount parity vector
|
||||||
SYNDROME_DONE, // Read registered syndrome result
|
SYNDROME_DONE, // Read registered syndrome result
|
||||||
DONE
|
DONE
|
||||||
} state_t;
|
} state_t;
|
||||||
@@ -134,10 +135,16 @@ module ldpc_decoder_core #(
|
|||||||
logic signed [Q-1:0] vn_to_cn [DC][Z]; // VN->CN messages for current row
|
logic signed [Q-1:0] vn_to_cn [DC][Z]; // VN->CN messages for current row
|
||||||
logic signed [Q-1:0] cn_to_vn [DC][Z]; // new CN->VN messages (output of min-sum)
|
logic signed [Q-1:0] cn_to_vn [DC][Z]; // new CN->VN messages (output of min-sum)
|
||||||
|
|
||||||
// Syndrome check
|
// Syndrome pipeline registers
|
||||||
|
logic [M_BASE*Z-1:0] parity_vec; // 224-bit registered parity results
|
||||||
logic [7:0] syndrome_cnt;
|
logic [7:0] syndrome_cnt;
|
||||||
logic syndrome_ok;
|
logic syndrome_ok;
|
||||||
|
|
||||||
|
// Popcount balanced adder tree intermediates (combinational)
|
||||||
|
logic [2:0] pc_l1 [56]; // Level 1: 56 groups of 4 bits → 3-bit counts
|
||||||
|
logic [4:0] pc_l2 [14]; // Level 2: 14 groups of 4 → 5-bit counts
|
||||||
|
logic [6:0] pc_l3 [4]; // Level 3: 4 groups → 7-bit counts
|
||||||
|
|
||||||
assign effective_max_iter = (max_iter == 0) ? MAX_ITER[4:0] : max_iter;
|
assign effective_max_iter = (max_iter == 0) ? MAX_ITER[4:0] : max_iter;
|
||||||
assign busy = (state != IDLE) && (state != DONE);
|
assign busy = (state != IDLE) && (state != DONE);
|
||||||
|
|
||||||
@@ -163,12 +170,13 @@ module ldpc_decoder_core #(
|
|||||||
LAYER_WRITE: begin
|
LAYER_WRITE: begin
|
||||||
if (col_idx == N_BASE - 1) begin
|
if (col_idx == N_BASE - 1) begin
|
||||||
if (row_idx == M_BASE - 1)
|
if (row_idx == M_BASE - 1)
|
||||||
state_next = SYNDROME;
|
state_next = SYNDROME_S1;
|
||||||
else
|
else
|
||||||
state_next = LAYER_READ; // next row
|
state_next = LAYER_READ; // next row
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
SYNDROME: state_next = SYNDROME_DONE;
|
SYNDROME_S1: state_next = SYNDROME_S2;
|
||||||
|
SYNDROME_S2: state_next = SYNDROME_DONE;
|
||||||
SYNDROME_DONE: begin
|
SYNDROME_DONE: begin
|
||||||
if (syndrome_ok && early_term_en)
|
if (syndrome_ok && early_term_en)
|
||||||
state_next = DONE;
|
state_next = DONE;
|
||||||
@@ -312,10 +320,9 @@ module ldpc_decoder_core #(
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
SYNDROME: begin
|
// Syndrome Pipeline Stage 1: Compute parity bits (register)
|
||||||
// Check H * c_hat == 0 (compute syndrome weight)
|
// Each parity is only 2-3 XOR levels deep (~3-4 ns)
|
||||||
// Only include connected columns (H_BASE >= 0)
|
SYNDROME_S1: begin
|
||||||
syndrome_cnt = '0;
|
|
||||||
for (int r = 0; r < M_BASE; r++) begin
|
for (int r = 0; r < M_BASE; r++) begin
|
||||||
for (int z = 0; z < Z; z++) begin
|
for (int z = 0; z < Z; z++) begin
|
||||||
logic parity;
|
logic parity;
|
||||||
@@ -328,9 +335,35 @@ module ldpc_decoder_core #(
|
|||||||
parity = parity ^ beliefs[bit_idx][Q-1];
|
parity = parity ^ beliefs[bit_idx][Q-1];
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
if (parity) syndrome_cnt = syndrome_cnt + 1;
|
parity_vec[r * Z + z] <= parity;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
// Syndrome Pipeline Stage 2: Popcount registered parity vector
|
||||||
|
// 224-bit popcount via adder tree (~14 ns)
|
||||||
|
SYNDROME_S2: begin
|
||||||
|
// Balanced 4-wide adder tree popcount (no loop-carried dependency)
|
||||||
|
// Level 1: 56 groups of 4 bits → 3-bit counts
|
||||||
|
for (int i = 0; i < 56; i++)
|
||||||
|
pc_l1[i] = {2'b0, parity_vec[4*i]} + {2'b0, parity_vec[4*i+1]} +
|
||||||
|
{2'b0, parity_vec[4*i+2]} + {2'b0, parity_vec[4*i+3]};
|
||||||
|
|
||||||
|
// Level 2: 14 groups of 4 three-bit counts → 5-bit counts
|
||||||
|
for (int i = 0; i < 14; i++)
|
||||||
|
pc_l2[i] = {2'b0, pc_l1[4*i]} + {2'b0, pc_l1[4*i+1]} +
|
||||||
|
{2'b0, pc_l1[4*i+2]} + {2'b0, pc_l1[4*i+3]};
|
||||||
|
|
||||||
|
// Level 3: 14 → 4 (3 groups of 4 + 1 group of 2) → 7-bit counts
|
||||||
|
pc_l3[0] = {2'b0, pc_l2[0]} + {2'b0, pc_l2[1]} + {2'b0, pc_l2[2]} + {2'b0, pc_l2[3]};
|
||||||
|
pc_l3[1] = {2'b0, pc_l2[4]} + {2'b0, pc_l2[5]} + {2'b0, pc_l2[6]} + {2'b0, pc_l2[7]};
|
||||||
|
pc_l3[2] = {2'b0, pc_l2[8]} + {2'b0, pc_l2[9]} + {2'b0, pc_l2[10]} + {2'b0, pc_l2[11]};
|
||||||
|
pc_l3[3] = {2'b0, pc_l2[12]} + {2'b0, pc_l2[13]};
|
||||||
|
|
||||||
|
// Level 4: final sum → 8-bit count
|
||||||
|
syndrome_cnt = {1'b0, pc_l3[0]} + {1'b0, pc_l3[1]} +
|
||||||
|
{1'b0, pc_l3[2]} + {1'b0, pc_l3[3]};
|
||||||
|
|
||||||
syndrome_weight <= syndrome_cnt;
|
syndrome_weight <= syndrome_cnt;
|
||||||
syndrome_ok <= (syndrome_cnt == 0);
|
syndrome_ok <= (syndrome_cnt == 0);
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user