Initial LDPC optical decoder project scaffold

Rate-1/8 QC-LDPC decoder for photon-starved optical communication.
Target: Efabless chipIgnite (SkyWater 130nm, Caravel harness).

- RTL: decoder top, core (layered min-sum), Wishbone interface
- Python behavioral model with Poisson channel simulation
- 7x8 base matrix, Z=32, n=256, k=32

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
cah
2026-02-23 21:47:40 -07:00
commit b93a6f5769
5 changed files with 1261 additions and 0 deletions

403
rtl/ldpc_decoder_core.sv Normal file
View File

@@ -0,0 +1,403 @@
// LDPC Decoder Core - Layered Min-Sum with QC structure
//
// Layered scheduling processes one base-matrix row at a time.
// For each row, we:
// 1. Read VN beliefs for all Z columns connected to this row
// 2. Subtract old CN->VN messages to get VN->CN messages
// 3. Run CN min-sum update
// 4. Add new CN->VN messages back to VN beliefs
// 5. Write updated beliefs back
//
// This converges ~2x faster than flooding and needs only one message memory
// (CN->VN messages for current layer, overwritten each layer).
module ldpc_decoder_core #(
parameter N_BASE = 8,
parameter M_BASE = 7,
parameter Z = 32,
parameter N = N_BASE * Z,
parameter M = M_BASE * Z,
parameter Q = 6,
parameter MAX_ITER = 30,
parameter DC = 8, // check node degree
parameter DV_MAX = 7 // max variable node degree
)(
input logic clk,
input logic rst_n,
// Control
input logic start,
input logic early_term_en,
input logic [4:0] max_iter,
// Channel LLRs (loaded before start)
input logic signed [Q-1:0] llr_in [N],
// Status
output logic busy,
output logic converged,
output logic [4:0] iter_used,
// Results
output logic [Z-1:0] decoded_bits, // first Z bits = info bits
output logic [7:0] syndrome_weight
);
// =========================================================================
// Base matrix H stored as shift values (-1 = no connection)
// H_BASE[row][col] = cyclic shift amount, or -1 if zero sub-matrix
// =========================================================================
// This is a placeholder base matrix for rate-1/8 QC-LDPC.
// Must be replaced with a properly designed matrix (PEG algorithm or
// density evolution optimized). All entries >= 0 means fully connected
// (regular dv=7, dc=8). For irregular codes, some entries would be -1.
//
// TODO: Replace with optimized base matrix from model/design_h_matrix.py
logic signed [5:0] H_BASE [M_BASE][N_BASE];
// Shift values for 7x8 base matrix (Z=32, values 0..31, -1=null)
// This is a regular (7,8) code - every entry is connected
initial begin
// Row 0
H_BASE[0][0] = 0; H_BASE[0][1] = 5; H_BASE[0][2] = 11;
H_BASE[0][3] = 17; H_BASE[0][4] = 23; H_BASE[0][5] = 29;
H_BASE[0][6] = 3; H_BASE[0][7] = 9;
// Row 1
H_BASE[1][0] = 15; H_BASE[1][1] = 0; H_BASE[1][2] = 21;
H_BASE[1][3] = 7; H_BASE[1][4] = 13; H_BASE[1][5] = 19;
H_BASE[1][6] = 25; H_BASE[1][7] = 31;
// Row 2
H_BASE[2][0] = 10; H_BASE[2][1] = 20; H_BASE[2][2] = 0;
H_BASE[2][3] = 30; H_BASE[2][4] = 8; H_BASE[2][5] = 16;
H_BASE[2][6] = 24; H_BASE[2][7] = 2;
// Row 3
H_BASE[3][0] = 27; H_BASE[3][1] = 14; H_BASE[3][2] = 1;
H_BASE[3][3] = 0; H_BASE[3][4] = 18; H_BASE[3][5] = 6;
H_BASE[3][6] = 12; H_BASE[3][7] = 22;
// Row 4
H_BASE[4][0] = 4; H_BASE[4][1] = 28; H_BASE[4][2] = 16;
H_BASE[4][3] = 12; H_BASE[4][4] = 0; H_BASE[4][5] = 26;
H_BASE[4][6] = 8; H_BASE[4][7] = 20;
// Row 5
H_BASE[5][0] = 19; H_BASE[5][1] = 9; H_BASE[5][2] = 31;
H_BASE[5][3] = 25; H_BASE[5][4] = 15; H_BASE[5][5] = 0;
H_BASE[5][6] = 21; H_BASE[5][7] = 11;
// Row 6
H_BASE[6][0] = 22; H_BASE[6][1] = 26; H_BASE[6][2] = 6;
H_BASE[6][3] = 14; H_BASE[6][4] = 30; H_BASE[6][5] = 10;
H_BASE[6][6] = 0; H_BASE[6][7] = 18;
end
// =========================================================================
// Memory: VN beliefs (total posterior LLR per bit)
// beliefs[j] = channel_llr[j] + sum of all CN->VN messages to j
// =========================================================================
logic signed [Q-1:0] beliefs [N];
// =========================================================================
// Memory: CN->VN messages for layered update
// msg_cn2vn[row][col][z] = message from check (row*Z+z) to variable (col*Z+shift(z))
// Stored as [M_BASE][N_BASE] banks of Z entries each
// =========================================================================
logic signed [Q-1:0] msg_cn2vn [M_BASE][N_BASE][Z];
// =========================================================================
// Decoder FSM
// =========================================================================
typedef enum logic [2:0] {
IDLE,
INIT, // Initialize beliefs from channel LLRs, zero messages
LAYER_READ, // Read Z beliefs for each of DC columns in current row
CN_UPDATE, // Run min-sum CN update on gathered messages
LAYER_WRITE, // Write updated beliefs and new CN->VN messages
SYNDROME, // Check syndrome after full iteration
DONE
} state_t;
state_t state, state_next;
logic [4:0] iter_cnt;
logic [2:0] row_idx; // current base matrix row (0..M_BASE-1)
logic [2:0] col_idx; // current column being read/written (0..N_BASE-1)
logic [4:0] effective_max_iter;
// Working registers for current layer CN update
logic signed [Q-1:0] vn_to_cn [DC][Z]; // VN->CN messages for current row
logic signed [Q-1:0] cn_to_vn [DC][Z]; // new CN->VN messages (output of min-sum)
// Syndrome check
logic [7:0] syndrome_cnt;
logic syndrome_ok;
assign effective_max_iter = (max_iter == 0) ? MAX_ITER[4:0] : max_iter;
assign busy = (state != IDLE) && (state != DONE);
// =========================================================================
// State machine
// =========================================================================
always_ff @(posedge clk or negedge rst_n) begin
if (!rst_n) begin
state <= IDLE;
end else begin
state <= state_next;
end
end
always_comb begin
state_next = state;
case (state)
IDLE: if (start) state_next = INIT;
INIT: state_next = LAYER_READ;
LAYER_READ: if (col_idx == N_BASE - 1) state_next = CN_UPDATE;
CN_UPDATE: state_next = LAYER_WRITE;
LAYER_WRITE: begin
if (col_idx == N_BASE - 1) begin
if (row_idx == M_BASE - 1)
state_next = SYNDROME;
else
state_next = LAYER_READ; // next row
end
end
SYNDROME: begin
if (syndrome_ok && early_term_en)
state_next = DONE;
else if (iter_cnt >= effective_max_iter)
state_next = DONE;
else
state_next = LAYER_READ; // next iteration
end
DONE: if (!start) state_next = IDLE;
default: state_next = IDLE;
endcase
end
// =========================================================================
// Datapath
// =========================================================================
always_ff @(posedge clk or negedge rst_n) begin
if (!rst_n) begin
iter_cnt <= '0;
row_idx <= '0;
col_idx <= '0;
converged <= 1'b0;
iter_used <= '0;
syndrome_weight <= '0;
end else begin
case (state)
IDLE: begin
iter_cnt <= '0;
row_idx <= '0;
col_idx <= '0;
converged <= 1'b0;
end
INIT: begin
// Initialize beliefs from channel LLRs
for (int j = 0; j < N; j++) begin
beliefs[j] <= llr_in[j];
end
// Zero all CN->VN messages
for (int r = 0; r < M_BASE; r++)
for (int c = 0; c < N_BASE; c++)
for (int z = 0; z < Z; z++)
msg_cn2vn[r][c][z] <= '0;
row_idx <= '0;
col_idx <= '0;
iter_cnt <= '0;
end
LAYER_READ: begin
// For column col_idx in current row_idx:
// VN->CN = belief - old CN->VN message
// (belief already contains the sum of ALL CN->VN messages,
// so subtracting the current row's message gives the extrinsic)
for (int z = 0; z < Z; z++) begin
int bit_idx;
int shifted_z;
logic signed [Q-1:0] old_msg;
logic signed [Q-1:0] belief_val;
shifted_z = (z + H_BASE[row_idx][col_idx]) % Z;
bit_idx = int'(col_idx) * Z + shifted_z;
old_msg = msg_cn2vn[row_idx][col_idx][z];
belief_val = beliefs[bit_idx];
vn_to_cn[col_idx][z] <= sat_sub(belief_val, old_msg);
end
if (col_idx == N_BASE - 1)
col_idx <= '0;
else
col_idx <= col_idx + 1;
end
CN_UPDATE: begin
// Min-sum update for all Z check nodes in current row
// Each CN has DC=8 incoming messages (one per column)
for (int z = 0; z < Z; z++) begin
// Gather DC messages for check node z
logic signed [Q-1:0] msgs [DC];
for (int d = 0; d < DC; d++)
msgs[d] = vn_to_cn[d][z];
// Min-sum: find min1, min2, sign product, min1 index
cn_min_sum(msgs, cn_to_vn[0][z], cn_to_vn[1][z],
cn_to_vn[2][z], cn_to_vn[3][z],
cn_to_vn[4][z], cn_to_vn[5][z],
cn_to_vn[6][z], cn_to_vn[7][z]);
end
col_idx <= '0; // prepare for LAYER_WRITE
end
LAYER_WRITE: begin
// Write back: update beliefs and store new CN->VN messages
for (int z = 0; z < Z; z++) begin
int bit_idx;
int shifted_z;
logic signed [Q-1:0] new_msg;
logic signed [Q-1:0] old_extrinsic;
shifted_z = (z + H_BASE[row_idx][col_idx]) % Z;
bit_idx = int'(col_idx) * Z + shifted_z;
new_msg = cn_to_vn[col_idx][z];
old_extrinsic = vn_to_cn[col_idx][z];
// belief = extrinsic (VN->CN) + new CN->VN message
beliefs[bit_idx] <= sat_add(old_extrinsic, new_msg);
// Store new message for next iteration
msg_cn2vn[row_idx][col_idx][z] <= new_msg;
end
if (col_idx == N_BASE - 1) begin
col_idx <= '0;
if (row_idx == M_BASE - 1)
row_idx <= '0;
else
row_idx <= row_idx + 1;
end else begin
col_idx <= col_idx + 1;
end
end
SYNDROME: begin
// Check H * c_hat == 0 (compute syndrome weight)
syndrome_cnt = '0;
for (int r = 0; r < M_BASE; r++) begin
for (int z = 0; z < Z; z++) begin
logic parity;
parity = 1'b0;
for (int c = 0; c < N_BASE; c++) begin
int shifted_z, bit_idx;
shifted_z = (z + H_BASE[r][c]) % Z;
bit_idx = c * Z + shifted_z;
parity = parity ^ beliefs[bit_idx][Q-1]; // sign bit = hard decision
end
if (parity) syndrome_cnt = syndrome_cnt + 1;
end
end
syndrome_weight <= syndrome_cnt;
syndrome_ok = (syndrome_cnt == 0);
iter_cnt <= iter_cnt + 1;
iter_used <= iter_cnt + 1;
if (syndrome_ok) converged <= 1'b1;
end
DONE: begin
// Output decoded info bits (first Z=32 bits, column 0)
for (int z = 0; z < Z; z++)
decoded_bits[z] <= beliefs[z][Q-1]; // sign bit = hard decision
end
endcase
end
end
// =========================================================================
// Min-sum CN update function
// =========================================================================
// Offset min-sum for DC=8 inputs
// For each output j: sign = XOR of all other signs, magnitude = min of all other magnitudes - offset
task automatic cn_min_sum(
input logic signed [Q-1:0] in [DC],
output logic signed [Q-1:0] out0, out1, out2, out3,
out4, out5, out6, out7
);
logic [DC-1:0] signs;
logic [Q-2:0] mags [DC];
logic sign_xor;
logic [Q-2:0] min1, min2;
int min1_idx;
logic signed [Q-1:0] outs [DC];
// Extract signs and magnitudes
sign_xor = 1'b0;
for (int i = 0; i < DC; i++) begin
signs[i] = in[i][Q-1];
mags[i] = in[i][Q-1] ? (~in[i][Q-2:0] + 1) : in[i][Q-2:0];
sign_xor = sign_xor ^ signs[i];
end
// Find two smallest magnitudes
min1 = {(Q-1){1'b1}};
min2 = {(Q-1){1'b1}};
min1_idx = 0;
for (int i = 0; i < DC; i++) begin
if (mags[i] < min1) begin
min2 = min1;
min1 = mags[i];
min1_idx = i;
end else if (mags[i] < min2) begin
min2 = mags[i];
end
end
// Compute extrinsic outputs with offset correction
for (int j = 0; j < DC; j++) begin
logic [Q-2:0] mag_out;
logic sign_out;
mag_out = (j == min1_idx) ? min2 : min1;
// Offset correction (subtract 1 in integer representation)
mag_out = (mag_out > 1) ? (mag_out - 1) : {(Q-1){1'b0}};
sign_out = sign_xor ^ signs[j];
outs[j] = sign_out ? (~{1'b0, mag_out} + 1) : {1'b0, mag_out};
end
out0 = outs[0]; out1 = outs[1]; out2 = outs[2]; out3 = outs[3];
out4 = outs[4]; out5 = outs[5]; out6 = outs[6]; out7 = outs[7];
endtask
// =========================================================================
// Saturating arithmetic helpers
// =========================================================================
function automatic logic signed [Q-1:0] sat_add(
logic signed [Q-1:0] a, logic signed [Q-1:0] b
);
logic signed [Q:0] sum;
sum = {a[Q-1], a} + {b[Q-1], b}; // sign-extend and add
if (sum > $signed({1'b0, {(Q-1){1'b1}}}))
return {1'b0, {(Q-1){1'b1}}}; // +max
else if (sum < $signed({1'b1, {(Q-1){1'b0}}}))
return {1'b1, {(Q-1){1'b0}}}; // -max
else
return sum[Q-1:0];
endfunction
function automatic logic signed [Q-1:0] sat_sub(
logic signed [Q-1:0] a, logic signed [Q-1:0] b
);
return sat_add(a, -b);
endfunction
endmodule

110
rtl/ldpc_decoder_top.sv Normal file
View File

@@ -0,0 +1,110 @@
// LDPC Decoder Top - QC-LDPC Rate 1/8 for Photon-Starved Optical Communication
// Target: Efabless chipIgnite (SkyWater 130nm, Caravel harness)
//
// Code parameters:
// Rate 1/8, n=256 coded bits, k=32 info bits
// QC-LDPC with 7x8 base matrix, lifting factor Z=32
// Offset min-sum decoding, layered scheduling
//
// Input: 6-bit signed LLRs (log-likelihood ratios from photon detector)
// Output: 32 decoded information bits + convergence status
module ldpc_decoder_top #(
parameter N_BASE = 8, // base matrix columns
parameter M_BASE = 7, // base matrix rows
parameter Z = 32, // lifting factor
parameter N = N_BASE * Z, // codeword length = 256
parameter K = Z, // info bits = 32 (rate 1/8)
parameter M = M_BASE * Z, // parity checks = 224
parameter Q = 6, // LLR quantization bits (signed)
parameter MAX_ITER = 30, // maximum decoding iterations
parameter DC = 8, // check node degree (= N_BASE for regular)
parameter DV_MAX = 7 // max variable node degree (= M_BASE for regular)
)(
input logic clk,
input logic rst_n,
// Wishbone B4 pipelined slave interface
input logic wb_cyc_i,
input logic wb_stb_i,
input logic wb_we_i,
input logic [7:0] wb_adr_i, // byte address (256 bytes address space)
input logic [31:0] wb_dat_i,
output logic [31:0] wb_dat_o,
output logic wb_ack_o,
// Interrupt (active high, directly to Caravel IRQ)
output logic irq_o
);
// =========================================================================
// Wishbone register interface
// =========================================================================
// Control/status registers
logic ctrl_start; // pulse: begin decoding
logic ctrl_early_term; // enable early termination
logic [4:0] ctrl_max_iter; // max iterations (0 = use MAX_ITER)
logic stat_busy;
logic stat_converged;
logic [4:0] stat_iter_used;
// LLR input buffer (written by host before starting decode)
logic signed [Q-1:0] llr_input [N];
// Decoded output
logic [K-1:0] decoded_bits;
logic [7:0] syndrome_weight;
wishbone_interface #(
.N(N), .K(K), .Q(Q)
) u_wb (
.clk (clk),
.rst_n (rst_n),
.wb_cyc_i (wb_cyc_i),
.wb_stb_i (wb_stb_i),
.wb_we_i (wb_we_i),
.wb_adr_i (wb_adr_i),
.wb_dat_i (wb_dat_i),
.wb_dat_o (wb_dat_o),
.wb_ack_o (wb_ack_o),
.ctrl_start (ctrl_start),
.ctrl_early_term(ctrl_early_term),
.ctrl_max_iter (ctrl_max_iter),
.stat_busy (stat_busy),
.stat_converged (stat_converged),
.stat_iter_used (stat_iter_used),
.llr_input (llr_input),
.decoded_bits (decoded_bits),
.syndrome_weight(syndrome_weight),
.irq_o (irq_o)
);
// =========================================================================
// Decoder core
// =========================================================================
ldpc_decoder_core #(
.N_BASE (N_BASE),
.M_BASE (M_BASE),
.Z (Z),
.Q (Q),
.MAX_ITER (MAX_ITER),
.DC (DC),
.DV_MAX (DV_MAX)
) u_core (
.clk (clk),
.rst_n (rst_n),
.start (ctrl_start),
.early_term_en (ctrl_early_term),
.max_iter (ctrl_max_iter),
.llr_in (llr_input),
.busy (stat_busy),
.converged (stat_converged),
.iter_used (stat_iter_used),
.decoded_bits (decoded_bits),
.syndrome_weight(syndrome_weight)
);
endmodule

139
rtl/wishbone_interface.sv Normal file
View File

@@ -0,0 +1,139 @@
// Wishbone B4 slave interface for LDPC decoder
// Compatible with Caravel SoC Wishbone interconnect
//
// Register map (byte-addressed):
// 0x00 CTRL R/W [0]=start (auto-clear), [1]=early_term_en, [12:8]=max_iter
// 0x04 STATUS R [0]=busy, [1]=converged, [12:8]=iterations_used, [23:16]=syndrome_wt
// 0x10-0x4F LLR W Channel LLRs packed 5x6-bit per 32-bit word (52 words for 256 LLRs)
// 0x50 DECODED R 32 decoded info bits
// 0x54 VERSION R Version/ID register
module wishbone_interface #(
parameter N = 256,
parameter K = 32,
parameter Q = 6
)(
input logic clk,
input logic rst_n,
// Wishbone slave
input logic wb_cyc_i,
input logic wb_stb_i,
input logic wb_we_i,
input logic [7:0] wb_adr_i,
input logic [31:0] wb_dat_i,
output logic [31:0] wb_dat_o,
output logic wb_ack_o,
// To/from decoder core
output logic ctrl_start,
output logic ctrl_early_term,
output logic [4:0] ctrl_max_iter,
input logic stat_busy,
input logic stat_converged,
input logic [4:0] stat_iter_used,
output logic signed [Q-1:0] llr_input [N],
input logic [K-1:0] decoded_bits,
input logic [7:0] syndrome_weight,
// Interrupt
output logic irq_o
);
localparam VERSION_ID = 32'hLD01_0001; // LDPC v0.1 build 1
// Wishbone handshake: ack on valid cycle
logic wb_valid;
assign wb_valid = wb_cyc_i && wb_stb_i;
always_ff @(posedge clk or negedge rst_n) begin
if (!rst_n)
wb_ack_o <= 1'b0;
else
wb_ack_o <= wb_valid && !wb_ack_o; // single-cycle ack
end
// =========================================================================
// Control register
// =========================================================================
logic start_pending;
logic early_term_reg;
logic [4:0] max_iter_reg;
// Start is a pulse: set on write, cleared after one cycle
always_ff @(posedge clk or negedge rst_n) begin
if (!rst_n) begin
start_pending <= 1'b0;
early_term_reg <= 1'b1; // early termination on by default
max_iter_reg <= 5'd0; // 0 = use MAX_ITER default
end else begin
if (ctrl_start)
start_pending <= 1'b0;
if (wb_valid && wb_we_i && !wb_ack_o && wb_adr_i == 8'h00) begin
start_pending <= wb_dat_i[0];
early_term_reg <= wb_dat_i[1];
max_iter_reg <= wb_dat_i[12:8];
end
end
end
assign ctrl_start = start_pending && !stat_busy;
assign ctrl_early_term = early_term_reg;
assign ctrl_max_iter = max_iter_reg;
// =========================================================================
// LLR input: pack 5 LLRs per 32-bit word
// Word at offset 0x10 + 4*i contains LLRs [5*i] through [5*i+4]
// Bits [5:0] = LLR[5*i], [11:6] = LLR[5*i+1], ... [29:24] = LLR[5*i+4]
// 52 words cover 260 LLRs (256 used, 4 padding)
// =========================================================================
always_ff @(posedge clk) begin
if (wb_valid && wb_we_i && !wb_ack_o) begin
if (wb_adr_i >= 8'h10 && wb_adr_i < 8'hE0) begin
int word_idx;
word_idx = (wb_adr_i - 8'h10) >> 2;
for (int p = 0; p < 5; p++) begin
int llr_idx;
llr_idx = word_idx * 5 + p;
if (llr_idx < N)
llr_input[llr_idx] <= wb_dat_i[p*Q +: Q];
end
end
end
end
// =========================================================================
// Read mux
// =========================================================================
always_comb begin
wb_dat_o = 32'h0;
case (wb_adr_i)
8'h00: wb_dat_o = {19'b0, max_iter_reg, 6'b0, early_term_reg, start_pending};
8'h04: wb_dat_o = {8'b0, syndrome_weight, 3'b0, stat_iter_used, 6'b0, stat_converged, stat_busy};
8'h50: wb_dat_o = decoded_bits;
8'h54: wb_dat_o = VERSION_ID;
default: wb_dat_o = 32'h0;
endcase
end
// =========================================================================
// Interrupt: assert when decode completes (busy falls)
// =========================================================================
logic busy_d1;
always_ff @(posedge clk or negedge rst_n) begin
if (!rst_n) begin
busy_d1 <= 1'b0;
irq_o <= 1'b0;
end else begin
busy_d1 <= stat_busy;
// Pulse IRQ on falling edge of busy
irq_o <= busy_d1 && !stat_busy;
end
end
endmodule