From 5d615876aee082093a694f2f7d60a20038dd763e Mon Sep 17 00:00:00 2001 From: cah Date: Wed, 25 Feb 2026 18:22:45 -0700 Subject: [PATCH] feat: integrate LDPC decoder into Caravel wrapper - Copy ldpc_decoder_core.sv and wishbone_interface.sv from standalone RTL - Create Caravel-adapted ldpc_decoder_top.sv with USE_POWER_PINS, 32-bit address (lower 8 bits passed through), and wb_sel_i port - Replace user_proj_example in user_project_wrapper.v with LDPC decoder instantiation, active-high to active-low reset inversion, and tie-offs for unused outputs (la_data_out, io_out, io_oeb, user_irq[2:1]) - Update includes.rtl.caravel_user_project with LDPC RTL file list - Fix invalid hex literal in VERSION_ID (0xLD -> 0x1D) Co-Authored-By: Claude Opus 4.6 --- .../includes.rtl.caravel_user_project | 9 +- verilog/rtl/ldpc_decoder_core.sv | 406 ++++++++++++++++++ verilog/rtl/ldpc_decoder_top.sv | 73 ++++ verilog/rtl/user_project_wrapper.v | 52 +-- verilog/rtl/wishbone_interface.sv | 139 ++++++ 5 files changed, 643 insertions(+), 36 deletions(-) create mode 100644 verilog/rtl/ldpc_decoder_core.sv create mode 100644 verilog/rtl/ldpc_decoder_top.sv create mode 100644 verilog/rtl/wishbone_interface.sv diff --git a/verilog/includes/includes.rtl.caravel_user_project b/verilog/includes/includes.rtl.caravel_user_project index a8c6e02..e332e2c 100644 --- a/verilog/includes/includes.rtl.caravel_user_project +++ b/verilog/includes/includes.rtl.caravel_user_project @@ -14,7 +14,8 @@ # SPDX-License-Identifier: Apache-2.0 # Caravel user project includes --v $(USER_PROJECT_VERILOG)/rtl/user_project_wrapper.v --v $(USER_PROJECT_VERILOG)/rtl/user_proj_example.v - - \ No newline at end of file +-v $(USER_PROJECT_VERILOG)/rtl/defines.v +-v $(USER_PROJECT_VERILOG)/rtl/user_project_wrapper.v +-v $(USER_PROJECT_VERILOG)/rtl/ldpc_decoder_top.sv +-v $(USER_PROJECT_VERILOG)/rtl/ldpc_decoder_core.sv +-v $(USER_PROJECT_VERILOG)/rtl/wishbone_interface.sv diff --git a/verilog/rtl/ldpc_decoder_core.sv b/verilog/rtl/ldpc_decoder_core.sv new file mode 100644 index 0000000..1ceaf81 --- /dev/null +++ b/verilog/rtl/ldpc_decoder_core.sv @@ -0,0 +1,406 @@ +// LDPC Decoder Core - Layered Min-Sum with QC structure +// +// Layered scheduling processes one base-matrix row at a time. +// For each row, we: +// 1. Read VN beliefs for all Z columns connected to this row +// 2. Subtract old CN->VN messages to get VN->CN messages +// 3. Run CN min-sum update +// 4. Add new CN->VN messages back to VN beliefs +// 5. Write updated beliefs back +// +// This converges ~2x faster than flooding and needs only one message memory +// (CN->VN messages for current layer, overwritten each layer). + +module ldpc_decoder_core #( + parameter N_BASE = 8, + parameter M_BASE = 7, + parameter Z = 32, + parameter N = N_BASE * Z, + parameter M = M_BASE * Z, + parameter Q = 6, + parameter MAX_ITER = 30, + parameter DC = 8, // check node degree + parameter DV_MAX = 7 // max variable node degree +)( + input logic clk, + input logic rst_n, + + // Control + input logic start, + input logic early_term_en, + input logic [4:0] max_iter, + + // Channel LLRs (loaded before start) + input logic signed [Q-1:0] llr_in [N], + + // Status + output logic busy, + output logic converged, + output logic [4:0] iter_used, + + // Results + output logic [Z-1:0] decoded_bits, // first Z bits = info bits + output logic [7:0] syndrome_weight +); + + // ========================================================================= + // Base matrix H stored as shift values (-1 = no connection) + // H_BASE[row][col] = cyclic shift amount, or -1 if zero sub-matrix + // ========================================================================= + + // IRA staircase base matrix for rate-1/8 QC-LDPC + // Column 0 = info (dv=7), Columns 1-7 = parity with lower-triangular staircase + // This matches model/ldpc_sim.py exactly. + // + // Row 0: info(0) + p1(5) + // Row 1: info(11) + p1(3) + p2(0) + // Row 2: info(17) + p2(7) + p3(0) + // Row 3: info(23) + p3(13) + p4(0) + // Row 4: info(29) + p4(19) + p5(0) + // Row 5: info(3) + p5(25) + p6(0) + // Row 6: info(9) + p6(31) + p7(0) + + logic signed [5:0] H_BASE [M_BASE][N_BASE]; + + initial begin + // Row 0: cols 0,1 connected + H_BASE[0][0] = 0; H_BASE[0][1] = 5; H_BASE[0][2] = -1; + H_BASE[0][3] = -1; H_BASE[0][4] = -1; H_BASE[0][5] = -1; + H_BASE[0][6] = -1; H_BASE[0][7] = -1; + // Row 1: cols 0,1,2 connected + H_BASE[1][0] = 11; H_BASE[1][1] = 3; H_BASE[1][2] = 0; + H_BASE[1][3] = -1; H_BASE[1][4] = -1; H_BASE[1][5] = -1; + H_BASE[1][6] = -1; H_BASE[1][7] = -1; + // Row 2: cols 0,2,3 connected + H_BASE[2][0] = 17; H_BASE[2][1] = -1; H_BASE[2][2] = 7; + H_BASE[2][3] = 0; H_BASE[2][4] = -1; H_BASE[2][5] = -1; + H_BASE[2][6] = -1; H_BASE[2][7] = -1; + // Row 3: cols 0,3,4 connected + H_BASE[3][0] = 23; H_BASE[3][1] = -1; H_BASE[3][2] = -1; + H_BASE[3][3] = 13; H_BASE[3][4] = 0; H_BASE[3][5] = -1; + H_BASE[3][6] = -1; H_BASE[3][7] = -1; + // Row 4: cols 0,4,5 connected + H_BASE[4][0] = 29; H_BASE[4][1] = -1; H_BASE[4][2] = -1; + H_BASE[4][3] = -1; H_BASE[4][4] = 19; H_BASE[4][5] = 0; + H_BASE[4][6] = -1; H_BASE[4][7] = -1; + // Row 5: cols 0,5,6 connected + H_BASE[5][0] = 3; H_BASE[5][1] = -1; H_BASE[5][2] = -1; + H_BASE[5][3] = -1; H_BASE[5][4] = -1; H_BASE[5][5] = 25; + H_BASE[5][6] = 0; H_BASE[5][7] = -1; + // Row 6: cols 0,6,7 connected + H_BASE[6][0] = 9; H_BASE[6][1] = -1; H_BASE[6][2] = -1; + H_BASE[6][3] = -1; H_BASE[6][4] = -1; H_BASE[6][5] = -1; + H_BASE[6][6] = 31; H_BASE[6][7] = 0; + end + + // ========================================================================= + // Memory: VN beliefs (total posterior LLR per bit) + // beliefs[j] = channel_llr[j] + sum of all CN->VN messages to j + // ========================================================================= + + logic signed [Q-1:0] beliefs [N]; + + // ========================================================================= + // Memory: CN->VN messages for layered update + // msg_cn2vn[row][col][z] = message from check (row*Z+z) to variable (col*Z+shift(z)) + // Stored as [M_BASE][N_BASE] banks of Z entries each + // ========================================================================= + + logic signed [Q-1:0] msg_cn2vn [M_BASE][N_BASE][Z]; + + // ========================================================================= + // Decoder FSM + // ========================================================================= + + typedef enum logic [2:0] { + IDLE, + INIT, // Initialize beliefs from channel LLRs, zero messages + LAYER_READ, // Read Z beliefs for each of DC columns in current row + CN_UPDATE, // Run min-sum CN update on gathered messages + LAYER_WRITE, // Write updated beliefs and new CN->VN messages + SYNDROME, // Check syndrome after full iteration + DONE + } state_t; + + state_t state, state_next; + + logic [4:0] iter_cnt; + logic [2:0] row_idx; // current base matrix row (0..M_BASE-1) + logic [2:0] col_idx; // current column being read/written (0..N_BASE-1) + logic [4:0] effective_max_iter; + + // Working registers for current layer CN update + logic signed [Q-1:0] vn_to_cn [DC][Z]; // VN->CN messages for current row + logic signed [Q-1:0] cn_to_vn [DC][Z]; // new CN->VN messages (output of min-sum) + + // Syndrome check + logic [7:0] syndrome_cnt; + logic syndrome_ok; + + assign effective_max_iter = (max_iter == 0) ? MAX_ITER[4:0] : max_iter; + assign busy = (state != IDLE) && (state != DONE); + + // ========================================================================= + // State machine + // ========================================================================= + + always_ff @(posedge clk or negedge rst_n) begin + if (!rst_n) begin + state <= IDLE; + end else begin + state <= state_next; + end + end + + always_comb begin + state_next = state; + case (state) + IDLE: if (start) state_next = INIT; + INIT: state_next = LAYER_READ; + LAYER_READ: if (col_idx == N_BASE - 1) state_next = CN_UPDATE; + CN_UPDATE: state_next = LAYER_WRITE; + LAYER_WRITE: begin + if (col_idx == N_BASE - 1) begin + if (row_idx == M_BASE - 1) + state_next = SYNDROME; + else + state_next = LAYER_READ; // next row + end + end + SYNDROME: begin + if (syndrome_ok && early_term_en) + state_next = DONE; + else if (iter_cnt >= effective_max_iter) + state_next = DONE; + else + state_next = LAYER_READ; // next iteration + end + DONE: if (!start) state_next = IDLE; + default: state_next = IDLE; + endcase + end + + // ========================================================================= + // Datapath + // ========================================================================= + + always_ff @(posedge clk or negedge rst_n) begin + if (!rst_n) begin + iter_cnt <= '0; + row_idx <= '0; + col_idx <= '0; + converged <= 1'b0; + iter_used <= '0; + syndrome_weight <= '0; + end else begin + case (state) + IDLE: begin + iter_cnt <= '0; + row_idx <= '0; + col_idx <= '0; + converged <= 1'b0; + end + + INIT: begin + // Initialize beliefs from channel LLRs + for (int j = 0; j < N; j++) begin + beliefs[j] <= llr_in[j]; + end + // Zero all CN->VN messages + for (int r = 0; r < M_BASE; r++) + for (int c = 0; c < N_BASE; c++) + for (int z = 0; z < Z; z++) + msg_cn2vn[r][c][z] <= '0; + row_idx <= '0; + col_idx <= '0; + iter_cnt <= '0; + end + + LAYER_READ: begin + // For column col_idx in current row_idx: + // VN->CN = belief - old CN->VN message + // (belief already contains the sum of ALL CN->VN messages, + // so subtracting the current row's message gives the extrinsic) + for (int z = 0; z < Z; z++) begin + int bit_idx; + int shifted_z; + logic signed [Q-1:0] old_msg; + logic signed [Q-1:0] belief_val; + + shifted_z = (z + H_BASE[row_idx][col_idx]) % Z; + bit_idx = int'(col_idx) * Z + shifted_z; + old_msg = msg_cn2vn[row_idx][col_idx][z]; + belief_val = beliefs[bit_idx]; + + vn_to_cn[col_idx][z] <= sat_sub(belief_val, old_msg); + end + + if (col_idx == N_BASE - 1) + col_idx <= '0; + else + col_idx <= col_idx + 1; + end + + CN_UPDATE: begin + // Min-sum update for all Z check nodes in current row + // Each CN has DC=8 incoming messages (one per column) + for (int z = 0; z < Z; z++) begin + // Gather DC messages for check node z + logic signed [Q-1:0] msgs [DC]; + for (int d = 0; d < DC; d++) + msgs[d] = vn_to_cn[d][z]; + + // Min-sum: find min1, min2, sign product, min1 index + cn_min_sum(msgs, cn_to_vn[0][z], cn_to_vn[1][z], + cn_to_vn[2][z], cn_to_vn[3][z], + cn_to_vn[4][z], cn_to_vn[5][z], + cn_to_vn[6][z], cn_to_vn[7][z]); + end + col_idx <= '0; // prepare for LAYER_WRITE + end + + LAYER_WRITE: begin + // Write back: update beliefs and store new CN->VN messages + for (int z = 0; z < Z; z++) begin + int bit_idx; + int shifted_z; + logic signed [Q-1:0] new_msg; + logic signed [Q-1:0] old_extrinsic; + + shifted_z = (z + H_BASE[row_idx][col_idx]) % Z; + bit_idx = int'(col_idx) * Z + shifted_z; + new_msg = cn_to_vn[col_idx][z]; + old_extrinsic = vn_to_cn[col_idx][z]; + + // belief = extrinsic (VN->CN) + new CN->VN message + beliefs[bit_idx] <= sat_add(old_extrinsic, new_msg); + + // Store new message for next iteration + msg_cn2vn[row_idx][col_idx][z] <= new_msg; + end + + if (col_idx == N_BASE - 1) begin + col_idx <= '0; + if (row_idx == M_BASE - 1) + row_idx <= '0; + else + row_idx <= row_idx + 1; + end else begin + col_idx <= col_idx + 1; + end + end + + SYNDROME: begin + // Check H * c_hat == 0 (compute syndrome weight) + syndrome_cnt = '0; + for (int r = 0; r < M_BASE; r++) begin + for (int z = 0; z < Z; z++) begin + logic parity; + parity = 1'b0; + for (int c = 0; c < N_BASE; c++) begin + int shifted_z, bit_idx; + shifted_z = (z + H_BASE[r][c]) % Z; + bit_idx = c * Z + shifted_z; + parity = parity ^ beliefs[bit_idx][Q-1]; // sign bit = hard decision + end + if (parity) syndrome_cnt = syndrome_cnt + 1; + end + end + syndrome_weight <= syndrome_cnt; + syndrome_ok = (syndrome_cnt == 0); + + iter_cnt <= iter_cnt + 1; + iter_used <= iter_cnt + 1; + if (syndrome_ok) converged <= 1'b1; + end + + DONE: begin + // Output decoded info bits (first Z=32 bits, column 0) + for (int z = 0; z < Z; z++) + decoded_bits[z] <= beliefs[z][Q-1]; // sign bit = hard decision + end + endcase + end + end + + // ========================================================================= + // Min-sum CN update function + // ========================================================================= + + // Offset min-sum for DC=8 inputs + // For each output j: sign = XOR of all other signs, magnitude = min of all other magnitudes - offset + task automatic cn_min_sum( + input logic signed [Q-1:0] in [DC], + output logic signed [Q-1:0] out0, out1, out2, out3, + out4, out5, out6, out7 + ); + logic [DC-1:0] signs; + logic [Q-2:0] mags [DC]; + logic sign_xor; + logic [Q-2:0] min1, min2; + int min1_idx; + logic signed [Q-1:0] outs [DC]; + + // Extract signs and magnitudes + sign_xor = 1'b0; + for (int i = 0; i < DC; i++) begin + signs[i] = in[i][Q-1]; + mags[i] = in[i][Q-1] ? (~in[i][Q-2:0] + 1) : in[i][Q-2:0]; + sign_xor = sign_xor ^ signs[i]; + end + + // Find two smallest magnitudes + min1 = {(Q-1){1'b1}}; + min2 = {(Q-1){1'b1}}; + min1_idx = 0; + for (int i = 0; i < DC; i++) begin + if (mags[i] < min1) begin + min2 = min1; + min1 = mags[i]; + min1_idx = i; + end else if (mags[i] < min2) begin + min2 = mags[i]; + end + end + + // Compute extrinsic outputs with offset correction + for (int j = 0; j < DC; j++) begin + logic [Q-2:0] mag_out; + logic sign_out; + + mag_out = (j == min1_idx) ? min2 : min1; + // Offset correction (subtract 1 in integer representation) + mag_out = (mag_out > 1) ? (mag_out - 1) : {(Q-1){1'b0}}; + sign_out = sign_xor ^ signs[j]; + + outs[j] = sign_out ? (~{1'b0, mag_out} + 1) : {1'b0, mag_out}; + end + + out0 = outs[0]; out1 = outs[1]; out2 = outs[2]; out3 = outs[3]; + out4 = outs[4]; out5 = outs[5]; out6 = outs[6]; out7 = outs[7]; + endtask + + // ========================================================================= + // Saturating arithmetic helpers + // ========================================================================= + + function automatic logic signed [Q-1:0] sat_add( + logic signed [Q-1:0] a, logic signed [Q-1:0] b + ); + logic signed [Q:0] sum; + sum = {a[Q-1], a} + {b[Q-1], b}; // sign-extend and add + if (sum > $signed({1'b0, {(Q-1){1'b1}}})) + return {1'b0, {(Q-1){1'b1}}}; // +max + else if (sum < $signed({1'b1, {(Q-1){1'b0}}})) + return {1'b1, {(Q-1){1'b0}}}; // -max + else + return sum[Q-1:0]; + endfunction + + function automatic logic signed [Q-1:0] sat_sub( + logic signed [Q-1:0] a, logic signed [Q-1:0] b + ); + return sat_add(a, -b); + endfunction + +endmodule diff --git a/verilog/rtl/ldpc_decoder_top.sv b/verilog/rtl/ldpc_decoder_top.sv new file mode 100644 index 0000000..06939d7 --- /dev/null +++ b/verilog/rtl/ldpc_decoder_top.sv @@ -0,0 +1,73 @@ +// LDPC Decoder Top - Caravel-adapted wrapper +// QC-LDPC Rate 1/8 for Photon-Starved Optical Communication +// Target: Efabless chipIgnite (SkyWater 130nm, Caravel harness) +// +// Adaptations from standalone version: +// - USE_POWER_PINS ifdef for Caravel power pass-through +// - 32-bit Wishbone address (lower 8 bits passed to wishbone_interface) +// - wb_sel_i byte selects accepted but unused (word-aligned access only) + +module ldpc_decoder_top #( + parameter N_BASE = 8, + parameter M_BASE = 7, + parameter Z = 32, + parameter N = N_BASE * Z, + parameter K = Z, + parameter M = M_BASE * Z, + parameter Q = 6, + parameter MAX_ITER = 30, + parameter DC = 8, + parameter DV_MAX = 7 +)( +`ifdef USE_POWER_PINS + inout vccd1, + inout vssd1, +`endif + input logic clk, + input logic rst_n, + input logic wb_cyc_i, + input logic wb_stb_i, + input logic wb_we_i, + input logic [3:0] wb_sel_i, // byte selects (unused, Caravel compat) + input logic [31:0] wb_adr_i, // full 32-bit address from Caravel + input logic [31:0] wb_dat_i, + output logic [31:0] wb_dat_o, + output logic wb_ack_o, + output logic irq_o +); + // Internal signals + logic ctrl_start; + logic ctrl_early_term; + logic [4:0] ctrl_max_iter; + logic stat_busy; + logic stat_converged; + logic [4:0] stat_iter_used; + logic signed [Q-1:0] llr_input [N]; + logic [K-1:0] decoded_bits; + logic [7:0] syndrome_weight; + + wishbone_interface #(.N(N), .K(K), .Q(Q)) u_wb ( + .clk(clk), .rst_n(rst_n), + .wb_cyc_i(wb_cyc_i), .wb_stb_i(wb_stb_i), .wb_we_i(wb_we_i), + .wb_adr_i(wb_adr_i[7:0]), // lower 8 bits only + .wb_dat_i(wb_dat_i), .wb_dat_o(wb_dat_o), .wb_ack_o(wb_ack_o), + .ctrl_start(ctrl_start), .ctrl_early_term(ctrl_early_term), + .ctrl_max_iter(ctrl_max_iter), + .stat_busy(stat_busy), .stat_converged(stat_converged), + .stat_iter_used(stat_iter_used), + .llr_input(llr_input), .decoded_bits(decoded_bits), + .syndrome_weight(syndrome_weight), .irq_o(irq_o) + ); + + ldpc_decoder_core #( + .N_BASE(N_BASE), .M_BASE(M_BASE), .Z(Z), .Q(Q), + .MAX_ITER(MAX_ITER), .DC(DC), .DV_MAX(DV_MAX) + ) u_core ( + .clk(clk), .rst_n(rst_n), + .start(ctrl_start), .early_term_en(ctrl_early_term), + .max_iter(ctrl_max_iter), .llr_in(llr_input), + .busy(stat_busy), .converged(stat_converged), + .iter_used(stat_iter_used), .decoded_bits(decoded_bits), + .syndrome_weight(syndrome_weight) + ); +endmodule diff --git a/verilog/rtl/user_project_wrapper.v b/verilog/rtl/user_project_wrapper.v index 14e4dee..90af42b 100644 --- a/verilog/rtl/user_project_wrapper.v +++ b/verilog/rtl/user_project_wrapper.v @@ -82,42 +82,30 @@ module user_project_wrapper #( /* User project is instantiated here */ /*--------------------------------------*/ -user_proj_example mprj ( +ldpc_decoder_top mprj ( `ifdef USE_POWER_PINS - .vccd1(vccd1), // User area 1 1.8V power - .vssd1(vssd1), // User area 1 digital ground + .vccd1(vccd1), + .vssd1(vssd1), `endif - - .wb_clk_i(wb_clk_i), - .wb_rst_i(wb_rst_i), - - // MGMT SoC Wishbone Slave - - .wbs_cyc_i(wbs_cyc_i), - .wbs_stb_i(wbs_stb_i), - .wbs_we_i(wbs_we_i), - .wbs_sel_i(wbs_sel_i), - .wbs_adr_i(wbs_adr_i), - .wbs_dat_i(wbs_dat_i), - .wbs_ack_o(wbs_ack_o), - .wbs_dat_o(wbs_dat_o), - - // Logic Analyzer - - .la_data_in(la_data_in), - .la_data_out(la_data_out), - .la_oenb (la_oenb), - - // IO Pads - - .io_in ({io_in[37:30],io_in[7:0]}), - .io_out({io_out[37:30],io_out[7:0]}), - .io_oeb({io_oeb[37:30],io_oeb[7:0]}), - - // IRQ - .irq(user_irq) + .clk (wb_clk_i), + .rst_n (~wb_rst_i), + .wb_cyc_i (wbs_cyc_i), + .wb_stb_i (wbs_stb_i), + .wb_we_i (wbs_we_i), + .wb_sel_i (wbs_sel_i), + .wb_adr_i (wbs_adr_i), + .wb_dat_i (wbs_dat_i), + .wb_dat_o (wbs_dat_o), + .wb_ack_o (wbs_ack_o), + .irq_o (user_irq[0]) ); +// Tie off unused outputs +assign la_data_out = 128'b0; +assign io_out = {`MPRJ_IO_PADS{1'b0}}; +assign io_oeb = {`MPRJ_IO_PADS{1'b1}}; // all inputs +assign user_irq[2:1] = 2'b0; + endmodule // user_project_wrapper `default_nettype wire diff --git a/verilog/rtl/wishbone_interface.sv b/verilog/rtl/wishbone_interface.sv new file mode 100644 index 0000000..a5ea6f2 --- /dev/null +++ b/verilog/rtl/wishbone_interface.sv @@ -0,0 +1,139 @@ +// Wishbone B4 slave interface for LDPC decoder +// Compatible with Caravel SoC Wishbone interconnect +// +// Register map (byte-addressed): +// 0x00 CTRL R/W [0]=start (auto-clear), [1]=early_term_en, [12:8]=max_iter +// 0x04 STATUS R [0]=busy, [1]=converged, [12:8]=iterations_used, [23:16]=syndrome_wt +// 0x10-0x4F LLR W Channel LLRs packed 5x6-bit per 32-bit word (52 words for 256 LLRs) +// 0x50 DECODED R 32 decoded info bits +// 0x54 VERSION R Version/ID register + +module wishbone_interface #( + parameter N = 256, + parameter K = 32, + parameter Q = 6 +)( + input logic clk, + input logic rst_n, + + // Wishbone slave + input logic wb_cyc_i, + input logic wb_stb_i, + input logic wb_we_i, + input logic [7:0] wb_adr_i, + input logic [31:0] wb_dat_i, + output logic [31:0] wb_dat_o, + output logic wb_ack_o, + + // To/from decoder core + output logic ctrl_start, + output logic ctrl_early_term, + output logic [4:0] ctrl_max_iter, + input logic stat_busy, + input logic stat_converged, + input logic [4:0] stat_iter_used, + output logic signed [Q-1:0] llr_input [N], + input logic [K-1:0] decoded_bits, + input logic [7:0] syndrome_weight, + + // Interrupt + output logic irq_o +); + + localparam VERSION_ID = 32'h1D01_0001; // LDPC v0.1 build 1 + + // Wishbone handshake: ack on valid cycle + logic wb_valid; + assign wb_valid = wb_cyc_i && wb_stb_i; + + always_ff @(posedge clk or negedge rst_n) begin + if (!rst_n) + wb_ack_o <= 1'b0; + else + wb_ack_o <= wb_valid && !wb_ack_o; // single-cycle ack + end + + // ========================================================================= + // Control register + // ========================================================================= + + logic start_pending; + logic early_term_reg; + logic [4:0] max_iter_reg; + + // Start is a pulse: set on write, cleared after one cycle + always_ff @(posedge clk or negedge rst_n) begin + if (!rst_n) begin + start_pending <= 1'b0; + early_term_reg <= 1'b1; // early termination on by default + max_iter_reg <= 5'd0; // 0 = use MAX_ITER default + end else begin + if (ctrl_start) + start_pending <= 1'b0; + + if (wb_valid && wb_we_i && !wb_ack_o && wb_adr_i == 8'h00) begin + start_pending <= wb_dat_i[0]; + early_term_reg <= wb_dat_i[1]; + max_iter_reg <= wb_dat_i[12:8]; + end + end + end + + assign ctrl_start = start_pending && !stat_busy; + assign ctrl_early_term = early_term_reg; + assign ctrl_max_iter = max_iter_reg; + + // ========================================================================= + // LLR input: pack 5 LLRs per 32-bit word + // Word at offset 0x10 + 4*i contains LLRs [5*i] through [5*i+4] + // Bits [5:0] = LLR[5*i], [11:6] = LLR[5*i+1], ... [29:24] = LLR[5*i+4] + // 52 words cover 260 LLRs (256 used, 4 padding) + // ========================================================================= + + always_ff @(posedge clk) begin + if (wb_valid && wb_we_i && !wb_ack_o) begin + if (wb_adr_i >= 8'h10 && wb_adr_i < 8'hE0) begin + int word_idx; + word_idx = (wb_adr_i - 8'h10) >> 2; + for (int p = 0; p < 5; p++) begin + int llr_idx; + llr_idx = word_idx * 5 + p; + if (llr_idx < N) + llr_input[llr_idx] <= wb_dat_i[p*Q +: Q]; + end + end + end + end + + // ========================================================================= + // Read mux + // ========================================================================= + + always_comb begin + wb_dat_o = 32'h0; + case (wb_adr_i) + 8'h00: wb_dat_o = {19'b0, max_iter_reg, 6'b0, early_term_reg, start_pending}; + 8'h04: wb_dat_o = {8'b0, syndrome_weight, 3'b0, stat_iter_used, 6'b0, stat_converged, stat_busy}; + 8'h50: wb_dat_o = decoded_bits; + 8'h54: wb_dat_o = VERSION_ID; + default: wb_dat_o = 32'h0; + endcase + end + + // ========================================================================= + // Interrupt: assert when decode completes (busy falls) + // ========================================================================= + + logic busy_d1; + always_ff @(posedge clk or negedge rst_n) begin + if (!rst_n) begin + busy_d1 <= 1'b0; + irq_o <= 1'b0; + end else begin + busy_d1 <= stat_busy; + // Pulse IRQ on falling edge of busy + irq_o <= busy_d1 && !stat_busy; + end + end + +endmodule