From b93a6f5769e1f343146b092eed55259f5d0e12e0 Mon Sep 17 00:00:00 2001 From: cah Date: Mon, 23 Feb 2026 21:47:40 -0700 Subject: [PATCH] Initial LDPC optical decoder project scaffold Rate-1/8 QC-LDPC decoder for photon-starved optical communication. Target: Efabless chipIgnite (SkyWater 130nm, Caravel harness). - RTL: decoder top, core (layered min-sum), Wishbone interface - Python behavioral model with Poisson channel simulation - 7x8 base matrix, Z=32, n=256, k=32 Co-Authored-By: Claude Opus 4.6 --- CLAUDE.md | 135 +++++++++++ model/ldpc_sim.py | 474 ++++++++++++++++++++++++++++++++++++++ rtl/ldpc_decoder_core.sv | 403 ++++++++++++++++++++++++++++++++ rtl/ldpc_decoder_top.sv | 110 +++++++++ rtl/wishbone_interface.sv | 139 +++++++++++ 5 files changed, 1261 insertions(+) create mode 100644 CLAUDE.md create mode 100644 model/ldpc_sim.py create mode 100644 rtl/ldpc_decoder_core.sv create mode 100644 rtl/ldpc_decoder_top.sv create mode 100644 rtl/wishbone_interface.sv diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..323c0e1 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,135 @@ +# ldpc_optical - LDPC Decoder for Photon-Starved Optical Communication + +## Overview + +Low-Density Parity Check (LDPC) decoder targeting the Efabless chipIgnite program (SkyWater 130nm, Caravel harness). Designed for photon-starved optical communication links where received signals are soft probabilities (partial bits), not hard 0/1 decisions. + +## Target Application + +- **Channel**: Photon-counting optical (Poisson channel, single-photon detectors) +- **Use case**: Deep space optical, underwater optical, or any photon-starved link +- **Input format**: Soft LLR (log-likelihood ratios) representing probability of 0 vs 1 +- **Code rate**: 1/8 (32 info bits -> 256 coded bits) for maximum coding gain +- **Decoding**: Offset min-sum (hardware-friendly approximation of belief propagation) + +## Architecture + +``` +Caravel SoC (Sky130, ~10 mm^2 user area) ++==============================================+ +| PicoRV32 (Caravel) | +| | | +| | Wishbone B4 | +| | | +| +---v--------------------------------------+| +| | ldpc_decoder_top || +| | | || +| | +-- wishbone_interface || +| | +-- llr_ram (256 x 6-bit) || +| | +-- msg_ram (1792 x 6-bit) || +| | +-- vn_update_array [Z=32] || +| | +-- cn_update_array [Z=32] || +| | +-- barrel_shifter_z32 || +| | +-- iteration_controller || +| | +-- syndrome_checker || +| | +-- hard_decision_out || +| +------------------------------------------+| ++==============================================+ +``` + +## Code Parameters + +| Parameter | Value | Notes | +|-----------|-------|-------| +| Code type | QC-LDPC | Quasi-cyclic for hardware efficiency | +| Rate | 1/8 (R = 0.125) | k=32 info bits, n=256 coded bits | +| Base matrix | 7 x 8 | M_BASE=7 rows, N_BASE=8 cols | +| Lifting factor Z | 32 | n = N_BASE * Z = 256 | +| Quantization | 6-bit signed | 1 sign + 5 magnitude | +| Max iterations | 30 | With early termination on syndrome check | +| Decoding algorithm | Offset min-sum | Offset ~0.5, ~0.2 dB from sum-product | +| Scheduling | Layered (row-serial) | ~2x faster convergence than flooding | + +## Fabrication Target + +| Parameter | Value | +|-----------|-------| +| Process | SkyWater 130nm (Sky130) | +| Platform | Efabless Caravel harness | +| User area | ~10.3 mm^2 (2.92 x 3.52 mm) | +| Target clock | 150 MHz (aggressive for Sky130) | +| Estimated area | ~1.5 mm^2 (decoder only) | +| Interface | Wishbone B4 slave | + +## Directory Structure + +- `rtl/` - SystemVerilog RTL sources +- `tb/` - Verilator testbenches +- `model/` - Python behavioral model (bit-exact reference) +- `data/` - H-matrix definitions, test vectors +- `openlane/` - OpenLane ASIC flow configuration (future) +- `docs/` - Design documentation + +## Channel Model (Photon-Counting Optical) + +The receiver uses single-photon detectors. Each time slot produces a photon count (or binary click/no-click). The channel LLR is: + +``` +LLR(y) = log(P(y | bit=1) / P(y | bit=0)) +``` + +For binary detection (click/no-click): +- P(click | bit=1) = 1 - exp(-(lambda_s + lambda_b)) +- P(click | bit=0) = 1 - exp(-lambda_b) + +where lambda_s = signal photons/slot, lambda_b = background photons/slot. + +LLR computation is done in software (PicoRV32). The decoder only sees quantized 6-bit LLRs. + +## Simulation + +```bash +# Verilator lint check +verilator --lint-only -Wall rtl/*.sv + +# Run testbench +verilator --binary --timing -o sim_ldpc tb/tb_ldpc_decoder.sv rtl/*.sv +./obj_dir/sim_ldpc + +# Python behavioral model +cd model && python3 ldpc_sim.py +``` + +## Key Design Decisions + +1. **Soft input (LLR), not hard bits**: The whole point of LDPC for photon-starved channels. Hard-decision decoding would lose ~2-3 dB of coding gain. +2. **Rate 1/8**: Extreme redundancy for very low SNR. Shannon limit at R=1/8 is Eb/N0 ~ -1.59 dB; practical LDPC can approach 0 to +1 dB. +3. **Min-sum over sum-product**: No multipliers or LUT-based tanh needed. Just comparators and adders. Critical for area on Sky130. +4. **Layered scheduling**: Process one row of base matrix at a time, updating messages immediately. Converges in ~half the iterations of flooding schedule. +5. **Z=32 parallelism**: 32 VN/CN processors working in parallel. Matches lifting factor for natural throughput. + +## Performance Estimates + +- Codeword decode: ~630 cycles (30 iterations x 21 cycles/iter) +- At 150 MHz: ~238K codewords/sec +- Decoded throughput: 238K x 32 bits = 7.6 Mbps +- Latency: ~4.2 us per codeword +- Area: ~1.5 mm^2 at Sky130 (leaves ~8.5 mm^2 for additional blocks) + +## Register Map (Wishbone, word-addressed) + +| Offset | Name | R/W | Description | +|--------|------|-----|-------------| +| 0x00 | CTRL | R/W | [0]=start, [1]=early_term_en, [12:8]=max_iter | +| 0x04 | STATUS | R | [0]=busy, [1]=converged, [12:8]=iterations_used | +| 0x08 | CONFIG | R/W | [2:0]=code_sel (future: multiple H matrices) | +| 0x10-0x4F | LLR_IN | W | Channel LLRs, packed 5x6-bit per word | +| 0x50-0x57 | DECODED | R | 32 decoded bits (1 word) | +| 0x5C | SYNDROME_WT | R | Syndrome weight (0 = valid codeword) | + +## Notes + +- No multipliers in the entire design (add/compare/select only) +- 150 MHz is aggressive for Sky130 — may need to relax to 100 MHz depending on synthesis results +- Error floor at rate 1/8 expected around BER 10^-7 to 10^-9 — may need outer RS code for optical comm BER requirements +- Base matrix H must be carefully designed (PEG algorithm or density evolution optimization) diff --git a/model/ldpc_sim.py b/model/ldpc_sim.py new file mode 100644 index 0000000..1165f1e --- /dev/null +++ b/model/ldpc_sim.py @@ -0,0 +1,474 @@ +#!/usr/bin/env python3 +""" +LDPC Decoder Behavioral Model - Bit-Exact Reference for RTL Verification + +Implements offset min-sum decoding with layered scheduling for a +rate-1/8 QC-LDPC code (n=256, k=32, Z=32, base matrix 7x8). + +Channel model: Poisson photon-counting (optical communication) + +Usage: + python3 ldpc_sim.py # Run BER simulation + python3 ldpc_sim.py --gen-vectors # Generate RTL test vectors + python3 ldpc_sim.py --sweep-snr # SNR sweep for BER curve +""" + +import numpy as np +import argparse +import json +import os + +# ============================================================================= +# Code parameters +# ============================================================================= +N_BASE = 8 # base matrix columns +M_BASE = 7 # base matrix rows +Z = 32 # lifting factor +N = N_BASE * Z # 256 codeword bits +K = Z # 32 info bits (rate 1/8) +M = M_BASE * Z # 224 parity checks +Q_BITS = 6 # quantization bits (signed) +Q_MAX = 2**(Q_BITS-1) - 1 # +31 +Q_MIN = -(2**(Q_BITS-1)) # -32 +OFFSET = 1 # min-sum offset (integer) + +# Base matrix: H_BASE[row][col] = cyclic shift, -1 = no connection +# This must match the RTL exactly! +H_BASE = np.array([ + [ 0, 5, 11, 17, 23, 29, 3, 9], + [15, 0, 21, 7, 13, 19, 25, 31], + [10, 20, 0, 30, 8, 16, 24, 2], + [27, 14, 1, 0, 18, 6, 12, 22], + [ 4, 28, 16, 12, 0, 26, 8, 20], + [19, 9, 31, 25, 15, 0, 21, 11], + [22, 26, 6, 14, 30, 10, 0, 18], +], dtype=np.int8) + + +def build_full_h_matrix(): + """Expand QC base matrix to full binary parity-check matrix H (M x N).""" + H = np.zeros((M, N), dtype=np.int8) + for r in range(M_BASE): + for c in range(N_BASE): + shift = H_BASE[r, c] + if shift < 0: + continue # null sub-matrix + # Cyclic permutation matrix of size Z with shift + for z in range(Z): + H[r * Z + z, c * Z + (z + shift) % Z] = 1 + return H + + +def ldpc_encode(info_bits, H): + """ + Systematic encoding: info bits are the first K bits of codeword. + Solve H * c^T = 0 for parity bits given info bits. + + For a systematic code, H = [H_p | H_i] where H_p is invertible. + c = [info | parity], H_p * parity^T = H_i * info^T (mod 2) + + This uses dense GF(2) Gaussian elimination. Fine for small codes. + """ + # info_bits goes in columns 0..K-1 (first base column = info) + # Parity bits in columns K..N-1 + + # We need to solve: H[:,K:] * p = H[:,:K] * info (mod 2) + H_p = H[:, K:].copy() # M x (N-K) = 224 x 224 + H_i = H[:, :K].copy() # M x K = 224 x 32 + + syndrome = H_i @ info_bits % 2 # M-vector + + # Gaussian elimination on H_p to solve for parity + n_parity = N - K # 224 + assert H_p.shape == (M, n_parity) + + # Augmented matrix [H_p | syndrome] + aug = np.hstack([H_p, syndrome.reshape(-1, 1)]).astype(np.int8) + + # Forward elimination + pivot_row = 0 + for col in range(n_parity): + # Find pivot + found = False + for row in range(pivot_row, M): + if aug[row, col] == 1: + aug[[pivot_row, row]] = aug[[row, pivot_row]] + found = True + break + if not found: + continue # skip this column (rank deficient) + + # Eliminate + for row in range(M): + if row != pivot_row and aug[row, col] == 1: + aug[row] = (aug[row] + aug[pivot_row]) % 2 + pivot_row += 1 + + parity = aug[:n_parity, -1] # solution + codeword = np.concatenate([info_bits, parity]) + + # Verify + check = H @ codeword % 2 + assert np.all(check == 0), f"Encoding failed: syndrome weight = {check.sum()}" + + return codeword + + +def poisson_channel(codeword, lam_s, lam_b): + """ + Simulate photon-counting optical channel. + + For each bit: + bit=1: transmit pulse -> expected photons = lam_s + lam_b + bit=0: no pulse -> expected photons = lam_b (background only) + + Receiver counts photons (Poisson distributed). + Output: LLR = log(P(y|1) / P(y|0)) for each received symbol. + + For binary (click/no-click) detector: + P(click|1) = 1 - exp(-(lam_s + lam_b)) + P(click|0) = 1 - exp(-lam_b) + """ + n = len(codeword) + + # Expected photon counts + lam = np.where(codeword == 1, lam_s + lam_b, lam_b) + + # Poisson photon counts + photon_counts = np.random.poisson(lam) + + # Compute exact LLR for each observation + # P(y|1) = (lam_s+lam_b)^y * exp(-(lam_s+lam_b)) / y! + # P(y|0) = lam_b^y * exp(-lam_b) / y! + # LLR = y * log((lam_s+lam_b)/lam_b) - lam_s + llr = np.zeros(n, dtype=np.float64) + for i in range(n): + y = photon_counts[i] + if lam_b > 0: + llr[i] = y * np.log((lam_s + lam_b) / lam_b) - lam_s + else: + # No background: click = definitely bit 1, no click = definitely bit 0 + if y > 0: + llr[i] = 100.0 # strong positive (bit=1) + else: + llr[i] = -lam_s # no photons, likely bit=0 + + return llr, photon_counts + + +def quantize_llr(llr_float, q_bits=Q_BITS): + """Quantize floating-point LLR to signed integer.""" + q_max = 2**(q_bits-1) - 1 + q_min = -(2**(q_bits-1)) + # Scale: map typical LLR range to integer range + # For photon channel, LLRs are typically in [-5, +5] range + scale = q_max / 5.0 + llr_scaled = np.round(llr_float * scale).astype(np.int32) + return np.clip(llr_scaled, q_min, q_max).astype(np.int8) + + +def sat_add_q(a, b): + """Saturating add in Q-bit signed arithmetic.""" + s = int(a) + int(b) + return max(Q_MIN, min(Q_MAX, s)) + + +def sat_sub_q(a, b): + """Saturating subtract in Q-bit signed arithmetic.""" + return sat_add_q(a, -b) + + +def min_sum_cn_update(msgs_in, offset=OFFSET): + """ + Offset min-sum check node update. + + For each output j: + sign = XOR of all other input signs + magnitude = min of all other magnitudes - offset (clamp to 0) + + Args: + msgs_in: list of DC signed integers (Q-bit) + offset: offset correction value + + Returns: + msgs_out: list of DC signed integers (Q-bit) + """ + dc = len(msgs_in) + signs = [1 if m < 0 else 0 for m in msgs_in] + mags = [abs(m) for m in msgs_in] + sign_xor = sum(signs) % 2 + + # Find min1, min2, and index of min1 + min1 = Q_MAX + min2 = Q_MAX + min1_idx = 0 + for i in range(dc): + if mags[i] < min1: + min2 = min1 + min1 = mags[i] + min1_idx = i + elif mags[i] < min2: + min2 = mags[i] + + msgs_out = [] + for j in range(dc): + mag = min2 if j == min1_idx else min1 + mag = max(0, mag - offset) # offset correction + sgn = sign_xor ^ signs[j] # extrinsic sign + val = -mag if sgn else mag + msgs_out.append(val) + + return msgs_out + + +def decode_layered_min_sum(llr_q, max_iter=30, early_term=True): + """ + Layered offset min-sum LDPC decoder (bit-exact reference for RTL). + + Args: + llr_q: quantized channel LLRs (N-length array of signed Q-bit integers) + max_iter: maximum iterations + early_term: stop when syndrome is zero + + Returns: + decoded_bits: hard decisions (N-length binary array) + converged: True if syndrome == 0 + iterations: number of iterations performed + syndrome_weight: final syndrome weight + """ + # Initialize beliefs from channel LLRs + beliefs = [int(x) for x in llr_q] + + # Initialize CN->VN messages to zero + # msg[row][col][z] = message from CN (row*Z+z) to VN at shifted position + msg = [[[0 for _ in range(Z)] for _ in range(N_BASE)] for _ in range(M_BASE)] + + for iteration in range(max_iter): + # Process each base matrix row (layer) + for row in range(M_BASE): + # Step 1: Compute VN->CN messages by subtracting old CN->VN + vn_to_cn = [[0]*Z for _ in range(N_BASE)] + for col in range(N_BASE): + shift = int(H_BASE[row, col]) + if shift < 0: + continue + for z in range(Z): + shifted_z = (z + shift) % Z + bit_idx = col * Z + shifted_z + old_msg = msg[row][col][z] + vn_to_cn[col][z] = sat_sub_q(beliefs[bit_idx], old_msg) + + # Step 2: CN min-sum update + cn_to_vn = [[0]*Z for _ in range(N_BASE)] + for z in range(Z): + # Gather messages from all columns for this check node + cn_inputs = [vn_to_cn[col][z] for col in range(N_BASE)] + cn_outputs = min_sum_cn_update(cn_inputs) + for col in range(N_BASE): + cn_to_vn[col][z] = cn_outputs[col] + + # Step 3: Update beliefs and store new messages + for col in range(N_BASE): + shift = int(H_BASE[row, col]) + if shift < 0: + continue + for z in range(Z): + shifted_z = (z + shift) % Z + bit_idx = col * Z + shifted_z + new_msg = cn_to_vn[col][z] + extrinsic = vn_to_cn[col][z] + beliefs[bit_idx] = sat_add_q(extrinsic, new_msg) + msg[row][col][z] = new_msg + + # Syndrome check + hard = [1 if b < 0 else 0 for b in beliefs] + syndrome_weight = compute_syndrome_weight(hard) + + if early_term and syndrome_weight == 0: + return np.array(hard[:K]), True, iteration + 1, 0 + + hard = [1 if b < 0 else 0 for b in beliefs] + syndrome_weight = compute_syndrome_weight(hard) + return np.array(hard[:K]), syndrome_weight == 0, max_iter, syndrome_weight + + +def compute_syndrome_weight(hard_bits): + """Compute syndrome weight = number of unsatisfied parity checks.""" + weight = 0 + for r in range(M_BASE): + for z in range(Z): + parity = 0 + for c in range(N_BASE): + shift = int(H_BASE[r, c]) + if shift < 0: + continue + shifted_z = (z + shift) % Z + bit_idx = c * Z + shifted_z + parity ^= hard_bits[bit_idx] + if parity: + weight += 1 + return weight + + +def run_ber_simulation(lam_s_db_range, lam_b=0.1, n_frames=1000, max_iter=30): + """ + Run BER simulation over a range of signal photon counts. + + Args: + lam_s_db_range: signal photons/slot in dB (10*log10(lam_s)) + lam_b: background photon rate + n_frames: number of codewords per SNR point + max_iter: decoder iterations + """ + H = build_full_h_matrix() + print(f"H matrix: {H.shape}, rank = {np.linalg.matrix_rank(H.astype(float))}") + print(f"Code: ({N},{K}) rate {K/N:.3f}, Z={Z}") + print(f"Background photons: {lam_b}") + print(f"{'lam_s_dB':>10s} {'lam_s':>8s} {'BER':>10s} {'FER':>10s} {'avg_iter':>10s}") + print("-" * 55) + + results = [] + for lam_s_db in lam_s_db_range: + lam_s = 10**(lam_s_db / 10) + bit_errors = 0 + frame_errors = 0 + total_bits = 0 + total_iter = 0 + + for frame in range(n_frames): + # Random info bits + info = np.random.randint(0, 2, K) + + # Encode + codeword = ldpc_encode(info, H) + + # Channel + llr_float, _ = poisson_channel(codeword, lam_s, lam_b) + llr_q = quantize_llr(llr_float) + + # Decode + decoded, converged, iters, _ = decode_layered_min_sum(llr_q, max_iter) + total_iter += iters + + # Count errors + errs = np.sum(decoded != info) + bit_errors += errs + total_bits += K + if errs > 0: + frame_errors += 1 + + ber = bit_errors / total_bits if total_bits > 0 else 0 + fer = frame_errors / n_frames + avg_iter = total_iter / n_frames + + print(f"{lam_s_db:10.1f} {lam_s:8.3f} {ber:10.6f} {fer:10.4f} {avg_iter:10.1f}") + results.append({ + 'lam_s_db': lam_s_db, 'lam_s': lam_s, + 'ber': ber, 'fer': fer, 'avg_iter': avg_iter + }) + + return results + + +def generate_test_vectors(n_vectors=10, lam_s=2.0, lam_b=0.1, max_iter=30): + """Generate test vectors for RTL verification.""" + H = build_full_h_matrix() + vectors = [] + + for i in range(n_vectors): + info = np.random.randint(0, 2, K) + codeword = ldpc_encode(info, H) + llr_float, photons = poisson_channel(codeword, lam_s, lam_b) + llr_q = quantize_llr(llr_float) + decoded, converged, iters, syn_wt = decode_layered_min_sum(llr_q, max_iter) + + vec = { + 'index': i, + 'info_bits': info.tolist(), + 'codeword': codeword.tolist(), + 'photon_counts': photons.tolist(), + 'llr_float': llr_float.tolist(), + 'llr_quantized': llr_q.tolist(), + 'decoded_bits': decoded.tolist(), + 'converged': bool(converged), + 'iterations': iters, + 'syndrome_weight': syn_wt, + 'bit_errors': int(np.sum(decoded != info)), + } + vectors.append(vec) + status = "PASS" if np.array_equal(decoded, info) else f"FAIL ({vec['bit_errors']} errs)" + print(f" Vector {i}: {status} (iter={iters}, converged={converged})") + + return vectors + + +def main(): + parser = argparse.ArgumentParser(description='LDPC Decoder Behavioral Model') + parser.add_argument('--gen-vectors', action='store_true', + help='Generate RTL test vectors') + parser.add_argument('--sweep-snr', action='store_true', + help='Run BER vs SNR sweep') + parser.add_argument('--n-frames', type=int, default=1000, + help='Frames per SNR point (default: 1000)') + parser.add_argument('--max-iter', type=int, default=30, + help='Max decoder iterations (default: 30)') + parser.add_argument('--lam-s', type=float, default=2.0, + help='Signal photons/slot for test vectors (default: 2.0)') + parser.add_argument('--lam-b', type=float, default=0.1, + help='Background photons/slot (default: 0.1)') + parser.add_argument('--seed', type=int, default=42, + help='Random seed (default: 42)') + args = parser.parse_args() + + np.random.seed(args.seed) + + if args.gen_vectors: + print(f"Generating test vectors (lam_s={args.lam_s}, lam_b={args.lam_b})...") + vectors = generate_test_vectors( + n_vectors=20, lam_s=args.lam_s, lam_b=args.lam_b, + max_iter=args.max_iter + ) + out_path = os.path.join(os.path.dirname(__file__), '..', 'data', 'test_vectors.json') + with open(out_path, 'w') as f: + json.dump(vectors, f, indent=2) + print(f"\nWrote {len(vectors)} vectors to {out_path}") + + elif args.sweep_snr: + print("BER Sweep: Poisson photon-counting channel, rate-1/8 QC-LDPC") + lam_s_db_range = np.arange(-6, 10, 1.0) # -6 to +9 dB + results = run_ber_simulation( + lam_s_db_range, lam_b=args.lam_b, + n_frames=args.n_frames, max_iter=args.max_iter + ) + out_path = os.path.join(os.path.dirname(__file__), '..', 'data', 'ber_results.json') + with open(out_path, 'w') as f: + json.dump(results, f, indent=2) + print(f"\nWrote results to {out_path}") + + else: + # Quick demo + print("=== LDPC Rate-1/8 Decoder Demo ===") + print(f"Code: ({N},{K}), rate {K/N:.3f}, Z={Z}") + + H = build_full_h_matrix() + print(f"H matrix: {H.shape}, density: {H.sum()/(H.shape[0]*H.shape[1]):.4f}") + + info = np.random.randint(0, 2, K) + print(f"\nInfo bits ({K}): {info}") + + codeword = ldpc_encode(info, H) + print(f"Codeword ({N} bits), weight: {codeword.sum()}") + + # Simulate at a few photon levels + for lam_s in [0.5, 1.0, 2.0, 5.0]: + np.random.seed(args.seed) + llr_float, photons = poisson_channel(codeword, lam_s, args.lam_b) + llr_q = quantize_llr(llr_float) + decoded, converged, iters, syn_wt = decode_layered_min_sum(llr_q) + errors = np.sum(decoded != info) + print(f" lam_s={lam_s:.1f}: decoded in {iters} iter, " + f"converged={converged}, errors={errors}") + + +if __name__ == '__main__': + main() diff --git a/rtl/ldpc_decoder_core.sv b/rtl/ldpc_decoder_core.sv new file mode 100644 index 0000000..e2d51e8 --- /dev/null +++ b/rtl/ldpc_decoder_core.sv @@ -0,0 +1,403 @@ +// LDPC Decoder Core - Layered Min-Sum with QC structure +// +// Layered scheduling processes one base-matrix row at a time. +// For each row, we: +// 1. Read VN beliefs for all Z columns connected to this row +// 2. Subtract old CN->VN messages to get VN->CN messages +// 3. Run CN min-sum update +// 4. Add new CN->VN messages back to VN beliefs +// 5. Write updated beliefs back +// +// This converges ~2x faster than flooding and needs only one message memory +// (CN->VN messages for current layer, overwritten each layer). + +module ldpc_decoder_core #( + parameter N_BASE = 8, + parameter M_BASE = 7, + parameter Z = 32, + parameter N = N_BASE * Z, + parameter M = M_BASE * Z, + parameter Q = 6, + parameter MAX_ITER = 30, + parameter DC = 8, // check node degree + parameter DV_MAX = 7 // max variable node degree +)( + input logic clk, + input logic rst_n, + + // Control + input logic start, + input logic early_term_en, + input logic [4:0] max_iter, + + // Channel LLRs (loaded before start) + input logic signed [Q-1:0] llr_in [N], + + // Status + output logic busy, + output logic converged, + output logic [4:0] iter_used, + + // Results + output logic [Z-1:0] decoded_bits, // first Z bits = info bits + output logic [7:0] syndrome_weight +); + + // ========================================================================= + // Base matrix H stored as shift values (-1 = no connection) + // H_BASE[row][col] = cyclic shift amount, or -1 if zero sub-matrix + // ========================================================================= + + // This is a placeholder base matrix for rate-1/8 QC-LDPC. + // Must be replaced with a properly designed matrix (PEG algorithm or + // density evolution optimized). All entries >= 0 means fully connected + // (regular dv=7, dc=8). For irregular codes, some entries would be -1. + // + // TODO: Replace with optimized base matrix from model/design_h_matrix.py + + logic signed [5:0] H_BASE [M_BASE][N_BASE]; + + // Shift values for 7x8 base matrix (Z=32, values 0..31, -1=null) + // This is a regular (7,8) code - every entry is connected + initial begin + // Row 0 + H_BASE[0][0] = 0; H_BASE[0][1] = 5; H_BASE[0][2] = 11; + H_BASE[0][3] = 17; H_BASE[0][4] = 23; H_BASE[0][5] = 29; + H_BASE[0][6] = 3; H_BASE[0][7] = 9; + // Row 1 + H_BASE[1][0] = 15; H_BASE[1][1] = 0; H_BASE[1][2] = 21; + H_BASE[1][3] = 7; H_BASE[1][4] = 13; H_BASE[1][5] = 19; + H_BASE[1][6] = 25; H_BASE[1][7] = 31; + // Row 2 + H_BASE[2][0] = 10; H_BASE[2][1] = 20; H_BASE[2][2] = 0; + H_BASE[2][3] = 30; H_BASE[2][4] = 8; H_BASE[2][5] = 16; + H_BASE[2][6] = 24; H_BASE[2][7] = 2; + // Row 3 + H_BASE[3][0] = 27; H_BASE[3][1] = 14; H_BASE[3][2] = 1; + H_BASE[3][3] = 0; H_BASE[3][4] = 18; H_BASE[3][5] = 6; + H_BASE[3][6] = 12; H_BASE[3][7] = 22; + // Row 4 + H_BASE[4][0] = 4; H_BASE[4][1] = 28; H_BASE[4][2] = 16; + H_BASE[4][3] = 12; H_BASE[4][4] = 0; H_BASE[4][5] = 26; + H_BASE[4][6] = 8; H_BASE[4][7] = 20; + // Row 5 + H_BASE[5][0] = 19; H_BASE[5][1] = 9; H_BASE[5][2] = 31; + H_BASE[5][3] = 25; H_BASE[5][4] = 15; H_BASE[5][5] = 0; + H_BASE[5][6] = 21; H_BASE[5][7] = 11; + // Row 6 + H_BASE[6][0] = 22; H_BASE[6][1] = 26; H_BASE[6][2] = 6; + H_BASE[6][3] = 14; H_BASE[6][4] = 30; H_BASE[6][5] = 10; + H_BASE[6][6] = 0; H_BASE[6][7] = 18; + end + + // ========================================================================= + // Memory: VN beliefs (total posterior LLR per bit) + // beliefs[j] = channel_llr[j] + sum of all CN->VN messages to j + // ========================================================================= + + logic signed [Q-1:0] beliefs [N]; + + // ========================================================================= + // Memory: CN->VN messages for layered update + // msg_cn2vn[row][col][z] = message from check (row*Z+z) to variable (col*Z+shift(z)) + // Stored as [M_BASE][N_BASE] banks of Z entries each + // ========================================================================= + + logic signed [Q-1:0] msg_cn2vn [M_BASE][N_BASE][Z]; + + // ========================================================================= + // Decoder FSM + // ========================================================================= + + typedef enum logic [2:0] { + IDLE, + INIT, // Initialize beliefs from channel LLRs, zero messages + LAYER_READ, // Read Z beliefs for each of DC columns in current row + CN_UPDATE, // Run min-sum CN update on gathered messages + LAYER_WRITE, // Write updated beliefs and new CN->VN messages + SYNDROME, // Check syndrome after full iteration + DONE + } state_t; + + state_t state, state_next; + + logic [4:0] iter_cnt; + logic [2:0] row_idx; // current base matrix row (0..M_BASE-1) + logic [2:0] col_idx; // current column being read/written (0..N_BASE-1) + logic [4:0] effective_max_iter; + + // Working registers for current layer CN update + logic signed [Q-1:0] vn_to_cn [DC][Z]; // VN->CN messages for current row + logic signed [Q-1:0] cn_to_vn [DC][Z]; // new CN->VN messages (output of min-sum) + + // Syndrome check + logic [7:0] syndrome_cnt; + logic syndrome_ok; + + assign effective_max_iter = (max_iter == 0) ? MAX_ITER[4:0] : max_iter; + assign busy = (state != IDLE) && (state != DONE); + + // ========================================================================= + // State machine + // ========================================================================= + + always_ff @(posedge clk or negedge rst_n) begin + if (!rst_n) begin + state <= IDLE; + end else begin + state <= state_next; + end + end + + always_comb begin + state_next = state; + case (state) + IDLE: if (start) state_next = INIT; + INIT: state_next = LAYER_READ; + LAYER_READ: if (col_idx == N_BASE - 1) state_next = CN_UPDATE; + CN_UPDATE: state_next = LAYER_WRITE; + LAYER_WRITE: begin + if (col_idx == N_BASE - 1) begin + if (row_idx == M_BASE - 1) + state_next = SYNDROME; + else + state_next = LAYER_READ; // next row + end + end + SYNDROME: begin + if (syndrome_ok && early_term_en) + state_next = DONE; + else if (iter_cnt >= effective_max_iter) + state_next = DONE; + else + state_next = LAYER_READ; // next iteration + end + DONE: if (!start) state_next = IDLE; + default: state_next = IDLE; + endcase + end + + // ========================================================================= + // Datapath + // ========================================================================= + + always_ff @(posedge clk or negedge rst_n) begin + if (!rst_n) begin + iter_cnt <= '0; + row_idx <= '0; + col_idx <= '0; + converged <= 1'b0; + iter_used <= '0; + syndrome_weight <= '0; + end else begin + case (state) + IDLE: begin + iter_cnt <= '0; + row_idx <= '0; + col_idx <= '0; + converged <= 1'b0; + end + + INIT: begin + // Initialize beliefs from channel LLRs + for (int j = 0; j < N; j++) begin + beliefs[j] <= llr_in[j]; + end + // Zero all CN->VN messages + for (int r = 0; r < M_BASE; r++) + for (int c = 0; c < N_BASE; c++) + for (int z = 0; z < Z; z++) + msg_cn2vn[r][c][z] <= '0; + row_idx <= '0; + col_idx <= '0; + iter_cnt <= '0; + end + + LAYER_READ: begin + // For column col_idx in current row_idx: + // VN->CN = belief - old CN->VN message + // (belief already contains the sum of ALL CN->VN messages, + // so subtracting the current row's message gives the extrinsic) + for (int z = 0; z < Z; z++) begin + int bit_idx; + int shifted_z; + logic signed [Q-1:0] old_msg; + logic signed [Q-1:0] belief_val; + + shifted_z = (z + H_BASE[row_idx][col_idx]) % Z; + bit_idx = int'(col_idx) * Z + shifted_z; + old_msg = msg_cn2vn[row_idx][col_idx][z]; + belief_val = beliefs[bit_idx]; + + vn_to_cn[col_idx][z] <= sat_sub(belief_val, old_msg); + end + + if (col_idx == N_BASE - 1) + col_idx <= '0; + else + col_idx <= col_idx + 1; + end + + CN_UPDATE: begin + // Min-sum update for all Z check nodes in current row + // Each CN has DC=8 incoming messages (one per column) + for (int z = 0; z < Z; z++) begin + // Gather DC messages for check node z + logic signed [Q-1:0] msgs [DC]; + for (int d = 0; d < DC; d++) + msgs[d] = vn_to_cn[d][z]; + + // Min-sum: find min1, min2, sign product, min1 index + cn_min_sum(msgs, cn_to_vn[0][z], cn_to_vn[1][z], + cn_to_vn[2][z], cn_to_vn[3][z], + cn_to_vn[4][z], cn_to_vn[5][z], + cn_to_vn[6][z], cn_to_vn[7][z]); + end + col_idx <= '0; // prepare for LAYER_WRITE + end + + LAYER_WRITE: begin + // Write back: update beliefs and store new CN->VN messages + for (int z = 0; z < Z; z++) begin + int bit_idx; + int shifted_z; + logic signed [Q-1:0] new_msg; + logic signed [Q-1:0] old_extrinsic; + + shifted_z = (z + H_BASE[row_idx][col_idx]) % Z; + bit_idx = int'(col_idx) * Z + shifted_z; + new_msg = cn_to_vn[col_idx][z]; + old_extrinsic = vn_to_cn[col_idx][z]; + + // belief = extrinsic (VN->CN) + new CN->VN message + beliefs[bit_idx] <= sat_add(old_extrinsic, new_msg); + + // Store new message for next iteration + msg_cn2vn[row_idx][col_idx][z] <= new_msg; + end + + if (col_idx == N_BASE - 1) begin + col_idx <= '0; + if (row_idx == M_BASE - 1) + row_idx <= '0; + else + row_idx <= row_idx + 1; + end else begin + col_idx <= col_idx + 1; + end + end + + SYNDROME: begin + // Check H * c_hat == 0 (compute syndrome weight) + syndrome_cnt = '0; + for (int r = 0; r < M_BASE; r++) begin + for (int z = 0; z < Z; z++) begin + logic parity; + parity = 1'b0; + for (int c = 0; c < N_BASE; c++) begin + int shifted_z, bit_idx; + shifted_z = (z + H_BASE[r][c]) % Z; + bit_idx = c * Z + shifted_z; + parity = parity ^ beliefs[bit_idx][Q-1]; // sign bit = hard decision + end + if (parity) syndrome_cnt = syndrome_cnt + 1; + end + end + syndrome_weight <= syndrome_cnt; + syndrome_ok = (syndrome_cnt == 0); + + iter_cnt <= iter_cnt + 1; + iter_used <= iter_cnt + 1; + if (syndrome_ok) converged <= 1'b1; + end + + DONE: begin + // Output decoded info bits (first Z=32 bits, column 0) + for (int z = 0; z < Z; z++) + decoded_bits[z] <= beliefs[z][Q-1]; // sign bit = hard decision + end + endcase + end + end + + // ========================================================================= + // Min-sum CN update function + // ========================================================================= + + // Offset min-sum for DC=8 inputs + // For each output j: sign = XOR of all other signs, magnitude = min of all other magnitudes - offset + task automatic cn_min_sum( + input logic signed [Q-1:0] in [DC], + output logic signed [Q-1:0] out0, out1, out2, out3, + out4, out5, out6, out7 + ); + logic [DC-1:0] signs; + logic [Q-2:0] mags [DC]; + logic sign_xor; + logic [Q-2:0] min1, min2; + int min1_idx; + logic signed [Q-1:0] outs [DC]; + + // Extract signs and magnitudes + sign_xor = 1'b0; + for (int i = 0; i < DC; i++) begin + signs[i] = in[i][Q-1]; + mags[i] = in[i][Q-1] ? (~in[i][Q-2:0] + 1) : in[i][Q-2:0]; + sign_xor = sign_xor ^ signs[i]; + end + + // Find two smallest magnitudes + min1 = {(Q-1){1'b1}}; + min2 = {(Q-1){1'b1}}; + min1_idx = 0; + for (int i = 0; i < DC; i++) begin + if (mags[i] < min1) begin + min2 = min1; + min1 = mags[i]; + min1_idx = i; + end else if (mags[i] < min2) begin + min2 = mags[i]; + end + end + + // Compute extrinsic outputs with offset correction + for (int j = 0; j < DC; j++) begin + logic [Q-2:0] mag_out; + logic sign_out; + + mag_out = (j == min1_idx) ? min2 : min1; + // Offset correction (subtract 1 in integer representation) + mag_out = (mag_out > 1) ? (mag_out - 1) : {(Q-1){1'b0}}; + sign_out = sign_xor ^ signs[j]; + + outs[j] = sign_out ? (~{1'b0, mag_out} + 1) : {1'b0, mag_out}; + end + + out0 = outs[0]; out1 = outs[1]; out2 = outs[2]; out3 = outs[3]; + out4 = outs[4]; out5 = outs[5]; out6 = outs[6]; out7 = outs[7]; + endtask + + // ========================================================================= + // Saturating arithmetic helpers + // ========================================================================= + + function automatic logic signed [Q-1:0] sat_add( + logic signed [Q-1:0] a, logic signed [Q-1:0] b + ); + logic signed [Q:0] sum; + sum = {a[Q-1], a} + {b[Q-1], b}; // sign-extend and add + if (sum > $signed({1'b0, {(Q-1){1'b1}}})) + return {1'b0, {(Q-1){1'b1}}}; // +max + else if (sum < $signed({1'b1, {(Q-1){1'b0}}})) + return {1'b1, {(Q-1){1'b0}}}; // -max + else + return sum[Q-1:0]; + endfunction + + function automatic logic signed [Q-1:0] sat_sub( + logic signed [Q-1:0] a, logic signed [Q-1:0] b + ); + return sat_add(a, -b); + endfunction + +endmodule diff --git a/rtl/ldpc_decoder_top.sv b/rtl/ldpc_decoder_top.sv new file mode 100644 index 0000000..638ae4a --- /dev/null +++ b/rtl/ldpc_decoder_top.sv @@ -0,0 +1,110 @@ +// LDPC Decoder Top - QC-LDPC Rate 1/8 for Photon-Starved Optical Communication +// Target: Efabless chipIgnite (SkyWater 130nm, Caravel harness) +// +// Code parameters: +// Rate 1/8, n=256 coded bits, k=32 info bits +// QC-LDPC with 7x8 base matrix, lifting factor Z=32 +// Offset min-sum decoding, layered scheduling +// +// Input: 6-bit signed LLRs (log-likelihood ratios from photon detector) +// Output: 32 decoded information bits + convergence status + +module ldpc_decoder_top #( + parameter N_BASE = 8, // base matrix columns + parameter M_BASE = 7, // base matrix rows + parameter Z = 32, // lifting factor + parameter N = N_BASE * Z, // codeword length = 256 + parameter K = Z, // info bits = 32 (rate 1/8) + parameter M = M_BASE * Z, // parity checks = 224 + parameter Q = 6, // LLR quantization bits (signed) + parameter MAX_ITER = 30, // maximum decoding iterations + parameter DC = 8, // check node degree (= N_BASE for regular) + parameter DV_MAX = 7 // max variable node degree (= M_BASE for regular) +)( + input logic clk, + input logic rst_n, + + // Wishbone B4 pipelined slave interface + input logic wb_cyc_i, + input logic wb_stb_i, + input logic wb_we_i, + input logic [7:0] wb_adr_i, // byte address (256 bytes address space) + input logic [31:0] wb_dat_i, + output logic [31:0] wb_dat_o, + output logic wb_ack_o, + + // Interrupt (active high, directly to Caravel IRQ) + output logic irq_o +); + + // ========================================================================= + // Wishbone register interface + // ========================================================================= + + // Control/status registers + logic ctrl_start; // pulse: begin decoding + logic ctrl_early_term; // enable early termination + logic [4:0] ctrl_max_iter; // max iterations (0 = use MAX_ITER) + + logic stat_busy; + logic stat_converged; + logic [4:0] stat_iter_used; + + // LLR input buffer (written by host before starting decode) + logic signed [Q-1:0] llr_input [N]; + + // Decoded output + logic [K-1:0] decoded_bits; + logic [7:0] syndrome_weight; + + wishbone_interface #( + .N(N), .K(K), .Q(Q) + ) u_wb ( + .clk (clk), + .rst_n (rst_n), + .wb_cyc_i (wb_cyc_i), + .wb_stb_i (wb_stb_i), + .wb_we_i (wb_we_i), + .wb_adr_i (wb_adr_i), + .wb_dat_i (wb_dat_i), + .wb_dat_o (wb_dat_o), + .wb_ack_o (wb_ack_o), + .ctrl_start (ctrl_start), + .ctrl_early_term(ctrl_early_term), + .ctrl_max_iter (ctrl_max_iter), + .stat_busy (stat_busy), + .stat_converged (stat_converged), + .stat_iter_used (stat_iter_used), + .llr_input (llr_input), + .decoded_bits (decoded_bits), + .syndrome_weight(syndrome_weight), + .irq_o (irq_o) + ); + + // ========================================================================= + // Decoder core + // ========================================================================= + + ldpc_decoder_core #( + .N_BASE (N_BASE), + .M_BASE (M_BASE), + .Z (Z), + .Q (Q), + .MAX_ITER (MAX_ITER), + .DC (DC), + .DV_MAX (DV_MAX) + ) u_core ( + .clk (clk), + .rst_n (rst_n), + .start (ctrl_start), + .early_term_en (ctrl_early_term), + .max_iter (ctrl_max_iter), + .llr_in (llr_input), + .busy (stat_busy), + .converged (stat_converged), + .iter_used (stat_iter_used), + .decoded_bits (decoded_bits), + .syndrome_weight(syndrome_weight) + ); + +endmodule diff --git a/rtl/wishbone_interface.sv b/rtl/wishbone_interface.sv new file mode 100644 index 0000000..1163253 --- /dev/null +++ b/rtl/wishbone_interface.sv @@ -0,0 +1,139 @@ +// Wishbone B4 slave interface for LDPC decoder +// Compatible with Caravel SoC Wishbone interconnect +// +// Register map (byte-addressed): +// 0x00 CTRL R/W [0]=start (auto-clear), [1]=early_term_en, [12:8]=max_iter +// 0x04 STATUS R [0]=busy, [1]=converged, [12:8]=iterations_used, [23:16]=syndrome_wt +// 0x10-0x4F LLR W Channel LLRs packed 5x6-bit per 32-bit word (52 words for 256 LLRs) +// 0x50 DECODED R 32 decoded info bits +// 0x54 VERSION R Version/ID register + +module wishbone_interface #( + parameter N = 256, + parameter K = 32, + parameter Q = 6 +)( + input logic clk, + input logic rst_n, + + // Wishbone slave + input logic wb_cyc_i, + input logic wb_stb_i, + input logic wb_we_i, + input logic [7:0] wb_adr_i, + input logic [31:0] wb_dat_i, + output logic [31:0] wb_dat_o, + output logic wb_ack_o, + + // To/from decoder core + output logic ctrl_start, + output logic ctrl_early_term, + output logic [4:0] ctrl_max_iter, + input logic stat_busy, + input logic stat_converged, + input logic [4:0] stat_iter_used, + output logic signed [Q-1:0] llr_input [N], + input logic [K-1:0] decoded_bits, + input logic [7:0] syndrome_weight, + + // Interrupt + output logic irq_o +); + + localparam VERSION_ID = 32'hLD01_0001; // LDPC v0.1 build 1 + + // Wishbone handshake: ack on valid cycle + logic wb_valid; + assign wb_valid = wb_cyc_i && wb_stb_i; + + always_ff @(posedge clk or negedge rst_n) begin + if (!rst_n) + wb_ack_o <= 1'b0; + else + wb_ack_o <= wb_valid && !wb_ack_o; // single-cycle ack + end + + // ========================================================================= + // Control register + // ========================================================================= + + logic start_pending; + logic early_term_reg; + logic [4:0] max_iter_reg; + + // Start is a pulse: set on write, cleared after one cycle + always_ff @(posedge clk or negedge rst_n) begin + if (!rst_n) begin + start_pending <= 1'b0; + early_term_reg <= 1'b1; // early termination on by default + max_iter_reg <= 5'd0; // 0 = use MAX_ITER default + end else begin + if (ctrl_start) + start_pending <= 1'b0; + + if (wb_valid && wb_we_i && !wb_ack_o && wb_adr_i == 8'h00) begin + start_pending <= wb_dat_i[0]; + early_term_reg <= wb_dat_i[1]; + max_iter_reg <= wb_dat_i[12:8]; + end + end + end + + assign ctrl_start = start_pending && !stat_busy; + assign ctrl_early_term = early_term_reg; + assign ctrl_max_iter = max_iter_reg; + + // ========================================================================= + // LLR input: pack 5 LLRs per 32-bit word + // Word at offset 0x10 + 4*i contains LLRs [5*i] through [5*i+4] + // Bits [5:0] = LLR[5*i], [11:6] = LLR[5*i+1], ... [29:24] = LLR[5*i+4] + // 52 words cover 260 LLRs (256 used, 4 padding) + // ========================================================================= + + always_ff @(posedge clk) begin + if (wb_valid && wb_we_i && !wb_ack_o) begin + if (wb_adr_i >= 8'h10 && wb_adr_i < 8'hE0) begin + int word_idx; + word_idx = (wb_adr_i - 8'h10) >> 2; + for (int p = 0; p < 5; p++) begin + int llr_idx; + llr_idx = word_idx * 5 + p; + if (llr_idx < N) + llr_input[llr_idx] <= wb_dat_i[p*Q +: Q]; + end + end + end + end + + // ========================================================================= + // Read mux + // ========================================================================= + + always_comb begin + wb_dat_o = 32'h0; + case (wb_adr_i) + 8'h00: wb_dat_o = {19'b0, max_iter_reg, 6'b0, early_term_reg, start_pending}; + 8'h04: wb_dat_o = {8'b0, syndrome_weight, 3'b0, stat_iter_used, 6'b0, stat_converged, stat_busy}; + 8'h50: wb_dat_o = decoded_bits; + 8'h54: wb_dat_o = VERSION_ID; + default: wb_dat_o = 32'h0; + endcase + end + + // ========================================================================= + // Interrupt: assert when decode completes (busy falls) + // ========================================================================= + + logic busy_d1; + always_ff @(posedge clk or negedge rst_n) begin + if (!rst_n) begin + busy_d1 <= 1'b0; + irq_o <= 1'b0; + end else begin + busy_d1 <= stat_busy; + // Pulse IRQ on falling edge of busy + irq_o <= busy_d1 && !stat_busy; + end + end + +endmodule