Ridge Regression Example — Tikhonov Regularization¶

Demonstrates linearsystems.ridge_regression() on an ill-conditioned multicollinear regression problem. Regularization strength \(\alpha\) is swept over a log scale, the L-curve (solution norm vs residual norm) is plotted to identify the optimal \(\alpha\), and coefficient paths are compared against the unregularized least-squares solution from linearsystems.lsqrr().

Example Code¶

"""IMSL Ridge Regression example: Tikhonov regularization for ill-conditioned systems.

Generates an ill-conditioned multicollinear regression problem and demonstrates
ridge_regression() with varying regularization strengths α. Shows the L-curve
(solution norm vs residual norm) and coefficient paths, and compares with
standard least-squares (lsqrr, α=0 limit).

Outputs:
- Optimal α and coefficients printed to stdout
- SVG L-curve + coefficient paths saved to
  test_output/example_imsl_ridge_regression.svg
"""

from __future__ import annotations

from pathlib import Path
from typing import Dict, List

import matplotlib.pyplot as plt
import numpy as np

from linearsystems import ridge_regression, lsqrr


def _make_ill_conditioned(
    m: int = 80,
    n: int = 5,
    noise: float = 0.05,
    seed: int = 42,
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
    """Generate an ill-conditioned regression system with multicollinear features.

    The last two columns are nearly collinear with the first.

    Args:
        m (int): Number of observations. Defaults to 80.
        n (int): Number of features. Defaults to 5.
        noise (float): Noise level added to observations. Defaults to ``0.05``.
        seed (int): Random seed. Defaults to 42.

    Returns:
        tuple[np.ndarray, np.ndarray, np.ndarray]:
            ``(A, b, x_true)`` where *A* is (m, n), *b* is (m,), and
            *x_true* is (n,) — the ground-truth coefficient vector.
    """
    rng = np.random.default_rng(seed)
    base = rng.standard_normal((m, 3))
    # Introduce multicollinearity: columns 3 and 4 almost equal column 0
    col3 = base[:, 0] + 0.01 * rng.standard_normal(m)
    col4 = base[:, 0] - 0.01 * rng.standard_normal(m)
    a = np.column_stack([base, col3, col4])
    # Normalise columns
    a = a / np.linalg.norm(a, axis=0)
    x_true = np.array([1.0, -2.0, 0.5, 3.0, -1.5])
    b = a @ x_true + noise * rng.standard_normal(m)
    return a, b, x_true


def run_demo_imsl_ridge_regression() -> Dict[str, object]:
    """Run Ridge Regression demo showing the L-curve and coefficient paths.

    Compares ridge_regression() at multiple α values with an unregularized
    lsqrr() solve. Identifies the optimal α on the L-curve corner.

    Args:
        None

    Returns:
        Dict[str, object]: Result dict with keys ``alpha_values`` (list),
            ``optimal_alpha`` (float), ``x_ridge_optimal`` (np.ndarray),
            ``residual_unregularized`` (float), and ``plot_path`` (str).
    """
    a, b, x_true = _make_ill_conditioned()
    m, n = a.shape

    # Alpha sweep (log scale)
    alphas: List[float] = list(np.logspace(-4, 2, 50))

    sol_norms: List[float] = []
    res_norms: List[float] = []
    coeff_paths: List[np.ndarray] = []

    for alpha in alphas:
        x_r = ridge_regression(a, b, alpha=alpha)
        sol_norms.append(float(np.linalg.norm(x_r)))
        res_norms.append(float(np.linalg.norm(a @ x_r - b)))
        coeff_paths.append(x_r)

    # Unregularized LS via lsqrr — returns (x, residuals, rank, sv)
    x_ls, _resids, _rank, _sv = lsqrr(a, b)
    res_unreg = float(np.linalg.norm(a @ x_ls - b))

    # Identify L-curve corner: maximise curvature (Menger curvature proxy)
    log_res = np.log10(res_norms)
    log_sol = np.log10(sol_norms)
    # Approximate curvature via finite differences
    d1_r = np.gradient(log_res)
    d2_r = np.gradient(d1_r)
    d1_s = np.gradient(log_sol)
    d2_s = np.gradient(d1_s)
    curvature = np.abs(d1_r * d2_s - d1_s * d2_r) / (d1_r**2 + d1_s**2 + 1e-30) ** 1.5
    idx_opt = int(np.argmax(curvature))
    alpha_opt = alphas[idx_opt]
    x_opt = coeff_paths[idx_opt]

    # Coefficient matrix for path plot
    coeff_matrix = np.array(coeff_paths)   # shape (n_alpha, n)

    print("\nIMSL Ridge Regression: ill-conditioned 80×5 multicollinear system")
    print("-" * 65)
    print(f"Condition number of A: {np.linalg.cond(a):.2e}")
    print(f"Optimal α (L-curve corner): {alpha_opt:.4e}")
    print(f"\n{'Coefficient':<15} {'x_true':>10} {'x_ridge(α_opt)':>16} {'x_LS (unregul.)':>18}")
    print("-" * 65)
    for i in range(n):
        print(f"{'x[' + str(i) + ']':<15} {x_true[i]:>10.4f} {x_opt[i]:>16.4f} {x_ls[i]:>18.4f}")
    print("-" * 65)
    print(f"\n‖Ax−b‖ unregularized (lsqrr):  {res_unreg:.4e}")
    print(f"‖Ax−b‖ ridge optimal:           {res_norms[idx_opt]:.4e}")
    print(f"‖x‖   unregularized:            {np.linalg.norm(x_ls):.4e}")
    print(f"‖x‖   ridge optimal:            {sol_norms[idx_opt]:.4e}")

    output_dir = Path("test_output")
    output_dir.mkdir(parents=True, exist_ok=True)
    plot_path = output_dir / "example_imsl_ridge_regression.svg"

    fig, axes = plt.subplots(1, 2, figsize=(14, 5))

    # Left: L-curve
    ax = axes[0]
    ax.loglog(res_norms, sol_norms, "b-", linewidth=1.5, label="Ridge path")
    ax.loglog(res_norms[idx_opt], sol_norms[idx_opt], "ro", markersize=10,
              label=f"Optimal α={alpha_opt:.2e}", zorder=5)
    ax.loglog(res_unreg, np.linalg.norm(x_ls), "k*", markersize=12,
              label="lsqrr (α→0)", zorder=5)
    ax.set_xlabel("Residual norm ‖Ax−b‖")
    ax.set_ylabel("Solution norm ‖x‖")
    ax.set_title("L-Curve: Solution vs Residual Norm")
    ax.legend(fontsize=9)
    ax.grid(True, which="both", alpha=0.3)

    # Right: coefficient paths
    ax2 = axes[1]
    for i in range(n):
        ax2.semilogx(alphas, coeff_matrix[:, i], label=f"x[{i}]")
    ax2.axvline(alpha_opt, color="red", linestyle="--", linewidth=1.5,
                label=f"α_opt={alpha_opt:.2e}")
    ax2.set_xlabel("Regularization α")
    ax2.set_ylabel("Coefficient value")
    ax2.set_title("Coefficient Paths vs α")
    ax2.legend(fontsize=8)
    ax2.grid(True, which="both", alpha=0.3)

    fig.tight_layout()
    fig.savefig(plot_path, format="svg")
    plt.close(fig)
    print(f"\nPlot saved to: {plot_path}")

    return {
        "alpha_values": alphas,
        "optimal_alpha": alpha_opt,
        "x_ridge_optimal": x_opt,
        "residual_unregularized": res_unreg,
        "plot_path": str(plot_path),
    }


if __name__ == "__main__":
    run_demo_imsl_ridge_regression()

Plot Output¶

L-curve and coefficient paths for ridge regression — Left: L-curve in log–log space. The red dot marks the corner (optimal \(\alpha\)) and the star marks the unregularized lsqrr solution. Right: coefficient paths showing how each \(x_i\) evolves as \(\alpha\) increases from 0.¶

Console Output¶

IMSL Ridge Regression: ill-conditioned 80├ù5 multicollinear system
-----------------------------------------------------------------
Condition number of A: 2.50e+02
Optimal ╬▒ (L-curve corner): 9.1030e-03

Coefficient         x_true   x_ridge(╬▒_opt)    x_LS (unregul.)
-----------------------------------------------------------------
x[0]                1.0000           0.8846            10.0076
x[1]               -2.0000          -1.9790            -2.0003
x[2]                0.5000           0.4347             0.4218
x[3]                3.0000           0.8688             1.2853
x[4]               -1.5000           0.7577            -8.7721
-----------------------------------------------------------------

ΓÇûAxΓêÆbΓÇû unregularized (lsqrr):  4.3854e-01
ΓÇûAxΓêÆbΓÇû ridge optimal:           4.5131e-01
ΓÇûxΓÇû   unregularized:            1.3525e+01
ΓÇûxΓÇû   ridge optimal:            2.4934e+00

Plot saved to: test_output\example_imsl_ridge_regression.svg