Ridge Regression Example — Tikhonov Regularization¶
Demonstrates linearsystems.ridge_regression() on an ill-conditioned
multicollinear regression problem. Regularization strength \(\alpha\) is
swept over a log scale, the L-curve (solution norm vs residual norm) is plotted
to identify the optimal \(\alpha\), and coefficient paths are compared
against the unregularized least-squares solution from linearsystems.lsqrr().
Example Code¶
"""IMSL Ridge Regression example: Tikhonov regularization for ill-conditioned systems.
Generates an ill-conditioned multicollinear regression problem and demonstrates
ridge_regression() with varying regularization strengths α. Shows the L-curve
(solution norm vs residual norm) and coefficient paths, and compares with
standard least-squares (lsqrr, α=0 limit).
Outputs:
- Optimal α and coefficients printed to stdout
- SVG L-curve + coefficient paths saved to
test_output/example_imsl_ridge_regression.svg
"""
from __future__ import annotations
from pathlib import Path
from typing import Dict, List
import matplotlib.pyplot as plt
import numpy as np
from linearsystems import ridge_regression, lsqrr
def _make_ill_conditioned(
m: int = 80,
n: int = 5,
noise: float = 0.05,
seed: int = 42,
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
"""Generate an ill-conditioned regression system with multicollinear features.
The last two columns are nearly collinear with the first.
Args:
m (int): Number of observations. Defaults to 80.
n (int): Number of features. Defaults to 5.
noise (float): Noise level added to observations. Defaults to ``0.05``.
seed (int): Random seed. Defaults to 42.
Returns:
tuple[np.ndarray, np.ndarray, np.ndarray]:
``(A, b, x_true)`` where *A* is (m, n), *b* is (m,), and
*x_true* is (n,) — the ground-truth coefficient vector.
"""
rng = np.random.default_rng(seed)
base = rng.standard_normal((m, 3))
# Introduce multicollinearity: columns 3 and 4 almost equal column 0
col3 = base[:, 0] + 0.01 * rng.standard_normal(m)
col4 = base[:, 0] - 0.01 * rng.standard_normal(m)
a = np.column_stack([base, col3, col4])
# Normalise columns
a = a / np.linalg.norm(a, axis=0)
x_true = np.array([1.0, -2.0, 0.5, 3.0, -1.5])
b = a @ x_true + noise * rng.standard_normal(m)
return a, b, x_true
def run_demo_imsl_ridge_regression() -> Dict[str, object]:
"""Run Ridge Regression demo showing the L-curve and coefficient paths.
Compares ridge_regression() at multiple α values with an unregularized
lsqrr() solve. Identifies the optimal α on the L-curve corner.
Args:
None
Returns:
Dict[str, object]: Result dict with keys ``alpha_values`` (list),
``optimal_alpha`` (float), ``x_ridge_optimal`` (np.ndarray),
``residual_unregularized`` (float), and ``plot_path`` (str).
"""
a, b, x_true = _make_ill_conditioned()
m, n = a.shape
# Alpha sweep (log scale)
alphas: List[float] = list(np.logspace(-4, 2, 50))
sol_norms: List[float] = []
res_norms: List[float] = []
coeff_paths: List[np.ndarray] = []
for alpha in alphas:
x_r = ridge_regression(a, b, alpha=alpha)
sol_norms.append(float(np.linalg.norm(x_r)))
res_norms.append(float(np.linalg.norm(a @ x_r - b)))
coeff_paths.append(x_r)
# Unregularized LS via lsqrr — returns (x, residuals, rank, sv)
x_ls, _resids, _rank, _sv = lsqrr(a, b)
res_unreg = float(np.linalg.norm(a @ x_ls - b))
# Identify L-curve corner: maximise curvature (Menger curvature proxy)
log_res = np.log10(res_norms)
log_sol = np.log10(sol_norms)
# Approximate curvature via finite differences
d1_r = np.gradient(log_res)
d2_r = np.gradient(d1_r)
d1_s = np.gradient(log_sol)
d2_s = np.gradient(d1_s)
curvature = np.abs(d1_r * d2_s - d1_s * d2_r) / (d1_r**2 + d1_s**2 + 1e-30) ** 1.5
idx_opt = int(np.argmax(curvature))
alpha_opt = alphas[idx_opt]
x_opt = coeff_paths[idx_opt]
# Coefficient matrix for path plot
coeff_matrix = np.array(coeff_paths) # shape (n_alpha, n)
print("\nIMSL Ridge Regression: ill-conditioned 80×5 multicollinear system")
print("-" * 65)
print(f"Condition number of A: {np.linalg.cond(a):.2e}")
print(f"Optimal α (L-curve corner): {alpha_opt:.4e}")
print(f"\n{'Coefficient':<15} {'x_true':>10} {'x_ridge(α_opt)':>16} {'x_LS (unregul.)':>18}")
print("-" * 65)
for i in range(n):
print(f"{'x[' + str(i) + ']':<15} {x_true[i]:>10.4f} {x_opt[i]:>16.4f} {x_ls[i]:>18.4f}")
print("-" * 65)
print(f"\n‖Ax−b‖ unregularized (lsqrr): {res_unreg:.4e}")
print(f"‖Ax−b‖ ridge optimal: {res_norms[idx_opt]:.4e}")
print(f"‖x‖ unregularized: {np.linalg.norm(x_ls):.4e}")
print(f"‖x‖ ridge optimal: {sol_norms[idx_opt]:.4e}")
output_dir = Path("test_output")
output_dir.mkdir(parents=True, exist_ok=True)
plot_path = output_dir / "example_imsl_ridge_regression.svg"
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
# Left: L-curve
ax = axes[0]
ax.loglog(res_norms, sol_norms, "b-", linewidth=1.5, label="Ridge path")
ax.loglog(res_norms[idx_opt], sol_norms[idx_opt], "ro", markersize=10,
label=f"Optimal α={alpha_opt:.2e}", zorder=5)
ax.loglog(res_unreg, np.linalg.norm(x_ls), "k*", markersize=12,
label="lsqrr (α→0)", zorder=5)
ax.set_xlabel("Residual norm ‖Ax−b‖")
ax.set_ylabel("Solution norm ‖x‖")
ax.set_title("L-Curve: Solution vs Residual Norm")
ax.legend(fontsize=9)
ax.grid(True, which="both", alpha=0.3)
# Right: coefficient paths
ax2 = axes[1]
for i in range(n):
ax2.semilogx(alphas, coeff_matrix[:, i], label=f"x[{i}]")
ax2.axvline(alpha_opt, color="red", linestyle="--", linewidth=1.5,
label=f"α_opt={alpha_opt:.2e}")
ax2.set_xlabel("Regularization α")
ax2.set_ylabel("Coefficient value")
ax2.set_title("Coefficient Paths vs α")
ax2.legend(fontsize=8)
ax2.grid(True, which="both", alpha=0.3)
fig.tight_layout()
fig.savefig(plot_path, format="svg")
plt.close(fig)
print(f"\nPlot saved to: {plot_path}")
return {
"alpha_values": alphas,
"optimal_alpha": alpha_opt,
"x_ridge_optimal": x_opt,
"residual_unregularized": res_unreg,
"plot_path": str(plot_path),
}
if __name__ == "__main__":
run_demo_imsl_ridge_regression()
Plot Output¶
Left: L-curve in log–log space. The red dot marks the corner (optimal \(\alpha\)) and the star marks the unregularized lsqrr solution. Right: coefficient paths showing how each \(x_i\) evolves as \(\alpha\) increases from 0.¶
Console Output¶
IMSL Ridge Regression: ill-conditioned 80×5 multicollinear system
-----------------------------------------------------------------
Condition number of A: 2.50e+02
Optimal ╬▒ (L-curve corner): 9.1030e-03
Coefficient x_true x_ridge(╬▒_opt) x_LS (unregul.)
-----------------------------------------------------------------
x[0] 1.0000 0.8846 10.0076
x[1] -2.0000 -1.9790 -2.0003
x[2] 0.5000 0.4347 0.4218
x[3] 3.0000 0.8688 1.2853
x[4] -1.5000 0.7577 -8.7721
-----------------------------------------------------------------
‖Ax−b‖ unregularized (lsqrr): 4.3854e-01
‖Ax−b‖ ridge optimal: 4.5131e-01
ΓÇûxΓÇû unregularized: 1.3525e+01
ΓÇûxΓÇû ridge optimal: 2.4934e+00
Plot saved to: test_output\example_imsl_ridge_regression.svg