Statistics Example — random_generate, rng_set_seed, descriptive stats

This example uses utilities.rng_set_seed() to fix the random state and utilities.random_generate() to draw 1 000 normally-distributed samples. Mean, median, standard deviation, variance, skewness, kurtosis, and percentiles (P5–P95) are computed via numpy/scipy and annotated on a histogram with fitted PDF. A violin and box-plot panel shows the distribution shape.

Example Code

"""IMSL statistics example: statistical summary using random_generate and rng_set_seed.

Outputs:
- Summary statistics table printed to stdout
- SVG histogram with annotated statistics saved to
  test_output/example_imsl_statistics.svg
"""
from __future__ import annotations

from pathlib import Path
from typing import Dict

import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as stats

from utilities import random_generate, rng_set_seed


def run_demo_imsl_statistics() -> Dict[str, object]:
    """Generate sample data and compute descriptive statistics.

    Uses :func:`utilities.rng_set_seed` to fix the random state and
    :func:`utilities.random_generate` to produce 1000 normally-distributed
    samples.  Descriptive statistics (mean, median, std, variance,
    percentiles, skewness, kurtosis) are computed via numpy/scipy and
    displayed on a histogram.

    Args:
        None

    Returns:
        Dict[str, object]: Result dict with keys ``data`` (np.ndarray),
            ``mean`` (float), ``median`` (float), ``std`` (float),
            ``variance`` (float), ``skewness`` (float), ``kurtosis`` (float),
            ``percentiles`` (dict[str, float]), and ``plot_path`` (str).
    """
    rng_set_seed(42)
    n = 1000
    data = random_generate(n, "normal", seed=42)

    mean_val = float(np.mean(data))
    median_val = float(np.median(data))
    std_val = float(np.std(data, ddof=1))
    var_val = float(np.var(data, ddof=1))
    skew_val = float(stats.skew(data))
    kurt_val = float(stats.kurtosis(data))

    pct_labels = ["P5", "P10", "P25", "P50", "P75", "P90", "P95"]
    pct_values = [5, 10, 25, 50, 75, 90, 95]
    percentiles: dict[str, float] = {
        lbl: float(np.percentile(data, p))
        for lbl, p in zip(pct_labels, pct_values)
    }

    # ── Print summary table ───────────────────────────────────────────────────
    print("\nIMSL Statistics Example — Normal Distribution (n=1000, seed=42)")
    print("=" * 55)
    print(f"  {'Statistic':<20}  {'Value':>12}")
    print("  " + "-" * 36)
    print(f"  {'Mean':<20}  {mean_val:>12.6f}")
    print(f"  {'Median':<20}  {median_val:>12.6f}")
    print(f"  {'Std Dev':<20}  {std_val:>12.6f}")
    print(f"  {'Variance':<20}  {var_val:>12.6f}")
    print(f"  {'Skewness':<20}  {skew_val:>12.6f}")
    print(f"  {'Kurtosis (excess)':<20}  {kurt_val:>12.6f}")
    print("  " + "-" * 36)
    print(f"  {'Min':<20}  {data.min():>12.6f}")
    print(f"  {'Max':<20}  {data.max():>12.6f}")
    print()
    print("  Percentiles:")
    for lbl, val in percentiles.items():
        print(f"    {lbl:<6}  {val:>12.6f}")

    # ── Plot ─────────────────────────────────────────────────────────────────
    output_dir = Path("test_output")
    output_dir.mkdir(parents=True, exist_ok=True)
    plot_path = output_dir / "example_imsl_statistics.svg"

    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(13, 5))

    # Left: histogram with mean±std and percentile markers
    ax1.hist(data, bins=35, color="#b45309", alpha=0.80, edgecolor="white",
             linewidth=0.5, density=True, label="Sample (density)")

    x_range = np.linspace(data.min(), data.max(), 300)
    ax1.plot(x_range, stats.norm.pdf(x_range, mean_val, std_val),
             "k-", linewidth=1.8, label="Normal PDF fit")

    ax1.axvline(mean_val, color="#1d4ed8", linewidth=1.8, linestyle="-",
                label=f"Mean = {mean_val:.3f}")
    ax1.axvline(mean_val - std_val, color="#1d4ed8", linewidth=1.2,
                linestyle="--", label=f"±1σ = {std_val:.3f}")
    ax1.axvline(mean_val + std_val, color="#1d4ed8", linewidth=1.2, linestyle="--")

    for lbl in ("P5", "P25", "P75", "P95"):
        ax1.axvline(percentiles[lbl], color="#92400e", linewidth=0.9,
                    linestyle=":", alpha=0.9)
    ax1.axvline(percentiles["P5"], color="#92400e", linewidth=0.9,
                linestyle=":", label="P5/P25/P75/P95")

    ax1.set_xlabel("Value")
    ax1.set_ylabel("Density")
    ax1.set_title("Histogram with PDF fit and percentiles")
    ax1.legend(fontsize=8)
    ax1.grid(True, alpha=0.3)

    # Right: box-plot and violin side by side
    parts = ax2.violinplot(data, positions=[1], showmedians=True,
                           showextrema=True)
    for pc in parts["bodies"]:
        pc.set_facecolor("#d97706")
        pc.set_alpha(0.75)
    ax2.boxplot(data, positions=[2], widths=0.4,
                patch_artist=True,
                boxprops=dict(facecolor="#b45309", alpha=0.6),
                medianprops=dict(color="white", linewidth=2))
    ax2.set_xticks([1, 2])
    ax2.set_xticklabels(["Violin", "Box"])
    ax2.set_ylabel("Value")
    ax2.set_title(f"Distribution shape  skew={skew_val:.3f}  kurt={kurt_val:.3f}")
    ax2.grid(True, alpha=0.3, axis="y")

    fig.suptitle("IMSL Statistics: random_generate + descriptive stats",
                 fontweight="bold")
    fig.tight_layout()
    fig.savefig(plot_path, format="svg")
    plt.close(fig)

    print(f"\nPlot saved to: {plot_path}")
    return {
        "data": data,
        "mean": mean_val,
        "median": median_val,
        "std": std_val,
        "variance": var_val,
        "skewness": skew_val,
        "kurtosis": kurt_val,
        "percentiles": percentiles,
        "plot_path": str(plot_path),
    }


if __name__ == "__main__":
    run_demo_imsl_statistics()

Plot Output

Histogram with PDF fit and percentile markers, plus violin and box-plot

Left: density histogram with Normal PDF overlay, mean ± 1σ lines, and P5/P25/P75/P95 percentile markers. Right: violin and box-plot showing distribution shape with skewness and kurtosis annotated.

Console Output

IMSL Statistics Example ΓÇö Normal Distribution (n=1000, seed=42)
=======================================================
  Statistic                    Value
  ------------------------------------
  Mean                     -0.028892
  Median                    0.006178
  Std Dev                   0.989217
  Variance                  0.978550
  Skewness                 -0.043688
  Kurtosis (excess)         0.085432
  ------------------------------------
  Min                      -3.648413
  Max                       3.178854

  Percentiles:
    P5         -1.685641
    P10        -1.283113
    P25        -0.696313
    P50         0.006178
    P75         0.589887
    P90         1.253997
    P95         1.616243

Plot saved to: test_output\example_imsl_statistics.svg