Statistics Example — random_generate, rng_set_seed, descriptive stats¶
This example uses utilities.rng_set_seed() to fix the random state and
utilities.random_generate() to draw 1 000 normally-distributed samples.
Mean, median, standard deviation, variance, skewness, kurtosis, and
percentiles (P5–P95) are computed via numpy/scipy and annotated on a
histogram with fitted PDF. A violin and box-plot panel shows the
distribution shape.
Example Code¶
"""IMSL statistics example: statistical summary using random_generate and rng_set_seed.
Outputs:
- Summary statistics table printed to stdout
- SVG histogram with annotated statistics saved to
test_output/example_imsl_statistics.svg
"""
from __future__ import annotations
from pathlib import Path
from typing import Dict
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as stats
from utilities import random_generate, rng_set_seed
def run_demo_imsl_statistics() -> Dict[str, object]:
"""Generate sample data and compute descriptive statistics.
Uses :func:`utilities.rng_set_seed` to fix the random state and
:func:`utilities.random_generate` to produce 1000 normally-distributed
samples. Descriptive statistics (mean, median, std, variance,
percentiles, skewness, kurtosis) are computed via numpy/scipy and
displayed on a histogram.
Args:
None
Returns:
Dict[str, object]: Result dict with keys ``data`` (np.ndarray),
``mean`` (float), ``median`` (float), ``std`` (float),
``variance`` (float), ``skewness`` (float), ``kurtosis`` (float),
``percentiles`` (dict[str, float]), and ``plot_path`` (str).
"""
rng_set_seed(42)
n = 1000
data = random_generate(n, "normal", seed=42)
mean_val = float(np.mean(data))
median_val = float(np.median(data))
std_val = float(np.std(data, ddof=1))
var_val = float(np.var(data, ddof=1))
skew_val = float(stats.skew(data))
kurt_val = float(stats.kurtosis(data))
pct_labels = ["P5", "P10", "P25", "P50", "P75", "P90", "P95"]
pct_values = [5, 10, 25, 50, 75, 90, 95]
percentiles: dict[str, float] = {
lbl: float(np.percentile(data, p))
for lbl, p in zip(pct_labels, pct_values)
}
# ── Print summary table ───────────────────────────────────────────────────
print("\nIMSL Statistics Example — Normal Distribution (n=1000, seed=42)")
print("=" * 55)
print(f" {'Statistic':<20} {'Value':>12}")
print(" " + "-" * 36)
print(f" {'Mean':<20} {mean_val:>12.6f}")
print(f" {'Median':<20} {median_val:>12.6f}")
print(f" {'Std Dev':<20} {std_val:>12.6f}")
print(f" {'Variance':<20} {var_val:>12.6f}")
print(f" {'Skewness':<20} {skew_val:>12.6f}")
print(f" {'Kurtosis (excess)':<20} {kurt_val:>12.6f}")
print(" " + "-" * 36)
print(f" {'Min':<20} {data.min():>12.6f}")
print(f" {'Max':<20} {data.max():>12.6f}")
print()
print(" Percentiles:")
for lbl, val in percentiles.items():
print(f" {lbl:<6} {val:>12.6f}")
# ── Plot ─────────────────────────────────────────────────────────────────
output_dir = Path("test_output")
output_dir.mkdir(parents=True, exist_ok=True)
plot_path = output_dir / "example_imsl_statistics.svg"
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(13, 5))
# Left: histogram with mean±std and percentile markers
ax1.hist(data, bins=35, color="#b45309", alpha=0.80, edgecolor="white",
linewidth=0.5, density=True, label="Sample (density)")
x_range = np.linspace(data.min(), data.max(), 300)
ax1.plot(x_range, stats.norm.pdf(x_range, mean_val, std_val),
"k-", linewidth=1.8, label="Normal PDF fit")
ax1.axvline(mean_val, color="#1d4ed8", linewidth=1.8, linestyle="-",
label=f"Mean = {mean_val:.3f}")
ax1.axvline(mean_val - std_val, color="#1d4ed8", linewidth=1.2,
linestyle="--", label=f"±1σ = {std_val:.3f}")
ax1.axvline(mean_val + std_val, color="#1d4ed8", linewidth=1.2, linestyle="--")
for lbl in ("P5", "P25", "P75", "P95"):
ax1.axvline(percentiles[lbl], color="#92400e", linewidth=0.9,
linestyle=":", alpha=0.9)
ax1.axvline(percentiles["P5"], color="#92400e", linewidth=0.9,
linestyle=":", label="P5/P25/P75/P95")
ax1.set_xlabel("Value")
ax1.set_ylabel("Density")
ax1.set_title("Histogram with PDF fit and percentiles")
ax1.legend(fontsize=8)
ax1.grid(True, alpha=0.3)
# Right: box-plot and violin side by side
parts = ax2.violinplot(data, positions=[1], showmedians=True,
showextrema=True)
for pc in parts["bodies"]:
pc.set_facecolor("#d97706")
pc.set_alpha(0.75)
ax2.boxplot(data, positions=[2], widths=0.4,
patch_artist=True,
boxprops=dict(facecolor="#b45309", alpha=0.6),
medianprops=dict(color="white", linewidth=2))
ax2.set_xticks([1, 2])
ax2.set_xticklabels(["Violin", "Box"])
ax2.set_ylabel("Value")
ax2.set_title(f"Distribution shape skew={skew_val:.3f} kurt={kurt_val:.3f}")
ax2.grid(True, alpha=0.3, axis="y")
fig.suptitle("IMSL Statistics: random_generate + descriptive stats",
fontweight="bold")
fig.tight_layout()
fig.savefig(plot_path, format="svg")
plt.close(fig)
print(f"\nPlot saved to: {plot_path}")
return {
"data": data,
"mean": mean_val,
"median": median_val,
"std": std_val,
"variance": var_val,
"skewness": skew_val,
"kurtosis": kurt_val,
"percentiles": percentiles,
"plot_path": str(plot_path),
}
if __name__ == "__main__":
run_demo_imsl_statistics()
Plot Output¶
Left: density histogram with Normal PDF overlay, mean ± 1σ lines, and P5/P25/P75/P95 percentile markers. Right: violin and box-plot showing distribution shape with skewness and kurtosis annotated.¶
Console Output¶
IMSL Statistics Example ΓÇö Normal Distribution (n=1000, seed=42)
=======================================================
Statistic Value
------------------------------------
Mean -0.028892
Median 0.006178
Std Dev 0.989217
Variance 0.978550
Skewness -0.043688
Kurtosis (excess) 0.085432
------------------------------------
Min -3.648413
Max 3.178854
Percentiles:
P5 -1.685641
P10 -1.283113
P25 -0.696313
P50 0.006178
P75 0.589887
P90 1.253997
P95 1.616243
Plot saved to: test_output\example_imsl_statistics.svg