nb-transformer / nb_transformer /inference.py

Upload folder using huggingface_hub

ccd282b verified 5 months ago

16.6 kB

	"""
	Statistical Inference Module for Negative Binomial GLM

	This module implements closed-form standard error calculations and statistical
	inference for negative binomial GLM parameters, following the mathematical
	derivation in methods/closed_form_standard_errors.md.

	Key functions:
	- compute_fisher_weights: Calculate Fisher information weights
	- compute_standard_errors: Closed-form standard errors for binary predictor
	- compute_wald_statistics: Wald test statistics and p-values
	- validate_calibration: QQ plots for p-value calibration assessment
	"""

	import numpy as np
	import matplotlib.pyplot as plt
	from scipy import stats
	from scipy.stats import uniform
	from typing import Tuple, Dict, Optional, Union
	import warnings


	def compute_fisher_weights(mu_hat: float,
	beta_hat: float,
	alpha_hat: float,
	x_indicators: np.ndarray,
	lib_sizes: np.ndarray) -> np.ndarray:
	"""
	Compute Fisher information weights for negative binomial GLM.

	For each observation i, the Fisher weight is:
	W_i = m_i / (1 + φ * m_i)

	where:
	- m_i = ℓ_i * exp(μ̂ + x_i * β̂) is the fitted mean
	- φ = exp(α̂) is the dispersion parameter
	- ℓ_i is the library size (exposure)
	- x_i ∈ {0,1} is the treatment indicator

	Args:
	mu_hat: Fitted intercept parameter (log scale)
	beta_hat: Fitted slope parameter (log fold change)
	alpha_hat: Fitted dispersion parameter (log scale)
	x_indicators: Binary treatment indicators (0 = control, 1 = treatment)
	lib_sizes: Library sizes (exposures) for each observation

	Returns:
	Array of Fisher weights W_i for each observation

	References:
	methods/closed_form_standard_errors.md
	"""
	# Convert parameters to natural scale
	phi = np.exp(alpha_hat) # Dispersion parameter

	# Compute fitted means: m_i = ℓ_i * exp(μ̂ + x_i * β̂)
	linear_predictor = mu_hat + x_indicators * beta_hat
	fitted_means = lib_sizes * np.exp(linear_predictor)

	# Compute Fisher weights: W_i = m_i / (1 + φ * m_i)
	weights = fitted_means / (1.0 + phi * fitted_means)

	return weights


	def compute_standard_errors(mu_hat: float,
	beta_hat: float,
	alpha_hat: float,
	x_indicators: np.ndarray,
	lib_sizes: np.ndarray) -> Dict[str, float]:
	"""
	Compute closed-form standard errors for negative binomial GLM with binary predictor.

	For a binary predictor x ∈ {0,1}, the standard errors are:
	- SE(β̂₁) = √(1/S₀ + 1/S₁) [slope/treatment effect]
	- SE(β̂₀) = 1/√S₀ [intercept]

	where:
	- S₀ = Σ W_i for observations with x_i = 0 (control group)
	- S₁ = Σ W_i for observations with x_i = 1 (treatment group)

	Args:
	mu_hat: Fitted intercept parameter (log scale)
	beta_hat: Fitted slope parameter (log fold change)
	alpha_hat: Fitted dispersion parameter (log scale)
	x_indicators: Binary treatment indicators (0 = control, 1 = treatment)
	lib_sizes: Library sizes (exposures) for each observation

	Returns:
	Dictionary with standard errors:
	- 'se_beta': Standard error of treatment effect (slope)
	- 'se_mu': Standard error of intercept
	- 'S0': Sum of weights for control group
	- 'S1': Sum of weights for treatment group

	References:
	methods/closed_form_standard_errors.md, Section 5
	"""
	# Input validation
	x_indicators = np.asarray(x_indicators)
	lib_sizes = np.asarray(lib_sizes)

	if len(x_indicators) != len(lib_sizes):
	raise ValueError("x_indicators and lib_sizes must have same length")

	if not np.all(np.isin(x_indicators, [0, 1])):
	raise ValueError("x_indicators must contain only 0s and 1s")

	if np.any(lib_sizes <= 0):
	raise ValueError("lib_sizes must be positive")

	# Compute Fisher weights
	weights = compute_fisher_weights(mu_hat, beta_hat, alpha_hat, x_indicators, lib_sizes)

	# Compute group-wise weight sums
	S0 = np.sum(weights[x_indicators == 0]) # Control group
	S1 = np.sum(weights[x_indicators == 1]) # Treatment group

	# Handle edge cases
	if S0 <= 0 or S1 <= 0:
	warnings.warn("One or both groups have zero weight sum. Standard errors may be unreliable.")
	se_beta = np.inf
	se_mu = np.inf
	else:
	# Closed-form standard errors
	se_beta = np.sqrt(1.0/S0 + 1.0/S1) # Treatment effect standard error
	se_mu = 1.0 / np.sqrt(S0) # Intercept standard error

	return {
	'se_beta': se_beta,
	'se_mu': se_mu,
	'S0': S0,
	'S1': S1
	}


	def compute_wald_statistics(beta_hat: float, se_beta: float) -> Dict[str, float]:
	"""
	Compute Wald test statistics and p-values for treatment effect.

	The Wald statistic for testing H₀: β = 0 vs H₁: β ≠ 0 is:
	z = β̂ / SE(β̂)

	Under the null hypothesis, z ~ N(0,1) asymptotically.
	Two-sided p-value: p = 2 * (1 - Φ(\|z\|))

	Args:
	beta_hat: Fitted treatment effect (log fold change)
	se_beta: Standard error of treatment effect

	Returns:
	Dictionary with test statistics:
	- 'z_stat': Wald z-statistic
	- 'p_value': Two-sided p-value
	- 'chi2_stat': Chi-squared statistic (z²)

	References:
	methods/closed_form_standard_errors.md, Section 6
	"""
	# Handle edge cases
	if se_beta <= 0 or np.isinf(se_beta):
	return {
	'z_stat': np.nan,
	'p_value': np.nan,
	'chi2_stat': np.nan
	}

	# Compute Wald statistic
	z_stat = beta_hat / se_beta

	# Two-sided p-value using normal distribution
	p_value = 2.0 * (1.0 - stats.norm.cdf(np.abs(z_stat)))

	# Chi-squared statistic (equivalent test)
	chi2_stat = z_stat ** 2

	return {
	'z_stat': z_stat,
	'p_value': p_value,
	'chi2_stat': chi2_stat
	}


	def compute_nb_glm_inference(mu_hat: float,
	beta_hat: float,
	alpha_hat: float,
	x_indicators: np.ndarray,
	lib_sizes: np.ndarray) -> Dict[str, float]:
	"""
	Complete statistical inference for negative binomial GLM with binary predictor.

	Combines parameter estimates with closed-form standard errors and test statistics
	to provide full statistical inference equivalent to classical GLM software.

	Args:
	mu_hat: Fitted intercept parameter (log scale)
	beta_hat: Fitted slope parameter (log fold change)
	alpha_hat: Fitted dispersion parameter (log scale)
	x_indicators: Binary treatment indicators (0 = control, 1 = treatment)
	lib_sizes: Library sizes (exposures) for each observation

	Returns:
	Dictionary with complete inference results:
	- Parameter estimates: mu_hat, beta_hat, alpha_hat
	- Standard errors: se_mu, se_beta
	- Test statistics: z_stat, chi2_stat
	- P-value: p_value (two-sided test of H₀: β = 0)
	- Fisher information: S0, S1 (group weight sums)
	"""
	# Compute standard errors
	se_results = compute_standard_errors(mu_hat, beta_hat, alpha_hat, x_indicators, lib_sizes)

	# Compute test statistics
	test_results = compute_wald_statistics(beta_hat, se_results['se_beta'])

	# Combine all results
	inference_results = {
	# Parameter estimates
	'mu_hat': mu_hat,
	'beta_hat': beta_hat,
	'alpha_hat': alpha_hat,

	# Standard errors
	'se_mu': se_results['se_mu'],
	'se_beta': se_results['se_beta'],

	# Test statistics
	'z_stat': test_results['z_stat'],
	'chi2_stat': test_results['chi2_stat'],
	'p_value': test_results['p_value'],

	# Fisher information
	'S0': se_results['S0'],
	'S1': se_results['S1']
	}

	return inference_results


	def validate_calibration(p_values: np.ndarray,
	title: str = "P-value Calibration",
	output_path: Optional[str] = None,
	alpha: float = 0.05) -> Dict[str, float]:
	"""
	Validate statistical calibration using QQ plots and uniformity tests.

	Under correct calibration, p-values from null data should follow Uniform(0,1).
	This function creates QQ plots and performs statistical tests to assess calibration.

	Args:
	p_values: Array of p-values to test for uniformity
	title: Title for the QQ plot
	output_path: Optional path to save the plot
	alpha: Significance level for statistical tests

	Returns:
	Dictionary with calibration metrics:
	- 'ks_statistic': Kolmogorov-Smirnov test statistic
	- 'ks_pvalue': KS test p-value
	- 'ad_statistic': Anderson-Darling test statistic
	- 'ad_pvalue': AD test p-value (approximate)
	- 'is_calibrated_ks': Boolean, True if KS test is non-significant
	- 'is_calibrated_ad': Boolean, True if AD test is non-significant

	References:
	Statistical calibration assessment for hypothesis testing
	"""
	# Remove NaN values
	p_values = p_values[~np.isnan(p_values)]

	if len(p_values) == 0:
	raise ValueError("No valid p-values provided")

	# Kolmogorov-Smirnov test for uniformity
	ks_stat, ks_pval = stats.kstest(p_values, 'uniform')

	# Anderson-Darling test for uniformity using manual calculation
	# Since scipy doesn't support uniform dist directly, we use the formula
	# for uniform distribution on [0,1]
	n = len(p_values)
	p_sorted = np.sort(p_values)

	# Anderson-Darling statistic for uniform distribution
	i = np.arange(1, n + 1)
	ad_stat = -n - np.sum((2i - 1) (np.log(p_sorted) + np.log(1 - p_sorted[::-1]))) / n

	# Critical values for uniform distribution (approximate)
	# These are rough approximations based on simulation studies
	if n >= 25:
	ad_critical_05 = 2.492 # 5% critical value for large n
	ad_pval_approx = 0.05 if ad_stat > ad_critical_05 else 0.1
	else:
	# For small samples, use more conservative threshold
	ad_critical_05 = 2.0
	ad_pval_approx = 0.05 if ad_stat > ad_critical_05 else 0.1

	# Create QQ plot
	fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))

	# QQ plot against uniform distribution
	expected_quantiles = np.linspace(0, 1, len(p_values))
	observed_quantiles = np.sort(p_values)

	ax1.scatter(expected_quantiles, observed_quantiles, alpha=0.6, s=20)
	ax1.plot([0, 1], [0, 1], 'r--', label='Perfect calibration')
	ax1.set_xlabel('Expected quantiles (Uniform)')
	ax1.set_ylabel('Observed quantiles (P-values)')
	ax1.set_title(f'{title}\nQQ Plot vs Uniform(0,1)')
	ax1.legend()
	ax1.grid(True, alpha=0.3)

	# Histogram of p-values
	ax2.hist(p_values, bins=20, density=True, alpha=0.7, color='skyblue',
	edgecolor='black', label='Observed')
	ax2.axhline(y=1.0, color='red', linestyle='--', label='Expected (Uniform)')
	ax2.set_xlabel('P-value')
	ax2.set_ylabel('Density')
	ax2.set_title(f'{title}\nP-value Histogram')
	ax2.legend()
	ax2.grid(True, alpha=0.3)

	plt.tight_layout()

	# Add statistical test results as text
	textstr = f'KS test: D={ks_stat:.4f}, p={ks_pval:.4f}\nAD test: A²={ad_stat:.4f}'
	fig.text(0.02, 0.02, textstr, fontsize=10,
	bbox=dict(boxstyle="round,pad=0.3", facecolor="lightgray"))

	if output_path:
	plt.savefig(output_path, dpi=300, bbox_inches='tight')
	print(f"Calibration plot saved to: {output_path}")
	else:
	plt.show()

	# Return calibration metrics
	calibration_metrics = {
	'ks_statistic': ks_stat,
	'ks_pvalue': ks_pval,
	'ad_statistic': ad_stat,
	'ad_pvalue': ad_pval_approx,
	'is_calibrated_ks': ks_pval > alpha,
	'is_calibrated_ad': ad_pval_approx > alpha,
	'n_tests': len(p_values)
	}

	return calibration_metrics


	def summarize_calibration_results(calibration_metrics: Dict[str, float]) -> str:
	"""
	Generate a human-readable summary of calibration results.

	Args:
	calibration_metrics: Output from validate_calibration()

	Returns:
	Formatted string summary
	"""
	ks_result = "✓ Well-calibrated" if calibration_metrics['is_calibrated_ks'] else "✗ Poorly calibrated"
	ad_result = "✓ Well-calibrated" if calibration_metrics['is_calibrated_ad'] else "✗ Poorly calibrated"

	summary = f"""
	Calibration Assessment Summary (n = {calibration_metrics['n_tests']:,})
	=========================================

	Kolmogorov-Smirnov Test:
	Statistic: {calibration_metrics['ks_statistic']:.4f}
	P-value: {calibration_metrics['ks_pvalue']:.4f}
	Result: {ks_result}

	Anderson-Darling Test:
	Statistic: {calibration_metrics['ad_statistic']:.4f}
	P-value: ~{calibration_metrics['ad_pvalue']:.3f}
	Result: {ad_result}

	Interpretation:
	- Well-calibrated methods should show p-values ~ Uniform(0,1) under null hypothesis
	- Significant test results (p < 0.05) indicate poor calibration
	- QQ plot should follow diagonal line for good calibration
	"""

	return summary


	def load_pretrained_model(checkpoint_path: Optional[str] = None, device: Optional[str] = None):
	"""
	Load the pre-trained NB-Transformer model.

	Args:
	checkpoint_path: Path to checkpoint file. If None, uses bundled v13 model.
	device: Device to load model on ('cpu', 'cuda', 'mps'). If None, auto-detects.

	Returns:
	Loaded DispersionTransformer model ready for inference

	Example:
	>>> from nb_transformer import load_pretrained_model
	>>> model = load_pretrained_model()
	>>> params = model.predict_parameters([2.1, 1.8, 2.3], [1.5, 1.2, 1.7])
	"""
	import torch
	import os
	from .model import DispersionTransformer
	from .train import DispersionLightningModule

	# Auto-detect device if not specified
	if device is None:
	if torch.cuda.is_available():
	device = 'cuda'
	elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
	device = 'mps'
	else:
	device = 'cpu'

	# Use bundled checkpoint if none specified
	if checkpoint_path is None:
	package_dir = os.path.dirname(__file__)
	checkpoint_path = os.path.join(package_dir, '..', 'model_checkpoint', 'last-v13.ckpt')

	if not os.path.exists(checkpoint_path):
	raise FileNotFoundError(
	f"Bundled model checkpoint not found at {checkpoint_path}. "
	"Please provide checkpoint_path explicitly."
	)

	# Load checkpoint
	try:
	lightning_module = DispersionLightningModule.load_from_checkpoint(
	checkpoint_path,
	map_location=device
	)
	model = lightning_module.model
	model.to(device)
	model.eval()
	return model

	except Exception as e:
	raise RuntimeError(f"Failed to load model from {checkpoint_path}: {e}")


	def quick_inference_example():
	"""
	Demonstrate quick inference with the pre-trained model.

	Returns:
	Dictionary with example parameters
	"""
	# Load model
	model = load_pretrained_model()

	# Example data: two conditions with different sample sizes
	condition_1 = [2.1, 1.8, 2.3, 2.0] # 4 samples from control
	condition_2 = [1.5, 1.2, 1.7, 1.4, 1.6] # 5 samples from treatment

	# Predict parameters
	params = model.predict_parameters(condition_1, condition_2)

	print("NB-Transformer Quick Inference Example")
	print("=====================================")
	print(f"Control samples: {condition_1}")
	print(f"Treatment samples: {condition_2}")
	print(f"μ̂ (base mean): {params['mu']:.3f}")
	print(f"β̂ (log fold change): {params['beta']:.3f}")
	print(f"α̂ (log dispersion): {params['alpha']:.3f}")
	print(f"Fold change: {np.exp(params['beta']):.2f}x")

	return params