Coverage for anfis_toolbox/metrics.py: 100%

1"""Common metrics utilities for ANFIS Toolbox.

3This module provides lightweight, dependency-free metrics that are useful

4for training and evaluating ANFIS models.

5"""

7from __future__ import annotations

9from collections.abc import Callable, Iterable, Mapping, Sequence

10from dataclasses import dataclass

11from typing import TYPE_CHECKING, Literal, Protocol, TypeAlias, cast, runtime_checkable

13import numpy as np

14import numpy.typing as npt

16if TYPE_CHECKING: # pragma: no cover - typing helper

17 from .model import TSKANFIS as ANFIS

20ArrayLike: TypeAlias = npt.ArrayLike

21MetricValue: TypeAlias = float | np.ndarray

22MetricFn: TypeAlias = Callable[[np.ndarray, np.ndarray], float]

24_EPSILON: float = 1e-12

27@runtime_checkable

28class _PredictorLike(Protocol):

29 """Minimal protocol for objects exposing a ``predict`` method."""

31 def predict(self, X: np.ndarray) -> np.ndarray: # pragma: no cover - typing helper

32 """Return predictions for the provided samples."""

35def _to_float_array(values: ArrayLike) -> np.ndarray:

36 return np.asarray(values, dtype=float)

39def _coerce_regression_targets(y_true: ArrayLike, y_pred: ArrayLike) -> tuple[np.ndarray, np.ndarray]:

40 yt = _to_float_array(y_true)

41 yp = _to_float_array(y_pred)

42 try:

43 yt_b, yp_b = np.broadcast_arrays(yt, yp)

44 except ValueError as exc: # pragma: no cover - exercised via callers

45 raise ValueError("regression targets must be broadcastable to the same shape") from exc

46 return yt_b.reshape(-1), yp_b.reshape(-1)

49def _flatten_float(values: ArrayLike) -> np.ndarray:

50 return _to_float_array(values).reshape(-1)

53def _coerce_labels(y_true: ArrayLike) -> np.ndarray:

54 labels = np.asarray(y_true)

55 if labels.ndim == 0:

56 return cast(np.ndarray, labels.reshape(1).astype(int))

57 if labels.ndim == 2:

58 return cast(np.ndarray, np.argmax(labels, axis=1).astype(int))

59 return cast(np.ndarray, labels.reshape(-1).astype(int))

62def _ensure_probabilities(y_prob: ArrayLike) -> np.ndarray:

63 proba = _to_float_array(y_prob)

64 if proba.ndim != 2:

65 raise ValueError("Probabilities must be a 2D array with shape (n_samples, n_classes)")

66 row_sums = np.sum(proba, axis=1, keepdims=True)

67 if np.any(row_sums <= 0.0):

68 raise ValueError("Each probability row must have positive sum")

69 proba = proba / row_sums

70 proba = np.clip(proba, _EPSILON, 1.0)

71 proba = proba / np.sum(proba, axis=1, keepdims=True)

72 return cast(np.ndarray, proba)

75def _confusion_matrix(y_true: np.ndarray, y_pred: np.ndarray) -> tuple[np.ndarray, np.ndarray]:

76 classes = np.unique(np.concatenate([y_true, y_pred]))

77 index = {label: idx for idx, label in enumerate(classes)}

78 matrix = np.zeros((classes.size, classes.size), dtype=int)

79 for yt, yp in zip(y_true, y_pred, strict=False):

80 matrix[index[yt], index[yp]] += 1

81 return matrix, classes

84def _safe_divide(num: float, den: float) -> float:

85 return num / den if den > 0.0 else 0.0

88def mean_squared_error(y_true: np.ndarray, y_pred: np.ndarray) -> float:

89 """Compute the mean squared error (MSE).

91 Parameters:

92 y_true: Array-like of true target values, shape (...,)

93 y_pred: Array-like of predicted values, same shape as y_true

95 Returns:

96 The mean of squared differences over all elements as a float.

98 Notes:

99 - Inputs are coerced to NumPy arrays with dtype=float.

100 - Broadcasting follows NumPy semantics. If shapes are not compatible

101 for element-wise subtraction, a ValueError will be raised by NumPy.

102 """

103 yt, yp = _coerce_regression_targets(y_true, y_pred)

104 diff = yt - yp

105 return float(np.mean(diff * diff))

106

107

108def mean_absolute_error(y_true: np.ndarray, y_pred: np.ndarray) -> float:

109 """Compute the mean absolute error (MAE).

110

111 Parameters:

112 y_true: Array-like of true target values, shape (...,)

113 y_pred: Array-like of predicted values, same shape as y_true

114

115 Returns:

116 The mean of absolute differences over all elements as a float.

117

118 Notes:

119 - Inputs are coerced to NumPy arrays with dtype=float.

120 - Broadcasting follows NumPy semantics. If shapes are not compatible

121 for element-wise subtraction, a ValueError will be raised by NumPy.

122 """

123 yt, yp = _coerce_regression_targets(y_true, y_pred)

124 return float(np.mean(np.abs(yt - yp)))

125

126

127def root_mean_squared_error(y_true: np.ndarray, y_pred: np.ndarray) -> float:

128 """Compute the root mean squared error (RMSE).

129

130 This is simply the square root of mean_squared_error.

131 """

132 mse = mean_squared_error(y_true, y_pred)

133 return float(np.sqrt(mse))

134

135

136def mean_absolute_percentage_error(

137 y_true: np.ndarray,

138 y_pred: np.ndarray,

139 epsilon: float = 1e-12,

140 *,

141 ignore_zero_targets: bool = False,

142) -> float:

143 """Compute the mean absolute percentage error (MAPE) in percent.

144

145 MAPE = mean( abs((y_true - y_pred) / max(abs(y_true), epsilon)) ) * 100

146

147 Parameters:

148 y_true: Array-like of true target values.

149 y_pred: Array-like of predicted values, broadcastable to y_true.

150 epsilon: Small constant to avoid division by zero when y_true == 0.

151 ignore_zero_targets: When True, drop samples where |y_true| <= epsilon; if all

152 targets are (near) zero, returns ``np.inf`` to signal undefined percentage.

153

154 Returns:

155 MAPE value as a percentage (float).

156 """

157 yt, yp = _coerce_regression_targets(y_true, y_pred)

158 if ignore_zero_targets:

159 mask = np.abs(yt) > float(epsilon)

160 if not np.any(mask):

161 return float(np.inf)

162 yt = yt[mask]

163 yp = yp[mask]

164 denom = np.maximum(np.abs(yt), float(epsilon))

165 return float(np.mean(np.abs((yt - yp) / denom)) * 100.0)

166

167

168def symmetric_mean_absolute_percentage_error(

169 y_true: np.ndarray, y_pred: np.ndarray, epsilon: float = _EPSILON

170) -> float:

171 """Compute the symmetric mean absolute percentage error (SMAPE) in percent.

172

173 SMAPE = mean( 200 * |y_true - y_pred| / (|y_true| + |y_pred|) )

174 with an epsilon added to denominator to avoid division by zero.

175

176 Parameters:

177 y_true: Array-like of true target values.

178 y_pred: Array-like of predicted values, broadcastable to y_true.

179 epsilon: Small constant added to denominator to avoid division by zero.

180

181 Returns:

182 SMAPE value as a percentage (float).

183 """

184 yt, yp = _coerce_regression_targets(y_true, y_pred)

185 denom = np.maximum(np.abs(yt) + np.abs(yp), float(epsilon))

186 return float(np.mean(200.0 * np.abs(yt - yp) / denom))

187

188

189def r2_score(y_true: np.ndarray, y_pred: np.ndarray, epsilon: float = _EPSILON) -> float:

190 """Compute the coefficient of determination R^2.

191

192 R^2 = 1 - SS_res / SS_tot, where SS_res = sum((y - y_hat)^2)

193 and SS_tot = sum((y - mean(y))^2). If SS_tot is ~0 (constant target),

194 returns 1.0 when predictions match the constant target (SS_res ~0),

195 otherwise 0.0.

196 """

197 yt, yp = _coerce_regression_targets(y_true, y_pred)

198 diff = yt - yp

199 ss_res = float(np.sum(diff * diff))

200 yt_mean = float(np.mean(yt))

201 ss_tot = float(np.sum((yt - yt_mean) ** 2))

202 if ss_tot <= float(epsilon):

203 return 1.0 if ss_res <= float(epsilon) else 0.0

204 return 1.0 - ss_res / ss_tot

205

206

207def pearson_correlation(y_true: np.ndarray, y_pred: np.ndarray, epsilon: float = _EPSILON) -> float:

208 """Compute the Pearson correlation coefficient r.

209

210 Returns 0.0 when the standard deviation of either input is ~0 (undefined r).

211 """

212 yt, yp = _coerce_regression_targets(y_true, y_pred)

213 yt_centered = yt - np.mean(yt)

214 yp_centered = yp - np.mean(yp)

215 num = float(np.sum(yt_centered * yp_centered))

216 den = float(np.sqrt(np.sum(yt_centered * yt_centered) * np.sum(yp_centered * yp_centered)))

217 if den <= float(epsilon):

218 return 0.0

219 return num / den

220

221

222def mean_squared_logarithmic_error(y_true: np.ndarray, y_pred: np.ndarray) -> float:

223 """Compute the mean squared logarithmic error (MSLE).

224

225 Requires non-negative inputs. Uses log1p for numerical stability:

226 MSLE = mean( (log1p(y_true) - log1p(y_pred))^2 ).

227 """

228 yt, yp = _coerce_regression_targets(y_true, y_pred)

229 if np.any(yt < 0) or np.any(yp < 0):

230 raise ValueError("mean_squared_logarithmic_error requires non-negative y_true and y_pred")

231 diff = np.log1p(yt) - np.log1p(yp)

232 return float(np.mean(diff * diff))

233

234

235def explained_variance_score(y_true: np.ndarray, y_pred: np.ndarray, epsilon: float = _EPSILON) -> float:

236 """Compute the explained variance score for regression predictions."""

237 yt, yp = _coerce_regression_targets(y_true, y_pred)

238 diff = yt - yp

239 var_true = float(np.var(yt))

240 var_residual = float(np.var(diff))

241 if var_true <= float(epsilon):

242 return 1.0 if var_residual <= float(epsilon) else 0.0

243 return 1.0 - var_residual / var_true

244

245

246def median_absolute_error(y_true: np.ndarray, y_pred: np.ndarray) -> float:

247 """Return the median absolute deviation between predictions and targets."""

248 yt, yp = _coerce_regression_targets(y_true, y_pred)

249 return float(np.median(np.abs(yt - yp)))

250

251

252def mean_bias_error(y_true: np.ndarray, y_pred: np.ndarray) -> float:

253 """Compute the mean signed error, positive when predictions overshoot."""

254 yt, yp = _coerce_regression_targets(y_true, y_pred)

255 return float(np.mean(yp - yt))

256

257

258def balanced_accuracy_score(y_true: np.ndarray, y_pred: np.ndarray) -> float:

259 """Return the macro-average recall, balancing performance across classes."""

260 labels_true = _coerce_labels(y_true)

261 labels_pred = _coerce_labels(y_pred)

262 if labels_true.shape[0] != labels_pred.shape[0]:

263 raise ValueError("y_true and y_pred must have the same number of samples")

264 matrix, _ = _confusion_matrix(labels_true, labels_pred)

265 recalls = []

266 for idx in range(matrix.shape[0]):

267 tp = float(matrix[idx, idx])

268 fn = float(np.sum(matrix[idx, :]) - tp)

269 recalls.append(_safe_divide(tp, tp + fn))

270 return float(np.mean(recalls)) if recalls else 0.0

271

272

273def precision_recall_f1(

274 y_true: ArrayLike,

275 y_pred: ArrayLike,

276 average: Literal["macro", "micro", "binary"] = "macro",

277) -> tuple[float, float, float]:

278 """Compute precision, recall, and F1 score with the requested averaging."""

279 labels_true = _coerce_labels(y_true)

280 labels_pred = _coerce_labels(y_pred)

281 if labels_true.shape[0] != labels_pred.shape[0]:

282 raise ValueError("y_true and y_pred must have the same number of samples")

283 matrix, classes = _confusion_matrix(labels_true, labels_pred)

284 if average == "micro":

285 tp = float(np.trace(matrix))

286 fp = float(np.sum(np.sum(matrix, axis=0) - np.diag(matrix)))

287 fn = float(np.sum(np.sum(matrix, axis=1) - np.diag(matrix)))

288 precision = _safe_divide(tp, tp + fp)

289 recall = _safe_divide(tp, tp + fn)

290 f1 = _safe_divide(2 * precision * recall, precision + recall)

291 return precision, recall, f1

292

293 per_class_precision: list[float] = []

294 per_class_recall: list[float] = []

295 for idx, _ in enumerate(classes):

296 tp = float(matrix[idx, idx])

297 fp = float(np.sum(matrix[:, idx]) - tp)

298 fn = float(np.sum(matrix[idx, :]) - tp)

299 prec = _safe_divide(tp, tp + fp)

300 rec = _safe_divide(tp, tp + fn)

301 per_class_precision.append(prec)

302 per_class_recall.append(rec)

303

304 if average == "binary":

305 if len(per_class_precision) != 2:

306 raise ValueError("average='binary' is only defined for binary classification")

307 precision = per_class_precision[1]

308 recall = per_class_recall[1]

309 f1 = _safe_divide(2 * precision * recall, precision + recall)

310 return precision, recall, f1

311

312 precision = float(np.mean(per_class_precision)) if per_class_precision else 0.0

313 recall = float(np.mean(per_class_recall)) if per_class_recall else 0.0

314 f1 = _safe_divide(2 * precision * recall, precision + recall)

315 return precision, recall, f1

316

317

318# -----------------------------

319# Classification metrics and helpers

320# -----------------------------

321

322

323def softmax(logits: np.ndarray, axis: int = -1) -> np.ndarray:

324 """Compute a numerically stable softmax along a given axis."""

325 z = _to_float_array(logits)

326 zmax = np.max(z, axis=axis, keepdims=True)

327 ez = np.exp(z - zmax)

328 den = np.sum(ez, axis=axis, keepdims=True)

329 den = np.clip(den, _EPSILON, None)

330 return cast(np.ndarray, ez / den)

331

332

333def cross_entropy(y_true: np.ndarray, logits: np.ndarray, epsilon: float = _EPSILON) -> float:

334 """Compute mean cross-entropy from integer labels or one-hot vs logits.

335

336 Parameters:

337 y_true: Array-like of shape (n_samples,) of integer class labels, or

338 one-hot array of shape (n_samples, n_classes).

339 logits: Array-like raw scores, shape (n_samples, n_classes).

340 epsilon: Small constant for numerical stability.

341

342 Returns:

343 Mean cross-entropy (float).

344 """

345 logits = _to_float_array(logits)

346 n = logits.shape[0]

347 if n == 0:

348 return 0.0

349 # Stable log-softmax

350 zmax = np.max(logits, axis=1, keepdims=True)

351 logsumexp = zmax + np.log(np.sum(np.exp(logits - zmax), axis=1, keepdims=True))

352 log_probs = logits - logsumexp # (n, k)

353

354 yt = np.asarray(y_true)

355 if yt.ndim == 1:

356 # integer labels

357 yt = yt.reshape(-1)

358 if yt.shape[0] != n:

359 raise ValueError("y_true length must match logits batch size")

360 # pick log prob at true class

361 idx = (np.arange(n), yt.astype(int))

362 nll = -log_probs[idx]

363 else:

364 # one-hot

365 if yt.shape != logits.shape:

366 raise ValueError("For one-hot y_true, shape must match logits")

367 nll = -np.sum(yt * log_probs, axis=1)

368 return float(np.mean(nll))

369

370

371def log_loss(y_true: np.ndarray, y_prob: np.ndarray, epsilon: float = _EPSILON) -> float:

372 """Compute mean log loss from integer/one-hot labels and probabilities."""

373 P = _to_float_array(y_prob)

374 P = np.clip(P, float(epsilon), 1.0)

375 yt = np.asarray(y_true)

376 n = P.shape[0]

377 if yt.ndim == 1:

378 idx = (np.arange(n), yt.astype(int))

379 nll = -np.log(P[idx])

380 else:

381 if yt.shape != P.shape:

382 raise ValueError("For one-hot y_true, shape must match probabilities")

383 nll = -np.sum(yt * np.log(P), axis=1)

384 return float(np.mean(nll))

385

386

387def accuracy(y_true: np.ndarray, y_pred: np.ndarray) -> float:

388 """Compute accuracy from integer/one-hot labels and logits/probabilities.

389

390 y_pred can be class indices (n,), logits (n,k), or probabilities (n,k).

391 y_true can be class indices (n,) or one-hot (n,k).

392 """

393 yt_labels = _coerce_labels(y_true)

394 yp_arr = np.asarray(y_pred)

395 if yp_arr.ndim == 2:

396 yp_labels = np.argmax(yp_arr, axis=1)

397 else:

398 yp_labels = yp_arr.reshape(-1).astype(int)

399 if yt_labels.shape[0] != yp_labels.shape[0]:

400 raise ValueError("y_true and y_pred must have same number of samples")

401 return float(np.mean(yt_labels == yp_labels))

402

403

404def partition_coefficient(U: np.ndarray) -> float:

405 """Bezdek's Partition Coefficient (PC) in [1/k, 1]. Higher is crisper.

406

407 Parameters:

408 U: Membership matrix of shape (n_samples, n_clusters).

409

410 Returns:

411 PC value as float.

412 """

413 U = np.asarray(U, dtype=float)

414 if U.ndim != 2:

415 raise ValueError("U must be a 2D membership matrix")

416 n = U.shape[0]

417 if n == 0:

418 return 0.0

419 return float(np.sum(U * U) / float(n))

420

421

422def classification_entropy(U: np.ndarray, epsilon: float = 1e-12) -> float:

423 """Classification Entropy (CE). Lower is better (crisper).

424

425 Parameters:

426 U: Membership matrix of shape (n_samples, n_clusters).

427 epsilon: Small constant to avoid log(0).

428

429 Returns:

430 CE value as float.

431 """

432 U = np.asarray(U, dtype=float)

433 if U.ndim != 2:

434 raise ValueError("U must be a 2D membership matrix")

435 n = U.shape[0]

436 if n == 0:

437 return 0.0

438 Uc = np.clip(U, float(epsilon), 1.0)

439 return float(-np.sum(Uc * np.log(Uc)) / float(n))

440

441

442def xie_beni_index(

443 X: np.ndarray,

444 U: np.ndarray,

445 C: np.ndarray,

446 m: float = 2.0,

447 epsilon: float = 1e-12,

448) -> float:

449 """Xie-Beni index (XB). Lower is better.

450

451 XB = sum_i sum_k u_ik^m ||x_i - v_k||^2 / (n * min_{p!=q} ||v_p - v_q||^2)

452

453 Parameters:

454 X: Data array, shape (n_samples, n_features) or (n_samples,).

455 U: Membership matrix, shape (n_samples, n_clusters).

456 C: Cluster centers, shape (n_clusters, n_features).

457 m: Fuzzifier (>1).

458 epsilon: Small constant to avoid division by zero.

459

460 Returns:

461 XB value as float (np.inf when centers < 2).

462 """

463 X = np.asarray(X, dtype=float)

464 if X.ndim == 1:

465 X = X.reshape(-1, 1)

466 if X.ndim != 2:

467 raise ValueError("X must be 1D or 2D array-like")

468 U = np.asarray(U, dtype=float)

469 C = np.asarray(C, dtype=float)

470 if U.ndim != 2:

471 raise ValueError("U must be a 2D membership matrix")

472 if C.ndim != 2:

473 raise ValueError("C must be a 2D centers matrix")

474 if X.shape[0] != U.shape[0]:

475 raise ValueError("X and U must have the same number of samples")

476 if C.shape[1] != X.shape[1]:

477 raise ValueError("C and X must have the same number of features")

478 if C.shape[0] < 2:

479 return float(np.inf)

480 m = float(m)

481

482 # distances (n,k)

483 d2 = ((X[:, None, :] - C[None, :, :]) ** 2).sum(axis=2)

484 num = float(np.sum((U**m) * d2))

485

486 # min squared distance between distinct centers

487 diffs = C[:, None, :] - C[None, :, :]

488 dist2 = (diffs * diffs).sum(axis=2)

489 k = C.shape[0]

490 idx = np.arange(k)

491 dist2[idx, idx] = np.inf

492 den = float(np.min(dist2))

493 den = max(den, float(epsilon))

494 return num / (float(X.shape[0]) * den)

495

496

497def _regression_metrics_dict(y_true: ArrayLike, y_pred: ArrayLike) -> dict[str, MetricValue]:

498 yt, yp = _coerce_regression_targets(y_true, y_pred)

499 residuals = yt - yp

500 mse = float(np.mean(residuals * residuals)) if residuals.size else 0.0

501 rmse = float(np.sqrt(mse))

502 mae = float(np.mean(np.abs(residuals))) if residuals.size else 0.0

503 median_ae = float(np.median(np.abs(residuals))) if residuals.size else 0.0

504 mean_bias = float(np.mean(yp - yt)) if residuals.size else 0.0

505 max_error = float(np.max(np.abs(residuals))) if residuals.size else 0.0

506 std_error = float(np.std(residuals)) if residuals.size else 0.0

507 explained_var = explained_variance_score(yt, yp)

508 r2 = r2_score(yt, yp)

509 mape = mean_absolute_percentage_error(yt, yp, ignore_zero_targets=True)

510 smape = symmetric_mean_absolute_percentage_error(yt, yp)

511 try:

512 msle = mean_squared_logarithmic_error(yt, yp)

513 except ValueError:

514 msle = float(np.nan)

515 pearson = pearson_correlation(yt, yp)

516 return {

517 "mse": mse,

518 "rmse": rmse,

519 "mae": mae,

520 "median_absolute_error": median_ae,

521 "mean_bias_error": mean_bias,

522 "max_error": max_error,

523 "std_error": std_error,

524 "explained_variance": explained_var,

525 "r2": r2,

526 "mape": mape,

527 "smape": smape,

528 "msle": msle,

529 "pearson": pearson,

530 }

531

532

533def _classification_metrics_dict(

534 y_true: ArrayLike,

535 y_pred_labels: ArrayLike,

536 probabilities: np.ndarray | None,

537) -> dict[str, MetricValue]:

538 labels_true = _coerce_labels(y_true)

539 labels_pred = _coerce_labels(y_pred_labels)

540 if labels_true.shape[0] != labels_pred.shape[0]:

541 raise ValueError("y_true and y_pred must have the same number of samples")

542

543 matrix, classes = _confusion_matrix(labels_true, labels_pred)

544 accuracy_val = float(np.mean(labels_true == labels_pred)) if labels_true.size else 0.0

545 bal_acc = balanced_accuracy_score(labels_true, labels_pred)

546 prec_macro, rec_macro, f1_macro = precision_recall_f1(labels_true, labels_pred, average="macro")

547 prec_micro, rec_micro, f1_micro = precision_recall_f1(labels_true, labels_pred, average="micro")

548

549 values: dict[str, MetricValue] = {

550 "accuracy": accuracy_val,

551 "balanced_accuracy": bal_acc,

552 "precision_macro": prec_macro,

553 "recall_macro": rec_macro,

554 "f1_macro": f1_macro,

555 "precision_micro": prec_micro,

556 "recall_micro": rec_micro,

557 "f1_micro": f1_micro,

558 "confusion_matrix": matrix,

559 "classes": classes,

560 }

561

562 if probabilities is not None:

563 values["log_loss"] = log_loss(labels_true, probabilities)

564 else:

565 values["log_loss"] = float("nan")

566

567 return values

568

569

570@dataclass(frozen=True)

571class MetricReport:

572 """Immutable container exposing computed metrics by key or attribute."""

573

574 task: Literal["regression", "classification"]

575 _values: Mapping[str, MetricValue]

576

577 def __post_init__(self) -> None: # pragma: no cover - trivial

578 """Sanitize stored NumPy scalars/arrays to prevent accidental mutation."""

579 sanitized: dict[str, MetricValue] = {}

580 for key, value in self._values.items():

581 if isinstance(value, np.ndarray):

582 sanitized[key] = value.copy()

583 elif isinstance(value, (np.floating, np.integer)):

584 sanitized[key] = float(value)

585 else:

586 sanitized[key] = value

587 object.__setattr__(self, "_values", sanitized)

588

589 def to_dict(self) -> dict[str, MetricValue]:

590 """Return a shallow copy of the underlying metric mapping."""

591 return {key: (value.copy() if isinstance(value, np.ndarray) else value) for key, value in self._values.items()}

592

593 def __getitem__(self, key: str) -> MetricValue:

594 """Provide dictionary-style access to metric values."""

595 return self._values[key]

596

597 def __getattr__(self, item: str) -> MetricValue:

598 """Allow attribute-style access to stored metrics."""

599 try:

600 return self._values[item]

601 except KeyError as exc:

602 raise AttributeError(item) from exc

603

604 def keys(self) -> Iterable[str]: # pragma: no cover - simple passthrough

605 """Expose the metric key iterator from the backing mapping."""

606 return self._values.keys()

607

608

609def compute_metrics(

610 y_true: ArrayLike,

611 *,

612 y_pred: ArrayLike | None = None,

613 y_proba: ArrayLike | None = None,

614 logits: ArrayLike | None = None,

615 task: Literal["auto", "regression", "classification"] = "auto",

616 metrics: Sequence[str] | None = None,

617 custom_metrics: Mapping[str, MetricFn] | None = None,

618) -> MetricReport:

619 """Compute regression or classification metrics and return a report."""

620 resolved_task: Literal["regression", "classification"]

621

622 if task == "regression":

623 resolved_task = "regression"

624 elif task == "classification":

625 resolved_task = "classification"

626 else:

627 arr_pred = None if y_pred is None else np.asarray(y_pred)

628 if y_proba is not None or logits is not None:

629 resolved_task = "classification"

630 elif arr_pred is not None and arr_pred.ndim == 2:

631 resolved_task = "classification"

632 elif arr_pred is not None and arr_pred.ndim == 1 and np.issubdtype(arr_pred.dtype, np.integer):

633 resolved_task = "classification"

634 else:

635 resolved_task = "regression"

636

637 values: dict[str, MetricValue]

638

639 if resolved_task == "regression":

640 if y_pred is None:

641 raise ValueError("Regression metrics require 'y_pred'.")

642 values = _regression_metrics_dict(y_true, y_pred)

643 if custom_metrics:

644 yt_arr, yp_arr = _coerce_regression_targets(y_true, y_pred)

645 for name, fn in custom_metrics.items():

646 values[name] = float(fn(yt_arr, yp_arr))

647 else:

648 probabilities: np.ndarray | None = None

649 if logits is not None:

650 probabilities = softmax(_to_float_array(logits), axis=1)

651 elif y_proba is not None:

652 probabilities = _ensure_probabilities(y_proba)

653

654 if y_pred is not None:

655 pred_labels = y_pred

656 elif probabilities is not None:

657 pred_labels = np.argmax(probabilities, axis=1)

658 else:

659 raise ValueError("Classification metrics require 'y_pred', 'y_proba', or 'logits'.")

660

661 values = _classification_metrics_dict(y_true, pred_labels, probabilities)

662

663 if custom_metrics:

664 labels_true = _coerce_labels(y_true)

665 labels_pred = _coerce_labels(pred_labels)

666 for name, fn in custom_metrics.items():

667 values[name] = float(fn(labels_true, labels_pred))

668

669 if metrics is not None:

670 missing = [name for name in metrics if name not in values]

671 if missing:

672 raise KeyError(f"Requested metric(s) not available: {', '.join(missing)}")

673 values = {name: values[name] for name in metrics}

674

675 return MetricReport(task=resolved_task, _values=values)

676

677

678class ANFISMetrics:

679 """Metrics calculator utilities for ANFIS models."""

680

681 @staticmethod

682 def regression_metrics(y_true: ArrayLike, y_pred: ArrayLike) -> dict[str, MetricValue]:

683 """Return a suite of regression metrics for predictions vs. targets."""

684 report = compute_metrics(y_true, y_pred=y_pred, task="regression")

685 return report.to_dict()

686

687 @staticmethod

688 def classification_metrics(

689 y_true: ArrayLike,

690 y_pred: ArrayLike | None = None,

691 *,

692 y_proba: ArrayLike | None = None,

693 logits: ArrayLike | None = None,

694 ) -> dict[str, MetricValue]:

695 """Return common classification metrics for encoded targets and predictions."""

696 report = compute_metrics(

697 y_true,

698 y_pred=y_pred,

699 y_proba=y_proba,

700 logits=logits,

701 task="classification",

702 )

703 return report.to_dict()

704

705 @staticmethod

706 def model_complexity_metrics(model: ANFIS) -> dict[str, int]:

707 """Compute structural statistics for an ANFIS model instance."""

708 n_inputs = model.n_inputs

709 n_rules = model.n_rules

710

711 n_premise_params = 0

712 for mfs in model.membership_layer.membership_functions.values():

713 for mf in mfs:

714 n_premise_params += len(mf.parameters)

715

716 n_consequent_params = model.consequent_layer.parameters.size

717

718 return {

719 "n_inputs": n_inputs,

720 "n_rules": n_rules,

721 "n_premise_parameters": n_premise_params,

722 "n_consequent_parameters": int(n_consequent_params),

723 "total_parameters": n_premise_params + int(n_consequent_params),

724 }

725

726

727def _resolve_predictor(model: object) -> _PredictorLike:

728 """Return an object exposing ``predict`` for use in :func:`quick_evaluate`."""

729 predict_fn = getattr(model, "predict", None)

730 if callable(predict_fn):

731 return cast(_PredictorLike, model)

732

733 underlying = getattr(model, "model_", None)

734 if underlying is not None:

735 predict_fn = getattr(underlying, "predict", None)

736 if callable(predict_fn):

737 return cast(_PredictorLike, underlying)

738

739 raise TypeError(

740 "quick_evaluate requires an object with a callable 'predict' method. Pass a fitted ANFIS "

741 "model or estimator such as ANFISRegressor."

742 )

743

744

745def quick_evaluate(

746 model: object,

747 X_test: np.ndarray,

748 y_test: np.ndarray,

749 print_results: bool = True,

750 task: Literal["auto", "regression", "classification"] = "auto",

751) -> dict[str, float]:

752 """Evaluate a trained ANFIS model or estimator on test data."""

753 predictor = _resolve_predictor(model)

754 X_arr = np.asarray(X_test, dtype=float)

755 y_vec = np.asarray(y_test)

756 y_pred_raw = predictor.predict(X_arr)

757

758 y_proba = None

759 predict_proba = getattr(predictor, "predict_proba", None)

760 if callable(predict_proba):

761 y_proba = predict_proba(X_arr)

762

763 report = compute_metrics(

764 y_vec,

765 y_pred=y_pred_raw,

766 y_proba=y_proba,

767 task=task,

768 )

769 metrics = report.to_dict()

770

771 if print_results:

772 print("=" * 50) # noqa: T201

773 print("ANFIS Model Evaluation Results") # noqa: T201

774 print("=" * 50) # noqa: T201

775 if report.task == "regression":

776 print(f"Mean Squared Error (MSE): {metrics['mse']:.6f}") # noqa: T201

777 print(f"Root Mean Squared Error: {metrics['rmse']:.6f}") # noqa: T201

778 print(f"Mean Absolute Error (MAE): {metrics['mae']:.6f}") # noqa: T201

779 print(f"Median Absolute Error: {metrics['median_absolute_error']:.6f}") # noqa: T201

780 print(f"R-squared (R²): {metrics['r2']:.4f}") # noqa: T201

781 print(f"Explained Variance: {metrics['explained_variance']:.4f}") # noqa: T201

782 print(f"Symmetric MAPE: {metrics['smape']:.2f}%") # noqa: T201

783 print(f"Max Error: {metrics['max_error']:.6f}") # noqa: T201

784 print(f"Std. of Error: {metrics['std_error']:.6f}") # noqa: T201

785 else:

786 print(f"Accuracy: {metrics['accuracy']:.4f}") # noqa: T201

787 print(f"Balanced Accuracy: {metrics['balanced_accuracy']:.4f}") # noqa: T201

788 if not np.isnan(metrics.get("log_loss", float("nan"))):

789 print(f"Log Loss: {metrics['log_loss']:.6f}") # noqa: T201

790 print(f"Precision (macro): {metrics['precision_macro']:.4f}") # noqa: T201

791 print(f"Recall (macro): {metrics['recall_macro']:.4f}") # noqa: T201

792 print(f"F1-score (macro): {metrics['f1_macro']:.4f}") # noqa: T201

793 print("=" * 50) # noqa: T201

794

795 # For backward compatibility keep returning plain dict but include rich metrics.

796 return {key: (value.tolist() if isinstance(value, np.ndarray) else value) for key, value in metrics.items()}

Coverage for anfis_toolbox / metrics.py: 100%

405 statements