Skip to content

Optimization

anfis_toolbox.optim.base.BaseTrainer

Bases: ABC

Shared training loop for ANFIS trainers.

compute_loss abstractmethod

compute_loss(
    model: ModelLike, X: ndarray, y: ndarray
) -> float

Compute loss for the provided data without mutating the model.

Source code in anfis_toolbox/optim/base.py
@abstractmethod
def compute_loss(self, model: ModelLike, X: np.ndarray, y: np.ndarray) -> float:  # pragma: no cover - abstract
    """Compute loss for the provided data without mutating the model."""

fit

fit(
    model: ModelLike,
    X: ndarray,
    y: ndarray,
    *,
    validation_data: tuple[ndarray, ndarray] | None = None,
    validation_frequency: int = 1,
) -> TrainingHistory

Train model on (X, y) and optionally evaluate on validation data.

Returns a dictionary containing the per-epoch training losses and, when validation_data is provided, the validation losses (aligned with the training epochs; epochs without validation are recorded as None).

Source code in anfis_toolbox/optim/base.py
def fit(
    self,
    model: ModelLike,
    X: np.ndarray,
    y: np.ndarray,
    *,
    validation_data: tuple[np.ndarray, np.ndarray] | None = None,
    validation_frequency: int = 1,
) -> TrainingHistory:
    """Train ``model`` on ``(X, y)`` and optionally evaluate on validation data.

    Returns a dictionary containing the per-epoch training losses and, when
    ``validation_data`` is provided, the validation losses (aligned with the
    training epochs; epochs without validation are recorded as ``None``).
    """
    if validation_frequency < 1:
        raise ValueError("validation_frequency must be >= 1")

    X_train, y_train = self._prepare_training_data(model, X, y)
    state = self.init_state(model, X_train, y_train)

    prepared_val: tuple[np.ndarray, np.ndarray] | None = None
    if validation_data is not None:
        prepared_val = self._prepare_validation_data(model, *validation_data)

    epochs = int(getattr(self, "epochs", 1))
    batch_size = getattr(self, "batch_size", None)
    shuffle = bool(getattr(self, "shuffle", True))
    verbose = bool(getattr(self, "verbose", False))

    train_history: list[float] = []
    val_history: list[float | None] = [] if prepared_val is not None else []

    n_samples = X_train.shape[0]
    for epoch_idx in range(epochs):
        epoch_losses: list[float] = []
        if batch_size is None:
            loss, state = self.train_step(model, X_train, y_train, state)
            epoch_losses.append(float(loss))
        else:
            indices = np.arange(n_samples)
            if shuffle:
                np.random.shuffle(indices)
            for start in range(0, n_samples, batch_size):
                end = start + batch_size
                batch_idx = indices[start:end]
                loss, state = self.train_step(
                    model,
                    X_train[batch_idx],
                    y_train[batch_idx],
                    state,
                )
                epoch_losses.append(float(loss))

        epoch_loss = float(np.mean(epoch_losses)) if epoch_losses else 0.0
        train_history.append(epoch_loss)

        val_loss: float | None = None
        if prepared_val is not None:
            if (epoch_idx + 1) % validation_frequency == 0:
                X_val, y_val = prepared_val
                val_loss = float(self.compute_loss(model, X_val, y_val))
            val_history.append(val_loss)

        self._log_epoch(epoch_idx, epoch_loss, val_loss, verbose)

    result: TrainingHistory = {"train": train_history}
    if prepared_val is not None:
        result["val"] = val_history
    return result

init_state abstractmethod

init_state(model: ModelLike, X: ndarray, y: ndarray) -> Any

Initialize and return any optimizer-specific state.

Called once before training begins. Trainers that don't require state may return None.

Parameters:

Name Type Description Default
model ModelLike

The model to be trained.

required
X ndarray

The full training inputs.

required
y ndarray

The full training targets.

required

Returns:

Name Type Description
Any Any

Optimizer state (or None) to be threaded through train_step.

Source code in anfis_toolbox/optim/base.py
@abstractmethod
def init_state(self, model: ModelLike, X: np.ndarray, y: np.ndarray) -> Any:  # pragma: no cover - abstract
    """Initialize and return any optimizer-specific state.

    Called once before training begins. Trainers that don't require state may
    return None.

    Parameters:
        model: The model to be trained.
        X (np.ndarray): The full training inputs.
        y (np.ndarray): The full training targets.

    Returns:
        Any: Optimizer state (or None) to be threaded through ``train_step``.
    """
    raise NotImplementedError

train_step abstractmethod

train_step(
    model: ModelLike, Xb: ndarray, yb: ndarray, state: Any
) -> tuple[float, Any]

Perform a single training step on a batch and return (loss, new_state).

Parameters:

Name Type Description Default
model ModelLike

The model to be trained.

required
Xb ndarray

A batch of inputs.

required
yb ndarray

A batch of targets.

required
state Any

Optimizer state produced by init_state.

required

Returns:

Type Description
tuple[float, Any]

tuple[float, Any]: The batch loss and the updated optimizer state.

Source code in anfis_toolbox/optim/base.py
@abstractmethod
def train_step(
    self, model: ModelLike, Xb: np.ndarray, yb: np.ndarray, state: Any
) -> tuple[float, Any]:  # pragma: no cover - abstract
    """Perform a single training step on a batch and return (loss, new_state).

    Parameters:
        model: The model to be trained.
        Xb (np.ndarray): A batch of inputs.
        yb (np.ndarray): A batch of targets.
        state: Optimizer state produced by ``init_state``.

    Returns:
        tuple[float, Any]: The batch loss and the updated optimizer state.
    """
    raise NotImplementedError

anfis_toolbox.optim.hybrid.HybridTrainer dataclass

HybridTrainer(
    learning_rate: float = 0.01,
    epochs: int = 100,
    verbose: bool = False,
    _loss_fn: MSELoss = MSELoss(),
)

Bases: BaseTrainer

Original Jang (1993) hybrid training: LSM for consequents + GD for antecedents.

Notes

This trainer assumes a single-output regression head. It is not compatible with :class:~anfis_toolbox.model.TSKANFISClassifier or the high-level :class:~anfis_toolbox.classifier.ANFISClassifier facade.

compute_loss

compute_loss(
    model: ModelLike, X: ndarray, y: ndarray
) -> float

Compute the hybrid MSE loss on prepared data without side effects.

Source code in anfis_toolbox/optim/hybrid.py
def compute_loss(self, model: ModelLike, X: np.ndarray, y: np.ndarray) -> float:
    """Compute the hybrid MSE loss on prepared data without side effects."""
    model = self._require_regression_model(model)
    X_arr, y_arr = self._prepare_validation_data(model, X, y)
    normalized_weights = model.forward_antecedents(X_arr)
    preds = model.consequent_layer.forward(X_arr, normalized_weights)
    return float(self._loss_fn.loss(y_arr, preds))

init_state

init_state(
    model: ModelLike, X: ndarray, y: ndarray
) -> None

Hybrid trainer doesn't maintain optimizer state; returns None.

Source code in anfis_toolbox/optim/hybrid.py
def init_state(self, model: ModelLike, X: np.ndarray, y: np.ndarray) -> None:
    """Hybrid trainer doesn't maintain optimizer state; returns None."""
    self._require_regression_model(model)
    return None

train_step

train_step(
    model: ModelLike, Xb: ndarray, yb: ndarray, state: None
) -> tuple[float, None]

Perform one hybrid step on a batch and return (loss, state).

Equivalent to one iteration of the hybrid algorithm on the given batch.

Source code in anfis_toolbox/optim/hybrid.py
def train_step(self, model: ModelLike, Xb: np.ndarray, yb: np.ndarray, state: None) -> tuple[float, None]:
    """Perform one hybrid step on a batch and return (loss, state).

    Equivalent to one iteration of the hybrid algorithm on the given batch.
    """
    model = self._require_regression_model(model)
    Xb, yb = self._prepare_training_data(model, Xb, yb)
    # Forward to get normalized weights
    normalized_weights = model.forward_antecedents(Xb)

    # Build LSM system for batch
    ones_col = np.ones((Xb.shape[0], 1), dtype=float)
    x_bar = np.concatenate([Xb, ones_col], axis=1)
    A_blocks = [normalized_weights[:, j : j + 1] * x_bar for j in range(model.n_rules)]
    A = np.concatenate(A_blocks, axis=1)
    try:
        regularization = 1e-6 * np.eye(A.shape[1])
        ATA_reg = A.T @ A + regularization
        theta = np.linalg.solve(ATA_reg, A.T @ yb.flatten())
    except np.linalg.LinAlgError:
        logging.getLogger(__name__).warning("Matrix singular in LSM, using pseudo-inverse")
        theta = np.linalg.pinv(A) @ yb.flatten()
    model.consequent_layer.parameters = theta.reshape(model.n_rules, model.n_inputs + 1)

    # Loss and backward for antecedents only
    y_pred = model.consequent_layer.forward(Xb, normalized_weights)
    loss = self._loss_fn.loss(yb, y_pred)
    dL_dy = self._loss_fn.gradient(yb, y_pred)
    dL_dnorm_w, _ = model.consequent_layer.backward(dL_dy)
    dL_dw = model.normalization_layer.backward(dL_dnorm_w)
    gradients = model.rule_layer.backward(dL_dw)
    model.membership_layer.backward(gradients)
    model.update_membership_parameters(self.learning_rate)
    return float(loss), state

anfis_toolbox.optim.hybrid_adam.HybridAdamTrainer dataclass

HybridAdamTrainer(
    learning_rate: float = 0.001,
    beta1: float = 0.9,
    beta2: float = 0.999,
    epsilon: float = 1e-08,
    epochs: int = 100,
    verbose: bool = False,
    _loss_fn: MSELoss = MSELoss(),
)

Bases: BaseTrainer

Hybrid training: LSM for consequents + Adam for antecedents.

Notes

This variant also targets the regression ANFIS. It is not compatible with the classification head (:class:~anfis_toolbox.model.TSKANFISClassifier) or :class:~anfis_toolbox.classifier.ANFISClassifier.

compute_loss

compute_loss(
    model: ModelLike, X: ndarray, y: ndarray
) -> float

Evaluate mean squared error on provided data without updates.

Source code in anfis_toolbox/optim/hybrid_adam.py
def compute_loss(self, model: ModelLike, X: np.ndarray, y: np.ndarray) -> float:
    """Evaluate mean squared error on provided data without updates."""
    model = self._require_regression_model(model)
    X_arr, y_arr = self._prepare_validation_data(model, X, y)
    normalized_weights = model.forward_antecedents(X_arr)
    preds = model.consequent_layer.forward(X_arr, normalized_weights)
    return float(self._loss_fn.loss(y_arr, preds))

init_state

init_state(
    model: ModelLike, X: ndarray, y: ndarray
) -> dict[str, Any]

Initialize Adam moment tensors for membership parameters.

Source code in anfis_toolbox/optim/hybrid_adam.py
def init_state(self, model: ModelLike, X: np.ndarray, y: np.ndarray) -> dict[str, Any]:
    """Initialize Adam moment tensors for membership parameters."""
    model = self._require_regression_model(model)
    params = model.get_parameters()
    zero_struct = zeros_like_structure(params)["membership"]
    return {"m": deepcopy(zero_struct), "v": deepcopy(zero_struct), "t": 0}

train_step

train_step(
    model: ModelLike,
    Xb: ndarray,
    yb: ndarray,
    state: dict[str, Any],
) -> tuple[float, dict[str, Any]]

Execute one hybrid iteration combining LSM and Adam updates.

Source code in anfis_toolbox/optim/hybrid_adam.py
def train_step(
    self, model: ModelLike, Xb: np.ndarray, yb: np.ndarray, state: dict[str, Any]
) -> tuple[float, dict[str, Any]]:
    """Execute one hybrid iteration combining LSM and Adam updates."""
    model = self._require_regression_model(model)
    model.reset_gradients()
    Xb, yb = self._prepare_training_data(model, Xb, yb)
    normalized_weights = model.forward_antecedents(Xb)
    # LSM for consequents
    ones_col = np.ones((Xb.shape[0], 1), dtype=float)
    x_bar = np.concatenate([Xb, ones_col], axis=1)
    A_blocks = [normalized_weights[:, j : j + 1] * x_bar for j in range(model.n_rules)]
    A = np.concatenate(A_blocks, axis=1)
    try:
        regularization = 1e-6 * np.eye(A.shape[1])
        ATA_reg = A.T @ A + regularization
        theta = np.linalg.solve(ATA_reg, A.T @ yb.flatten())
    except np.linalg.LinAlgError:
        logging.getLogger(__name__).warning("Matrix singular in LSM, using pseudo-inverse")
        theta = np.linalg.pinv(A) @ yb.flatten()
    model.consequent_layer.parameters = theta.reshape(model.n_rules, model.n_inputs + 1)

    # Adam for antecedents
    y_pred = model.consequent_layer.forward(Xb, normalized_weights)
    loss = self._loss_fn.loss(yb, y_pred)
    dL_dy = self._loss_fn.gradient(yb, y_pred)
    dL_dnorm_w, _ = model.consequent_layer.backward(dL_dy)
    dL_dw = model.normalization_layer.backward(dL_dnorm_w)
    gradients = model.rule_layer.backward(dL_dw)
    grad_struct = model.membership_layer.backward(gradients)
    self._apply_adam_update(model, grad_struct, state)
    return float(loss), state

anfis_toolbox.optim.sgd.SGDTrainer dataclass

SGDTrainer(
    learning_rate: float = 0.01,
    epochs: int = 100,
    batch_size: None | int = None,
    shuffle: bool = True,
    verbose: bool = False,
    loss: LossFunction | str | None = None,
)

Bases: BaseTrainer

Stochastic gradient descent trainer for ANFIS.

Parameters:

Name Type Description Default
learning_rate float

Step size for gradient descent.

0.01
epochs int

Number of passes over the data.

100
batch_size None | int

Mini-batch size; if None uses full batch.

None
shuffle bool

Whether to shuffle data each epoch.

True
verbose bool

Whether to log progress (delegated to model logging settings).

False
Notes

Uses the configurable loss provided via loss (defaults to mean squared error). The selected loss is responsible for adapting target shapes via prepare_targets. When used with ANFISClassifier and loss="cross_entropy" it trains on logits with the appropriate softmax gradient.

compute_loss

compute_loss(
    model: ModelLike, X: ndarray, y: ndarray
) -> float

Return the loss for (X, y) without mutating model.

Source code in anfis_toolbox/optim/sgd.py
def compute_loss(self, model: ModelLike, X: np.ndarray, y: np.ndarray) -> float:
    """Return the loss for ``(X, y)`` without mutating ``model``."""
    loss_fn = self._get_loss_fn()
    preds = model.forward(X)
    return float(loss_fn.loss(y, preds))

init_state

init_state(
    model: ModelLike, X: ndarray, y: ndarray
) -> None

SGD has no persistent optimizer state; returns None.

Source code in anfis_toolbox/optim/sgd.py
def init_state(self, model: ModelLike, X: np.ndarray, y: np.ndarray) -> None:
    """SGD has no persistent optimizer state; returns None."""
    return None

train_step

train_step(
    model: ModelLike, Xb: ndarray, yb: ndarray, state: Any
) -> tuple[float, Any]

Perform one SGD step on a batch and return (loss, state).

Source code in anfis_toolbox/optim/sgd.py
def train_step(self, model: ModelLike, Xb: np.ndarray, yb: np.ndarray, state: Any) -> tuple[float, Any]:
    """Perform one SGD step on a batch and return (loss, state)."""
    loss = self._compute_loss_backward_and_update(model, Xb, yb)
    return loss, state

anfis_toolbox.optim.adam.AdamTrainer dataclass

AdamTrainer(
    learning_rate: float = 0.001,
    beta1: float = 0.9,
    beta2: float = 0.999,
    epsilon: float = 1e-08,
    epochs: int = 100,
    batch_size: None | int = None,
    shuffle: bool = True,
    verbose: bool = False,
    loss: LossFunction | str | None = None,
)

Bases: BaseTrainer

Adam optimizer-based trainer for ANFIS.

Parameters:

Name Type Description Default
learning_rate float

Base step size (alpha).

0.001
beta1 float

Exponential decay rate for the first moment estimates.

0.9
beta2 float

Exponential decay rate for the second moment estimates.

0.999
epsilon float

Small constant for numerical stability.

1e-08
epochs int

Number of passes over the dataset.

100
batch_size None | int

If None, use full-batch; otherwise mini-batches of this size.

None
shuffle bool

Whether to shuffle the data at each epoch when using mini-batches.

True
verbose bool

Unused here; kept for API parity.

False
Notes

Supports configurable losses via the loss parameter. Defaults to mean squared error for regression, but can minimize other differentiable objectives such as categorical cross-entropy when used with ANFISClassifier.

compute_loss

compute_loss(
    model: ModelLike, X: ndarray, y: ndarray
) -> float

Evaluate the configured loss on (X, y) without updating parameters.

Source code in anfis_toolbox/optim/adam.py
def compute_loss(self, model: ModelLike, X: np.ndarray, y: np.ndarray) -> float:
    """Evaluate the configured loss on ``(X, y)`` without updating parameters."""
    loss_fn = self._get_loss_fn()
    preds = model.forward(X)
    return float(loss_fn.loss(y, preds))

init_state

init_state(
    model: ModelLike, X: ndarray, y: ndarray
) -> dict[str, Any]

Initialize Adam's first and second moments and time step.

Returns a dict with keys: params, m, v, t.

Source code in anfis_toolbox/optim/adam.py
def init_state(self, model: ModelLike, X: np.ndarray, y: np.ndarray) -> dict[str, Any]:
    """Initialize Adam's first and second moments and time step.

    Returns a dict with keys: params, m, v, t.
    """
    params = model.get_parameters()
    return {
        "params": params,
        "m": zeros_like_structure(params),
        "v": zeros_like_structure(params),
        "t": 0,
    }

train_step

train_step(
    model: ModelLike,
    Xb: ndarray,
    yb: ndarray,
    state: dict[str, Any],
) -> tuple[float, dict[str, Any]]

One Adam step on a batch; returns (loss, updated_state).

Source code in anfis_toolbox/optim/adam.py
def train_step(
    self, model: ModelLike, Xb: np.ndarray, yb: np.ndarray, state: dict[str, Any]
) -> tuple[float, dict[str, Any]]:
    """One Adam step on a batch; returns (loss, updated_state)."""
    loss, grads = self._compute_loss_and_grads(model, Xb, yb)
    t_val = cast(int, state["t"])
    t_new = self._apply_adam_step(model, state["params"], grads, state["m"], state["v"], t_val)
    state["t"] = t_new
    return loss, state

anfis_toolbox.optim.rmsprop.RMSPropTrainer dataclass

RMSPropTrainer(
    learning_rate: float = 0.001,
    rho: float = 0.9,
    epsilon: float = 1e-08,
    epochs: int = 100,
    batch_size: None | int = None,
    shuffle: bool = True,
    verbose: bool = False,
    loss: LossFunction | str | None = None,
)

Bases: BaseTrainer

RMSProp optimizer-based trainer for ANFIS.

Parameters:

Name Type Description Default
learning_rate float

Base step size (alpha).

0.001
rho float

Exponential decay rate for the squared gradient moving average.

0.9
epsilon float

Small constant for numerical stability.

1e-08
epochs int

Number of passes over the dataset.

100
batch_size None | int

If None, use full-batch; otherwise mini-batches of this size.

None
shuffle bool

Whether to shuffle the data at each epoch when using mini-batches.

True
verbose bool

Unused here; kept for API parity.

False
Notes

Supports configurable losses via the loss parameter. Defaults to mean squared error for regression tasks but can be switched to other differentiable objectives such as categorical cross-entropy when training ANFISClassifier models.

compute_loss

compute_loss(
    model: ModelLike, X: ndarray, y: ndarray
) -> float

Return the current loss value for (X, y) without modifying state.

Source code in anfis_toolbox/optim/rmsprop.py
def compute_loss(self, model: ModelLike, X: np.ndarray, y: np.ndarray) -> float:
    """Return the current loss value for ``(X, y)`` without modifying state."""
    loss_fn = self._get_loss_fn()
    preds = model.forward(X)
    return float(loss_fn.loss(y, preds))

init_state

init_state(
    model: ModelLike, X: ndarray, y: ndarray
) -> dict[str, Any]

Initialize RMSProp caches for consequents and membership scalars.

Source code in anfis_toolbox/optim/rmsprop.py
def init_state(self, model: ModelLike, X: np.ndarray, y: np.ndarray) -> dict[str, Any]:
    """Initialize RMSProp caches for consequents and membership scalars."""
    params = model.get_parameters()
    return {"params": params, "cache": zeros_like_structure(params)}

train_step

train_step(
    model: ModelLike,
    Xb: ndarray,
    yb: ndarray,
    state: dict[str, Any],
) -> tuple[float, dict[str, Any]]

One RMSProp step on a batch; returns (loss, updated_state).

Source code in anfis_toolbox/optim/rmsprop.py
def train_step(
    self, model: ModelLike, Xb: np.ndarray, yb: np.ndarray, state: dict[str, Any]
) -> tuple[float, dict[str, Any]]:
    """One RMSProp step on a batch; returns (loss, updated_state)."""
    loss, grads = self._compute_loss_and_grads(model, Xb, yb)
    self._apply_rmsprop_step(model, state["params"], state["cache"], grads)
    return loss, state

anfis_toolbox.optim.pso.PSOTrainer dataclass

PSOTrainer(
    swarm_size: int = 20,
    inertia: float = 0.7,
    cognitive: float = 1.5,
    social: float = 1.5,
    epochs: int = 100,
    init_sigma: float = 0.1,
    clamp_velocity: None | tuple[float, float] = None,
    clamp_position: None | tuple[float, float] = None,
    random_state: None | int = None,
    verbose: bool = False,
    loss: LossFunction | str | None = None,
)

Bases: BaseTrainer

Particle Swarm Optimization (PSO) trainer for ANFIS.

Parameters:

Name Type Description Default
swarm_size int

Number of particles.

20
inertia float

Inertia weight (w).

0.7
cognitive float

Cognitive coefficient (c1).

1.5
social float

Social coefficient (c2).

1.5
epochs int

Number of iterations of the swarm update.

100
init_sigma float

Std-dev for initializing particle positions around current params.

0.1
clamp_velocity None | tuple[float, float]

Optional (min, max) to clip velocities element-wise.

None
clamp_position None | tuple[float, float]

Optional (min, max) to clip positions element-wise.

None
random_state None | int

Seed for RNG to ensure determinism.

None
verbose bool

Unused here; kept for API parity.

False
Notes

Optimizes the loss specified by loss (defaulting to mean squared error) by searching directly in parameter space without gradients. With ANFISClassifier you can set loss="cross_entropy" to optimize categorical cross-entropy on logits.

compute_loss

compute_loss(model: Any, X: ndarray, y: ndarray) -> float

Evaluate the swarm's current parameters on (X, y) without mutation.

Source code in anfis_toolbox/optim/pso.py
def compute_loss(self, model: Any, X: np.ndarray, y: np.ndarray) -> float:
    """Evaluate the swarm's current parameters on ``(X, y)`` without mutation."""
    return self._evaluate_loss(model, X, y)

init_state

init_state(
    model: ModelLike, X: ndarray, y: ndarray
) -> dict[str, Any]

Initialize PSO swarm state and return as a dict.

Source code in anfis_toolbox/optim/pso.py
def init_state(self, model: ModelLike, X: np.ndarray, y: np.ndarray) -> dict[str, Any]:
    """Initialize PSO swarm state and return as a dict."""
    X = np.asarray(X, dtype=float)
    y = np.asarray(y, dtype=float)
    rng = np.random.default_rng(self.random_state)
    base_params = model.get_parameters()
    theta0, meta = _flatten_params(base_params)
    D = theta0.size
    positions = theta0[None, :] + self.init_sigma * rng.normal(size=(self.swarm_size, D))
    velocities = np.zeros((self.swarm_size, D), dtype=float)
    # Initialize personal/global bests on provided data
    personal_best_pos = positions.copy()
    personal_best_val = np.empty(self.swarm_size, dtype=float)
    for i in range(self.swarm_size):
        params_i = _unflatten_params(positions[i], meta, base_params)
        with self._temporary_parameters(model, params_i):
            personal_best_val[i] = self._evaluate_loss(model, X, y)
    g_idx = int(np.argmin(personal_best_val))
    global_best_pos = personal_best_pos[g_idx].copy()
    global_best_val = float(personal_best_val[g_idx])
    return {
        "meta": meta,
        "template": base_params,
        "positions": positions,
        "velocities": velocities,
        "pbest_pos": personal_best_pos,
        "pbest_val": personal_best_val,
        "gbest_pos": global_best_pos,
        "gbest_val": global_best_val,
        "rng": rng,
    }

train_step

train_step(
    model: ModelLike,
    Xb: ndarray,
    yb: ndarray,
    state: dict[str, Any],
) -> tuple[float, dict[str, Any]]

Perform one PSO iteration over the swarm on a batch and return (best_loss, state).

Source code in anfis_toolbox/optim/pso.py
def train_step(
    self, model: ModelLike, Xb: np.ndarray, yb: np.ndarray, state: dict[str, Any]
) -> tuple[float, dict[str, Any]]:
    """Perform one PSO iteration over the swarm on a batch and return (best_loss, state)."""
    positions = state["positions"]
    velocities = state["velocities"]
    personal_best_pos = state["pbest_pos"]
    personal_best_val = state["pbest_val"]
    global_best_pos = state["gbest_pos"]
    global_best_val = state["gbest_val"]
    meta = state["meta"]
    template = state["template"]
    rng = state["rng"]

    D = positions.shape[1]
    r1 = rng.random(size=(self.swarm_size, D))
    r2 = rng.random(size=(self.swarm_size, D))
    cognitive_term = self.cognitive * r1 * (personal_best_pos - positions)
    social_term = self.social * r2 * (global_best_pos[None, :] - positions)
    velocities = self.inertia * velocities + cognitive_term + social_term
    if self.clamp_velocity is not None:
        vmin, vmax = self.clamp_velocity
        velocities = np.clip(velocities, vmin, vmax)
    positions = positions + velocities
    if self.clamp_position is not None:
        pmin, pmax = self.clamp_position
        positions = np.clip(positions, pmin, pmax)

    # Evaluate swarm and update bests
    for i in range(self.swarm_size):
        params_i = _unflatten_params(positions[i], meta, template)
        with self._temporary_parameters(model, params_i):
            val = self._evaluate_loss(model, Xb, yb)
        if val < personal_best_val[i]:
            personal_best_val[i] = val
            personal_best_pos[i] = positions[i].copy()
            if val < global_best_val:
                global_best_val = float(val)
                global_best_pos = positions[i].copy()

    # Update state and set model to global best
    state.update(
        {
            "positions": positions,
            "velocities": velocities,
            "pbest_pos": personal_best_pos,
            "pbest_val": personal_best_val,
            "gbest_pos": global_best_pos,
            "gbest_val": global_best_val,
        }
    )
    best_params = _unflatten_params(global_best_pos, meta, template)
    model.set_parameters(best_params)
    return float(global_best_val), state