Optimization¶

anfis_toolbox.optim.base.BaseTrainer ¶

Bases: ABC

Shared training loop for ANFIS trainers.

compute_loss `abstractmethod` ¶

compute_loss(
    model: ModelLike, X: ndarray, y: ndarray
) -> float

Compute loss for the provided data without mutating the model.

Source code in anfis_toolbox/optim/base.py

@abstractmethod
def compute_loss(self, model: ModelLike, X: np.ndarray, y: np.ndarray) -> float:  # pragma: no cover - abstract
    """Compute loss for the provided data without mutating the model."""

fit ¶

fit(
    model: ModelLike,
    X: ndarray,
    y: ndarray,
    *,
    validation_data: tuple[ndarray, ndarray] | None = None,
    validation_frequency: int = 1,
) -> TrainingHistory

Train model on (X, y) and optionally evaluate on validation data.

Returns a dictionary containing the per-epoch training losses and, when validation_data is provided, the validation losses (aligned with the training epochs; epochs without validation are recorded as None).

Source code in anfis_toolbox/optim/base.py

def fit(
    self,
    model: ModelLike,
    X: np.ndarray,
    y: np.ndarray,
    *,
    validation_data: tuple[np.ndarray, np.ndarray] | None = None,
    validation_frequency: int = 1,
) -> TrainingHistory:
    """Train ``model`` on ``(X, y)`` and optionally evaluate on validation data.

    Returns a dictionary containing the per-epoch training losses and, when
    ``validation_data`` is provided, the validation losses (aligned with the
    training epochs; epochs without validation are recorded as ``None``).
    """
    if validation_frequency < 1:
        raise ValueError("validation_frequency must be >= 1")

    X_train, y_train = self._prepare_training_data(model, X, y)
    state = self.init_state(model, X_train, y_train)

    prepared_val: tuple[np.ndarray, np.ndarray] | None = None
    if validation_data is not None:
        prepared_val = self._prepare_validation_data(model, *validation_data)

    epochs = int(getattr(self, "epochs", 1))
    batch_size = getattr(self, "batch_size", None)
    shuffle = bool(getattr(self, "shuffle", True))
    verbose = bool(getattr(self, "verbose", False))

    train_history: list[float] = []
    val_history: list[float | None] = [] if prepared_val is not None else []

    n_samples = X_train.shape[0]
    for epoch_idx in range(epochs):
        epoch_losses: list[float] = []
        if batch_size is None:
            loss, state = self.train_step(model, X_train, y_train, state)
            epoch_losses.append(float(loss))
        else:
            indices = np.arange(n_samples)
            if shuffle:
                np.random.shuffle(indices)
            for start in range(0, n_samples, batch_size):
                end = start + batch_size
                batch_idx = indices[start:end]
                loss, state = self.train_step(
                    model,
                    X_train[batch_idx],
                    y_train[batch_idx],
                    state,
                )
                epoch_losses.append(float(loss))

        epoch_loss = float(np.mean(epoch_losses)) if epoch_losses else 0.0
        train_history.append(epoch_loss)

        val_loss: float | None = None
        if prepared_val is not None:
            if (epoch_idx + 1) % validation_frequency == 0:
                X_val, y_val = prepared_val
                val_loss = float(self.compute_loss(model, X_val, y_val))
            val_history.append(val_loss)

        self._log_epoch(epoch_idx, epoch_loss, val_loss, verbose)

    result: TrainingHistory = {"train": train_history}
    if prepared_val is not None:
        result["val"] = val_history
    return result

init_state `abstractmethod` ¶

init_state(model: ModelLike, X: ndarray, y: ndarray) -> Any

Initialize and return any optimizer-specific state.

Called once before training begins. Trainers that don't require state may return None.

Parameters:

Name	Type	Description	Default
`model`	`ModelLike`	The model to be trained.	required
`X`	`ndarray`	The full training inputs.	required
`y`	`ndarray`	The full training targets.	required

Returns:

Name	Type	Description
`Any`	`Any`	Optimizer state (or None) to be threaded through `train_step`.

Source code in anfis_toolbox/optim/base.py

@abstractmethod
def init_state(self, model: ModelLike, X: np.ndarray, y: np.ndarray) -> Any:  # pragma: no cover - abstract
    """Initialize and return any optimizer-specific state.

    Called once before training begins. Trainers that don't require state may
    return None.

    Parameters:
        model: The model to be trained.
        X (np.ndarray): The full training inputs.
        y (np.ndarray): The full training targets.

    Returns:
        Any: Optimizer state (or None) to be threaded through ``train_step``.
    """
    raise NotImplementedError

train_step `abstractmethod` ¶

train_step(
    model: ModelLike, Xb: ndarray, yb: ndarray, state: Any
) -> tuple[float, Any]

Perform a single training step on a batch and return (loss, new_state).

Parameters:

Name	Type	Description	Default
`model`	`ModelLike`	The model to be trained.	required
`Xb`	`ndarray`	A batch of inputs.	required
`yb`	`ndarray`	A batch of targets.	required
`state`	`Any`	Optimizer state produced by `init_state`.	required

Returns:

Type	Description
`tuple[float, Any]`	tuple[float, Any]: The batch loss and the updated optimizer state.

Source code in anfis_toolbox/optim/base.py

@abstractmethod
def train_step(
    self, model: ModelLike, Xb: np.ndarray, yb: np.ndarray, state: Any
) -> tuple[float, Any]:  # pragma: no cover - abstract
    """Perform a single training step on a batch and return (loss, new_state).

    Parameters:
        model: The model to be trained.
        Xb (np.ndarray): A batch of inputs.
        yb (np.ndarray): A batch of targets.
        state: Optimizer state produced by ``init_state``.

    Returns:
        tuple[float, Any]: The batch loss and the updated optimizer state.
    """
    raise NotImplementedError

anfis_toolbox.optim.hybrid.HybridTrainer `dataclass` ¶

HybridTrainer(
    learning_rate: float = 0.01,
    epochs: int = 100,
    verbose: bool = False,
    _loss_fn: MSELoss = MSELoss(),
)

Bases: BaseTrainer

Original Jang (1993) hybrid training: LSM for consequents + GD for antecedents.

Notes

This trainer assumes a single-output regression head. It is not compatible with :class:~anfis_toolbox.model.TSKANFISClassifier or the high-level :class:~anfis_toolbox.classifier.ANFISClassifier facade.

compute_loss ¶

compute_loss(
    model: ModelLike, X: ndarray, y: ndarray
) -> float

Compute the hybrid MSE loss on prepared data without side effects.

Source code in anfis_toolbox/optim/hybrid.py

def compute_loss(self, model: ModelLike, X: np.ndarray, y: np.ndarray) -> float:
    """Compute the hybrid MSE loss on prepared data without side effects."""
    model = self._require_regression_model(model)
    X_arr, y_arr = self._prepare_validation_data(model, X, y)
    normalized_weights = model.forward_antecedents(X_arr)
    preds = model.consequent_layer.forward(X_arr, normalized_weights)
    return float(self._loss_fn.loss(y_arr, preds))

init_state ¶

init_state(
    model: ModelLike, X: ndarray, y: ndarray
) -> None

Hybrid trainer doesn't maintain optimizer state; returns None.

Source code in anfis_toolbox/optim/hybrid.py

def init_state(self, model: ModelLike, X: np.ndarray, y: np.ndarray) -> None:
    """Hybrid trainer doesn't maintain optimizer state; returns None."""
    self._require_regression_model(model)
    return None

train_step ¶

train_step(
    model: ModelLike, Xb: ndarray, yb: ndarray, state: None
) -> tuple[float, None]

Perform one hybrid step on a batch and return (loss, state).

Equivalent to one iteration of the hybrid algorithm on the given batch.

Source code in anfis_toolbox/optim/hybrid.py

def train_step(self, model: ModelLike, Xb: np.ndarray, yb: np.ndarray, state: None) -> tuple[float, None]:
    """Perform one hybrid step on a batch and return (loss, state).

    Equivalent to one iteration of the hybrid algorithm on the given batch.
    """
    model = self._require_regression_model(model)
    Xb, yb = self._prepare_training_data(model, Xb, yb)
    # Forward to get normalized weights
    normalized_weights = model.forward_antecedents(Xb)

    # Build LSM system for batch
    ones_col = np.ones((Xb.shape[0], 1), dtype=float)
    x_bar = np.concatenate([Xb, ones_col], axis=1)
    A_blocks = [normalized_weights[:, j : j + 1] * x_bar for j in range(model.n_rules)]
    A = np.concatenate(A_blocks, axis=1)
    try:
        regularization = 1e-6 * np.eye(A.shape[1])
        ATA_reg = A.T @ A + regularization
        theta = np.linalg.solve(ATA_reg, A.T @ yb.flatten())
    except np.linalg.LinAlgError:
        logging.getLogger(__name__).warning("Matrix singular in LSM, using pseudo-inverse")
        theta = np.linalg.pinv(A) @ yb.flatten()
    model.consequent_layer.parameters = theta.reshape(model.n_rules, model.n_inputs + 1)

    # Loss and backward for antecedents only
    y_pred = model.consequent_layer.forward(Xb, normalized_weights)
    loss = self._loss_fn.loss(yb, y_pred)
    dL_dy = self._loss_fn.gradient(yb, y_pred)
    dL_dnorm_w, _ = model.consequent_layer.backward(dL_dy)
    dL_dw = model.normalization_layer.backward(dL_dnorm_w)
    gradients = model.rule_layer.backward(dL_dw)
    model.membership_layer.backward(gradients)
    model.update_membership_parameters(self.learning_rate)
    return float(loss), state

anfis_toolbox.optim.hybrid_adam.HybridAdamTrainer `dataclass` ¶

HybridAdamTrainer(
    learning_rate: float = 0.001,
    beta1: float = 0.9,
    beta2: float = 0.999,
    epsilon: float = 1e-08,
    epochs: int = 100,
    verbose: bool = False,
    _loss_fn: MSELoss = MSELoss(),
)

Bases: BaseTrainer

Hybrid training: LSM for consequents + Adam for antecedents.

Notes

This variant also targets the regression ANFIS. It is not compatible with the classification head (:class:~anfis_toolbox.model.TSKANFISClassifier) or :class:~anfis_toolbox.classifier.ANFISClassifier.

compute_loss ¶

compute_loss(
    model: ModelLike, X: ndarray, y: ndarray
) -> float

Evaluate mean squared error on provided data without updates.

Source code in anfis_toolbox/optim/hybrid_adam.py

def compute_loss(self, model: ModelLike, X: np.ndarray, y: np.ndarray) -> float:
    """Evaluate mean squared error on provided data without updates."""
    model = self._require_regression_model(model)
    X_arr, y_arr = self._prepare_validation_data(model, X, y)
    normalized_weights = model.forward_antecedents(X_arr)
    preds = model.consequent_layer.forward(X_arr, normalized_weights)
    return float(self._loss_fn.loss(y_arr, preds))

init_state ¶

init_state(
    model: ModelLike, X: ndarray, y: ndarray
) -> dict[str, Any]

Initialize Adam moment tensors for membership parameters.

Source code in anfis_toolbox/optim/hybrid_adam.py

def init_state(self, model: ModelLike, X: np.ndarray, y: np.ndarray) -> dict[str, Any]:
    """Initialize Adam moment tensors for membership parameters."""
    model = self._require_regression_model(model)
    params = model.get_parameters()
    zero_struct = zeros_like_structure(params)["membership"]
    return {"m": deepcopy(zero_struct), "v": deepcopy(zero_struct), "t": 0}

train_step ¶

train_step(
    model: ModelLike,
    Xb: ndarray,
    yb: ndarray,
    state: dict[str, Any],
) -> tuple[float, dict[str, Any]]

Execute one hybrid iteration combining LSM and Adam updates.

Source code in anfis_toolbox/optim/hybrid_adam.py

def train_step(
    self, model: ModelLike, Xb: np.ndarray, yb: np.ndarray, state: dict[str, Any]
) -> tuple[float, dict[str, Any]]:
    """Execute one hybrid iteration combining LSM and Adam updates."""
    model = self._require_regression_model(model)
    model.reset_gradients()
    Xb, yb = self._prepare_training_data(model, Xb, yb)
    normalized_weights = model.forward_antecedents(Xb)
    # LSM for consequents
    ones_col = np.ones((Xb.shape[0], 1), dtype=float)
    x_bar = np.concatenate([Xb, ones_col], axis=1)
    A_blocks = [normalized_weights[:, j : j + 1] * x_bar for j in range(model.n_rules)]
    A = np.concatenate(A_blocks, axis=1)
    try:
        regularization = 1e-6 * np.eye(A.shape[1])
        ATA_reg = A.T @ A + regularization
        theta = np.linalg.solve(ATA_reg, A.T @ yb.flatten())
    except np.linalg.LinAlgError:
        logging.getLogger(__name__).warning("Matrix singular in LSM, using pseudo-inverse")
        theta = np.linalg.pinv(A) @ yb.flatten()
    model.consequent_layer.parameters = theta.reshape(model.n_rules, model.n_inputs + 1)

    # Adam for antecedents
    y_pred = model.consequent_layer.forward(Xb, normalized_weights)
    loss = self._loss_fn.loss(yb, y_pred)
    dL_dy = self._loss_fn.gradient(yb, y_pred)
    dL_dnorm_w, _ = model.consequent_layer.backward(dL_dy)
    dL_dw = model.normalization_layer.backward(dL_dnorm_w)
    gradients = model.rule_layer.backward(dL_dw)
    grad_struct = model.membership_layer.backward(gradients)
    self._apply_adam_update(model, grad_struct, state)
    return float(loss), state

anfis_toolbox.optim.sgd.SGDTrainer `dataclass` ¶

SGDTrainer(
    learning_rate: float = 0.01,
    epochs: int = 100,
    batch_size: None | int = None,
    shuffle: bool = True,
    verbose: bool = False,
    loss: LossFunction | str | None = None,
)

Bases: BaseTrainer

Stochastic gradient descent trainer for ANFIS.

Parameters:

Name	Type	Description	Default
`learning_rate`	`float`	Step size for gradient descent.	`0.01`
`epochs`	`int`	Number of passes over the data.	`100`
`batch_size`	`None \| int`	Mini-batch size; if None uses full batch.	`None`
`shuffle`	`bool`	Whether to shuffle data each epoch.	`True`
`verbose`	`bool`	Whether to log progress (delegated to model logging settings).	`False`

Notes

Uses the configurable loss provided via loss (defaults to mean squared error). The selected loss is responsible for adapting target shapes via prepare_targets. When used with ANFISClassifier and loss="cross_entropy" it trains on logits with the appropriate softmax gradient.

compute_loss ¶

compute_loss(
    model: ModelLike, X: ndarray, y: ndarray
) -> float

Return the loss for (X, y) without mutating model.

Source code in anfis_toolbox/optim/sgd.py

def compute_loss(self, model: ModelLike, X: np.ndarray, y: np.ndarray) -> float:
    """Return the loss for ``(X, y)`` without mutating ``model``."""
    loss_fn = self._get_loss_fn()
    preds = model.forward(X)
    return float(loss_fn.loss(y, preds))

init_state ¶

init_state(
    model: ModelLike, X: ndarray, y: ndarray
) -> None

SGD has no persistent optimizer state; returns None.

Source code in anfis_toolbox/optim/sgd.py

def init_state(self, model: ModelLike, X: np.ndarray, y: np.ndarray) -> None:
    """SGD has no persistent optimizer state; returns None."""
    return None

train_step ¶

train_step(
    model: ModelLike, Xb: ndarray, yb: ndarray, state: Any
) -> tuple[float, Any]

Perform one SGD step on a batch and return (loss, state).

Source code in anfis_toolbox/optim/sgd.py

def train_step(self, model: ModelLike, Xb: np.ndarray, yb: np.ndarray, state: Any) -> tuple[float, Any]:
    """Perform one SGD step on a batch and return (loss, state)."""
    loss = self._compute_loss_backward_and_update(model, Xb, yb)
    return loss, state

anfis_toolbox.optim.adam.AdamTrainer `dataclass` ¶

AdamTrainer(
    learning_rate: float = 0.001,
    beta1: float = 0.9,
    beta2: float = 0.999,
    epsilon: float = 1e-08,
    epochs: int = 100,
    batch_size: None | int = None,
    shuffle: bool = True,
    verbose: bool = False,
    loss: LossFunction | str | None = None,
)

Bases: BaseTrainer

Adam optimizer-based trainer for ANFIS.

Parameters:

Name	Type	Description	Default
`learning_rate`	`float`	Base step size (alpha).	`0.001`
`beta1`	`float`	Exponential decay rate for the first moment estimates.	`0.9`
`beta2`	`float`	Exponential decay rate for the second moment estimates.	`0.999`
`epsilon`	`float`	Small constant for numerical stability.	`1e-08`
`epochs`	`int`	Number of passes over the dataset.	`100`
`batch_size`	`None \| int`	If None, use full-batch; otherwise mini-batches of this size.	`None`
`shuffle`	`bool`	Whether to shuffle the data at each epoch when using mini-batches.	`True`
`verbose`	`bool`	Unused here; kept for API parity.	`False`

Notes

Supports configurable losses via the loss parameter. Defaults to mean squared error for regression, but can minimize other differentiable objectives such as categorical cross-entropy when used with ANFISClassifier.

compute_loss ¶

compute_loss(
    model: ModelLike, X: ndarray, y: ndarray
) -> float

Evaluate the configured loss on (X, y) without updating parameters.

Source code in anfis_toolbox/optim/adam.py

def compute_loss(self, model: ModelLike, X: np.ndarray, y: np.ndarray) -> float:
    """Evaluate the configured loss on ``(X, y)`` without updating parameters."""
    loss_fn = self._get_loss_fn()
    preds = model.forward(X)
    return float(loss_fn.loss(y, preds))

init_state ¶

init_state(
    model: ModelLike, X: ndarray, y: ndarray
) -> dict[str, Any]

Initialize Adam's first and second moments and time step.

Returns a dict with keys: params, m, v, t.

Source code in anfis_toolbox/optim/adam.py

def init_state(self, model: ModelLike, X: np.ndarray, y: np.ndarray) -> dict[str, Any]:
    """Initialize Adam's first and second moments and time step.

    Returns a dict with keys: params, m, v, t.
    """
    params = model.get_parameters()
    return {
        "params": params,
        "m": zeros_like_structure(params),
        "v": zeros_like_structure(params),
        "t": 0,
    }

train_step ¶

train_step(
    model: ModelLike,
    Xb: ndarray,
    yb: ndarray,
    state: dict[str, Any],
) -> tuple[float, dict[str, Any]]

One Adam step on a batch; returns (loss, updated_state).

Source code in anfis_toolbox/optim/adam.py

def train_step(
    self, model: ModelLike, Xb: np.ndarray, yb: np.ndarray, state: dict[str, Any]
) -> tuple[float, dict[str, Any]]:
    """One Adam step on a batch; returns (loss, updated_state)."""
    loss, grads = self._compute_loss_and_grads(model, Xb, yb)
    t_val = cast(int, state["t"])
    t_new = self._apply_adam_step(model, state["params"], grads, state["m"], state["v"], t_val)
    state["t"] = t_new
    return loss, state

anfis_toolbox.optim.rmsprop.RMSPropTrainer `dataclass` ¶

RMSPropTrainer(
    learning_rate: float = 0.001,
    rho: float = 0.9,
    epsilon: float = 1e-08,
    epochs: int = 100,
    batch_size: None | int = None,
    shuffle: bool = True,
    verbose: bool = False,
    loss: LossFunction | str | None = None,
)

Bases: BaseTrainer

RMSProp optimizer-based trainer for ANFIS.

Parameters:

Name	Type	Description	Default
`learning_rate`	`float`	Base step size (alpha).	`0.001`
`rho`	`float`	Exponential decay rate for the squared gradient moving average.	`0.9`
`epsilon`	`float`	Small constant for numerical stability.	`1e-08`
`epochs`	`int`	Number of passes over the dataset.	`100`
`batch_size`	`None \| int`	If None, use full-batch; otherwise mini-batches of this size.	`None`
`shuffle`	`bool`	Whether to shuffle the data at each epoch when using mini-batches.	`True`
`verbose`	`bool`	Unused here; kept for API parity.	`False`

Notes

Supports configurable losses via the loss parameter. Defaults to mean squared error for regression tasks but can be switched to other differentiable objectives such as categorical cross-entropy when training ANFISClassifier models.

compute_loss ¶

compute_loss(
    model: ModelLike, X: ndarray, y: ndarray
) -> float

Return the current loss value for (X, y) without modifying state.

Source code in anfis_toolbox/optim/rmsprop.py

def compute_loss(self, model: ModelLike, X: np.ndarray, y: np.ndarray) -> float:
    """Return the current loss value for ``(X, y)`` without modifying state."""
    loss_fn = self._get_loss_fn()
    preds = model.forward(X)
    return float(loss_fn.loss(y, preds))

init_state ¶

init_state(
    model: ModelLike, X: ndarray, y: ndarray
) -> dict[str, Any]

Initialize RMSProp caches for consequents and membership scalars.

Source code in anfis_toolbox/optim/rmsprop.py

def init_state(self, model: ModelLike, X: np.ndarray, y: np.ndarray) -> dict[str, Any]:
    """Initialize RMSProp caches for consequents and membership scalars."""
    params = model.get_parameters()
    return {"params": params, "cache": zeros_like_structure(params)}

train_step ¶

train_step(
    model: ModelLike,
    Xb: ndarray,
    yb: ndarray,
    state: dict[str, Any],
) -> tuple[float, dict[str, Any]]

One RMSProp step on a batch; returns (loss, updated_state).

Source code in anfis_toolbox/optim/rmsprop.py

def train_step(
    self, model: ModelLike, Xb: np.ndarray, yb: np.ndarray, state: dict[str, Any]
) -> tuple[float, dict[str, Any]]:
    """One RMSProp step on a batch; returns (loss, updated_state)."""
    loss, grads = self._compute_loss_and_grads(model, Xb, yb)
    self._apply_rmsprop_step(model, state["params"], state["cache"], grads)
    return loss, state

anfis_toolbox.optim.pso.PSOTrainer `dataclass` ¶

PSOTrainer(
    swarm_size: int = 20,
    inertia: float = 0.7,
    cognitive: float = 1.5,
    social: float = 1.5,
    epochs: int = 100,
    init_sigma: float = 0.1,
    clamp_velocity: None | tuple[float, float] = None,
    clamp_position: None | tuple[float, float] = None,
    random_state: None | int = None,
    verbose: bool = False,
    loss: LossFunction | str | None = None,
)

Bases: BaseTrainer

Particle Swarm Optimization (PSO) trainer for ANFIS.

Parameters:

Name	Type	Description	Default
`swarm_size`	`int`	Number of particles.	`20`
`inertia`	`float`	Inertia weight (w).	`0.7`
`cognitive`	`float`	Cognitive coefficient (c1).	`1.5`
`social`	`float`	Social coefficient (c2).	`1.5`
`epochs`	`int`	Number of iterations of the swarm update.	`100`
`init_sigma`	`float`	Std-dev for initializing particle positions around current params.	`0.1`
`clamp_velocity`	`None \| tuple[float, float]`	Optional (min, max) to clip velocities element-wise.	`None`
`clamp_position`	`None \| tuple[float, float]`	Optional (min, max) to clip positions element-wise.	`None`
`random_state`	`None \| int`	Seed for RNG to ensure determinism.	`None`
`verbose`	`bool`	Unused here; kept for API parity.	`False`

Notes

Optimizes the loss specified by loss (defaulting to mean squared error) by searching directly in parameter space without gradients. With ANFISClassifier you can set loss="cross_entropy" to optimize categorical cross-entropy on logits.

compute_loss ¶

compute_loss(model: Any, X: ndarray, y: ndarray) -> float

Evaluate the swarm's current parameters on (X, y) without mutation.

Source code in anfis_toolbox/optim/pso.py

def compute_loss(self, model: Any, X: np.ndarray, y: np.ndarray) -> float:
    """Evaluate the swarm's current parameters on ``(X, y)`` without mutation."""
    return self._evaluate_loss(model, X, y)

init_state ¶

init_state(
    model: ModelLike, X: ndarray, y: ndarray
) -> dict[str, Any]

Initialize PSO swarm state and return as a dict.

Source code in anfis_toolbox/optim/pso.py

def init_state(self, model: ModelLike, X: np.ndarray, y: np.ndarray) -> dict[str, Any]:
    """Initialize PSO swarm state and return as a dict."""
    X = np.asarray(X, dtype=float)
    y = np.asarray(y, dtype=float)
    rng = np.random.default_rng(self.random_state)
    base_params = model.get_parameters()
    theta0, meta = _flatten_params(base_params)
    D = theta0.size
    positions = theta0[None, :] + self.init_sigma * rng.normal(size=(self.swarm_size, D))
    velocities = np.zeros((self.swarm_size, D), dtype=float)
    # Initialize personal/global bests on provided data
    personal_best_pos = positions.copy()
    personal_best_val = np.empty(self.swarm_size, dtype=float)
    for i in range(self.swarm_size):
        params_i = _unflatten_params(positions[i], meta, base_params)
        with self._temporary_parameters(model, params_i):
            personal_best_val[i] = self._evaluate_loss(model, X, y)
    g_idx = int(np.argmin(personal_best_val))
    global_best_pos = personal_best_pos[g_idx].copy()
    global_best_val = float(personal_best_val[g_idx])
    return {
        "meta": meta,
        "template": base_params,
        "positions": positions,
        "velocities": velocities,
        "pbest_pos": personal_best_pos,
        "pbest_val": personal_best_val,
        "gbest_pos": global_best_pos,
        "gbest_val": global_best_val,
        "rng": rng,
    }

train_step ¶

train_step(
    model: ModelLike,
    Xb: ndarray,
    yb: ndarray,
    state: dict[str, Any],
) -> tuple[float, dict[str, Any]]

Perform one PSO iteration over the swarm on a batch and return (best_loss, state).

Source code in anfis_toolbox/optim/pso.py

def train_step(
    self, model: ModelLike, Xb: np.ndarray, yb: np.ndarray, state: dict[str, Any]
) -> tuple[float, dict[str, Any]]:
    """Perform one PSO iteration over the swarm on a batch and return (best_loss, state)."""
    positions = state["positions"]
    velocities = state["velocities"]
    personal_best_pos = state["pbest_pos"]
    personal_best_val = state["pbest_val"]
    global_best_pos = state["gbest_pos"]
    global_best_val = state["gbest_val"]
    meta = state["meta"]
    template = state["template"]
    rng = state["rng"]

    D = positions.shape[1]
    r1 = rng.random(size=(self.swarm_size, D))
    r2 = rng.random(size=(self.swarm_size, D))
    cognitive_term = self.cognitive * r1 * (personal_best_pos - positions)
    social_term = self.social * r2 * (global_best_pos[None, :] - positions)
    velocities = self.inertia * velocities + cognitive_term + social_term
    if self.clamp_velocity is not None:
        vmin, vmax = self.clamp_velocity
        velocities = np.clip(velocities, vmin, vmax)
    positions = positions + velocities
    if self.clamp_position is not None:
        pmin, pmax = self.clamp_position
        positions = np.clip(positions, pmin, pmax)

    # Evaluate swarm and update bests
    for i in range(self.swarm_size):
        params_i = _unflatten_params(positions[i], meta, template)
        with self._temporary_parameters(model, params_i):
            val = self._evaluate_loss(model, Xb, yb)
        if val < personal_best_val[i]:
            personal_best_val[i] = val
            personal_best_pos[i] = positions[i].copy()
            if val < global_best_val:
                global_best_val = float(val)
                global_best_pos = positions[i].copy()

    # Update state and set model to global best
    state.update(
        {
            "positions": positions,
            "velocities": velocities,
            "pbest_pos": personal_best_pos,
            "pbest_val": personal_best_val,
            "gbest_pos": global_best_pos,
            "gbest_val": global_best_val,
        }
    )
    best_params = _unflatten_params(global_best_pos, meta, template)
    model.set_parameters(best_params)
    return float(global_best_val), state

Optimization¶

anfis_toolbox.optim.base.BaseTrainer ¶

compute_loss abstractmethod ¶

fit ¶

init_state abstractmethod ¶

train_step abstractmethod ¶

anfis_toolbox.optim.hybrid.HybridTrainer dataclass ¶

compute_loss ¶

init_state ¶

train_step ¶

anfis_toolbox.optim.hybrid_adam.HybridAdamTrainer dataclass ¶

compute_loss ¶

init_state ¶

train_step ¶

anfis_toolbox.optim.sgd.SGDTrainer dataclass ¶

compute_loss ¶

init_state ¶

train_step ¶

anfis_toolbox.optim.adam.AdamTrainer dataclass ¶

compute_loss ¶

init_state ¶

train_step ¶

anfis_toolbox.optim.rmsprop.RMSPropTrainer dataclass ¶

compute_loss ¶

init_state ¶

train_step ¶

anfis_toolbox.optim.pso.PSOTrainer dataclass ¶

compute_loss ¶

init_state ¶

train_step ¶

compute_loss `abstractmethod` ¶

init_state `abstractmethod` ¶

train_step `abstractmethod` ¶

anfis_toolbox.optim.hybrid.HybridTrainer `dataclass` ¶

anfis_toolbox.optim.hybrid_adam.HybridAdamTrainer `dataclass` ¶

anfis_toolbox.optim.sgd.SGDTrainer `dataclass` ¶

anfis_toolbox.optim.adam.AdamTrainer `dataclass` ¶

anfis_toolbox.optim.rmsprop.RMSPropTrainer `dataclass` ¶

anfis_toolbox.optim.pso.PSOTrainer `dataclass` ¶