Skip to content

Clustering

anfis_toolbox.clustering.FuzzyCMeans

FuzzyCMeans(
    n_clusters: int,
    m: float = 2.0,
    max_iter: int = 300,
    tol: float = 0.0001,
    random_state: int | None = None,
)

Fuzzy C-Means clustering.

Parameters:

Name Type Description Default
n_clusters int

Number of clusters (>= 2).

required
m float

Fuzzifier (> 1). Default 2.0.

2.0
max_iter int

Maximum iterations.

300
tol float

Convergence tolerance on centers.

0.0001
random_state int | None

Optional seed for reproducibility.

None
Source code in anfis_toolbox/clustering.py
def __init__(
    self,
    n_clusters: int,
    m: float = 2.0,
    max_iter: int = 300,
    tol: float = 1e-4,
    random_state: int | None = None,
) -> None:
    """Initialize FuzzyCMeans with hyperparameters."""
    if n_clusters < 2:
        raise ValueError("n_clusters must be >= 2")
    if m <= 1:
        raise ValueError("m (fuzzifier) must be > 1")
    self.n_clusters = int(n_clusters)
    self.m = float(m)
    self.max_iter = int(max_iter)
    self.tol = float(tol)
    self.random_state = random_state
    self.cluster_centers_ = None
    self.membership_ = None

classification_entropy

classification_entropy() -> float

Classification Entropy (CE). Lower is better (crisper).

Source code in anfis_toolbox/clustering.py
def classification_entropy(self) -> float:
    """Classification Entropy (CE). Lower is better (crisper)."""
    if self.membership_ is None:
        raise RuntimeError("Fit the model before calling classification_entropy().")
    return _ce(self.membership_)

fit

fit(X: ndarray) -> FuzzyCMeans

Fit the FCM model.

Sets cluster_centers_ (k,d) and membership_ (n,k).

Source code in anfis_toolbox/clustering.py
def fit(self, X: np.ndarray) -> FuzzyCMeans:
    """Fit the FCM model.

    Sets cluster_centers_ (k,d) and membership_ (n,k).
    """
    X = self._check_X(X)
    n, _ = X.shape
    if n < self.n_clusters:
        raise ValueError("n_samples must be >= n_clusters")
    U = self._init_membership(n)
    m = self.m

    def update_centers(Um: np.ndarray) -> np.ndarray:
        num = Um.T @ X  # (k,d)
        den = np.maximum(Um.sum(axis=0)[:, None], 1e-12)
        return num / den

    Um = U**m
    C = update_centers(Um)
    for _ in range(self.max_iter):
        d2 = np.maximum(self._pairwise_sq_dists(X, C), 1e-12)  # (n,k)
        inv = d2 ** (-1.0 / (m - 1.0))
        U_new = inv / np.sum(inv, axis=1, keepdims=True)
        Um_new = U_new**m
        C_new = update_centers(Um_new)
        if np.max(np.linalg.norm(C_new - C, axis=1)) < self.tol:
            U, C = U_new, C_new
            break
        U, C = U_new, C_new
    self.membership_ = U
    self.cluster_centers_ = C
    return self

fit_predict

fit_predict(X: ndarray) -> np.ndarray

Fit and return hard labels via argmax of membership.

Source code in anfis_toolbox/clustering.py
def fit_predict(self, X: np.ndarray) -> np.ndarray:
    """Fit and return hard labels via argmax of membership."""
    self.fit(X)
    return self.predict(X)

partition_coefficient

partition_coefficient() -> float

Bezdek's Partition Coefficient (PC) in [1/k, 1]. Higher is crisper.

Source code in anfis_toolbox/clustering.py
def partition_coefficient(self) -> float:
    """Bezdek's Partition Coefficient (PC) in [1/k, 1]. Higher is crisper."""
    if self.membership_ is None:
        raise RuntimeError("Fit the model before calling partition_coefficient().")
    return _pc(self.membership_)

predict

predict(X: ndarray) -> np.ndarray

Return hard labels via argmax of predict_proba.

Source code in anfis_toolbox/clustering.py
def predict(self, X: np.ndarray) -> np.ndarray:
    """Return hard labels via argmax of predict_proba."""
    U = self.predict_proba(X)
    return np.argmax(U, axis=1)

predict_proba

predict_proba(X: ndarray) -> np.ndarray

Return membership degrees for samples to clusters (rows sum to 1).

Source code in anfis_toolbox/clustering.py
def predict_proba(self, X: np.ndarray) -> np.ndarray:
    """Return membership degrees for samples to clusters (rows sum to 1)."""
    if self.cluster_centers_ is None:
        raise RuntimeError("Call fit() before predict_proba().")
    X = self._check_X(X)
    C = self.cluster_centers_
    m = self.m
    d2 = np.maximum(self._pairwise_sq_dists(X, C), 1e-12)
    inv = d2 ** (-1.0 / (m - 1.0))
    return inv / np.sum(inv, axis=1, keepdims=True)

transform

transform(X: ndarray) -> np.ndarray

Alias for predict_proba.

Source code in anfis_toolbox/clustering.py
def transform(self, X: np.ndarray) -> np.ndarray:
    """Alias for predict_proba."""
    return self.predict_proba(X)

xie_beni_index

xie_beni_index(X: ndarray) -> float

Xie-Beni index (XB). Lower is better.

XB = sum_i sum_k u_ik^m ||x_i - v_k||^2 / (n * min_{p!=q} ||v_p - v_q||^2)

Source code in anfis_toolbox/clustering.py
def xie_beni_index(self, X: np.ndarray) -> float:
    """Xie-Beni index (XB). Lower is better.

    XB = sum_i sum_k u_ik^m ||x_i - v_k||^2 / (n * min_{p!=q} ||v_p - v_q||^2)
    """
    if self.membership_ is None or self.cluster_centers_ is None:
        raise RuntimeError("Fit the model before calling xie_beni_index().")
    X = self._check_X(X)
    return _xb(X, self.membership_, self.cluster_centers_, m=self.m)