# Praktikum 2

import library

```python
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
import numpy as np
```

### Pengantar k-Means

```python
from sklearn.datasets import make_blobs
X, y_true = make_blobs(n_samples=300, centers=4,
                       cluster_std=0.60, random_state=0)
plt.scatter(X[:, 0], X[:, 1], s=50);
```

Nanti akan muncul tampilan seperti di bawah ini:

<figure><img src="https://3041032130-files.gitbook.io/~/files/v0/b/gitbook-x-prod.appspot.com/o/spaces%2F5CvtE8Xh9b75jKUaRr5Y%2Fuploads%2FN45pRC0HROKeAaVqf4in%2FHasil%20Prak721.png?alt=media&#x26;token=af3bf119-92c7-49d4-90f1-91fd2b6df6f8" alt=""><figcaption></figcaption></figure>

```python
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=4)
kmeans.fit(X)
y_kmeans = kmeans.predict(X)
```

```python
plt.scatter(X[:, 0], X[:, 1], c=y_kmeans, s=50, cmap='viridis')

centers = kmeans.cluster_centers_
plt.scatter(centers[:, 0], centers[:, 1], c='black', s=200, alpha=0.5)
```

Nanti akan muncul tampilan seperti di bawah ini:

<figure><img src="https://3041032130-files.gitbook.io/~/files/v0/b/gitbook-x-prod.appspot.com/o/spaces%2F5CvtE8Xh9b75jKUaRr5Y%2Fuploads%2FwjrfAt6qfQptb0T8PTzo%2FHasil%20Prak722.png?alt=media&#x26;token=1a13660f-ebf0-4f5e-99b9-497a5eb169c1" alt=""><figcaption></figcaption></figure>

### Algoritma Expectation-Maximization

```python
from sklearn.metrics import pairwise_distances_argmin

def find_clusters(X, n_clusters, rseed=2):
    # 1. Randomly choose clusters
    rng = np.random.RandomState(rseed)
    i = rng.permutation(X.shape[0])[:n_clusters]
    centers = X[i]
    
    while True:
        # 2a. input label center yang baru
        labels = pairwise_distances_argmin(X, centers)
        
        # 2b. update center dari titik baru
        new_centers = np.array([X[labels == i].mean(0)
                                for i in range(n_clusters)])
        
        # 2c. cek konvergensi
        if np.all(centers == new_centers):
            break
        centers = new_centers
    
    return centers, labels

centers, labels = find_clusters(X, 4)
plt.scatter(X[:, 0], X[:, 1], c=labels,s=50, cmap='viridis');
```

Nanti akan muncul tampilan seperti di bawah ini:

<figure><img src="https://3041032130-files.gitbook.io/~/files/v0/b/gitbook-x-prod.appspot.com/o/spaces%2F5CvtE8Xh9b75jKUaRr5Y%2Fuploads%2FYxxZFWgyPm1ASLCWv9t4%2FHasil%20Prak723.png?alt=media&#x26;token=1ef4c975-69ee-4b8e-b43d-57642d239cad" alt=""><figcaption></figcaption></figure>

#### Perubahan random

```python
centers, labels = find_clusters(X, 4, rseed=0)
plt.scatter(X[:, 0], X[:, 1], c=labels,s=50, cmap='viridis');
```

Nanti akan muncul tampilan seperti di bawah ini:

<figure><img src="https://3041032130-files.gitbook.io/~/files/v0/b/gitbook-x-prod.appspot.com/o/spaces%2F5CvtE8Xh9b75jKUaRr5Y%2Fuploads%2F26EEwp8wIbpufvaA9SoI%2FHasil%20Prak724.png?alt=media&#x26;token=b786d8a2-3ea4-48bb-b750-65bab0213f37" alt=""><figcaption></figcaption></figure>

#### Optimalisasi Jumlah Klaster

```python
labels = KMeans(6, random_state=0).fit_predict(X)
plt.scatter(X[:, 0], X[:, 1], c=labels, s=50, cmap='viridis');
```

Nanti akan muncul tampilan seperti di bawah ini:

<figure><img src="https://3041032130-files.gitbook.io/~/files/v0/b/gitbook-x-prod.appspot.com/o/spaces%2F5CvtE8Xh9b75jKUaRr5Y%2Fuploads%2FxglzM2ngMeEdqPR2VqgI%2FHasil%20Prak725.png?alt=media&#x26;token=e0ec0158-9352-42b5-ac48-4af888f088d5" alt=""><figcaption></figcaption></figure>

#### Batas Klaster yang Tidak Selalu Linier

```python
from sklearn.datasets import make_moons
X, y = make_moons(200, noise=.05, random_state=0)
```

```python
labels = KMeans(2, random_state=0).fit_predict(X)
plt.scatter(X[:, 0], X[:, 1], c=labels,s=50, cmap='viridis');
```

Nanti akan muncul tampilan seperti di bawah ini:

<figure><img src="https://3041032130-files.gitbook.io/~/files/v0/b/gitbook-x-prod.appspot.com/o/spaces%2F5CvtE8Xh9b75jKUaRr5Y%2Fuploads%2FAps0SnjO5vf3mnKtmwRD%2FHasil%20Prak726.png?alt=media&#x26;token=68fdb861-dd3a-4b8c-a62b-e9bedc66c13b" alt=""><figcaption></figcaption></figure>

```python
from sklearn.cluster import SpectralClustering
model = SpectralClustering(n_clusters=2, affinity='nearest_neighbors',
                           assign_labels='kmeans')
labels = model.fit_predict(X)
plt.scatter(X[:, 0], X[:, 1], c=labels, s=50, cmap='viridis');
```

Nanti akan muncul tampilan seperti di bawah ini:

<figure><img src="https://3041032130-files.gitbook.io/~/files/v0/b/gitbook-x-prod.appspot.com/o/spaces%2F5CvtE8Xh9b75jKUaRr5Y%2Fuploads%2FMpUXu2fELtp9tkTTgvt9%2FHasil%20Prak727.png?alt=media&#x26;token=1fe6f31a-5b14-4908-bf96-ca1b418ae44b" alt=""><figcaption></figcaption></figure>

### Contoh Kasus 1: Karakter Angka

```python
from sklearn.datasets import load_digits
digits = load_digits()
digits.data.shape
```

```
(1797, 64)
```

```python
# terapkan K-Means
kmeans = KMeans(n_clusters=10, random_state=0)
clusters = kmeans.fit_predict(digits.data)
kmeans.cluster_centers_.shape
```

```
/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  warnings.warn(
(10, 64)
```

```python
fig, ax = plt.subplots(2, 5, figsize=(8, 3))
centers = kmeans.cluster_centers_.reshape(10, 8, 8)
for axi, center in zip(ax.flat, centers):
    axi.set(xticks=[], yticks=[])
    axi.imshow(center, interpolation='nearest', cmap=plt.cm.binary)
```

Nanti akan muncul tampilan seperti di bawah ini:

<figure><img src="https://3041032130-files.gitbook.io/~/files/v0/b/gitbook-x-prod.appspot.com/o/spaces%2F5CvtE8Xh9b75jKUaRr5Y%2Fuploads%2Fo2n43vKvY4z5dI4koy1Y%2FHasil%20Prak728.png?alt=media&#x26;token=d6673aae-8a7a-45fc-910d-388224bb126d" alt=""><figcaption></figcaption></figure>

```python
from scipy.stats import mode

labels = np.zeros_like(clusters)
for i in range(10):
    mask = (clusters == i)
    labels[mask] = mode(digits.target[mask])[0]
```

```python
from sklearn.metrics import accuracy_score
accuracy_score(digits.target, labels)
```

```
0.7935447968836951
```

```python
from sklearn.metrics import confusion_matrix
mat = confusion_matrix(digits.target, labels)
sns.heatmap(mat.T, square=True, annot=True, fmt='d', cbar=False,
            xticklabels=digits.target_names,
            yticklabels=digits.target_names)
plt.xlabel('true label')
plt.ylabel('predicted label');
```

Nanti akan muncul tampilan seperti di bawah ini:

<figure><img src="https://3041032130-files.gitbook.io/~/files/v0/b/gitbook-x-prod.appspot.com/o/spaces%2F5CvtE8Xh9b75jKUaRr5Y%2Fuploads%2FxVJQ6lD3kQ39Ror225xz%2FHasil%20Prak729.png?alt=media&#x26;token=03c6ad96-0cec-4c68-b120-7e42d54872bb" alt=""><figcaption></figcaption></figure>

```python
from sklearn.manifold import TSNE


tsne = TSNE(n_components=2, init='random', random_state=0)
digits_proj = tsne.fit_transform(digits.data)

# hitung klaster
kmeans = KMeans(n_clusters=10, random_state=0)
clusters = kmeans.fit_predict(digits_proj)

# permutasi label
labels = np.zeros_like(clusters)
for i in range(10):
    mask = (clusters == i)
    labels[mask] = mode(digits.target[mask])[0]

# hitung akurasi
accuracy_score(digits.target, labels)
```

```
/usr/local/lib/python3.10/dist-packages/sklearn/cluster/_kmeans.py:870: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  warnings.warn(
0.9415692821368948
```

### Studi Kasus 2: Kompresi Citra

```python
from sklearn.datasets import load_sample_image
flower = load_sample_image("flower.jpg")
ax = plt.axes(xticks=[], yticks=[])
ax.imshow(flower);
```

Nanti akan muncul tampilan seperti di bawah ini:

<figure><img src="https://3041032130-files.gitbook.io/~/files/v0/b/gitbook-x-prod.appspot.com/o/spaces%2F5CvtE8Xh9b75jKUaRr5Y%2Fuploads%2FEQPUw8G0A7izyTkzTkRS%2FHasil%20Prak72A.png?alt=media&#x26;token=f5b06501-76ab-484b-a062-5c4bbe2ca0b3" alt=""><figcaption></figcaption></figure>

```python
flower.shape
```

```
(427, 640, 3)
```

```python
data = flower / 255.0
data = data.reshape(427 * 640, 3)
data.shape
```

```
(273280, 3)
```

```python
def plot_pixels(data, title, colors=None, N=10000):
    if colors is None:
        colors = data
    
    # choose a random subset
    rng = np.random.RandomState(0)
    i = rng.permutation(data.shape[0])[:N]
    colors = colors[i]
    R, G, B = data[i].T
    
    fig, ax = plt.subplots(1, 2, figsize=(16, 6))
    ax[0].scatter(R, G, color=colors, marker='.')
    ax[0].set(xlabel='Red', ylabel='Green', xlim=(0, 1), ylim=(0, 1))

    ax[1].scatter(R, B, color=colors, marker='.')
    ax[1].set(xlabel='Red', ylabel='Blue', xlim=(0, 1), ylim=(0, 1))

    fig.suptitle(title, size=20);
```

```python
plot_pixels(data, title='Input color space: 16 million possible colors')
```

Nanti akan muncul tampilan seperti di bawah ini:

<figure><img src="https://3041032130-files.gitbook.io/~/files/v0/b/gitbook-x-prod.appspot.com/o/spaces%2F5CvtE8Xh9b75jKUaRr5Y%2Fuploads%2F9OJ9pRTgKTavCFKUBu83%2FHasil%20Prak72B.png?alt=media&#x26;token=7bb5c00b-3f1e-4e84-ae92-ef1471350e1a" alt=""><figcaption></figcaption></figure>

```python
import warnings; warnings.simplefilter('ignore')  # Fix NumPy issues.

from sklearn.cluster import MiniBatchKMeans
kmeans = MiniBatchKMeans(16)
kmeans.fit(data)
new_colors = kmeans.cluster_centers_[kmeans.predict(data)]

plot_pixels(data, colors=new_colors,title="Reduced color space: 16 colors")
```

Nanti akan muncul tampilan seperti di bawah ini:

<figure><img src="https://3041032130-files.gitbook.io/~/files/v0/b/gitbook-x-prod.appspot.com/o/spaces%2F5CvtE8Xh9b75jKUaRr5Y%2Fuploads%2FXU2IQBvuZiTIuSpxwnGA%2FHasil%20Prak72C.png?alt=media&#x26;token=a0afe4e2-06fe-4e10-aba6-9c9ddb2536e2" alt=""><figcaption></figcaption></figure>

```python
flower_recolored = new_colors.reshape(flower.shape)

fig, ax = plt.subplots(1, 2, figsize=(16, 6),
                       subplot_kw=dict(xticks=[], yticks=[]))
fig.subplots_adjust(wspace=0.05)
ax[0].imshow(flower)
ax[0].set_title('Original Image', size=16)
ax[1].imshow(flower_recolored)
ax[1].set_title('16-color Image', size=16);
```

Nanti akan muncul tampilan seperti di bawah ini:

<figure><img src="https://3041032130-files.gitbook.io/~/files/v0/b/gitbook-x-prod.appspot.com/o/spaces%2F5CvtE8Xh9b75jKUaRr5Y%2Fuploads%2FA1WiZw0ukEB7kO968raY%2FHasil%20Prak72D.png?alt=media&#x26;token=11579740-9e0a-48a9-8e95-d414cc97638b" alt=""><figcaption></figcaption></figure>
