Load Fashion MNIST

In [23]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets
from tensorflow.keras import layers, models, datasets, callbacks

(x_train,y_train),(x_test,y_test) = datasets.fashion_mnist.load_data()
x_train = x_train.astype("float32")/255
x_train = np.pad(x_train, ((0,0), (2,2), (2,2)), constant_values=0.0)
x_train = np.expand_dims(x_train,-1)
x_test = x_test.astype("float32")/255
x_test = np.pad(x_test, ((0,0), (2,2), (2,2)), constant_values=0.0)
x_test = np.expand_dims(x_test,-1)

display the first 10 images in the training dataset

In [24]:
def display(
    images, n=10, size=(20, 3), cmap="gray_r", as_type="float32", save_to=None
):
    """
    Displays n random images from each one of the supplied arrays.
    """
    if images.max() > 1.0:
        images = images / 255.0
    elif images.min() < 0.0:
        images = (images + 1.0) / 2.0

    plt.figure(figsize=size)
    for i in range(n):
        _ = plt.subplot(1, n, i + 1)
        plt.imshow(images[i].astype(as_type), cmap=cmap)
        plt.axis("off")

    if save_to:
        plt.savefig(save_to)
        print(f"\nSaved to {save_to}")

    plt.show()

display(x_train)
No description has been provided for this image
In [36]:
IMAGE_SIZE = 32
BATCH_SIZE = 100
VALIDATION_SPLIT = 0.2
EMBEDDING_DIM = 2

ENCODER MODEL consists of 3 Conv2D layers with a stride of 2. The latent space dimension is taken to be 2.

In [35]:
encoder_input = layers.Input(
    shape = (32, 32, 1), name="encoder_input" 
)
x = layers.Conv2D(32, (3,3), strides = 2, activation = "relu", padding="same")(
    encoder_input
)
x = layers.Conv2D(64, (3,3), strides = 2, activation="relu", padding="same")(x)
x = layers.Conv2D(128, (3,3), strides = 2, activation="relu", padding="same")(x)

x = layers.Flatten()(x)
encoder_output = layers.Dense(EMBEDDING_DIM, name="encoder_output")(x)

encoder = models.Model(encoder_input, encoder_output)

encoder.summary()
Model: "functional_9"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type)                    ┃ Output Shape           ┃       Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ encoder_input (InputLayer)      │ (None, 32, 32, 1)      │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_21 (Conv2D)              │ (None, 16, 16, 32)     │           320 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_22 (Conv2D)              │ (None, 8, 8, 64)       │        18,496 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_23 (Conv2D)              │ (None, 4, 4, 128)      │        73,856 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ flatten_6 (Flatten)             │ (None, 2048)           │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ encoder_output (Dense)          │ (None, 2)              │         4,098 │
└─────────────────────────────────┴────────────────────────┴───────────────┘
 Total params: 96,770 (378.01 KB)
 Trainable params: 96,770 (378.01 KB)
 Non-trainable params: 0 (0.00 B)

The decoder maps the latent vector onto an image. It is a stack of 3 Conv2D layers that terminate in a Conv2D layer

In [37]:
shape_before_flattening = (4,4,128)
dense_d = np.prod(shape_before_flattening)


decoder_input = layers.Input(shape=(EMBEDDING_DIM,), name="decoder_input")
x = layers.Dense(2048)(decoder_input)
x = layers.Reshape((4,4,128))(x)
x = layers.Conv2DTranspose(
    128, (3,3), strides = 2, activation="relu", padding="same")(x)
x = layers.Conv2DTranspose(
    64, (3,3), strides = 2, activation="relu", padding="same"
)(x)
x = layers.Conv2DTranspose(
    32, (3,3), strides = 2, activation="relu", padding="same"
)(x)
decoder_output = layers.Conv2D(
    1, (3,3), strides = 1, activation="sigmoid", padding="same", name="decoder_output"
)(x)
decoder = models.Model(decoder_input,decoder_output)
decoder.summary()
Model: "functional_10"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type)                    ┃ Output Shape           ┃       Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ decoder_input (InputLayer)      │ (None, 2)              │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_4 (Dense)                 │ (None, 2048)           │         6,144 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ reshape_4 (Reshape)             │ (None, 4, 4, 128)      │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_transpose_8              │ (None, 8, 8, 128)      │       147,584 │
│ (Conv2DTranspose)               │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_transpose_9              │ (None, 16, 16, 64)     │        73,792 │
│ (Conv2DTranspose)               │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_transpose_10             │ (None, 32, 32, 32)     │        18,464 │
│ (Conv2DTranspose)               │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ decoder_output (Conv2D)         │ (None, 32, 32, 1)      │           289 │
└─────────────────────────────────┴────────────────────────┴───────────────┘
 Total params: 246,273 (962.00 KB)
 Trainable params: 246,273 (962.00 KB)
 Non-trainable params: 0 (0.00 B)

Autoencdoer = encoder + decoder

In [38]:
autoencoder = models.Model(
    encoder_input, decoder(encoder_output)
)
autoencoder.summary()
Model: "functional_11"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type)                    ┃ Output Shape           ┃       Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ encoder_input (InputLayer)      │ (None, 32, 32, 1)      │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_21 (Conv2D)              │ (None, 16, 16, 32)     │           320 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_22 (Conv2D)              │ (None, 8, 8, 64)       │        18,496 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_23 (Conv2D)              │ (None, 4, 4, 128)      │        73,856 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ flatten_6 (Flatten)             │ (None, 2048)           │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ encoder_output (Dense)          │ (None, 2)              │         4,098 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ functional_10 (Functional)      │ (None, 32, 32, 1)      │       246,273 │
└─────────────────────────────────┴────────────────────────┴───────────────┘
 Total params: 343,043 (1.31 MB)
 Trainable params: 343,043 (1.31 MB)
 Non-trainable params: 0 (0.00 B)

We compile the autoencoder with an Adam optimizer and use MSE as the loss function

In [39]:
autoencoder.compile(
    optimizer = "adam",
    loss = "mse"
)
 

Training of the autoencoder uses x_train as the input and the target. We train for a maximum of 80 epochs and use a early-stopping callback to quit training if the validation loss has not improved for 5 consecutive epochs. We ended up training for about 60 epochs (which took awhile). But a reasonable latent space forms after 10 epochs.

In [40]:
callback_ckpt = [
    keras.callbacks.ModelCheckpoint(
        filepath = "autoencoder.keras",
        save_best_only = True,
        verbose = 1,
        monitor = "val_loss")]
callback_stopping = [
    keras.callbacks.EarlyStopping(
        monitor = "val_loss",
        patience = 5,
    )
]

num_epochs = 80
history = autoencoder.fit(
    x_train,x_train, epochs = num_epochs,
     batch_size=BATCH_SIZE,
    shuffle=True,
    validation_split= VALIDATION_SPLIT,
    #validation_data = (x_test,x_test),
    callbacks = [callback_ckpt, callback_stopping]
)
Epoch 1/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 58ms/step - loss: 0.0650
Epoch 1: val_loss improved from inf to 0.02644, saving model to autoencoder.keras
480/480 ━━━━━━━━━━━━━━━━━━━━ 32s 63ms/step - loss: 0.0649 - val_loss: 0.0264
Epoch 2/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 59ms/step - loss: 0.0253
Epoch 2: val_loss improved from 0.02644 to 0.02406, saving model to autoencoder.keras
480/480 ━━━━━━━━━━━━━━━━━━━━ 31s 64ms/step - loss: 0.0253 - val_loss: 0.0241
Epoch 3/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 64ms/step - loss: 0.0236
Epoch 3: val_loss improved from 0.02406 to 0.02303, saving model to autoencoder.keras
480/480 ━━━━━━━━━━━━━━━━━━━━ 33s 69ms/step - loss: 0.0236 - val_loss: 0.0230
Epoch 4/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 60ms/step - loss: 0.0228
Epoch 4: val_loss improved from 0.02303 to 0.02240, saving model to autoencoder.keras
480/480 ━━━━━━━━━━━━━━━━━━━━ 31s 65ms/step - loss: 0.0228 - val_loss: 0.0224
Epoch 5/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 60ms/step - loss: 0.0221
Epoch 5: val_loss improved from 0.02240 to 0.02216, saving model to autoencoder.keras
480/480 ━━━━━━━━━━━━━━━━━━━━ 31s 65ms/step - loss: 0.0221 - val_loss: 0.0222
Epoch 6/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 62ms/step - loss: 0.0218
Epoch 6: val_loss improved from 0.02216 to 0.02203, saving model to autoencoder.keras
480/480 ━━━━━━━━━━━━━━━━━━━━ 33s 68ms/step - loss: 0.0218 - val_loss: 0.0220
Epoch 7/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 63ms/step - loss: 0.0216
Epoch 7: val_loss improved from 0.02203 to 0.02161, saving model to autoencoder.keras
480/480 ━━━━━━━━━━━━━━━━━━━━ 33s 68ms/step - loss: 0.0216 - val_loss: 0.0216
Epoch 8/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 60ms/step - loss: 0.0214
Epoch 8: val_loss improved from 0.02161 to 0.02161, saving model to autoencoder.keras
480/480 ━━━━━━━━━━━━━━━━━━━━ 31s 65ms/step - loss: 0.0214 - val_loss: 0.0216
Epoch 9/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 63ms/step - loss: 0.0212
Epoch 9: val_loss improved from 0.02161 to 0.02142, saving model to autoencoder.keras
480/480 ━━━━━━━━━━━━━━━━━━━━ 33s 68ms/step - loss: 0.0212 - val_loss: 0.0214
Epoch 10/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 62ms/step - loss: 0.0211
Epoch 10: val_loss improved from 0.02142 to 0.02133, saving model to autoencoder.keras
480/480 ━━━━━━━━━━━━━━━━━━━━ 32s 67ms/step - loss: 0.0211 - val_loss: 0.0213
Epoch 11/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 61ms/step - loss: 0.0209
Epoch 11: val_loss did not improve from 0.02133
480/480 ━━━━━━━━━━━━━━━━━━━━ 32s 66ms/step - loss: 0.0209 - val_loss: 0.0213
Epoch 12/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 62ms/step - loss: 0.0207
Epoch 12: val_loss improved from 0.02133 to 0.02101, saving model to autoencoder.keras
480/480 ━━━━━━━━━━━━━━━━━━━━ 32s 67ms/step - loss: 0.0207 - val_loss: 0.0210
Epoch 13/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 60ms/step - loss: 0.0206
Epoch 13: val_loss did not improve from 0.02101
480/480 ━━━━━━━━━━━━━━━━━━━━ 31s 65ms/step - loss: 0.0206 - val_loss: 0.0211
Epoch 14/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 60ms/step - loss: 0.0204
Epoch 14: val_loss improved from 0.02101 to 0.02089, saving model to autoencoder.keras
480/480 ━━━━━━━━━━━━━━━━━━━━ 31s 65ms/step - loss: 0.0204 - val_loss: 0.0209
Epoch 15/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 61ms/step - loss: 0.0204
Epoch 15: val_loss did not improve from 0.02089
480/480 ━━━━━━━━━━━━━━━━━━━━ 32s 66ms/step - loss: 0.0204 - val_loss: 0.0210
Epoch 16/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 59ms/step - loss: 0.0203
Epoch 16: val_loss improved from 0.02089 to 0.02077, saving model to autoencoder.keras
480/480 ━━━━━━━━━━━━━━━━━━━━ 31s 64ms/step - loss: 0.0203 - val_loss: 0.0208
Epoch 17/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 59ms/step - loss: 0.0202
Epoch 17: val_loss improved from 0.02077 to 0.02073, saving model to autoencoder.keras
480/480 ━━━━━━━━━━━━━━━━━━━━ 31s 64ms/step - loss: 0.0202 - val_loss: 0.0207
Epoch 18/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 59ms/step - loss: 0.0201
Epoch 18: val_loss improved from 0.02073 to 0.02066, saving model to autoencoder.keras
480/480 ━━━━━━━━━━━━━━━━━━━━ 31s 64ms/step - loss: 0.0201 - val_loss: 0.0207
Epoch 19/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 59ms/step - loss: 0.0200
Epoch 19: val_loss improved from 0.02066 to 0.02053, saving model to autoencoder.keras
480/480 ━━━━━━━━━━━━━━━━━━━━ 31s 64ms/step - loss: 0.0200 - val_loss: 0.0205
Epoch 20/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 65ms/step - loss: 0.0199
Epoch 20: val_loss improved from 0.02053 to 0.02051, saving model to autoencoder.keras
480/480 ━━━━━━━━━━━━━━━━━━━━ 34s 70ms/step - loss: 0.0199 - val_loss: 0.0205
Epoch 21/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 63ms/step - loss: 0.0198
Epoch 21: val_loss improved from 0.02051 to 0.02042, saving model to autoencoder.keras
480/480 ━━━━━━━━━━━━━━━━━━━━ 33s 68ms/step - loss: 0.0198 - val_loss: 0.0204
Epoch 22/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 64ms/step - loss: 0.0198
Epoch 22: val_loss did not improve from 0.02042
480/480 ━━━━━━━━━━━━━━━━━━━━ 33s 69ms/step - loss: 0.0198 - val_loss: 0.0204
Epoch 23/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 62ms/step - loss: 0.0197
Epoch 23: val_loss improved from 0.02042 to 0.02025, saving model to autoencoder.keras
480/480 ━━━━━━━━━━━━━━━━━━━━ 32s 67ms/step - loss: 0.0197 - val_loss: 0.0203
Epoch 24/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 61ms/step - loss: 0.0196
Epoch 24: val_loss did not improve from 0.02025
480/480 ━━━━━━━━━━━━━━━━━━━━ 32s 66ms/step - loss: 0.0196 - val_loss: 0.0203
Epoch 25/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 60ms/step - loss: 0.0195
Epoch 25: val_loss did not improve from 0.02025
480/480 ━━━━━━━━━━━━━━━━━━━━ 31s 65ms/step - loss: 0.0195 - val_loss: 0.0204
Epoch 26/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 62ms/step - loss: 0.0195
Epoch 26: val_loss improved from 0.02025 to 0.02020, saving model to autoencoder.keras
480/480 ━━━━━━━━━━━━━━━━━━━━ 32s 67ms/step - loss: 0.0195 - val_loss: 0.0202
Epoch 27/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 61ms/step - loss: 0.0194
Epoch 27: val_loss did not improve from 0.02020
480/480 ━━━━━━━━━━━━━━━━━━━━ 32s 66ms/step - loss: 0.0194 - val_loss: 0.0203
Epoch 28/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 62ms/step - loss: 0.0194
Epoch 28: val_loss did not improve from 0.02020
480/480 ━━━━━━━━━━━━━━━━━━━━ 32s 67ms/step - loss: 0.0194 - val_loss: 0.0205
Epoch 29/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 66ms/step - loss: 0.0193
Epoch 29: val_loss improved from 0.02020 to 0.02019, saving model to autoencoder.keras
480/480 ━━━━━━━━━━━━━━━━━━━━ 34s 71ms/step - loss: 0.0193 - val_loss: 0.0202
Epoch 30/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 64ms/step - loss: 0.0194
Epoch 30: val_loss improved from 0.02019 to 0.02013, saving model to autoencoder.keras
480/480 ━━━━━━━━━━━━━━━━━━━━ 33s 70ms/step - loss: 0.0193 - val_loss: 0.0201
Epoch 31/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 65ms/step - loss: 0.0193
Epoch 31: val_loss did not improve from 0.02013
480/480 ━━━━━━━━━━━━━━━━━━━━ 34s 70ms/step - loss: 0.0193 - val_loss: 0.0202
Epoch 32/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 74ms/step - loss: 0.0192
Epoch 32: val_loss did not improve from 0.02013
480/480 ━━━━━━━━━━━━━━━━━━━━ 38s 79ms/step - loss: 0.0192 - val_loss: 0.0202
Epoch 33/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 60ms/step - loss: 0.0191
Epoch 33: val_loss did not improve from 0.02013
480/480 ━━━━━━━━━━━━━━━━━━━━ 31s 65ms/step - loss: 0.0191 - val_loss: 0.0201
Epoch 34/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 61ms/step - loss: 0.0192
Epoch 34: val_loss improved from 0.02013 to 0.01996, saving model to autoencoder.keras
480/480 ━━━━━━━━━━━━━━━━━━━━ 32s 67ms/step - loss: 0.0192 - val_loss: 0.0200
Epoch 35/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 60ms/step - loss: 0.0192
Epoch 35: val_loss did not improve from 0.01996
480/480 ━━━━━━━━━━━━━━━━━━━━ 31s 65ms/step - loss: 0.0192 - val_loss: 0.0201
Epoch 36/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 60ms/step - loss: 0.0190
Epoch 36: val_loss did not improve from 0.01996
480/480 ━━━━━━━━━━━━━━━━━━━━ 31s 65ms/step - loss: 0.0190 - val_loss: 0.0201
Epoch 37/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 62ms/step - loss: 0.0189
Epoch 37: val_loss did not improve from 0.01996
480/480 ━━━━━━━━━━━━━━━━━━━━ 32s 67ms/step - loss: 0.0189 - val_loss: 0.0200
Epoch 38/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 60ms/step - loss: 0.0190
Epoch 38: val_loss did not improve from 0.01996
480/480 ━━━━━━━━━━━━━━━━━━━━ 31s 65ms/step - loss: 0.0190 - val_loss: 0.0200
Epoch 39/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 60ms/step - loss: 0.0189
Epoch 39: val_loss improved from 0.01996 to 0.01995, saving model to autoencoder.keras
480/480 ━━━━━━━━━━━━━━━━━━━━ 31s 65ms/step - loss: 0.0189 - val_loss: 0.0199
Epoch 40/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 60ms/step - loss: 0.0189
Epoch 40: val_loss improved from 0.01995 to 0.01994, saving model to autoencoder.keras
480/480 ━━━━━━━━━━━━━━━━━━━━ 31s 65ms/step - loss: 0.0189 - val_loss: 0.0199
Epoch 41/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 60ms/step - loss: 0.0189
Epoch 41: val_loss did not improve from 0.01994
480/480 ━━━━━━━━━━━━━━━━━━━━ 31s 64ms/step - loss: 0.0189 - val_loss: 0.0201
Epoch 42/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 59ms/step - loss: 0.0187
Epoch 42: val_loss did not improve from 0.01994
480/480 ━━━━━━━━━━━━━━━━━━━━ 31s 64ms/step - loss: 0.0187 - val_loss: 0.0200
Epoch 43/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 60ms/step - loss: 0.0188
Epoch 43: val_loss did not improve from 0.01994
480/480 ━━━━━━━━━━━━━━━━━━━━ 31s 65ms/step - loss: 0.0188 - val_loss: 0.0200
Epoch 44/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 60ms/step - loss: 0.0188
Epoch 44: val_loss improved from 0.01994 to 0.01991, saving model to autoencoder.keras
480/480 ━━━━━━━━━━━━━━━━━━━━ 31s 65ms/step - loss: 0.0188 - val_loss: 0.0199
Epoch 45/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 60ms/step - loss: 0.0188
Epoch 45: val_loss did not improve from 0.01991
480/480 ━━━━━━━━━━━━━━━━━━━━ 31s 65ms/step - loss: 0.0188 - val_loss: 0.0200
Epoch 46/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 60ms/step - loss: 0.0187
Epoch 46: val_loss improved from 0.01991 to 0.01991, saving model to autoencoder.keras
480/480 ━━━━━━━━━━━━━━━━━━━━ 31s 65ms/step - loss: 0.0187 - val_loss: 0.0199
Epoch 47/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 60ms/step - loss: 0.0187
Epoch 47: val_loss improved from 0.01991 to 0.01990, saving model to autoencoder.keras
480/480 ━━━━━━━━━━━━━━━━━━━━ 31s 65ms/step - loss: 0.0187 - val_loss: 0.0199
Epoch 48/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 60ms/step - loss: 0.0187
Epoch 48: val_loss improved from 0.01990 to 0.01983, saving model to autoencoder.keras
480/480 ━━━━━━━━━━━━━━━━━━━━ 31s 65ms/step - loss: 0.0187 - val_loss: 0.0198
Epoch 49/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 60ms/step - loss: 0.0186
Epoch 49: val_loss improved from 0.01983 to 0.01976, saving model to autoencoder.keras
480/480 ━━━━━━━━━━━━━━━━━━━━ 31s 65ms/step - loss: 0.0186 - val_loss: 0.0198
Epoch 50/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 60ms/step - loss: 0.0186
Epoch 50: val_loss did not improve from 0.01976
480/480 ━━━━━━━━━━━━━━━━━━━━ 31s 65ms/step - loss: 0.0186 - val_loss: 0.0198
Epoch 51/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 60ms/step - loss: 0.0186
Epoch 51: val_loss did not improve from 0.01976
480/480 ━━━━━━━━━━━━━━━━━━━━ 31s 65ms/step - loss: 0.0186 - val_loss: 0.0198
Epoch 52/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 61ms/step - loss: 0.0185
Epoch 52: val_loss did not improve from 0.01976
480/480 ━━━━━━━━━━━━━━━━━━━━ 31s 65ms/step - loss: 0.0185 - val_loss: 0.0199
Epoch 53/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 60ms/step - loss: 0.0185
Epoch 53: val_loss improved from 0.01976 to 0.01975, saving model to autoencoder.keras
480/480 ━━━━━━━━━━━━━━━━━━━━ 31s 65ms/step - loss: 0.0185 - val_loss: 0.0197
Epoch 54/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 60ms/step - loss: 0.0186
Epoch 54: val_loss did not improve from 0.01975
480/480 ━━━━━━━━━━━━━━━━━━━━ 31s 65ms/step - loss: 0.0186 - val_loss: 0.0198
Epoch 55/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 60ms/step - loss: 0.0184
Epoch 55: val_loss did not improve from 0.01975
480/480 ━━━━━━━━━━━━━━━━━━━━ 31s 65ms/step - loss: 0.0184 - val_loss: 0.0198
Epoch 56/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 60ms/step - loss: 0.0186
Epoch 56: val_loss did not improve from 0.01975
480/480 ━━━━━━━━━━━━━━━━━━━━ 31s 65ms/step - loss: 0.0186 - val_loss: 0.0199
Epoch 57/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 60ms/step - loss: 0.0185
Epoch 57: val_loss improved from 0.01975 to 0.01973, saving model to autoencoder.keras
480/480 ━━━━━━━━━━━━━━━━━━━━ 31s 65ms/step - loss: 0.0185 - val_loss: 0.0197
Epoch 58/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 60ms/step - loss: 0.0184
Epoch 58: val_loss did not improve from 0.01973
480/480 ━━━━━━━━━━━━━━━━━━━━ 31s 65ms/step - loss: 0.0184 - val_loss: 0.0197
Epoch 59/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 60ms/step - loss: 0.0184
Epoch 59: val_loss did not improve from 0.01973
480/480 ━━━━━━━━━━━━━━━━━━━━ 31s 65ms/step - loss: 0.0184 - val_loss: 0.0197
Epoch 60/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 65ms/step - loss: 0.0183
Epoch 60: val_loss did not improve from 0.01973
480/480 ━━━━━━━━━━━━━━━━━━━━ 34s 70ms/step - loss: 0.0183 - val_loss: 0.0198
Epoch 61/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 62ms/step - loss: 0.0185
Epoch 61: val_loss did not improve from 0.01973
480/480 ━━━━━━━━━━━━━━━━━━━━ 32s 67ms/step - loss: 0.0185 - val_loss: 0.0198
Epoch 62/80
480/480 ━━━━━━━━━━━━━━━━━━━━ 0s 60ms/step - loss: 0.0184
Epoch 62: val_loss did not improve from 0.01973
480/480 ━━━━━━━━━━━━━━━━━━━━ 31s 65ms/step - loss: 0.0184 - val_loss: 0.0198
In [44]:
best_model = keras.models.load_model("autoencoder.keras")
test_loss = best_model.evaluate(x_test,x_test)
print(f"test loss = {test_loss:.3f}")



def training_curves(history):
    train_loss = history.history["loss"]
    val_loss = history.history["val_loss"]
 
    epochs = np.arange(1,len(train_loss) + 1)


    plt.plot(epochs, train_loss, "b--", label="p-training loss")
    plt.plot(epochs, val_loss, "b", label="validation loss")
    plt.title("p-Training and Validation Losses")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.legend()


  

training_curves(history)
313/313 ━━━━━━━━━━━━━━━━━━━━ 4s 11ms/step - loss: 0.0196
test loss = 0.020
No description has been provided for this image

We first take 5000 test samples and use them to generate reconstructions. We display the first 10 such reconstructiosn against their true images.

In [45]:
n_to_predict = 5000
example_images = x_test[:n_to_predict]
predictions = autoencoder.predict(example_images)

print("Example real clothing items")
display(example_images)
print("Reconstructions")
display(predictions)
157/157 ━━━━━━━━━━━━━━━━━━━━ 2s 12ms/step
Example real clothing items
No description has been provided for this image
Reconstructions
No description has been provided for this image

The latent space provides insight into the features. Because our latent space is 2 dimensional, we can readily visualize how the clothing classes are clustered in the latent space.

In [47]:
embeddings = encoder.predict(example_images)
 

# Colour the embeddings by their label (clothing type - see table)
example_labels = y_test[:n_to_predict]

figsize = 8
plt.figure(figsize=(figsize, figsize))
plt.scatter(
    embeddings[:, 0],
    embeddings[:, 1],
    cmap="rainbow",
    c=example_labels,
    alpha=0.8,
    s=3,
)
plt.colorbar()
plt.show()
157/157 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
No description has been provided for this image

We can get specific samples in the test data set's point cloud within the latent space. THis allows us to see how randomly chosen points in the space lead to different type of clothing reconstructions.

In [48]:
# Get the range of the existing embeddings
mins, maxs = np.min(embeddings, axis=0), np.max(embeddings, axis=0)

# Sample some points in the latent space
grid_width, grid_height = (6, 3)
sample = np.random.uniform(
    mins, maxs, size=(grid_width * grid_height, EMBEDDING_DIM)
)
In [49]:
# Decode the sampled points
reconstructions = decoder.predict(sample)
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 65ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 65ms/step
In [50]:
# Draw a plot of...
figsize = 8
plt.figure(figsize=(figsize, figsize))

# ... the original embeddings ...
plt.scatter(embeddings[:, 0], embeddings[:, 1], c="black", alpha=0.5, s=2)

# ... and the newly generated points in the latent space
plt.scatter(sample[:, 0], sample[:, 1], c="#00B0F0", alpha=1, s=40)
plt.show()

# Add underneath a grid of the decoded images
fig = plt.figure(figsize=(figsize, grid_height * 2))
fig.subplots_adjust(hspace=0.4, wspace=0.4)

for i in range(grid_width * grid_height):
    ax = fig.add_subplot(grid_height, grid_width, i + 1)
    ax.axis("off")
    ax.text(
        0.5,
        -0.35,
        str(np.round(sample[i, :], 1)),
        fontsize=10,
        ha="center",
        transform=ax.transAxes,
    )
    ax.imshow(reconstructions[i, :, :], cmap="Greys")
No description has been provided for this image
No description has been provided for this image

But we can also regularly sample the latent space to see how the reconstructions are distributed across it. In particular, this allows one to "morph" one type of clothing into another by simply following a direction along the latent space.

In [52]:
x = np.linspace(min(embeddings[:, 0]), max(embeddings[:, 0]), grid_size)
y = np.linspace(max(embeddings[:, 1]), min(embeddings[:, 1]), grid_size)
xv, yv = np.meshgrid(x, y)
xv = xv.flatten()
yv = yv.flatten()
grid = np.array(list(zip(xv, yv)))

reconstructions = decoder.predict(grid)
# plt.scatter(grid[:, 0], grid[:, 1], c="black", alpha=1, s=10)
plt.show()

fig = plt.figure(figsize=(figsize, figsize))
fig.subplots_adjust(hspace=0.4, wspace=0.4)
for i in range(grid_size**2):
    ax = fig.add_subplot(grid_size, grid_size, i + 1)
    ax.axis("off")
    ax.imshow(reconstructions[i, :, :], cmap="Greys")
8/8 ━━━━━━━━━━━━━━━━━━━━ 0s 9ms/step
No description has been provided for this image