import numpy as np
import tensorflow as tf


#load data
from tensorflow.keras import datasets as dt
((x_train, y_train), (x_test, y_test)) = dt.mnist.load_data(path='mnist.npz')
(x_test.shape, y_test.shape)

((10000, 28, 28), (10000,))


#reformat to fit with expected shapes for Conv2D
x_train = np.expand_dims(x_train, axis=3)
x_test = np.expand_dims(x_test, axis=3)
x_test.shape

(10000, 28, 28, 1)


#build a simple model based on a 2D convolution

def get_CNN_model(input_shape):
    #use keras.layers.Conv2D interleaved with keras.layers.Maxpooling2D and with some keras.layers.Dense in the end
    #keras.Sequential is recommended
    #do not forget to set the input shape to the first layer
   
    return model


model = get_CNN_model((28,28,1))
model.summary()

WARNING:tensorflow:From C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\ops\resource_variable_ops.py:435: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
conv2d (Conv2D)              (None, 26, 26, 32)        320       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 13, 13, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 11, 11, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 5, 5, 64)          0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 3, 3, 128)         73856     
_________________________________________________________________
flatten (Flatten)            (None, 1152)              0         
_________________________________________________________________
dense (Dense)                (None, 512)               590336    
=================================================================
Total params: 683,008
Trainable params: 683,008
Non-trainable params: 0
_________________________________________________________________


# compile your model, then train and evaluate it
# report results with some conclusions


#construct a siammese network (two towers) from a previous CNN model. 
#Use some distance/similarity metric (L1, L2, cosine sim) and possibly an activation on top of it (or a simple tf.reduce_sum)
def get_Siammese_model(model, input_shape):
    #define kers.layers.Input for left and right tower
    #parameter sharing (use the same model)
    #you may need layer.Lambda()
    #keras.Model() (i.e., functional API) will be needed here
    
    return model_siam


model_siam = get_Siammese_model(model, (28,28,1))
model_siam.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
==================================================================================================
input_1 (InputLayer)            (None, 28, 28, 1)    0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            (None, 28, 28, 1)    0                                            
__________________________________________________________________________________________________
sequential (Sequential)         (None, 512)          683008      input_1[0][0]                    
                                                                 input_2[0][0]                    
__________________________________________________________________________________________________
lambda (Lambda)                 (None, 512)          0           sequential[1][0]                 
                                                                 sequential[2][0]                 
__________________________________________________________________________________________________
dense_1 (Dense)                 (None, 1)            513         lambda[0][0]                     
==================================================================================================
Total params: 683,521
Trainable params: 683,521
Non-trainable params: 0
__________________________________________________________________________________________________


#compile the model with e.g. Adam optimizer and e.g. binary cross entropy loss
#or define your own loss function, see e.g. here: 
#https://stackoverflow.com/questions/38260113/implementing-contrastive-loss-and-triplet-loss-in-tensorflow


# source data are in an incompatible format - we need to construct pairs
# a batch generator would be better, but simple static samples will do as well
def create_pairs(x, y, size):
    pairs = []
    labels = []
    #select random indices
    a = np.random.choice(len(y), size)
    b = np.random.choice(len(y), size)
    x1 = x[a]
    x2 = x[b]
    y = [int(i) for i in y[a] == y[b]]
             
    return (x1, x2, np.array(y))


#run the model training (depending on your PC performance, you may set lower/higher train set sizes)
#you may try to re-use the previous model and only fine-tune it

#do not increase epochs too much (overfitting)

x1, x2, y = create_pairs(x_train, y_train, 300000)
x1_val, x2_val, y_val = create_pairs(x_train, y_train, 10000)
x1_test, x2_test, y_pair_test = create_pairs(x_test, y_test, 10000)
model_siam.fit([x1, x2], y,
          batch_size=128,
          epochs=1,
          validation_data=([x1_val, x2_val], y_val))

Train on 300000 samples, validate on 10000 samples
WARNING:tensorflow:From C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\ops\math_ops.py:3066: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
300000/300000 [==============================] - 298s 994us/sample - loss: 0.0658 - mean_absolute_error: 0.0426 - acc: 0.9765 - val_loss: 0.0252 - val_mean_absolute_error: 0.0150 - val_acc: 0.9926

<tensorflow.python.keras.callbacks.History at 0x1e25861e240>


def get_samples(x, y):
    indices = [(y == i).nonzero()[0][0:10] for i in np.unique(y)]
    indices = np.array(indices).flatten()
    return (x[indices], y[indices])


# a simple validation for a single sample. Extend this to get some overall results for the whole test set. 
# Return, e.g., accuracy
i=0
x0_sample, y0_sample = get_samples(x_train, y_train)
example = x_test[i]
x0_test = np.repeat(example[np.newaxis, :, :, :], len(y0_sample), axis=0)
pred = np.argmax(model_siam.predict([x0_test, x0_sample]))
(y_test[i], y0_sample[pred])

(7, 7)

Basic task: classification network¶

Train and evaluate the network;¶

"Large" homework: Siamese networks + one-shot classification¶

Task 1: construct siamese network¶

Task 2: k-shot classification on existing labels¶

Task 3: one-shot (k-shot) classification for newly introduced classes¶