import numpy as np
import matplotlib.pyplot as plot
import tensorflow as tf

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import optimizers

from numpy.random import seed

from sklearn.model_selection import train_test_split

seed(69)
tf.random.set_seed(69)

X = np.load('XSound.npy')
Y = np.load('YSound.npy')


## Helper functions ##
words = ['go', 'no', 'stop', 'yes']

def flatten(l):
    '''
    Flattens given array
    '''
    return [item[0] for sublist in l for item in sublist]

def doublePlotSpecgram(dataIndexes):
    '''
    Draw a list of spectrograms based on data indexes [1,3,7,13]
    '''
    # create list of data sets containing [[ X_data, Y_data ], ...]
    dataSets = [[X_train[i], Y_train[i]] for i in dataIndexes]
    fig, axes = plot.subplots(ncols=len(dataSets), figsize=(15, 2))
    for i in range(len(dataSets)):
        X = dataSets[i][0]
        Y = dataSets[i][1]
        axes[i].title.set_text(f'Spectrogram of index {dataIndexes[i]}  \'{words[Y]}\'')
        flat = flatten(X)
        axes[i].specgram(flat, Fs=1, NFFT=64, noverlap=0, cmap="rainbow")
        axes[i].set_xlabel('Time')
        axes[i].set_ylabel('Frequency')


# plot a known entry with zeros
a = X[13]
plot.subplot(211)
plot.subplot()
plot.title('Spectrogram')
flat = flatten(a)
plot.specgram(flat, Fs=1, NFFT=64, noverlap=0, cmap="rainbow")
plot.xlabel('Time')
plot.ylabel('Frequency')

# locate all zeros and remove them
n = 0
x_len = len(X)
hasZeros = []
for a in range(x_len):
    if 0 in X[a].reshape(-1):
        n += 1
        hasZeros.append(a)

X = np.delete(X, hasZeros, axis=0)
Y = np.delete(Y, hasZeros, axis=0)

display(f"{n}/{x_len} sounds files contain zeros")
display(f"new x has {len(X)} elements and Y has {len(Y)}")
display(f"has zeros {len(hasZeros)}")

'1893/15737 sounds files contain zeros'

'new x has 13844 elements and Y has 13844'

'has zeros 1893'


# I will not separate X_train into a validation set because we can use the validation_split parameter in the model.fit()
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, stratify = Y, random_state=69)


# use helper function to plot a few of each word
for i in range(4):
    indexes = np.where(Y_train == i)[0][0:4]
    doublePlotSpecgram(indexes)


Y_train_categorical = to_categorical(Y_train, 4)

model = Sequential()

model.add(Conv2D(62, (3,3), activation='tanh', input_shape=X_train[0].shape)) #32 feature maps, 3*3 local receptive fields
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Flatten())
model.add(Dense(units=4, activation='softmax')) #fully connected output layer

sgd = optimizers.SGD(learning_rate=0.1)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

history = model.fit(X_train, Y_train_categorical, epochs=10, batch_size=50, verbose=1, validation_split=0.3)

Epoch 1/10
146/146 [==============================] - 9s 61ms/step - loss: 0.8419 - accuracy: 0.6741 - val_loss: 1.2557 - val_accuracy: 0.6302
Epoch 2/10
146/146 [==============================] - 9s 62ms/step - loss: 0.5774 - accuracy: 0.7829 - val_loss: 0.7785 - val_accuracy: 0.7185
Epoch 3/10
146/146 [==============================] - 9s 63ms/step - loss: 0.4827 - accuracy: 0.8151 - val_loss: 0.6188 - val_accuracy: 0.7695
Epoch 4/10
146/146 [==============================] - 9s 63ms/step - loss: 0.3962 - accuracy: 0.8495 - val_loss: 0.5880 - val_accuracy: 0.7843
Epoch 5/10
146/146 [==============================] - 9s 64ms/step - loss: 0.3315 - accuracy: 0.8762 - val_loss: 0.6618 - val_accuracy: 0.7653
Epoch 6/10
146/146 [==============================] - 9s 64ms/step - loss: 0.2879 - accuracy: 0.8928 - val_loss: 0.8531 - val_accuracy: 0.7477
Epoch 7/10
146/146 [==============================] - 9s 63ms/step - loss: 0.2508 - accuracy: 0.9119 - val_loss: 0.5331 - val_accuracy: 0.8180
Epoch 8/10
146/146 [==============================] - 9s 63ms/step - loss: 0.2112 - accuracy: 0.9312 - val_loss: 0.5054 - val_accuracy: 0.8292
Epoch 9/10
146/146 [==============================] - 9s 63ms/step - loss: 0.1902 - accuracy: 0.9373 - val_loss: 0.4919 - val_accuracy: 0.8414
Epoch 10/10
146/146 [==============================] - 9s 63ms/step - loss: 0.1687 - accuracy: 0.9468 - val_loss: 0.5032 - val_accuracy: 0.8334


# some other algorithm
from sklearn.cluster import KMeans
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.cluster import adjusted_rand_score
k = 16
X_flat = [i.reshape(-1) for i in X]

scaler = MinMaxScaler()
scaler.fit(X_flat)
scaled_data = scaler.transform(X_flat)
kmeansCluster = KMeans(n_clusters=k, random_state=69, n_init=15, init='k-means++')
kmeansCluster.fit(scaled_data)

fit_pred = kmeansCluster.fit_predict(scaled_data)
adjusted_rand_score(Y, fit_pred)

0.009768063313093727


rows=int(k/5)
fig,axes = plt.subplots(rows,5,figsize=(20,4*rows))
for ax,cc,i in zip (axes.ravel(),kmeansCluster.cluster_centers_,np.arange(axes.ravel().size)):
    ax.set_title("idx: {}, size: {}".format(i,len(np.where(kmeansCluster.labels_==i)[0])))
    ax.imshow(cc.reshape(62,65),cmap=plt.cm.gray_r)
plt.show()


Y_test_categorical = to_categorical(Y_test, 4)
print("Accuracy on training data: {}".format(model.evaluate(X_train, Y_train_categorical)))
print("Accuracy on test data: {}".format(model.evaluate(X_test, Y_test_categorical)))

325/325 [==============================] - 3s 11ms/step - loss: 0.2519 - accuracy: 0.9193
Accuracy on training data: [0.2519279718399048, 0.9192911386489868]
109/109 [==============================] - 1s 11ms/step - loss: 0.4911 - accuracy: 0.8266
Accuracy on test data: [0.491102010011673, 0.826639711856842]


steps = len(history.history['accuracy'])
plt.plot(np.arange(steps), history.history['accuracy'], label = 'Train')
plt.plot(np.arange(steps), history.history['val_accuracy'], label = 'Valid')
plt.legend()
plt.xlabel('Epochs')
plt.ylabel('Accuracy')

Text(0, 0.5, 'Accuracy')


def doublePlotSpecgramTestData(dataIndexes):
    dataSets = [[X_test[i], Y_test[i]] for i in dataIndexes]
    fig, axes = plot.subplots(ncols=len(dataSets), figsize=(15, 2))
    for i in range(len(dataSets)):
        X = dataSets[i][0]
        Y = dataSets[i][1]
        axes[i].title.set_text(f'Spectrogram of index {dataIndexes[i]}  \'{words[Y]}\'')
        flat = flatten(X)
        axes[i].specgram(flat, Fs=1, NFFT=64, noverlap=0, cmap="rainbow")
        axes[i].set_xlabel('Time')
        axes[i].set_ylabel('Frequency')


a = model.predict(X_test[0:100])
for i in range(len(a)):
    pred = list(a[i]).index(max(a[i]))
    if pred != Y_test[i]:
        print(f"index {i} predicts\t'{words[pred]}',\treal '{words[Y_test[i]]}'")

doublePlotSpecgramTestData([1,4,17,27])
doublePlotSpecgramTestData([33,35,36,40])

4/4 [==============================] - 0s 9ms/step
index 1 predicts	'stop',	real 'go'
index 4 predicts	'no',	real 'stop'
index 17 predicts	'yes',	real 'no'
index 27 predicts	'no',	real 'stop'
index 33 predicts	'no',	real 'go'
index 35 predicts	'yes',	real 'no'
index 36 predicts	'go',	real 'no'
index 40 predicts	'stop',	real 'no'
index 43 predicts	'stop',	real 'go'
index 55 predicts	'no',	real 'yes'
index 86 predicts	'yes',	real 'go'
index 90 predicts	'go',	real 'stop'
index 92 predicts	'no',	real 'go'
index 94 predicts	'no',	real 'go'
index 98 predicts	'go',	real 'no'


# code from the doublePlotSpecgram helper function, but using test data instead of train
doublePlotSpecgramTestData([33,92,36,98])

Word	How many?	Class #
Yes	4,044	3
No	3,941	1
Stop	3,872	2
Go	3,880	0

6. Speech Recognition¶