3

I was following a tutorial about Feed-Forward Networks and wrote this code for a simple FFN :

class FirstFFNetwork:

#intialize the parameters def init(self): self.w1 = np.random.randn() self.w2 = np.random.randn() self.w3 = np.random.randn() self.w4 = np.random.randn() self.w5 = np.random.randn() self.w6 = np.random.randn() self.b1 = 0 self.b2 = 0 self.b3 = 0

def sigmoid(self, x): return 1.0/(1.0 + np.exp(-x))

def forward_pass(self, x): #forward pass - preactivation and activation self.x1, self.x2 = x self.a1 = self.w1self.x1 + self.w2self.x2 + self.b1 self.h1 = self.sigmoid(self.a1) self.a2 = self.w3self.x1 + self.w4self.x2 + self.b2 self.h2 = self.sigmoid(self.a2) self.a3 = self.w5self.h1 + self.w6self.h2 + self.b3 self.h3 = self.sigmoid(self.a3) return self.h3

def grad(self, x, y): #back propagation self.forward_pass(x)

self.dw5 = (self.h3-y) * self.h3*(1-self.h3) * self.h1
self.dw6 = (self.h3-y) * self.h3*(1-self.h3) * self.h2
self.db3 = (self.h3-y) * self.h3*(1-self.h3)

self.dw1 = (self.h3-y) * self.h3*(1-self.h3) * self.w5 * self.h1*(1-self.h1) * self.x1
self.dw2 = (self.h3-y) * self.h3*(1-self.h3) * self.w5 * self.h1*(1-self.h1) * self.x2
self.db1 = (self.h3-y) * self.h3*(1-self.h3) * self.w5 * self.h1*(1-self.h1)

self.dw3 = (self.h3-y) * self.h3*(1-self.h3) * self.w6 * self.h2*(1-self.h2) * self.x1
self.dw4 = (self.h3-y) * self.h3*(1-self.h3) * self.w6 * self.h2*(1-self.h2) * self.x2
self.db2 = (self.h3-y) * self.h3*(1-self.h3) * self.w6 * self.h2*(1-self.h2)


def fit(self, X, Y, epochs=1, learning_rate=1, initialise=True, display_loss=False):

# initialise w, b
if initialise:
  self.w1 = np.random.randn()
  self.w2 = np.random.randn()
  self.w3 = np.random.randn()
  self.w4 = np.random.randn()
  self.w5 = np.random.randn()
  self.w6 = np.random.randn()
  self.b1 = 0
  self.b2 = 0
  self.b3 = 0

if display_loss:
  loss = {}

for i in tqdm_notebook(range(epochs), total=epochs, unit="epoch"):
  dw1, dw2, dw3, dw4, dw5, dw6, db1, db2, db3 = [0]*9
  for x, y in zip(X, Y):
    self.grad(x, y)
    dw1 += self.dw1
    dw2 += self.dw2
    dw3 += self.dw3
    dw4 += self.dw4
    dw5 += self.dw5
    dw6 += self.dw6
    db1 += self.db1
    db2 += self.db2
    db3 += self.db3

  m = X.shape[1]
  self.w1 -= learning_rate * dw1 / m
  self.w2 -= learning_rate * dw2 / m
  self.w3 -= learning_rate * dw3 / m
  self.w4 -= learning_rate * dw4 / m
  self.w5 -= learning_rate * dw5 / m
  self.w6 -= learning_rate * dw6 / m
  self.b1 -= learning_rate * db1 / m
  self.b2 -= learning_rate * db2 / m
  self.b3 -= learning_rate * db3 / m

  if display_loss:
    Y_pred = self.predict(X)
    loss[i] = mean_squared_error(Y_pred, Y)

if display_loss:
  plt.plot(loss.values())
  plt.xlabel('Epochs')
  plt.ylabel('Mean Squared Error')
  plt.show()

def predict(self, X): #predicting the results on unseen data Y_pred = [] for x in X: y_pred = self.forward_pass(x) Y_pred.append(y_pred) return np.array(Y_pred)

The data was generated as follows :

data, labels = make_blobs(n_samples=1000, centers=4, n_features=2, random_state=0)
labels_orig = labels
labels = np.mod(labels_orig, 2)
X_train, X_val, Y_train, Y_val = train_test_split(data, labels, stratify=labels, random_state=0)

When I ran the program yesterday, I had gotten a training accuracy of about 98% and a test accuracy of 94%. But when I ran it today, suddenly the accuracy dropped to 60-70%. I tried to scatter plot the result, and it looked like it behaved as if it were a single sigmoid instead of the Feed-Forward Network.

ffn = FirstFFNetwork()
#train the model on the data
ffn.fit(X_train, Y_train, epochs=2000, learning_rate=.01, display_loss=False)
#predictions
Y_pred_train = ffn.predict(X_train)
Y_pred_binarised_train = (Y_pred_train >= 0.5).astype("int").ravel()
Y_pred_val = ffn.predict(X_val)
Y_pred_binarised_val = (Y_pred_val >= 0.5).astype("int").ravel()
accuracy_train_1 = accuracy_score(Y_pred_binarised_train, Y_train)
accuracy_val_1 = accuracy_score(Y_pred_binarised_val, Y_val)
#model performance
print("Training accuracy", round(accuracy_train_1, 2))
print("Validation accuracy", round(accuracy_val_1, 2)

I do not understand how this happened and cannot figure it out.

Shayan Shafiq
  • 350
  • 1
  • 4
  • 12

1 Answers1

3
  • It is common during the training of Neural Networks for accuracy to improve for a while and then get worse -- in general, This is caused by over-fitting. It's also fairly common for the Neural Network to "get UNLUCKY and get knocked into a BAD sectors of parameter space corresponding to a sudden decrease in accuracy -- sometimes it can recover from this quickly, but sometimes not.

image

  • In general, lowering your learning rate is a good approach to this kind of problem. Also, setting a learning rate schedule like FactorScheduler can help you achieve more stable convergence by lowering the learning rate every few epochs. In fact, this can sometimes cover up mistakes in picking an initial learning rate that is too high.

  • you can try using mini-batches.

  • The error (Entropy) with log functions must be used precisely.


Shayan Shafiq
  • 350
  • 1
  • 4
  • 12
Faizy
  • 1,144
  • 1
  • 8
  • 30