Merge branch 'develop'

amsukdu · Sep 17, 2017 · faefc15 · faefc15
2 parents 8cbbfb0 + 95fb4fc
commit faefc15
Show file tree

Hide file tree

Showing 7 changed files with 68 additions and 28 deletions.
diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@
 This is Convolutional Neural Network only in python & numpy. It is simple and slow but will get the job done :+1:
 
 ## Specification
-**Weight Initialization :** Xavier
+**Weight Initialization :** HE Normal
 
 **Weight Update Policy :** ADAM, NAG, Momentum, Vanila
 
@@ -49,10 +49,10 @@ for i in range(600000):
 
 ```
 
-CIFAR-10 example gets ~70% test accuracy in 20 epoch.
+CIFAR-10 example gets ~72% test accuracy in 20 epoch.
 
 
-## API Referecne
+## API Reference
 ```python
 classes.NeuralNetwork(input_shape, layer_list, lr, l2_reg=0, dropout_p=1, loss='softmax'):
 ```
@@ -91,10 +91,6 @@ classes.ConvLayer(input_size, k, f=3, s=1, p=1, u_type='adam', a_type='relu')
 | ReLU | 'relu' |
 | Sigmoid | 'sigmoid' |
 
-## ToDos
-- [ ] More update policies.
-- [ ] More activation functions.
-- [ ] Speed tuning.
 
 ## License
 MIT

diff --git a/classes/conv_layer.py b/classes/conv_layer.py
@@ -26,12 +26,13 @@ def predict(self, batch):
         self.image_size = batch.shape[0]
         cols = u.im2col_indices(batch, self.f, self.f, self.p, self.s)
         sum_weights = []
+        bias = []
         for n in self.neurons:
-            n.last_input = cols
+            bias.append(n.b)
             sum_weights.append(n.weights)
 
         sum_weights = np.array(sum_weights)
-        strength = sum_weights.dot(cols).reshape(self.k, self.h2, self.w2, -1).transpose(3, 0, 1, 2)
+        strength = (sum_weights.dot(cols) + np.array(bias).reshape(sum_weights.shape[0], 1)).reshape(self.k, self.h2, self.w2, -1).transpose(3, 0, 1, 2)
 
         if self.activation:
             if self.a_type == 'sigmoid':
@@ -46,13 +47,15 @@ def forward(self, batch):
         cols = u.im2col_indices(batch, self.f, self.f, self.p, self.s)
         l2 = 0
         sum_weights = []
+        bias = []
         for n in self.neurons:
             n.last_input = cols
             sum_weights.append(n.weights)
+            bias.append(n.b)
             l2 += n.regularization()
 
         sum_weights = np.array(sum_weights)
-        strength = sum_weights.dot(cols).reshape(self.k, self.h2, self.w2, -1).transpose(3, 0, 1, 2)
+        strength = (sum_weights.dot(cols) + np.array(bias).reshape(sum_weights.shape[0], 1)).reshape(self.k, self.h2, self.w2, -1).transpose(3, 0, 1, 2)
 
         if self.activation:
             if self.a_type == 'sigmoid':

diff --git a/classes/neural_layer.py b/classes/neural_layer.py
@@ -18,7 +18,16 @@ def __init__(self, input_size, k, u_type='adam', a_type='relu'):
             input_size = np.prod(input_size)
 
         for n in range(k):
-            self.neurons.append(Neuron(input_size))
+            n = Neuron(input_size)
+            if u_type == 'adam':
+                n.m_bias, n.v_bias, n.m, n.v = 0, 0, 0, 0
+            elif u_type == 'm':
+                n.v_bias, n.v = 0, 0
+            elif u_type == 'nag':
+                n.v, n.v_bias, n.v_prev, n.v_prev_bias = 0, 0, 0, 0
+            elif u_type == 'rmsprop':
+                n.cache, n.cache_bias, n.v, n.v = 0, 0, 0, 0
+            self.neurons.append(n)
 
     def predict(self, batch):
 
@@ -91,6 +100,8 @@ def output_size(self):
     def update(self, lr, l2_reg, t=0):
         if self.u_type == 'adam':
             u.adam_update(self.neurons, lr, t=t, l2_reg=l2_reg)
+        elif self.u_type == 'rmsprop':
+            u.rmsprop(self.neurons, lr, l2_reg=l2_reg)
         elif self.u_type == 'm':
             u.momentum_update(self.neurons, lr, l2_reg=l2_reg)
         elif self.u_type == 'nag':

diff --git a/classes/neural_net.py b/classes/neural_net.py
@@ -11,7 +11,6 @@ def __init__(self, input_shape, layer_list, lr, l2_reg=0, dropout_p=1, loss='sof
         self.l2_reg = np.float32(l2_reg)
         self.loss = loss
 
-        # dropout
         self.dropout_p = dropout_p
         self.dropout_masks = []
         self.t = 0

diff --git a/classes/neuron.py b/classes/neuron.py
@@ -3,12 +3,11 @@
 
 class Neuron(object):
     def __init__(self, input_size, bias=0.0):
-        self.weights = (np.random.randn(input_size) * np.sqrt(2.0/input_size)).astype(np.float32)
+        limit = np.sqrt(2.0 / input_size)
+        self.weights = (np.random.randn(input_size) * limit).astype(np.float32)
         self.b = np.float32(bias)
         self.last_input = None
         self.delta = 0
-        self.m = 0
-        self.v = 0
 
     def strength(self, values):
         return np.dot(self.weights, values) + self.b

diff --git a/classes/utils.py b/classes/utils.py
@@ -65,9 +65,23 @@ def col2im_indices(cols, x_shape, field_height=3, field_width=3, padding=1, stri
     return x_padded[:, :, padding:-padding, padding:-padding]
 
 
-def adam_update(neurons, lr, t, l2_reg=0, beta1=np.float32(0.9), beta2=np.float32(0.999)):
+def rmsprop(neurons, lr, decay_rate=0.9, l2_reg=0, eps=1e-8):
     for n in neurons:
-        l2 = l2_reg * n.weights
+        dx = (n.last_input.dot(n.delta)).T
+        d_bias = np.average(n.delta)
+
+        n.cache = decay_rate * n.cache + (1 - decay_rate) * (dx ** 2)
+        if l2_reg > 0:
+            n.weights += - lr * dx / (np.sqrt(n.v) + eps) - l2_reg * n.weights
+        else:
+            n.weights += - lr * dx / (np.sqrt(n.v) + eps)
+
+        n.cache_bias = decay_rate * n.cache_bias + (1 - decay_rate) * (d_bias ** 2)
+        n.b -= lr * d_bias / (np.sqrt(n.cache_bias) + eps)
+
+def adam_update(neurons, lr, t, l2_reg=0, beta1=np.float32(0.9), beta2=np.float32(0.999), eps=1e-8):
+    for n in neurons:
+
         dx = (n.last_input.dot(n.delta)).T
         d_bias = np.average(n.delta)
 
@@ -77,21 +91,35 @@ def adam_update(neurons, lr, t, l2_reg=0, beta1=np.float32(0.9), beta2=np.float3
         m = n.m / np.float32(1-beta1**t)
         v = n.v / np.float32(1-beta2**t)
 
-        n.weights -= lr * m / (np.sqrt(v) + 1e-8) + l2
-        n.b -= lr * d_bias
+        if l2_reg > 0:
+            n.weights -= lr * m / (np.sqrt(v) + eps) + l2_reg * n.weights
+        else:
+            n.weights -= lr * m / (np.sqrt(v) + eps)
+
+        n.m_bias = beta1 * n.m_bias + (1 - beta1) * d_bias
+        n.v_bias = beta2 * n.v_bias + (1 - beta2) * (d_bias ** 2)
+
+        m = n.m_bias / np.float32(1 - beta1 ** t)
+        v = n.v_bias / np.float32(1 - beta2 ** t)
+
+        n.b -= lr * m / (np.sqrt(v) + eps)
 
 
 def nag_update(neurons, lr, l2_reg=0, mu=np.float32(0.9)):
     for n in neurons:
-        l2 = l2_reg * n.weights
         dx = (n.last_input.dot(n.delta)).T
         d_bias = np.average(n.delta)
 
         n.v_prev = n.v
         n.v = mu * n.v - lr * dx
+        if l2_reg > 0:
+            n.weights += -mu * n.v_prev + (1 + mu) * n.v - l2_reg * n.weights
+        else:
+            n.weights += -mu * n.v_prev + (1 + mu) * n.v
 
-        n.weights += -mu * n.v_prev + (1 + mu) * n.v - l2
-        n.b -= lr * d_bias
+        n.v_prev_bias = n.v_bias
+        n.v_bias = mu * n.v_bias - lr * d_bias
+        n.b += -mu * n.v_prev_bias + (1 + mu) * n.v_bias
 
 
 def momentum_update(neurons, lr, l2_reg=0, mu=np.float32(0.9)):
@@ -101,9 +129,13 @@ def momentum_update(neurons, lr, l2_reg=0, mu=np.float32(0.9)):
         d_bias = np.average(n.delta)
 
         n.v = mu * n.v - lr * dx
+        if l2_reg > 0:
+            n.weights += n.v - l2
+        else:
+            n.weights += n.v
 
-        n.weights += n.v - l2
-        n.b -= lr * d_bias
+        n.v_bias = mu * n.v_bias - lr * d_bias
+        n.b -= lr * n.v_bias
 
 
 def vanila_update(neurons, lr, l2_reg=0):

diff --git a/example/cifar-10-batches-py/main.py b/example/cifar-10-batches-py/main.py
@@ -52,9 +52,9 @@ def unpickle(file):
 test_labels = data['labels']
 
 lr = 1e-4
-dropout_percent = 0.4
-l2_reg = 3e-6
-learning_rate_decay = np.float32(100e-2)
+dropout_percent = 1
+l2_reg = 6e-6
+learning_rate_decay = np.float32(96e-2)
 batch_size = 1
 
 cnn = NeuralNetwork(train_images.shape[1:],
@@ -65,7 +65,7 @@ def unpickle(file):
                         {'type': 'pool'},
                         {'type': 'conv', 'k': 20, 'u_type': 'nag', 'f': 5, 's': 1, 'p': 2},
                         {'type': 'pool'},
-                        {'type': 'output', 'k': len(le.classes_), 'u_type': 'adam'}
+                        {'type': 'output', 'k': len(le.classes_), 'u_type': 'nag'}
                     ]
                     , lr, l2_reg=l2_reg, dropout_p=dropout_percent)