CNN詳解-基於python基礎庫實現的簡單CNN

CNN,即卷積神經網絡,主要用於圖像識別,分類。由輸入層,卷積層,池化層,全連接層(Affline層),Softmax層疊加而成。卷積神經網絡中還有一個非常重要的結構:過濾器,它作用於層與層之間(卷積層與池化層),決定了怎樣對數據進行卷積和池化。下面先直觀理解下卷積和池化

二維卷積

CNN詳解-基於python基礎庫實現的簡單CNN

即濾波器的每個格子與濾波器選中數據的格子相乘

三維卷積

CNN詳解-基於python基礎庫實現的簡單CNN

三維數據的話,濾波器的也是三維。同時對每個維度進行卷積,最後將每個維度的卷積結果相加,輸出二維。

池化

CNN詳解-基於python基礎庫實現的簡單CNN

池化分為最大池化層與平均池化層。最大池化層對濾波器選中的數據取最大值,平均池化層對濾波器選中的數據求平均值。一般使用最大池化層。池化層是單獨作用於每個維度,若是三維數據,即對每個維度上進行最大/平均操作,輸入結果也是三維。

卷積用於提取高層次特徵,池化用於縮小參數。一般為一層卷積加一層池化反覆疊加或多層卷積加一層池化。
全連接層用於卷積池化後,對數據列化然後經過一兩層全連接層,得出結果。
softmax用於最後的分類

好了,知道卷積池化,下面就來實現最簡單的一個卷積網絡:

CNN詳解-基於python基礎庫實現的簡單CNN

靈魂畫筆。(*^__^*) relu為激活函數,FC即全連接層,也即Affine層

CNN實現手寫數字識別

Package

import sys ,os 
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf #只是用來加載mnist數據集
from PIL import Image
import pandas as pd
import math

加載MNIST數據集

def one_hot_label(y):
one_hot_label = np.zeros((y.shape[0],10))
y = y.reshape(y.shape[0])
one_hot_label[range(y.shape[0]),y] = 1
return one_hot_label


# #(訓練圖像,訓練標籤),(測試圖像,測試標籤)
# # mnist的圖像均為28*28尺寸的數據,通道為1
(x_train_origin,t_train_origin),(x_test_origin,t_test_origin) = tf.keras.datasets.mnist.load_data()
X_train = x_train_origin/255.0
X_test = x_test_origin/255.0
m,h,w = x_train_origin.shape
X_train = X_train.reshape((m,1,h,w))
y_train = one_hot_label(t_train_origin)

m,h,w = x_test_origin.shape
X_test = X_test.reshape((m,1,h,w))
y_test = one_hot_label(t_test_origin)
print("shape of x_train is :"+repr(X_train.shape))
print("shape of t_train is :"+repr(y_train.shape))
print("shape of x_test is :"+repr(X_test.shape))
print("shape of t_test is :"+repr(y_test.shape))

shape of x_train is :(60000, 1, 28, 28)
shape of t_train is :(60000, 10)

shape of x_test is :(10000, 1, 28, 28)
shape of t_test is :(10000, 10)

顯示圖像

index = 0
plt.imshow(X_train[index].reshape((28,28)),cmap = plt.cm.gray)
print("y is:"+str(np.argmax(y_train[index])))
y is:5
CNN詳解-基於python基礎庫實現的簡單CNN

output_7_1.png

激活函數

def relu(input_X): 

"""
Arguments:
input_X -- a numpy array
Return :
A: a numpy array. let each elements in array all greater or equal 0
"""

A = np.where(input_X < 0 ,0,input_X)
return A

def softmax(input_X):
"""
Arguments:
input_X -- a numpy array
Return :
A: a numpy array same shape with input_X
"""
exp_a = np.exp(input_X)
sum_exp_a = np.sum(exp_a,axis=1)
sum_exp_a = sum_exp_a.reshape(input_X.shape[0],-1)
ret = exp_a/sum_exp_a
# print(ret)
return ret

損失函數

def cross_entropy_error(labels,logits):
return -np.sum(labels*np.log(logits))

卷積層

class Convolution:
def __init__(self,W,fb,stride = 1,pad = 0):
"""
W-- 濾波器權重,shape為(FN,NC,FH,FW),FN 為濾波器的個數
fb -- 濾波器的偏置,shape 為(1,FN)
stride -- 步長
pad -- 填充個數
"""
self.W = W
self.fb = fb
self.stride = stride

self.pad = pad


self.col_X = None
self.X = None
self.col_W = None

self.dW = None
self.db = None
self.out_shape = None
# self.out = None

def forward (self ,input_X):
"""
input_X-- shape為(m,nc,height,width)
"""
self.X = input_X
FN,NC,FH,FW = self.W.shape

m,input_nc, input_h,input_w = self.X.shape

#先計算輸出的height和widt
out_h = int((input_h+2*self.pad-FH)/self.stride + 1)
out_w = int((input_w+2*self.pad-FW)/self.stride + 1)

#將輸入數據展開成二維數組,shape為(m*out_h*out_w,FH*FW*C)
self.col_X = col_X = im2col2(self.X,FH,FW,self.stride,self.pad)

#將濾波器一個個按列展開(FH*FW*C,FN)
self.col_W = col_W = self.W.reshape(FN,-1).T
out = np.dot(col_X,col_W)+self.fb
out = out.T
out = out.reshape(m,FN,out_h,out_w)
self.out_shape = out.shape
return out

def backward(self, dz,learning_rate):
#print("==== Conv backbward ==== ")
assert(dz.shape == self.out_shape)

FN,NC,FH,FW = self.W.shape
o_FN,o_NC,o_FH,o_FW = self.out_shape

col_dz = dz.reshape(o_NC,-1)
col_dz = col_dz.T

self.dW = np.dot(self.col_X.T,col_dz) #shape is (FH*FW*C,FN)

self.db = np.sum(col_dz,axis=0,keepdims=True)


self.dW = self.dW.T.reshape(self.W.shape)
self.db = self.db.reshape(self.fb.shape)


d_col_x = np.dot(col_dz,self.col_W.T) #shape is (m*out_h*out_w,FH,FW*C)
dx = col2im2(d_col_x,self.X.shape,FH,FW,stride=1)

assert(dx.shape == self.X.shape)

#更新W和b
self.W = self.W - learning_rate*self.dW
self.fb = self.fb -learning_rate*self.db

return dx


池化層

class Pooling:
def __init__(self,pool_h,pool_w,stride = 1,pad = 0):
self.pool_h = pool_h
self.pool_w = pool_w
self.stride = stride
self.pad = pad
self.X = None
self.arg_max = None

def forward ( self,input_X) :
"""
前向傳播
input_X-- shape為(m,nc,height,width)
"""
self.X = input_X
N , C, H, W = input_X.shape
out_h = int(1+(H-self.pool_h)/self.stride)
out_w = int(1+(W-self.pool_w)/self.stride)

#展開
col = im2col2(input_X,self.pool_h,self.pool_w,self.stride,self.pad)
col = col.reshape(-1,self.pool_h*self.pool_w)
arg_max = np.argmax(col,axis=1)
#最大值
out = np.max(col,axis=1)
out =out.T.reshape(N,C,out_h,out_w)

self.arg_max = arg_max
return out

def backward(self ,dz):
"""
反向傳播
Arguments:
dz-- out的導數,shape與out 一致

Return:
返回前向傳播是的input_X的導數
"""
pool_size = self.pool_h*self.pool_w
dmax = np.zeros((dz.size,pool_size))
dmax[np.arange(self.arg_max.size),self.arg_max.flatten()] = dz.flatten()

dx = col2im2(dmax,out_shape=self.X.shape,fh=self.pool_h,fw=self.pool_w,stride=self.stride)
return dx


Relu層

class Relu:
def __init__(self):
self.mask = None

def forward(self ,X):
self.mask = X <= 0
out = X
out[self.mask] = 0
return out

def backward(self,dz):
dz[self.mask] = 0
dx = dz
return dx

SoftMax層

class SoftMax:
def __init__ (self):
self.y_hat = None

def forward(self,X):

self.y_hat = softmax(X)
return self.y_hat

def backward(self,labels):
m = labels.shape[0]
dx = (self.y_hat - labels)

return dx

def compute_cost(logits,label):
return cross_entropy_error(label,logits)

Affine FC層

class Affine:
def __init__(self,W,b):
self.W = W # shape is (n_x,n_unit)
self.b = b # shape is(1,n_unit)
self.X = None
self.origin_x_shape = None

self.dW = None
self.db = None

self.out_shape =None

def forward(self,X):
self.origin_x_shape = X.shape
self.X = X.reshape(X.shape[0],-1)#(m,n)
out = np.dot(self.X, self.W)+self.b
self.out_shape = out.shape
return out

def backward(self,dz,learning_rate):
"""
dz-- 前面的導數
"""
# print("Affine backward")
# print(self.X.shape)
# print(dz.shape)
# print(self.W.shape)

assert(dz.shape == self.out_shape)

m = self.X.shape[0]

self.dW = np.dot(self.X.T,dz)/m

self.db = np.sum(dz,axis=0,keepdims=True)/m

assert(self.dW.shape == self.W.shape)
assert(self.db.shape == self.b.shape)

dx = np.dot(dz,self.W.T)
assert(dx.shape == self.X.shape)

dx = dx.reshape(self.origin_x_shape) # 保持與之前的x一樣的shape

#更新W和b
self.W = self.W-learning_rate*self.dW
self.b = self.b - learning_rate*self.db

return dx

模型

class SimpleConvNet:

def __init__(self):
self.X = None
self.Y= None
self.layers = []

def add_conv_layer(self,n_filter,n_c , f, stride=1, pad=0):
"""
添加一層卷積層
Arguments:
n_c -- 輸入數據通道數,也即卷積層的通道數
n_filter -- 濾波器的個數
f --濾波器的長/寬

Return :
Conv -- 卷積層
"""

# 初始化W,b
W = np.random.randn(n_filter, n_c, f, f)*0.01
fb = np.zeros((1, n_filter))
# 卷積層
Conv = Convolution(W, fb, stride=stride, pad=pad)

return Conv

def add_maxpool_layer(self, pool_shape, stride=1, pad=0):
"""
添加一層池化層
Arguments:
pool_shape -- 濾波器的shape
f -- 濾波器大小
Return :
Pool -- 初始化的Pool類
"""
pool_h, pool_w = pool_shape
pool = Pooling(pool_h, pool_w, stride=stride, pad=pad)

return pool

def add_affine(self,n_x, n_units):
"""
添加一層全連接層
Arguments:
n_x -- 輸入個數
n_units -- 神經元個數
Return :
fc_layer -- Affine層對象
"""

W= np.random.randn(n_x, n_units)*0.01

b = np.zeros((1, n_units))

fc_layer = Affine(W,b)

return fc_layer

def add_relu(self):
relu_layer = Relu()
return relu_layer


def add_softmax(self):
softmax_layer = SoftMax()
return softmax_layer

#計算卷積或池化後的H和W
def cacl_out_hw(self,HW,f,stride = 1,pad = 0):

return (HW+2*pad - f)/stride+1




def init_model(self,train_X,n_classes):
"""
初始化一個卷積層網絡
"""
N,C,H,W = train_X.shape
#卷積層
n_filter = 4
f = 7

conv_layer = self.add_conv_layer(n_filter= n_filter,n_c=C,f=f,stride=1)

out_h = self.cacl_out_hw(H,f)
out_w = self.cacl_out_hw(W,f)
out_ch = n_filter

self.layers.append(conv_layer)

#Relu
relu_layer = self.add_relu()
self.layers.append(relu_layer)

#池化
f = 2
pool_layer = self.add_maxpool_layer(pool_shape=(f,f),stride=2)
out_h = self.cacl_out_hw(out_h,f,stride=2)
out_w = self.cacl_out_hw(out_w,f,stride=2)
#out_ch 不改變
self.layers.append(pool_layer)




#Affine層
n_x = int(out_h*out_w*out_ch)
n_units = 32
fc_layer = self.add_affine(n_x=n_x,n_units=n_units)
self.layers.append(fc_layer)

#Relu
relu_layer = self.add_relu()
self.layers.append(relu_layer)

#Affine
fc_layer = self.add_affine(n_x=n_units,n_units=n_classes)

self.layers.append(fc_layer)

#SoftMax
softmax_layer = self.add_softmax()
self.layers.append(softmax_layer)



def forward_progation(self,train_X, print_out = False):
"""
前向傳播
Arguments:
train_X -- 訓練數據
f -- 濾波器大小

Return :
Z-- 前向傳播的結果
loss -- 損失值
"""


N,C,H,W = train_X.shape
index = 0
# 卷積層
conv_layer = self.layers[index]
X = conv_layer.forward(train_X)
index =index+1
if print_out:
print("卷積之後:"+str(X.shape))
# Relu
relu_layer = self.layers[index]
index =index+1
X = relu_layer.forward(X)
if print_out:
print("Relu:"+str(X.shape))


# 池化層
pool_layer = self.layers[index]
index =index+1
X = pool_layer.forward(X)
if print_out:
print("池化:"+str(X.shape))


#Affine層
fc_layer = self.layers[index]

index =index+1
X = fc_layer.forward(X)
if print_out:
print("Affline 層的X:"+str(X.shape))

#Relu
relu_layer = self.layers[index]
index =index+1
X = relu_layer.forward(X)
if print_out:
print("Relu 層的X:"+str(X.shape))

#Affine層
fc_layer = self.layers[index]
index =index+1
X = fc_layer.forward(X)
if print_out:
print("Affline 層的X:"+str(X.shape))

#SoftMax層
sofmax_layer = self.layers[index]
index =index+1
A = sofmax_layer.forward(X)
if print_out:
print("Softmax 層的X:"+str(A.shape))

return A

def back_progation(self,train_y,learning_rate):
"""
反向傳播
Arguments:

"""
index = len(self.layers)-1
sofmax_layer = self.layers[index]
index -= 1
dz = sofmax_layer.backward(train_y)

fc_layer = self.layers[index]
dz = fc_layer.backward(dz,learning_rate=learning_rate)
index -= 1

relu_layer = self.layers[index]
dz = relu_layer.backward(dz)
index -= 1

fc_layer = self.layers[index]
dz = fc_layer.backward(dz,learning_rate=learning_rate)

index -= 1

pool_layer = self.layers[index]
dz = pool_layer.backward(dz)
index -= 1

relu_layer = self.layers[index]
dz = relu_layer.backward(dz)
index -= 1

conv_layer = self.layers[index]
conv_layer.backward(dz,learning_rate=learning_rate)
index -= 1


def get_minibatch(self,batch_data,minibatch_size,num):
m_examples = batch_data.shape[0]
minibatches = math.ceil( m_examples / minibatch_size)

if(num < minibatches):
return batch_data[num*minibatch_size:(num+1)*minibatch_size]
else:
return batch_data[num*minibatch_size:m_examples]


def optimize(self,train_X, train_y,minibatch_size,learning_rate=0.05,num_iters=500):
"""
優化方法
Arguments:
train_X -- 訓練數據
train_y -- 訓練數據的標籤
learning_rate -- 學習率
num_iters -- 迭代次數
minibatch_size
"""
m = train_X.shape[0]
num_batches = math.ceil(m / minibatch_size)

costs = []
for iteration in range(num_iters):
iter_cost = 0
for batch_num in range(num_batches):
minibatch_X = self.get_minibatch(train_X,minibatch_size,batch_num)
minibatch_y = self.get_minibatch(train_y,minibatch_size,batch_num)

# 前向傳播
A = self.forward_progation(minibatch_X,print_out=False)
#損失:

cost = compute_cost (A,minibatch_y)
#反向傳播
self.back_progation(minibatch_y,learning_rate)
if(iteration%100 == 0):
iter_cost += cost/num_batches

if(iteration%100 == 0):
print("After %d iters ,cost is :%g" %(iteration,iter_cost))
costs.append(iter_cost)




#畫出損失函數圖
plt.plot(costs)
plt.xlabel("iterations/hundreds")
plt.ylabel("costs")
plt.show()


def predicate(self, train_X):
"""
預測
"""
logits = self.forward_progation(train_X)
one_hot = np.zeros_like(logits)
one_hot[range(train_X.shape[0]),np.argmax(logits,axis=1)] = 1
return one_hot

def fit(self,train_X, train_y):
"""
訓練
"""
self.X = train_X
self.Y = train_y
n_y = train_y.shape[1]
m = train_X.shape[0]

#初始化模型
self.init_model(train_X,n_classes=n_y)

self.optimize(train_X, train_y,minibatch_size=10,learning_rate=0.05,num_iters=800)

logits = self.predicate(train_X)

accuracy = np.sum(np.argmax(logits,axis=1) == np.argmax(train_y,axis=1))/m
print("訓練集的準確率為:%g" %(accuracy))
convNet = SimpleConvNet()
#拿20張先做實驗
train_X = X_train[0:10]
train_y = y_train[0:10]
convNet.fit(train_X,train_y)
After 0 iters ,cost is :23.0254
After 100 iters ,cost is :14.5255
After 200 iters ,cost is :6.01782
After 300 iters ,cost is :5.71148
After 400 iters ,cost is :5.63212
After 500 iters ,cost is :5.45006
After 600 iters ,cost is :5.05849
After 700 iters ,cost is :4.29723
CNN詳解-基於python基礎庫實現的簡單CNN

output_32_1.png

訓練集的準確率為:0.9

預測

logits = convNet.predicate(X_train[0:10])
m = 10
accuracy = np.sum(np.argmax(logits,axis=1) == np.argmax(y_train[0:10],axis=1))/m
print("訓練的準確率為:%g" %(accuracy))
訓練的準確率為:0.9
index = 0
plt.imshow(X_train[index].reshape((28,28)),cmap = plt.cm.gray)
print("y is:"+str(np.argmax(y_train[index])))
print("your predicate result is :"+str(np.argmax(logits[index])))
y is:5
your predicate result is :5
CNN詳解-基於python基礎庫實現的簡單CNN

output_35_1.png

logits = convNet.predicate(X_test)
m = X_test.shape[0]
accuracy = np.sum(np.argmax(logits,axis=1) == np.argmax(y_test,axis=1))/m
print("測試的準確率為:%g" %(accuracy))
測試的準確率為:0.1031

因為訓練的數據只有10個,所以測試的準確率只有0.1。
本文的目的是實現CNN,瞭解CNN的過程。有一些輔助函數沒有顯示出來,用於將圖像轉成矩陣數據,方便卷積操作,然後再將其轉換成圖像用於後面的操作。如有興趣,可以查看完整代碼。

完整代碼鏈接:https://github.com/huanhuang/SimpleConvNet.git


作者:倔犟的貝殼
鏈接:https://www.jianshu.com/p/3286d4a061ca
來源:簡書
著作權歸作者所有。商業轉載請聯繫作者獲得授權,非商業轉載請註明出處。


分享到:


相關文章: