使用TensorFlow v2.0構建多層感知器

使用TensorFlow v2.0構建一個兩層隱藏層完全連接的神經網絡(多層感知器)。

這個例子使用低級方法來更好地理解構建神經網絡和訓練過程背後的所有機制。

神經網絡概述

使用TensorFlow v2.0構建多層感知器

MNIST 數據集概述

此示例使用手寫數字的MNIST數據集。該數據集包含60,000個用於訓練的示例和10,000個用於測試的示例。這些數字已經過尺寸標準化並位於圖像中心,圖像是固定大小(28x28像素),值為0到255。

在此示例中,每個圖像將轉換為float32並歸一化為[0,1],並展平為784個特徵的一維數組(28 * 28)

使用TensorFlow v2.0構建多層感知器

更多信息請查看鏈接: http://yann.lecun.com/exdb/mnist/

from __future__ import absolute_import, division, print_function
import tensorflow as tf
import numpy as np
# MNIST 數據集參數
num_classes = 10 # 所有類別(數字 0-9)
num_features = 784 # 數據特徵數目 (圖像形狀: 28*28)
# 訓練參數
learning_rate = 0.001
training_steps = 3000
batch_size = 256
display_step = 100
# 網絡參數
n_hidden_1 = 128 # 第一層隱含層神經元的數目
n_hidden_2 = 256 # 第二層隱含層神經元的數目
# 準備MNIST數據
from tensorflow.keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
# 轉化為float32
x_train, x_test = np.array(x_train, np.float32), np.array(x_test, np.float32)
# 將每張圖像展平為具有784個特徵的一維向量(28 * 28)
x_train, x_test = x_train.reshape([-1, num_features]), x_test.reshape([-1, num_features])
# 將圖像值從[0,255]歸一化到[0,1]
x_train, x_test = x_train / 255., x_test / 255.
# 使用tf.data API對數據進行隨機排序和批處理
train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_data = train_data.repeat().shuffle(5000).batch(batch_size).prefetch(1)
# 存儲層的權重和偏置
# 隨機值生成器初始化權重
random_normal = tf.initializers.RandomNormal()
weights = {
'h1': tf.Variable(random_normal([num_features, n_hidden_1])),
'h2': tf.Variable(random_normal([n_hidden_1, n_hidden_2])),
'out': tf.Variable(random_normal([n_hidden_2, num_classes]))

}
biases = {
'b1': tf.Variable(tf.zeros([n_hidden_1])),
'b2': tf.Variable(tf.zeros([n_hidden_2])),
'out': tf.Variable(tf.zeros([num_classes]))
}
# 創建模型
def neural_net(x):
# Hidden fully connected layer with 128 neurons.
# 具有128個神經元的隱含完全連接層
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
# Apply sigmoid to layer_1 output for non-linearity.
# 將sigmoid用於layer_1輸出以獲得非線性
layer_1 = tf.nn.sigmoid(layer_1)

# 具有128個神經元的隱含完全連接層
layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
# 將sigmoid用於layer_2輸出以獲得非線性
layer_2 = tf.nn.sigmoid(layer_2)

# 輸出完全連接層,每一個神經元代表一個類別
out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
# 應用softmax將輸出標準化為概率分佈
return tf.nn.softmax(out_layer)
# 交叉熵損失函數
def cross_entropy(y_pred, y_true):
# 將標籤編碼為獨熱向量
y_true = tf.one_hot(y_true, depth=num_classes)
# 將預測值限制在一個範圍之內以避免log(0)錯誤
y_pred = tf.clip_by_value(y_pred, 1e-9, 1.)
# 計算交叉熵
return tf.reduce_mean(-tf.reduce_sum(y_true * tf.math.log(y_pred)))
# 準確率評估
def accuracy(y_pred, y_true):
# 預測類是預測向量中最高分的索引(即argmax)

correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.cast(y_true, tf.int64))
return tf.reduce_mean(tf.cast(correct_prediction, tf.float32), axis=-1)
# 隨機梯度下降優化器
optimizer = tf.optimizers.SGD(learning_rate)
# 優化過程
def run_optimization(x, y):
# 將計算封裝在GradientTape中以實現自動微分
with tf.GradientTape() as g:
pred = neural_net(x)
loss = cross_entropy(pred, y)

# 要更新的變量,即可訓練的變量
trainable_variables = weights.values() + biases.values()
# 計算梯度
gradients = g.gradient(loss, trainable_variables)

# 按gradients更新 W 和 b
optimizer.apply_gradients(zip(gradients, trainable_variables))
# 針對給定步驟數進行訓練
for step, (batch_x, batch_y) in enumerate(train_data.take(training_steps), 1):
# 運行優化以更新W和b值
run_optimization(batch_x, batch_y)

if step % display_step == 0:
pred = neural_net(batch_x)
loss = cross_entropy(pred, batch_y)
acc = accuracy(pred, batch_y)
print("step: %i, loss: %f, accuracy: %f" % (step, loss, acc))

output:

step: 100, loss: 567.292969, accuracy: 0.136719
step: 200, loss: 398.614929, accuracy: 0.562500
step: 300, loss: 226.743774, accuracy: 0.753906
step: 400, loss: 193.384521, accuracy: 0.777344
step: 500, loss: 138.649963, accuracy: 0.886719
step: 600, loss: 109.713669, accuracy: 0.898438
step: 700, loss: 90.397217, accuracy: 0.906250
step: 800, loss: 104.545380, accuracy: 0.894531
step: 900, loss: 94.204697, accuracy: 0.890625
step: 1000, loss: 81.660645, accuracy: 0.906250

step: 1100, loss: 81.237137, accuracy: 0.902344
step: 1200, loss: 65.776703, accuracy: 0.925781
step: 1300, loss: 94.195862, accuracy: 0.910156
step: 1400, loss: 79.425507, accuracy: 0.917969
step: 1500, loss: 93.508163, accuracy: 0.914062
step: 1600, loss: 88.912506, accuracy: 0.917969
step: 1700, loss: 79.033607, accuracy: 0.929688
step: 1800, loss: 65.788315, accuracy: 0.898438
step: 1900, loss: 73.462387, accuracy: 0.937500
step: 2000, loss: 59.309540, accuracy: 0.917969
step: 2100, loss: 67.014008, accuracy: 0.917969
step: 2200, loss: 48.297115, accuracy: 0.949219
step: 2300, loss: 64.523148, accuracy: 0.910156
step: 2400, loss: 72.989517, accuracy: 0.925781
step: 2500, loss: 57.588585, accuracy: 0.929688
step: 2600, loss: 44.957100, accuracy: 0.960938
step: 2700, loss: 59.788242, accuracy: 0.937500
step: 2800, loss: 63.581337, accuracy: 0.937500
step: 2900, loss: 53.471252, accuracy: 0.941406
step: 3000, loss: 43.869728, accuracy: 0.949219
# 在驗證集上測試模型
pred = neural_net(x_test)
print("Test Accuracy: %f" % accuracy(pred, y_test))
# 可視化預測
import matplotlib.pyplot as plt
# 從驗證集中預測5張圖像
n_images = 5
test_images = x_test[:n_images]
predictions = neural_net(test_images)
# 顯示圖片和模型預測結果
for i in range(n_images):
plt.imshow(np.reshape(test_images[i], [28, 28]), cmap='gray')
plt.show()
print("Model prediction: %i" % np.argmax(predictions.numpy()[i]))

output:

使用TensorFlow v2.0構建多層感知器

Model prediction: 7

使用TensorFlow v2.0構建多層感知器

Model prediction:2

使用TensorFlow v2.0構建多層感知器

Model prediction: 1

使用TensorFlow v2.0構建多層感知器

Model prediction: 0

使用TensorFlow v2.0構建多層感知器

Model prediction: 4


分享到:


相關文章: