什麼是線性迴歸
通過訓練來得到線性方程係數ehta的過程叫做線性迴歸,這樣我們可以預測某個x對應的y值
線性 迴歸的數學表示
線性迴歸中的最小二乘法
通過對L求導使ehta最小
線性迴歸的梯度下降算法
複習numpy的向量操作
#coding=gbk
import numpy as np
from numpy.linalg import inv
from numpy importdot
from numpy importmat
#inv 求矩陣的逆
#dot求矩陣點乘
#mat是矩陣
A = np.mat([1,1])
#B = np.array([1,1])
print('矩陣A:\n',A)
print("A的轉置:\n",A.T)
#reshape給矩陣整形
print(A.reshape(2,1))
#print('B:\n',B)
B = mat([[1,2],[2,3]])
print(B.reshape(1,4))
print("B的逆:\n",inv(B))
print("矩陣B:\n",B)
print("B的第一行:\n",B[0,:])
print("B的第一列:\n",B[:,0])
#A 1*2 B 2*2
print('A和B點乘',dot(A,B))
print("B點乘A的轉置:\b",dot(B,A.T))
實現最小二乘法
#coding=gbk
import numpy as np
from numpy.linalg import inv
from numpy importdot
from numpy importmat
#y=2x
X = mat([1,2,3]).reshape(3,1)
Y = 2*X
#theta = (X'X)^-1X'Y
theta = dot(dot(inv(dot(X.T,X)),X.T),Y)
print (theta)
實現梯度下降
#coding=gbk
import numpy as np
from numpy.linalg import inv
from numpy importdot
from numpy importmat
#y=2x
X = mat([1,2,3]).reshape(3,1)
Y = 2*X
#theta = theta - alpha*(theta*X -Y)*X
theta = 1.
alpha = 0.1
for i in range(100):
theta = theta + np.sum(alpha * (Y - dot(X, theta))*X.reshape(1,3))/3.
print (theta)
迴歸分析實戰
隨機數據生成代碼,生成data.csv文件
import random
def Y(X1, X2, X3):
return 0.65 * X1 + 0.70 * X2 - 0.55 * X3 + 1.95
def Produce():
filename = 'data.csv'
with open(filename, 'w') as file:
file.write('X1,X2,X3,Y\n')
for i in range(200):
random.seed()
x1 = random.random() * 10
x2 = random.random() * 10
x3 = random.random() * 10
y = Y(x1, x2, x3)
try:
file.write(str(x1) + ',' + str(x2) + ',' + str(x3) + ',' + str(y) + '\n')
except Exception as e:
print ('Write Error')
print (str(e))
Produce()
分別用最小二乘法和梯度下降法計算theta
#coding=gbk
import numpy as np
from numpy.linalg import inv
from numpy importdot
from numpy importmat
import pandas as pd
#獲取X和Y向量
dataset = pd.read_csv('data.csv')
#print(dataset)
temp = dataset.iloc[:,0:3]#取數據集中
temp['x0'] = 1#截距
X = temp.iloc[:,[3,0,1,2]]
Y = dataset.iloc[:,3].values.reshape(200,1)
#Y =dataset.iloc[:,3].values.reshape(200,1)
#print(Y)
#最小二乘法
theta = dot(dot(inv(dot(X.T,X)),X.T),Y)
print("最小二乘法計算theta:\n",theta.reshape(4,1))
#梯度下降法
theta = np.array([1.,1.,1.,1.]).reshape(4,1)
alpha = 0.02#學習率
temp = theta #緩存,一塊同步更新,很重要
X0 = X.iloc[:,0].values.reshape(len(Y),1)
X1 = X.iloc[:,1].values.reshape(len(Y),1)
X2 = X.iloc[:,2].values.reshape(len(Y),1)
X3 = X.iloc[:,3].values.reshape(len(Y),1)
for i in range(10000):
temp[0] = theta[0] + alpha*np.sum((Y-dot(X,theta))*X0)/200.
temp[1] = theta[1] + alpha*np.sum((Y-dot(X,theta))*X1)/200.
temp[2] = theta[2] + alpha*np.sum((Y-dot(X,theta))*X2)/200.
temp[3] = theta[3] + alpha*np.sum((Y-dot(X,theta))*X3)/200.
theta = temp
print ("梯度下降法計算theta:",theta)
運行結果
閱讀更多 九雲IT 的文章