python之線性回歸

什麼是線性迴歸

通過訓練來得到線性方程係數ehta的過程叫做線性迴歸,這樣我們可以預測某個x對應的y值

python之線性迴歸

線性 迴歸的數學表示

python之線性迴歸

線性迴歸中的最小二乘法

通過對L求導使ehta最小

python之線性迴歸

線性迴歸的梯度下降算法

python之線性迴歸

python之線性迴歸

複習numpy的向量操作

#coding=gbk

import numpy as np

from numpy.linalg import inv

from numpy importdot

from numpy importmat

#inv 求矩陣的逆

#dot求矩陣點乘

#mat是矩陣

A = np.mat([1,1])

#B = np.array([1,1])

print('矩陣A:\n',A)

print("A的轉置:\n",A.T)

#reshape給矩陣整形

print(A.reshape(2,1))

#print('B:\n',B)

B = mat([[1,2],[2,3]])

print(B.reshape(1,4))

print("B的逆:\n",inv(B))

print("矩陣B:\n",B)

print("B的第一行:\n",B[0,:])

print("B的第一列:\n",B[:,0])

#A 1*2 B 2*2

print('A和B點乘',dot(A,B))

print("B點乘A的轉置:\b",dot(B,A.T))

實現最小二乘法

#coding=gbk

import numpy as np

from numpy.linalg import inv

from numpy importdot

from numpy importmat

#y=2x

X = mat([1,2,3]).reshape(3,1)

Y = 2*X

#theta = (X'X)^-1X'Y

theta = dot(dot(inv(dot(X.T,X)),X.T),Y)

print (theta)

實現梯度下降

#coding=gbk

import numpy as np

from numpy.linalg import inv

from numpy importdot

from numpy importmat

#y=2x

X = mat([1,2,3]).reshape(3,1)

Y = 2*X

#theta = theta - alpha*(theta*X -Y)*X

theta = 1.

alpha = 0.1

for i in range(100):

theta = theta + np.sum(alpha * (Y - dot(X, theta))*X.reshape(1,3))/3.

print (theta)

迴歸分析實戰

隨機數據生成代碼,生成data.csv文件

import random

def Y(X1, X2, X3):

return 0.65 * X1 + 0.70 * X2 - 0.55 * X3 + 1.95

def Produce():

filename = 'data.csv'

with open(filename, 'w') as file:

file.write('X1,X2,X3,Y\n')

for i in range(200):

random.seed()

x1 = random.random() * 10

x2 = random.random() * 10

x3 = random.random() * 10

y = Y(x1, x2, x3)

try:

file.write(str(x1) + ',' + str(x2) + ',' + str(x3) + ',' + str(y) + '\n')

except Exception as e:

print ('Write Error')

print (str(e))

Produce()

分別用最小二乘法和梯度下降法計算theta

#coding=gbk

import numpy as np

from numpy.linalg import inv

from numpy importdot

from numpy importmat

import pandas as pd

#獲取X和Y向量

dataset = pd.read_csv('data.csv')

#print(dataset)

temp = dataset.iloc[:,0:3]#取數據集中

temp['x0'] = 1#截距

X = temp.iloc[:,[3,0,1,2]]

Y = dataset.iloc[:,3].values.reshape(200,1)

#Y =dataset.iloc[:,3].values.reshape(200,1)

#print(Y)

#最小二乘法

theta = dot(dot(inv(dot(X.T,X)),X.T),Y)

print("最小二乘法計算theta:\n",theta.reshape(4,1))

#梯度下降法

theta = np.array([1.,1.,1.,1.]).reshape(4,1)

alpha = 0.02#學習率

temp = theta #緩存,一塊同步更新,很重要

X0 = X.iloc[:,0].values.reshape(len(Y),1)

X1 = X.iloc[:,1].values.reshape(len(Y),1)

X2 = X.iloc[:,2].values.reshape(len(Y),1)

X3 = X.iloc[:,3].values.reshape(len(Y),1)

for i in range(10000):

temp[0] = theta[0] + alpha*np.sum((Y-dot(X,theta))*X0)/200.

temp[1] = theta[1] + alpha*np.sum((Y-dot(X,theta))*X1)/200.

temp[2] = theta[2] + alpha*np.sum((Y-dot(X,theta))*X2)/200.

temp[3] = theta[3] + alpha*np.sum((Y-dot(X,theta))*X3)/200.

theta = temp

print ("梯度下降法計算theta:",theta)

運行結果

python之線性迴歸


分享到:


相關文章: