Prob 3-1 w/. Sklearn¶
In [21]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import scipy
In [22]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from sklearn import linear_model
Import Data,建立training set and testing set¶
In [23]:
# x1:T, x2:V, x3:AP, x4:RH, x0=1
data3 = pd.read_excel("data.xlsx")
data3['x0'] = 1 #增加一排常數項當作x0
#data = data3.head()
data = data3
train_x = np.matrix( data.drop('EP', 1)[0:400] )
train_y = np.array( data.EP[0:400] )
test_x = np.matrix( data.drop('EP', 1)[400:500] )
test_y = np.array( data.EP[400:500] )
train_df = data[0:400]
test_df = data[400:500]
D = 5 # dim (T,V,AP,RH) 維度是4, 加上x0維度是5
In [28]:
def Reg(degree):
#---------build up model---------------------------
reg = linear_model.LinearRegression()
est = make_pipeline(PolynomialFeatures(degree), reg)
est.fit(train_x, train_y)
#---------find coeff---------------------------
#print est.steps
coef = est.steps[-1][1].coef_#.ravel()
coef_pair = zip(np.abs(coef),range(coef.size))
feature_names = est.steps[0][1].get_feature_names()
#print feature_names
#---------plot coeff---------------------------
print 'plot:'
plt.plot(range(coef.size),np.abs(coef),'b-')
plt.xlabel('coef order')
plt.ylabel('abs(w)')
plt.title('Coef Weight')
plt.show()
#plt.close()
#---------show coeff---------------------------
coef_pair.sort()
rank = [ coef_pair[-i] for i in range(1,7) ]
print 'coef=',rank
print 'feature_names=', [ feature_names[pair[1]] for pair in rank ]
print '*'*64
#---------predict y from test_x---------------------
n = test_df['EP'].count()
s_name = 'pred_y_deg'+str(degree)+'_byTest'
test_df[s_name] = est.predict(test_x)
RMS_test = np.sqrt(np.sum( (test_df[s_name] - test_df['EP'])**2 ) /n )
print test_df.head()
#---------predict y from train_x---------------------
n = train_df['EP'].count()
s_name = 'pred_y_deg'+str(degree)+'_byTrain'
train_df[s_name] = est.predict(train_x)
RMS_train = np.sqrt(np.sum( (train_df[s_name] - train_df['EP'])**2 ) /n )
print train_df.head()
return [RMS_train,RMS_test]
In [29]:
deg =2
RMS = Reg(deg) #算出來=4.17413184555, 跟原本w/o sklearn的答案4.17413184559極為近似
print '*'*64
print 'RMS=', RMS
In [30]:
deg =3
RMS = Reg(deg) #算出來 4.08913247008, 跟原本4.0766289726稍有落差
print '*'*64
print 'RMS=', RMS
Prob 3-2 ¶
For M = 3 (order=3), use L1-regulization¶
In [ ]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from sklearn import linear_model
degree = 3
def L1_Reg(alpha):
est = make_pipeline(PolynomialFeatures(degree), linear_model.Lasso(alpha=alpha))
est.fit(train_x, train_y)
#print est.steps
coef = est.steps[-1][1].coef_#.ravel()
coef_pair = zip(np.abs(coef),range(coef.size))
plt.plot(range(coef.size),np.abs(coef),'b-')
plt.xlabel('coef order')
plt.ylabel('abs(w)')
plt.title('Coef Weight')
#plt.show()
#plt.close()
coef_pair.sort()
rank = [ coef_pair[-i] for i in range(1,7) ]
print 'alpha=',alpha,'coef=',rank
#E_W = alpha/2*np.sum([np.abs(i) for i in coef])
#print E_W
feature_names = est.steps[0][1].get_feature_names()
#print feature_names
print [ feature_names[pair[1]] for pair in rank ]
print '*'*64
In [ ]:
for j in range(1000,10000,1000): #設定lenda範圍
lenda = (float)(j)/10000000
L1_Reg(lenda)
In [ ]:
沒有留言:
張貼留言