机器学习集成学习篇——ython实现Bagging和AdaBOOST算法摘要Bagging算法Adaboost算法
摘要
本文通过ython实现了集成学习中的Bagging和AdaBOOST算法,并将代码进行了封装,方便读者调用。
Bagging算法
imortnumyasn
imortandasasd
classCyrus_bagging(object):
def__init__(self,estimator,n_estimators=20):
self.estimator=estimator
self.n_estimators=n_estimators
self.models=None
deffit(self,x,y):
x=n.array(x)
y=n.array(y).shae((-1,))
indices=n.arange(x.shae[0])
self.models=[]
foriinrange(self.n_estimators):
index=n.random.choice(indices,x.shae[0])
x0=x[index]
y0=y[index]
self.models.aend(self.estimator.fit(x0,y0))
defdict(self,x):
s=n.zeros([x.shae[0],self.n_estimators])
foriinrange(self.n_estimators):
s[:,i]=self.models[i].dict(x)
st=[]
foriinrange(s.shae[0]):
d_s=d.Series(s[i,:]).value_counts()
st.aend(int(d_s.argmax()))
turnn.array(st)fromsklearn.neighborsimortKNeighborsClassifier
fromsklearn.metricsimortclassification_ort
knn=KNeighborsClassifier()
model=Cyrus_bagging(knn)
model.fit(x_train,y_train)
y_=model.dict(x_test)
rint(classification_ort(y_test,y_))示例使用的数据为了与不使用集成算法的模型的准确率区分开来,所以使用较少特征的数据,因而准确率不是特别高,不过与未使用集成算法的模型相比,准确率已经优出不少。
cisioncallf1-scosuort01.001.001.0011
10.670.670.679
20.700.700.7010avgtotal0.800.800.8030Adaboost算法
imortnumyasn
imortandasasd
fromsklearn.metricsimortaccuracy_sco
classCyrusAdaBoost(object):
def__init__(self,estimator,n_estimators=20):
self.estimator=estimator
self.n_estimators=n_estimators
self.error_rate=None
self.model=None
defudate_w(self,y,_y,w):
error_rate=1-accuracy_sco(y,_y)
foriinrange(w.shae[0]):
ify[i]==_y[i]:
w[i]=w[i]*n.ex(-error_rate)
else:
w[i]=w[i]*n.ex(error_rate)
turnww.sum()
defcal_label(self,st,alha):
label=[]
foriinrange(st.shae[0]):
count=n.zeros(int(st[i,:].max()+1))
forjinrange(st.shae[1]):
count[int(st[i,j])]+=alha[j]
label.aend(count.argmax())
turnn.array(label)
deffit(self,x,y):
x=n.array(x)
y=n.array(y).shae((-1,))
self.error_rate=[]
self.model=[]
w0=n.ones(x.shae[0])
w0=w0w0.sum()
indices=n.arange(x.shae[0])
foriinrange(self.n_estimators):
index=n.random.choice(indices,size=x.shae[0],=w0)
x0=x[index]
y0=y[index]
model0=self.estimator.fit(x0,y0)
_y0=model0.dict(x0)
error_rate=1-accuracy_sco(y0,_y0)
self.error_rate.aend(error_rate)
self.model.aend(model0)
w0=self.udate_w(y0,_y0,w0)
defdict(self,x):
s=n.zeros([x.shae[0],self.n_estimators])
foriinrange(self.n_estimators):
s[:,i]=self.model[i].dict(x)
alha=1-n.array(self.error_rate)
turnself.cal_label(s,alha)fromsklearn.teimortDecisionTeClassifier
model=CyrusAdaBoost(estimator=DecisionTeClassifier(),n_estimators=50)
model.fit(x_train,y_train)
y_=model.dict(x_test)
rint(accuracy_sco(y_,y_test))0.932byCyrusMay20200612
这世界全部的漂亮,不过你的可爱模样。