Hard Voting硬投票法
import numpy
as np
from sklearn
import datasets
import warnings
from sklearn
.model_selection
import train_test_split
from sklearn
.ensemble
import VotingClassifier
from sklearn
.linear_model
import LogisticRegression
from sklearn
.tree
import DecisionTreeClassifier
from sklearn
.svm
import SVC
warnings
.filterwarnings
("ignore")
X
,y
= datasets
.make_moons
(n_samples
=500, noise
=0.3,random_state
=42)
X_train
,X_test
,y_train
,y_test
= train_test_split
(X
,y
,random_state
=42)
voting_clf
= VotingClassifier
(estimators
=[
('log_clf',LogisticRegression
()),
('svm_clf',SVC
()),
('dt_clf',DecisionTreeClassifier
(random_state
=666))
],voting
= 'hard')
voting_clf
.fit
(X_train
,y_train
)
voting_clf
.score
(X_test
,y_test
)
输出准确率:0.896
Soft Voting软投票法
import numpy
as np
from sklearn
import datasets
import warnings
from sklearn
.model_selection
import train_test_split
from sklearn
.ensemble
import VotingClassifier
from sklearn
.linear_model
import LogisticRegression
from sklearn
.tree
import DecisionTreeClassifier
from sklearn
.svm
import SVC
warnings
.filterwarnings
("ignore")
X
,y
= datasets
.make_moons
(n_samples
=500, noise
=0.3,random_state
=42)
X_train
,X_test
,y_train
,y_test
= train_test_split
(X
,y
,random_state
=42)
voting_clf2
= VotingClassifier
(estimators
=[
('log_clf',LogisticRegression
()),
('svm_clf',SVC
(probability
=True)),
('dt_clf',DecisionTreeClassifier
(random_state
=666))
],voting
= 'soft')
voting_clf2
.fit
(X_train
,y_train
)
voting_clf2
.score
(X_test
,y_test
)
输出准确率:0.912
Bagging放回取样
n_estimators=500基学习器的数量为500个 max_samples=100训练每个基学习器的样本数量为100个
import numpy
as np
from sklearn
import datasets
import warnings
from sklearn
.model_selection
import train_test_split
from sklearn
.ensemble
import VotingClassifier
from sklearn
.linear_model
import LogisticRegression
from sklearn
.tree
import DecisionTreeClassifier
from sklearn
.svm
import SVC
warnings
.filterwarnings
("ignore")
X
,y
= datasets
.make_moons
(n_samples
=500, noise
=0.3,random_state
=42)
X_train
,X_test
,y_train
,y_test
= train_test_split
(X
,y
,random_state
=42)
from sklearn
.ensemble
import BaggingClassifier
bagging_clf
= BaggingClassifier
(DecisionTreeClassifier
(random_state
=666),n_estimators
=500,max_samples
=100,bootstrap
=True)
bagging_clf
.fit
(X_train
,y_train
)
bagging_clf
.score
(X_test
,y_test
)
输出准确率:0.916
Random Subspaces针对特征进行随机采样
max_features : The number of features to draw from X to train each base estimator.
import numpy
as np
from sklearn
import datasets
import warnings
from sklearn
.model_selection
import train_test_split
from sklearn
.ensemble
import VotingClassifier
from sklearn
.linear_model
import LogisticRegression
from sklearn
.tree
import DecisionTreeClassifier
from sklearn
.svm
import SVC
from sklearn
.ensemble
import BaggingClassifier
warnings
.filterwarnings
("ignore")
X
,y
= datasets
.make_moons
(n_samples
=500, noise
=0.3,random_state
=42)
ramdom_subspaces_clf
= BaggingClassifier
(DecisionTreeClassifier
(random_state
=666),n_estimators
=500,max_samples
=1.0,bootstrap
=True,oob_score
=True,max_features
=1,bootstrap_features
=True)
ramdom_subspaces_clf
.fit
(X
,y
)
ramdom_subspaces_clf
.oob_score_
输出准确率:0.824
Random Patches针对样本和特征进行随机采样
from sklearn
.ensemble
import BaggingClassifier
import numpy
as np
from sklearn
import datasets
import warnings
from sklearn
.model_selection
import train_test_split
from sklearn
.ensemble
import VotingClassifier
from sklearn
.linear_model
import LogisticRegression
from sklearn
.tree
import DecisionTreeClassifier
from sklearn
.svm
import SVC
warnings
.filterwarnings
("ignore")
X
,y
= datasets
.make_moons
(n_samples
=500, noise
=0.3,random_state
=42)
ramdom_patches_clf
= BaggingClassifier
(DecisionTreeClassifier
(random_state
=666),n_estimators
=500,max_samples
=100,bootstrap
=True,oob_score
=True,max_features
=1,bootstrap_features
=True)
ramdom_patches_clf
.fit
(X
,y
)
ramdom_patches_clf
.oob_score_
输出准确率:0.86
Random Forest随机森林
import numpy
as np
from sklearn
import datasets
import warnings
from sklearn
.model_selection
import train_test_split
from sklearn
.ensemble
import VotingClassifier
from sklearn
.linear_model
import LogisticRegression
from sklearn
.tree
import DecisionTreeClassifier
from sklearn
.svm
import SVC
from sklearn
.ensemble
import RandomForestClassifier
warnings
.filterwarnings
("ignore")
X
,y
= datasets
.make_moons
(n_samples
=500, noise
=0.3,random_state
=42)
rf_clf
= RandomForestClassifier
(n_estimators
=500,random_state
=666,oob_score
=True)
rf_clf
.fit
(X
,y
)
rf_clf
.oob_score_
输出准确率:0.896
Extra-Trees极其随机森林
import numpy
as np
from sklearn
import datasets
import warnings
from sklearn
.model_selection
import train_test_split
from sklearn
.ensemble
import VotingClassifier
from sklearn
.linear_model
import LogisticRegression
from sklearn
.tree
import DecisionTreeClassifier
from sklearn
.svm
import SVC
from sklearn
.ensemble
import RandomForestClassifier
warnings
.filterwarnings
("ignore")
X
,y
= datasets
.make_moons
(n_samples
=500, noise
=0.3,random_state
=42)
from sklearn
.ensemble
import ExtraTreesClassifier
et_clf
= ExtraTreesClassifier
(n_estimators
=500,bootstrap
=True,oob_score
=True,random_state
=666)
et_clf
.fit
(X
,y
)
et_clf
.oob_score_
输出准确率:0.892
AdaBoosting
import numpy
as np
from sklearn
import datasets
import warnings
from sklearn
.model_selection
import train_test_split
from sklearn
.ensemble
import VotingClassifier
from sklearn
.linear_model
import LogisticRegression
from sklearn
.tree
import DecisionTreeClassifier
from sklearn
.svm
import SVC
from sklearn
.ensemble
import AdaBoostClassifier
warnings
.filterwarnings
("ignore")
X
,y
= datasets
.make_moons
(n_samples
=500, noise
=0.3,random_state
=42)
X_train
,X_test
,y_train
,y_test
= train_test_split
(X
,y
,random_state
=42)
ada_clf
= AdaBoostClassifier
(DecisionTreeClassifier
(max_depth
=2),n_estimators
=500,random_state
=666)
ada_clf
.fit
(X_train
,y_train
)
ada_clf
.score
(X_test
,y_test
)
输出准确率:0.872
Gradient Boosting Decision Tree(GBDT)梯度提升树
import numpy
as np
from sklearn
import datasets
import warnings
from sklearn
.model_selection
import train_test_split
from sklearn
.ensemble
import VotingClassifier
from sklearn
.linear_model
import LogisticRegression
from sklearn
.tree
import DecisionTreeClassifier
from sklearn
.svm
import SVC
from sklearn
.ensemble
import GradientBoostingClassifier
warnings
.filterwarnings
("ignore")
X
,y
= datasets
.make_moons
(n_samples
=500, noise
=0.3,random_state
=42)
X_train
,X_test
,y_train
,y_test
= train_test_split
(X
,y
,random_state
=42)
gb_clf
= GradientBoostingClassifier
(max_depth
=2, n_estimators
=30)
gb_clf
.fit
(X_train
,y_train
)
gb_clf
.score
(X_test
,y_test
)
输出准确率:0.912