集成学习

xiaoxiao2022-06-11  9

Hard Voting硬投票法

import numpy as np from sklearn import datasets import warnings from sklearn.model_selection import train_test_split from sklearn.ensemble import VotingClassifier from sklearn.linear_model import LogisticRegression from sklearn.tree import DecisionTreeClassifier from sklearn.svm import SVC warnings.filterwarnings("ignore") X,y = datasets.make_moons(n_samples=500, noise =0.3,random_state=42) X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=42) voting_clf = VotingClassifier(estimators=[ ('log_clf',LogisticRegression()), ('svm_clf',SVC()), ('dt_clf',DecisionTreeClassifier(random_state=666)) ],voting = 'hard') voting_clf.fit(X_train,y_train) voting_clf.score(X_test,y_test)

输出准确率:0.896

Soft Voting软投票法

import numpy as np from sklearn import datasets import warnings from sklearn.model_selection import train_test_split from sklearn.ensemble import VotingClassifier from sklearn.linear_model import LogisticRegression from sklearn.tree import DecisionTreeClassifier from sklearn.svm import SVC warnings.filterwarnings("ignore") X,y = datasets.make_moons(n_samples=500, noise =0.3,random_state=42) X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=42) voting_clf2 = VotingClassifier(estimators=[ ('log_clf',LogisticRegression()), ('svm_clf',SVC(probability=True)), ('dt_clf',DecisionTreeClassifier(random_state=666)) ],voting = 'soft') voting_clf2.fit(X_train,y_train) voting_clf2.score(X_test,y_test)

输出准确率:0.912

Bagging放回取样

n_estimators=500基学习器的数量为500个 max_samples=100训练每个基学习器的样本数量为100个

import numpy as np from sklearn import datasets import warnings from sklearn.model_selection import train_test_split from sklearn.ensemble import VotingClassifier from sklearn.linear_model import LogisticRegression from sklearn.tree import DecisionTreeClassifier from sklearn.svm import SVC warnings.filterwarnings("ignore") X,y = datasets.make_moons(n_samples=500, noise =0.3,random_state=42) X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=42) from sklearn.ensemble import BaggingClassifier bagging_clf = BaggingClassifier(DecisionTreeClassifier(random_state=666),n_estimators=500,max_samples=100,bootstrap=True) bagging_clf.fit(X_train,y_train) bagging_clf.score(X_test,y_test)

输出准确率:0.916

Random Subspaces针对特征进行随机采样

max_features : The number of features to draw from X to train each base estimator.

import numpy as np from sklearn import datasets import warnings from sklearn.model_selection import train_test_split from sklearn.ensemble import VotingClassifier from sklearn.linear_model import LogisticRegression from sklearn.tree import DecisionTreeClassifier from sklearn.svm import SVC from sklearn.ensemble import BaggingClassifier warnings.filterwarnings("ignore") X,y = datasets.make_moons(n_samples=500, noise =0.3,random_state=42) ramdom_subspaces_clf = BaggingClassifier(DecisionTreeClassifier(random_state=666),n_estimators=500,max_samples=1.0,bootstrap=True,oob_score=True,max_features=1,bootstrap_features=True) ramdom_subspaces_clf.fit(X,y) ramdom_subspaces_clf.oob_score_

输出准确率:0.824

Random Patches针对样本和特征进行随机采样

from sklearn.ensemble import BaggingClassifier import numpy as np from sklearn import datasets import warnings from sklearn.model_selection import train_test_split from sklearn.ensemble import VotingClassifier from sklearn.linear_model import LogisticRegression from sklearn.tree import DecisionTreeClassifier from sklearn.svm import SVC warnings.filterwarnings("ignore") X,y = datasets.make_moons(n_samples=500, noise =0.3,random_state=42) ramdom_patches_clf = BaggingClassifier(DecisionTreeClassifier(random_state=666),n_estimators=500,max_samples=100,bootstrap=True,oob_score=True,max_features=1,bootstrap_features=True) ramdom_patches_clf.fit(X,y) ramdom_patches_clf.oob_score_

输出准确率:0.86

Random Forest随机森林

import numpy as np from sklearn import datasets import warnings from sklearn.model_selection import train_test_split from sklearn.ensemble import VotingClassifier from sklearn.linear_model import LogisticRegression from sklearn.tree import DecisionTreeClassifier from sklearn.svm import SVC from sklearn.ensemble import RandomForestClassifier warnings.filterwarnings("ignore") X,y = datasets.make_moons(n_samples=500, noise =0.3,random_state=42) rf_clf = RandomForestClassifier(n_estimators=500,random_state=666,oob_score=True) rf_clf.fit(X,y) rf_clf.oob_score_

输出准确率:0.896

Extra-Trees极其随机森林

import numpy as np from sklearn import datasets import warnings from sklearn.model_selection import train_test_split from sklearn.ensemble import VotingClassifier from sklearn.linear_model import LogisticRegression from sklearn.tree import DecisionTreeClassifier from sklearn.svm import SVC from sklearn.ensemble import RandomForestClassifier warnings.filterwarnings("ignore") X,y = datasets.make_moons(n_samples=500, noise =0.3,random_state=42) from sklearn.ensemble import ExtraTreesClassifier et_clf = ExtraTreesClassifier(n_estimators=500,bootstrap=True,oob_score=True,random_state=666) et_clf.fit(X,y) et_clf.oob_score_

输出准确率:0.892

AdaBoosting

import numpy as np from sklearn import datasets import warnings from sklearn.model_selection import train_test_split from sklearn.ensemble import VotingClassifier from sklearn.linear_model import LogisticRegression from sklearn.tree import DecisionTreeClassifier from sklearn.svm import SVC from sklearn.ensemble import AdaBoostClassifier warnings.filterwarnings("ignore") X,y = datasets.make_moons(n_samples=500, noise =0.3,random_state=42) X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=42) ada_clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=2),n_estimators=500,random_state=666) ada_clf.fit(X_train,y_train) ada_clf.score(X_test,y_test)

输出准确率:0.872

Gradient Boosting Decision Tree(GBDT)梯度提升树

import numpy as np from sklearn import datasets import warnings from sklearn.model_selection import train_test_split from sklearn.ensemble import VotingClassifier from sklearn.linear_model import LogisticRegression from sklearn.tree import DecisionTreeClassifier from sklearn.svm import SVC from sklearn.ensemble import GradientBoostingClassifier warnings.filterwarnings("ignore") X,y = datasets.make_moons(n_samples=500, noise =0.3,random_state=42) X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=42) gb_clf = GradientBoostingClassifier(max_depth=2, n_estimators=30) gb_clf.fit(X_train,y_train) gb_clf.score(X_test,y_test)

输出准确率:0.912

转载请注明原文地址: https://www.6miu.com/read-4931829.html

最新回复(0)