1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
| ''' Stacking方法 ''' from sklearn.datasets import load_iris from sklearn.preprocessing import StandardScaler from sklearn import model_selection from sklearn.linear_model import LogisticRegression from sklearn.neighbors import KNeighborsClassifier from sklearn.naive_bayes import GaussianNB from sklearn.ensemble import RandomForestClassifier from mlxtend.classifier import StackingClassifier from sklearn.metrics import accuracy_score from sklearn.metrics import confusion_matrix from sklearn.metrics import classification_report import warnings; warnings.filterwarnings(action='ignore')
iris = load_iris() X = iris.data[:,:5] y = iris.target
print('feature=',X) print('target=',y)
def StackingMethod(X, y): ''' Stacking方法实现分类 INPUT -> 特征, 分类标签 ''' scaler = StandardScaler() scaler.fit(X) traffic_feature= scaler.transform(X) feature_train, feature_test, target_train, target_test = model_selection.train_test_split(X, y, test_size=0.3, random_state=0)
clf1 = LogisticRegression(random_state=1) clf2 = RandomForestClassifier(random_state=1) clf3 = GaussianNB()
sclf = StackingClassifier(classifiers=[clf1, clf2, clf3], meta_classifier=LogisticRegression())
sclf.fit(feature_train, target_train)
predict_results = sclf.predict(feature_test) print(accuracy_score(predict_results, target_test)) conf_mat = confusion_matrix(target_test, predict_results) print(conf_mat) print(classification_report(target_test, predict_results))
for clf, label in zip([clf1, clf2, clf3, sclf], ['Logistic Regression', 'Random Forest', 'naive Bayes', 'StackingModel']): scores = model_selection.cross_val_score(clf, X, y, cv=5, scoring='accuracy') print("Accuracy: %0.2f (+/- %0.2f) [%s]" % (scores.mean(), scores.std(), label))
return sclf
if __name__ == '__main__':
model = StackingMethod(X, y)
|