运行结果如下:
上面都是采用默认参数的KNN算法,下面将实现通过输入不同的K值,来检验准确率:
#采用自定义参数 import numpy as np import csv x = np.zeros((351, 34), dtype="float") #存放数据集 y = np.zeros((351,), dtype="bool") #存放类别 #简洁的写法,而且文件在读完后就自动关闭。 with open("ionosphere.data", 'r') as input_file: reader = csv.reader(input_file) for i, row in enumerate(reader): data = [float(datum) for datum in row[:-1]] x[i] = data y[i] = row[-1] == 'g' #得到数据与存放类别后,进行训练集和测试集的划分 from sklearn.model_selection import cross_val_score #导入k近邻分类器,输入参数,KNeighborsClassifier定义n_neighbors的个数 from sklearn.neighbors import KNeighborsClassifier avg_scores = [] all_scores = [] parameter_values = list(range(1, 21)) for n_neighbors in parameter_values: estimator = KNeighborsClassifier(n_neighbors) scores = cross_val_score(estimator, x, y, scoring="accuracy") avg_scores.append(np.mean(scores)) all_scores.append(scores) #设置matplotlib的默认字体,解决中文乱码问题 from pylab import mpl mpl.rcParams['font.sans-serif'] = ['SimHei'] #指定默认字体 (黑体) #mpl.rcParams['axes.unicode_minus'] = False #解决保存图像是负号'-'显示为方块的问题 import matplotlib.pyplot as plt plt.plot(parameter_values, avg_scores, '-o') plt.title(u"scikit-learn") plt.xlabel(u"n_neighbors参数") plt.ylabel(u"accuracy精确度") plt.show()运行结果如下:
