python机器学习-预测分析核心算法3-2代码在python3下运行遇到的一些问题

xiaoxiao2021-02-28  72

from sklearn import datasets, linear_model from sklearn.metrics import roc_curve, auc import pylab as pl def confusionMatrix(predicted, actual, threshold): if len(predicted) != len(actual): return -1 tp = 0.0#true-postition 真正 fp = 0.0 tn = 0.0 fn = 0.0 for i in range(len(actual)): if actual[i] > 0.5: if predicted[i] > threshold: tp += 1.0 else: fn += 1.0 else: if predicted[i] < threshold: tn += 1.0 else: fp += 1.0 rtn = [tp, fn, fp, tn] return rtn target_url = ("http://archive.ics.uci.edu/ml/machine-learning-databases/undocumented/connectionist-bench/sonar/sonar.all-data")#书上的网址是https://我在网上输入带s的 #网址网也会打不开,去掉s就可以了,不知道其他同学有没有这种问题 data = urllib.request.urlopen(target_url) xList = [] labels = [] for line in data: row = line.strip().split(",".encode(encoding='utf-8')) if(row[-1] == b'M'): #原书中代码是row[-1] == 'M'因为我们上面修改了代码,使row[-1]值变成了b'M',...,b'R',如果不加以修改,那么下面得到的所有labels值都为零,调试了好久才发现的 labels.append(1.0) else: labels.append(0.0) row.pop() floatRow = [float(num) for num in row] xList.append(floatRow) #print(labels) 调试所用可忽略 #print(len(labels)) 调试所用 indices = range(len(xList)) xListTest = [xList[i] for i in indices if i%3 == 0] xListTrain = [xList[i] for i in indices if i%3 != 0] labelsTest = [labels[i] for i in indices if i%3 == 0] labelsTrain = [labels[i] for i in indices if i%3 != 0] xTrain = numpy.array(xListTrain) yTrain = numpy.array(labelsTrain) xTest = numpy.array(xListTest) yTest = numpy.array(labelsTest) #print('yTrain = ', yTrain) #print('yTest = ', yTest) print("Shape of xTrain array", xTrain.shape) print("Shape of yTrain array", yTrain.shape) print("Shape of xTest array", xTest.shape) print("Shape of yTest array", yTest.shape) rocksVMinesModel = linear_model.LinearRegression() rocksVMinesModel.fit(xTrain, yTrain) trainingPredictions = rocksVMinesModel.predict(xTrain) #print('trainingPredictions = ', trainingPredictions) print("Some values predicted by model", trainingPredictions[0:5], trainingPredictions[-5:-1]) confusionMatTrain = confusionMatrix(trainingPredictions, yTrain, 0.5) tp = confusionMatTrain[0] fn = confusionMatTrain[1] fp = confusionMatTrain[2] tn = confusionMatTrain[3] print("tp = " + str(tp) + "\tfn = " + str(fn) + "\n" + "fp = " + str(fp) + "\ttn = " + str(tn) + '\n') testPredictions = rocksVMinesModel.predict(xTest) conMatTest = confusionMatrix(testPredictions, yTest, 0) tp = conMatTest[0] fn = conMatTest[1] fp = conMatTest[2] tn = conMatTest[3] print("tp = " + str(tp) + "\tfn = " + str(fn) + "\n" + "fp = " + str(fp) + "\ttn = " + str(tn) + '\n') fpr, tpr, thresholds = roc_curve(yTrain, trainingPredictions) roc_auc = auc(fpr, tpr) print('AUC for in-sample ROC curve: %f' % roc_auc) #plot ROC curve pl.clf() pl.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc) pl.plot([0, 1], [0, 1], 'k-') pl.xlim([0.0, 1.0]) pl.ylim([0.0, 1.0]) pl.xlabel('False Positive Rate') pl.ylabel('True Positive Rate') pl.title('In sample ROC rocks versus mines') pl.legend(loc="lower right") pl.show() fpr, tpr, thresholds = roc_curve(yTest, testPredictions) roc_ayc = auc(fpr, tpr) print('AUC for out-of-sample ROC curve: %f' % roc_auc) #plot ROC curve pl.clf() pl.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc) pl.plot([0, 1], [0, 1], 'k-') pl.xlim([0.0, 1.0]) pl.ylim([0.0, 1.0]) pl.xlabel('False Positive Rate') pl.ylabel('True Positive Rate') pl.title('Out-of-sample ROC rocks versus mines') pl.legend(loc="lower right") pl.show()

由于我的是python3版本,如果按照源代码row = line.strip().split(",")运行程序就会 出现如下问题,只需要在源代码上相应的修改row = line.strip().split(",".encode(encoding='utf-8')),就是需要把原字符串类型转换成bytes类型 #-------------------------------------------------------------------------------------   Traceback (most recent call last):   File "D:\Python362\a_机器学习及实战\3_2.py", line 38, in <module>   row = line.strip().split(",")   TypeError: a bytes-like object is required, not 'str' #-------------------------------------------------------------------------------------- 修改后得到的row[-1]是这样的,那么相应的也要修改接下来的程序 ---------------------------------------------------------------------------------------------- b'R' b'R' b'R' b'R' b'R' b'R' b'R' ... -------------------------------------------------------------------------------------------------

'''

紧接着下面的源代码if(row[-1] == 'M'):也要做相应的修改:if(row[-1] == b'M'):,若不修改,那么接下来的到的labels值全部是零,运行代码虽然不会出错,但是预测效果也都是毫无意义的了

修改两处代码之后运行的到的结果是:

Shape of xTrain array (138, 60) Shape of yTrain array (138,) Shape of xTest array (70, 60) Shape of yTest array (70,) Some values predicted by model [-0.10240253 0.42090698 0.38593034 0.36094537 0.31520494] [ 1.12242751 0.77626699 1.02016858 0.66338081] tp = 68.0 fn = 6.0 fp = 7.0 tn = 57.0 tp = 36.0 fn = 1.0 fp = 19.0 tn = 14.0 AUC for in-sample ROC curve: 0.979519 AUC for out-of-sample ROC curve: 0.979519 >>>

转载请注明原文地址: https://www.6miu.com/read-70756.html

最新回复(0)