mnist 是手写识别样本库,包含0到9手写图像和标签。有三个数据集:训练集(55000个)、验证集(5000个)和测试集(10000个)。含在四个文件里:train-images-idx3-ubyte.gz ,train-labels-idx1-ubyte.gz , t10k-images-idx3-ubyte.gz 和 t10k-labels-idx1-ubyte.gz 。
可以使用下面的代码将样本库下载到当前目录的 data子目录,并且遍历每个样本,产生png文件,其文件名以编号加上标签表示。需要建立 data、bmp 、trainImg、validationImg和testImg这些子目录,并且把下载到bmp目录且存为28x28x8bGray.bmp。比如对train集里的0号样本取出存在名为train_0_(7).png的png文件 里,0代表第一号样本,(7)代表这个文件是7的手写图。
from tensorflow.examples.tutorials.mnist import input_dataimport numpy as np
import cv2
def findLabel( label ):
max = 0 iMax = -1 for i in range( len( label ) ) : #print( label[i] , end = ' ' ) if label[i] > max : max = label[i] iMax = i #print( "max=", max , "iMax=" , iMax ) return iMax def main(): mnist = input_data.read_data_sets('data/',one_hot=True) with open("bmp/28x28x8bGray.bmp" , "rb" ) as r : head = r.read( 54 + 1024) procList = [ [ mnist.train.num_examples , mnist.train.labels , 'trainImg/train_' , mnist.train.images ] , [ mnist.validation.num_examples , mnist.validation.labels , 'validationImg/valid_' , mnist.validation.images] , [ mnist.test.num_examples , mnist.test.labels , 'testImg/test_',mnist.test.images ] ] tempBmp = "bmp/mnistTemp.bmp" for ite in procList : for i in range( ite[0] ) : print('=',end='') imax = findLabel ( ite[1][i] ) pngName = ite[2] + str( i ) +'_('+ str( imax) + ').png' image= ite[3][i]*200 image1 = image.astype( np.uint8 ) with open( tempBmp , "wb" ) as f: f.write( head ) for row in range( 27, -1 ,-1 ) : start = row*28 f.write( image1[start:start+28 ]) iBmp = cv2.imread( tempBmp , 0 ) cv2.imwrite( pngName, iBmp ) if i % 100 == 0 : print('\n') main()