trainGraph(CNN情感分析yoom例子六)

xiaoxiao2021-02-28  44

代码:

""" Train convolutional network for sentiment analysis. Based on "Convolutional Neural Networks for Sentence Classification" by Yoon Kim http://arxiv.org/pdf/1408.5882v2.pdf For 'CNN-non-static' gets to 82.1% after 61 epochs with following settings: embedding_dim = 20 filter_sizes = (3, 4) num_filters = 3 dropout_prob = (0.7, 0.8) hidden_dims = 100 For 'CNN-rand' gets to 78-79% after 7-8 epochs with following settings: embedding_dim = 20 filter_sizes = (3, 4) num_filters = 150 dropout_prob = (0.25, 0.5) hidden_dims = 150 For 'CNN-static' gets to 75.4% after 7 epochs with following settings: embedding_dim = 100 filter_sizes = (3, 4) num_filters = 150 dropout_prob = (0.25, 0.5) hidden_dims = 150 * it turns out that such a small data set as "Movie reviews with one sentence per review" (Pang and Lee, 2005) requires much smaller network than the one introduced in the original article: - embedding dimension is only 20 (instead of 300; 'CNN-static' still requires ~100) - 2 filter sizes (instead of 3) - higher dropout probabilities and - 3 filters per filter size is enough for 'CNN-non-static' (instead of 100) - embedding initialization does not require prebuilt Google Word2Vec data. Training Word2Vec on the same "Movie reviews" data set is enough to achieve performance reported in the article (81.6%) ** Another distinct difference is slidind(changed) MaxPooling window of length=2 instead of MaxPooling over whole feature map as in the article """ import numpy as np import data_helpers from w2v import train_word2vec from keras.models import Sequential, Model from keras.layers import Activation, Dense, Dropout, Embedding, Flatten, Input, Merge, Convolution1D, MaxPooling1D np.random.seed(2) # Parameters # ================================================== # # Model Variations. See Kim Yoon's Convolutional Neural Networks for # Sentence Classification, Section 3 for detail. model_variation = 'CNN-non-static' # CNN-rand | CNN-non-static | CNN-static print('Model variation is %s' % model_variation) # Model Hyperparameters sequence_length = 56 embedding_dim = 20 filter_sizes = (3, 4) num_filters = 150 dropout_prob = (0.25, 0.5) hidden_dims = 150 # Training parameters batch_size = 32 num_epochs = 100 val_split = 0.1 # Word2Vec parameters, see train_word2vec min_word_count = 1 # Minimum word count context = 10 # Context window size # Data Preparatopn # ================================================== # # Load data print("Loading data...") x, y, vocabulary, vocabulary_inv = data_helpers.load_data() print(x) if model_variation=='CNN-non-static' or model_variation=='CNN-static': embedding_weights = train_word2vec(x, vocabulary_inv, embedding_dim, min_word_count, context) if model_variation=='CNN-static': x = embedding_weights[0][x] #x's form:[[1,571,7,...,0],[2,61,2,3...,5]..........] #translate train set to word vector.. #form:[[[0.2562049 0.23179828 -0.32274666 ,,20dimensionality],],] elif model_variation=='CNN-rand': embedding_weights = None else: raise ValueError('Unknown model variation') # Shuffle data shuffle_indices = np.random.permutation(np.arange(len(y))) #form: [ 5362 10648 6574 ..., 6637 2575 7336] x_shuffled = x[shuffle_indices] y_shuffled = y[shuffle_indices].argmax(axis=1) #print(y[shuffle_indices]) #[[1 0] # ... # [1 0]] #print(y_shuffled) #the max of 1 column,[0 0 0 ..., 0 1 0] #print(len(y_shuffled)) #10662 print("Vocabulary Size: {:d}".format(len(vocabulary))) # Building model # ================================================== # # graph subnet with one input and one output, # convolutional layers concateneted in parallel graph_in = Input(shape=(sequence_length, embedding_dim)) convs = [] for fsz in filter_sizes: conv = Convolution1D(nb_filter=num_filters, #150 filter_length=fsz, #3 ! 4 border_mode='valid', activation='relu', subsample_length=1)(graph_in) pool = MaxPooling1D(pool_length=2)(conv) flatten = Flatten()(pool) convs.append(flatten) if len(filter_sizes)>1: out = Merge(mode='concat')(convs) else: out = convs[0] graph = Model(input=graph_in, output=out) # main sequential model model = Sequential() if not model_variation=='CNN-static': model.add(Embedding(len(vocabulary), embedding_dim, input_length=sequence_length, weights=embedding_weights)) #model.add(Embedding(18765, 20, input_length=56, #weights=embedding_weights)) model.add(Dropout(dropout_prob[0], input_shape=(sequence_length, embedding_dim))) #model.add(Dropout(dropout_prob[0]=0.25, input_shape=(56, 20))) model.add(graph) model.add(Dense(hidden_dims)) model.add(Dropout(dropout_prob[1])) model.add(Activation('relu')) model.add(Dense(1)) model.add(Activation('sigmoid')) model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy']) # Training model # ================================================== model.fit(x_shuffled, y_shuffled, batch_size=batch_size, epochs=num_epochs, validation_split=val_split, verbose=2)

转载请注明原文地址: https://www.6miu.com/read-77309.html

最新回复(0)