基于Keras构建RNN模型

2020-06-10  本文已影响0人  还闹不闹

参考:
https://bbs.huaweicloud.com/blogs/127160
https://www.jianshu.com/p/3596d55dfaa5
https://www.jianshu.com/p/b440a62f3c3d

#!/usr/bin/python
# coding=utf-8
import numpy as np
import pandas as pd
from keras.datasets import imdb
from matplotlib import pyplot as plt
from keras.preprocessing import sequence
from keras.layers.embeddings import Embedding
from keras.layers.convolutional import Conv1D, MaxPooling1D
from keras.layers import Dense, Flatten,Dropout
from keras.models import Sequential

# 显示所有列
pd.set_option('display.max_columns', None)
# 显示所有行
pd.set_option('display.max_rows', None)
# 设置value的显示长度为10000,默认为50
pd.set_option('display.width',10000)
pd.set_option('display.unicode.ambiguous_as_wide', True)
pd.set_option('display.unicode.east_asian_width', True)
#
np.set_printoptions(linewidth=1000)

(x_train, y_train), (x_validation, y_validation) = imdb.load_data(path='G:\\rnn\\rnn\imdb_movieReview_emotion_analysis\dataset\imdb.npz')
print(type(x_train), type(x_train[0]))
print(x_train[0:3])

# 合并训练集和评估数据集
x = np.concatenate((x_train, x_validation), axis=0)
y = np.concatenate((y_train, y_validation), axis=0)
print('x shape is %s, y shape is %s' % (x.shape, y.shape))
print(len(x[0:1][0]), len(x[1:2][0]))
print(x[0])
print(y[0:3])
print('Classes: %s' % np.unique(y))
print('Total words: %s' % len(np.unique(np.hstack(x))))
result = [len(word) for word in x]
print('Mean: %.2f words (STD: %.2f)' %(np.mean(result), np.std(result)))

plt.subplot(221)
plt.boxplot(result)
plt.subplot(222)
plt.hist(result)
plt.show()

print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
# 将整数转换成文本
# A dictionary mapping words to an integer index
word_index = imdb.get_word_index(path='G:\\rnn\\rnn\imdb_movieReview_emotion_analysis\dataset\imdb_word_index.json')
# The first indices are reserved
# for k,v in word_index.items():
#     print(k)
#     print(v)
word_index = {k:(v+3) for k,v in word_index.items()}
# print(word_index.items())
word_index["<PAD>"] = 0
word_index["<START>"] = 1
word_index["<UNK>"] = 2  # unknown
word_index["<UNUSED>"] = 3
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])
print(reverse_word_index[0])
def decode_review(text):
    return ' '.join([reverse_word_index.get(i, '?') for i in text])

movie_review_zh = decode_review([9999999999999])
print(movie_review_zh)

# -----------------------------------------
seed = 7
top_words = 5000 # 最大词汇量
max_words = 500 # 将影评的长度限制在500个单词以内
out_dimension = 32
batch_size = 512
epochs = 10
def create_model():
   model = Sequential()
   # 构建嵌入层
   model.add(Embedding(top_words, out_dimension, input_length=max_words))
   # 1维度卷积层
   model.add(Conv1D(filters=32, kernel_size=3, padding='same', activation='relu'))
   model.add(MaxPooling1D(pool_size=2))
   model.add(Flatten())
   model.add(Dense(250, activation='relu'))
   model.add(Dense(1, activation='sigmoid'))
   model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
   model.summary()
   return model

if __name__ == '__main__':
    np.random.seed(seed=seed)
    # 导入数据
    (x_train, y_train), (x_validation, y_validation) = imdb.load_data(num_words=top_words, path='G:\\rnn\\rnn\imdb_movieReview_emotion_analysis\dataset\imdb.npz')
    # 限定数据集的长度
    x_train = sequence.pad_sequences(x_train, maxlen=max_words)
    x_validation = sequence.pad_sequences(x_validation, maxlen=max_words)
    # 生成模型
    model = create_model()
    optimizer = None
    history = model.fit(x_train, y_train, validation_data=(x_validation, y_validation), batch_size=batch_size, epochs=epochs, verbose=2)

# 训练集和验证集上的accurcy和loss
# 

上一篇 下一篇

猜你喜欢

热点阅读