构建词汇表方法
2018-08-01 本文已影响0人
思君颜如玉
all_words = []
for item in train['word_seg']:
for word in item.split():
all_words.append(word)
from collections import Counter
voc_info = Counter(all_words)
voc = [item[0] for item in voc_info.most_common()[:40000]]
voc_index = {'unk':0}
voc_index.update(zip(voc,[item+1 for item in range(len(voc))]))