python统计一个word文档里单词的频率

2020-05-06  本文已影响0人  潇湘demi

#coding:utf-8

#import string

# -*- coding: utf-8 -*-

import docx

import re

dict = {}

file_path ='dear.docx'

doc = docx.Document(file_path)

for paragraphin doc.paragraphs:

s1 = paragraph.text

s2 = re.sub(r'[,.""?!]'," ",s1).lower()

for wordin s2.split():

#print word

        dict.setdefault(word,0)

if wordin dict:

dict[word] +=1

print dict

上一篇 下一篇

猜你喜欢

热点阅读