python 单词出现频率统计功能

2018-05-09 本文已影响0人 ReidLee

本文描述如何用python 实现统计一个文本中单词重复出现次数，并且对数据进行排序，代码如下：

# -- coding: utf-8 --
import sys

# 把数据写到文件中
def write2File(filePath, msg):
    write_abstract_file = open(filePath, "a")
    write_abstract_file.write(msg)
    write_abstract_file.write('\n')
    write_abstract_file.flush()
    write_abstract_file.close()

## 入口
if __name__ == "__main__":

    # 第一个参数为输入文件路径
    filePath = sys.argv[1]
    # 读取文件
    with open(filePath) as file_object:
        contents = file_object.read()
        wordDic = {}
        # 去掉部分特殊字符 TODO
        contents = contents.replace(',',' ')
        contents = contents.replace('(',' ')
        contents = contents.replace(')',' ')
        contents = contents.replace('&',' ')
        contents = contents.replace('/',' ')
        contents = contents.replace('\\',' ')
        contents = contents.replace('\"',' ')
        contents = contents.replace('- ',' ')
        contents = contents.replace(' -',' ')
        contents = contents.replace('\'',' ')

        # 文字分割
        wordArr = contents.split()

        # 文字统计
        for key in wordArr:
            if (wordDic.has_key(key)):
                wordDic[key]+=1
            else:
                wordDic[key] = 1;

        # 用 value 对字典进行排序
        wordTup = sorted(wordDic.items(), key=lambda x: x[1], reverse=True)

        # 数据输出
        for wordTup in wordTup:
            write2File("result.CSV", wordTup[0]+","+ str(wordTup[1]))

        # 关闭文件
        file_object.close()

python 单词出现频率统计功能

猜你喜欢

热点阅读