用python找多音字

2020-04-01  本文已影响0人  姜附

# coding=utf-8

import sys, getopt

import pypinyin

inputStr = sys.argv[1:]

#print(inputStr)

inputStr2=''.join(inputStr)

#print(inputStr2)

class DuYin:

    def __init__(self, ch,duyin):

        super().__init__()

        self.ch=ch

        self.ref=0

        self.duyin=duyin

    def incr(self):

        self.ref=self.ref+1

        return self

duoYinZi={}

# 读取文件,如果不指定编码默认就是gbk

with open("test.txt", "r", encoding='utf-8') as f:  # 打开文件

    data = f.read() 

#    data = f.read().decode('gbk').encode('utf-8')

#    print(data)

    for ch in data:

        p=pypinyin.pinyin(ch, heteronym=True)

#        print(p)

        if 1<len(p[0]):

            duoYinZi[ch] = duoYinZi.get(ch, DuYin(ch,p)).incr()

#            print(duoYinZi.get(ch, default=0) + 1)

print("一共"+str(len(duoYinZi))+"个多音字:")

#print(duoYinZi)

#outputStr = ''.join(duoYinZi)

#print(outputStr)

#print(duoYinZi)

outputStr=''

for k,v in duoYinZi.items():

    outputStr=outputStr + v.ch+":" + str(v.ref) + ":" + str(v.duyin[0]) +"\n"

print(outputStr)

#print('\n'.join(duoYinZi.items()))

#print(duoYinZi.keys())

#print(duoYinZi.values())

exit()

上一篇 下一篇

猜你喜欢

热点阅读