Python jieba分词的学习
import jieba
text = "姚家沟是东北某个偏僻山沟里只有二十几户人家的小村子,偏僻到有时候人们忽略了它的存在。"
fullMode = jieba.cut(text, cut_all=True)
defaultMode = jieba.cut(text, cut_all=False)
searchMode = jieba.cut_for_search(text)
print("fullMode: \n", " ".join(fullMode))
print("defaultMode: \n", " ".join(defaultMode))
print("searchMode: \n", " ".join(searchMode))
import jieba.analyse
findWord = jieba.analyse.extract_tags(text, topK=5, withWeight=True)
for wd, weight in findWord:
print(wd, weight)
print(findWord)
import jieba.posseg as pesg
words = pesg.cut(text)
for wd in words:
print(wd.word, wd.flag)
result = jieba.tokenize(text)
print("word start end")
for rs in result:
print(rs[0]," ",rs[1]," ",rs[2])
cutResult = jieba.cut(text)
resultList = list(cutResult)
resultSet = set(resultList)
resultForstatistic = {}
for item in resultSet:
resultForstatistic[item] = resultList.count(item)
keywords = ["姚家沟","东北"]
for keyword in keywords:
try:
keyCounts = resultForstatistic[keyword]
except KeyError:
keyCounts = 0
print(keyword, keyCounts)