Python项目之企业审批流绩效分析分析应用1

xiaoxiao2021-02-28  35

结合词频统计的功能,联想到可以应用于企业审批流程回退意见的词频分析,是企业流程绩效分析的扩展之一。

技术路线:jieba分词,wordcloud绘制特定形状词云

#bpmRejectAnalyzeV1.pyimport jiebaimport jieba.posseg as psegfrom os import pathfrom scipy.misc import imreadfrom wordcloud import WordCloudimport matplotlib.pyplot as pltdef getTxt(txt):    with open(txt,'r',encoding='utf-8')as f:        reject_list = f.readlines()    return reject_listdef segmentWords(txtlist):    stop_words = set(line.strip() for line in open('stopwords.txt', encoding='utf-8'))    newslist = []    for subject in txtlist:        if subject.isspace():            continue        word_list = pseg.cut(subject)        for word, flag in word_list:            if not word in stop_words and flag == 'n':                newslist.append(word)    return newslist    def drawPlant(newslist):    d = path.dirname(__file__)    mask_image = imread(path.join(d, "mickey.png"))    content = ' '.join(newslist)    wordcloud = WordCloud(font_path='simhei.ttf', background_color="white",mask=mask_image, max_words=40).generate(content)    # Display the generated image:    plt.imshow(wordcloud)    plt.axis("off")    wordcloud.to_file('wordcloud.jpg')    plt.show()def countWords(newslist):    wordDict = {}    for item in newslist:        wordDict[item] = wordDict.get(item,0) + 1    itemList = list(wordDict.items())    itemList.sort(key=lambda x:x[1],reverse=True)    for i in range(100):        word, count = itemList[i]        print("{}:{}".format(word,count))    def main():    txtlist = getTxt('bpmreject.txt')    wordlist = segmentWords(txtlist)    countWords(wordlist)    drawPlant(wordlist)    

main()

转载请注明原文地址: https://www.6miu.com/read-2631996.html

最新回复(0)