【python】 统计汉字的频率

xiaoxiao2021-03-01  11

import re lines = open(r'123.txt', encoding='utf8').read() x = ''.join(re.findall(u'[\u4e00-\u9fa5]+', lines)) #正则表达式,汉字的编码范围进行匹配 print(x) print(len(x)) dict_list = {} for i in range(len(x)): if dict_list.get(x[i]): dict_list[x[i]] = dict_list[x[i]] + 1 else: dict_list[x[i]] = 1 print(dict_list) print(len(dict_list)) for key,value in dict_list.items(): print(key,"--",value) # fp = open('1234_key_value_3.txt', mode='w+') # for key,value in dict_list.items(): # strs = str(key) + " " + str(value) +"\n" # fp.write(strs) # fp.close() print("------------") f_lines = open('1234_key_value_3.txt').readlines() for line in f_lines: strss = line.strip().split(" ") print(strss[0],"-----",strss[1])

 

转载请注明原文地址: https://www.6miu.com/read-3350046.html

最新回复(0)