# -*- coding: utf-8 -*-
from janome.tokenizer import Tokenizerimport codecsimport sysimport osdef analyze(path):
fi = codecs.open(path, encoding="UTF-8") lines = fi.readlines() fi.close() wordSet = set() fo = codecs.open(path + "x" , "w") t = Tokenizer() tokenStr = None for line in lines: for token in t.tokenize(line): tokenStr = str(token) if (tokenStr.find("記号") < 0) and (tokenStr.find("人名")) < 0: wordSet.add(tokenStr.split(",")[6]) #print(wordSet) wordList = list(wordSet) #fo.write(wordSet.pop() + "\r\n") for item in wordList: fo.write(item + "\r\n") fo.close()analyze("C:\\Users\\70485528\\mymail.txt")