# -*- coding: utf-8 -*-
import jieba
import jieba.posseg as pseg#用于词性标注
#分词
def part_word(fid1,fid3):
for i in fid1.readlines():
data_line=i.strip()
wordList = jieba.cut(data_line.decode('utf-8'))#wordlist是一个生成器
outStr = ''
for word in wordList:
outStr += word
outStr += ' '
fid3.write(outStr.strip().encode('utf-8') + '\n')
#print(type(wordList))
#词性标注
def ci_xing(fid1,fid3):
for i in fid1.readlines():
data_line=i.strip()
words= pseg.cut(data_line.decode('utf-8'))#wordlist是一个生成器
outStr = ''
for w in words:
outStr += w.word
outStr += '/'
outStr += w.flag
outStr += ' '
fid3.write(outStr.strip().encode('utf-8') + '\n')
#停用词过滤
def stop_word(fid1,fid2,fid3):
stopword=[]
for j in fid2.readlines():
stopword.append(j.strip().decode('utf-8'))#储存停用词表
#print j
for i in fid1.readlines():
data_line=i.strip()
wordList = jieba.cut(data_line.decode('utf-8'))#wordlist是一个生成器
outStr=''
for word in wordList:
if word not in stopword:
outStr+=word
outStr+=' '
fid3.write(outStr.strip().encode('utf-8') + '\n')
#主文件
def main():
fid1=open('myInput.txt','r')#读取文件
fid2=open('stopkey.txt','r')#读取停用词表
fid3=open('myOutput.txt','w')#将要写入的文件
#stop_word(fid1,fid2,fid3)#停用词过滤
part_word(fid1,fid2)#分词
#ci_xing(fid1,fid2)#词性标注
fid1.close()
fid2.close()
fid3.close()
main()
错误是:
Traceback (most recent call last):
File "/Users/xuxiangjun/Desktop/test/test.py", line 55, in <module>
main()
File "/Users/xuxiangjun/Desktop/test/test.py", line 50, in main
part_word(fid1,fid2)#分词
File "/Users/xuxiangjun/Desktop/test/test.py", line 14, in part_word
fid3.write(outStr.strip().encode('utf-8') + '\n')
IOError: File not open for writing
import jieba
import jieba.posseg as pseg#用于词性标注
#分词
def part_word(fid1,fid3):
for i in fid1.readlines():
data_line=i.strip()
wordList = jieba.cut(data_line.decode('utf-8'))#wordlist是一个生成器
outStr = ''
for word in wordList:
outStr += word
outStr += ' '
fid3.write(outStr.strip().encode('utf-8') + '\n')
#print(type(wordList))
#词性标注
def ci_xing(fid1,fid3):
for i in fid1.readlines():
data_line=i.strip()
words= pseg.cut(data_line.decode('utf-8'))#wordlist是一个生成器
outStr = ''
for w in words:
outStr += w.word
outStr += '/'
outStr += w.flag
outStr += ' '
fid3.write(outStr.strip().encode('utf-8') + '\n')
#停用词过滤
def stop_word(fid1,fid2,fid3):
stopword=[]
for j in fid2.readlines():
stopword.append(j.strip().decode('utf-8'))#储存停用词表
#print j
for i in fid1.readlines():
data_line=i.strip()
wordList = jieba.cut(data_line.decode('utf-8'))#wordlist是一个生成器
outStr=''
for word in wordList:
if word not in stopword:
outStr+=word
outStr+=' '
fid3.write(outStr.strip().encode('utf-8') + '\n')
#主文件
def main():
fid1=open('myInput.txt','r')#读取文件
fid2=open('stopkey.txt','r')#读取停用词表
fid3=open('myOutput.txt','w')#将要写入的文件
#stop_word(fid1,fid2,fid3)#停用词过滤
part_word(fid1,fid2)#分词
#ci_xing(fid1,fid2)#词性标注
fid1.close()
fid2.close()
fid3.close()
main()
错误是:
Traceback (most recent call last):
File "/Users/xuxiangjun/Desktop/test/test.py", line 55, in <module>
main()
File "/Users/xuxiangjun/Desktop/test/test.py", line 50, in main
part_word(fid1,fid2)#分词
File "/Users/xuxiangjun/Desktop/test/test.py", line 14, in part_word
fid3.write(outStr.strip().encode('utf-8') + '\n')
IOError: File not open for writing
