'''
对于orderfile里的所有xml文件进行读取筛选
xml的格式主要是:
<object><name>car</name>...</object>
<object><name>car</name>...</object>
...
所以思路就是首先遍历全部xml文件,依次读取找name的值是否在['car','bicycle','motorcycle','person','truck','bus']中,
找到不符合的并输出同时修改内容
'''
import os
import xml.dom.minidom
from xml.etree.ElementTree
import ElementTree,Element
folderPath =
'D:\\BaiduNetdiskDownload\\第三次操作结果\\orderFile(已修改)'
errorFilePath =
'D:\\BaiduNetdiskDownload\\第三次操作结果\\xmlErrorLog2.txt'
file = open(errorFilePath,
'w')
data = [
'car',
'bicycle',
'motorcycle',
'person',
'truck',
'bus',
'taxi']
errorFileList = []
errorStudentNumber = []
errorNumber =
0
errorFileNumber =
0
mark =
False
if not os.path.exists(folderPath):
print(
'ERROR:目标文件夹不存在')
exit(
1)
try:
fileList = os.listdir(folderPath)
print(
'成功读取目录 '+folderPath+
"下所有文件,文件名列表如下:")
print(fileList)
except Exception
as error:
print(
"ERROR:读取目录失败,错误信息如下\n"+error)
exit(
1)
def modifyNode(nodeStr):
if nodeStr==
'trunk':
return 'truck'
elif nodeStr==
'track':
return 'truck'
else:
return None
for i
in range(
0,len(fileList)):
fileName = os.path.join(folderPath,fileList[i])
suffix = fileName[-
3:]
if(suffix!=
'xml'):
continue
try:
tree = ElementTree()
tree.parse(fileName)
except Exception
as error:
print(
"ERROR:发现一个文件打开错误,可能是内容为空没找到节点,文件名为"+fileName)
file.write(
"ERROR:发现一个文件打开错误,可能是内容为空没找到节点,文件名为"+fileName+
"\n ")
continue
node = tree.findall(
"object/name")
for x
in range(
0,len(node)):
tempName = node[x].text
if not tempName
in data:
rightNode = modifyNode(tempName)
if rightNode!=
None:
node[x].text = rightNode
else:
errorNumber +=
1
mark =
True
print(
"ERROR:发现一个未知命名为【"+tempName+
"】,位于文件名 "+fileName+
" 的文件中,它是该文件的第 "+str(x)+
"个【name】节点")
file.write(
"ERROR:发现一个未知命名为【"+tempName+
"】,位于文件名 "+fileName+
" 的文件中,它是该文件的第 "+str(x)+
"个【name】节点\n")
if mark:
errorFileNumber +=
1
fileIndex = fileList[i][:-
4]
errorFileList.append(fileList[i])
errorStudentNumber.append(int(int(fileIndex)/
150))
mark =
False
tree.write(fileName)
errorFileListSort = sorted(set(errorFileList),key=errorFileList.index)
errorStudentNumberSort = sorted(set(errorStudentNumber),key=errorStudentNumber.index)
print(
"查找结束,共发现问题文件"+str(errorFileNumber)+
"个,共计"+str(errorNumber)+
"个错误node!")
print(
"问题文件名列表如下:")
print(errorFileListSort)
file.write(
"查找结束,共发现问题文件"+str(errorFileNumber)+
"个,共计"+str(errorNumber)+
"个错误node!\n")
file.write(
"问题文件名列表如下:\n")
file.write(str(errorFileListSort))