先看看xml文件
<?xml version="1.0" encoding="UTF-8"?> <configuration config="user.xml"> <property weburl="www.xzy.com"> <name>mapreduce.jobhistory.jhist.format</name> <value>json</value> <source>mapred-default.xml</source> <source>job.xml</source> </property> <property> <name>hadoop.proxyuser.hive.groups</name> <value>users</value> <source>core-site.xml</source> </property> </configuration>对上诉的XMl分析结构,如果打印根节点root node的子节点,有5个,分别是<root text node>,<first element node>, <first element text node>,<second element node>,<second element text node>;这个结构很重要,很大程度影响编程。 这个步骤大体是,创建工厂,生成解析器,得到xml文件,调用parse。coding 如下:
import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import java.io.File; public class docParse { public static void main(String args[]) throws Exception { File hostlist = new File("src/main/resources/5.xml");//得到文件 DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();//创建工厂 DocumentBuilder dBuilder = dbFactory.newDocumentBuilder(); Document doc = dBuilder.parse(hostlist);//调用parse doc.getDocumentElement().normalize(); Element root = doc.getDocumentElement(); System.out.println("root of xml file:" + root.getTagName()); NodeList childNodes = root.getChildNodes(); for (int i = 0;i < childNodes.getLength(); i++){ if (i == 1)//0是根节点的text node System.out.println(childNodes.item(i).getAttributes().getNamedItem("weburl").getNodeValue());// 打印属性 System.out.print(childNodes.item(i).getNodeName()); // <root text> <first child> <first child text> <second child> <second child text> Node node = childNodes.item(i); System.out.print(" type:" + node.getNodeType()); if (node.getNodeType() == Node.ELEMENT_NODE){ NodeList firstNods = node.getChildNodes(); for(int j = 0; j<firstNods.getLength();j++){ System.out.print(">>child nods:" + firstNods.item(j).getNodeName()); if (firstNods.item(j).getNodeName().equals("name")){ System.out.print("--" + firstNods.item(j).getFirstChild().getNodeValue()); } } } System.out.println(); } } }上诉代码只是简单的测试,只做参考。 XML 文件的格式可参看 https://www.w3schools.com/xml/xml_tree.asp