[Java开发之路](11)SAX解析XML文档

最后更新于:2022-04-01 09:59:48

### 1.简介 Dom解析功能强大,可增删改查,操作时会将XML文档读到内存,因此适用于小文档; SAX解析是从头到尾逐行逐个元素解析,修改较为不便,但适用于只读的大文档; SAX采用事件驱动的方式解析XML。套用网友的解释:如同在电影院看电影一样,从头到尾看一遍,不能回退(Dom可来来回回读取),在看电影的过程中,每遇到一个情节,都会调用大脑去接收处理这些信息。SAX也是相同的原理,每遇到一个元素节点,都会调用相应的方法来处理。在SAX的解析过程中,读取到文档开头、文档结尾,元素的开头和元素结尾都会调用相应方法,我们可以在这些方法中进行相应事件处理。 对应方法: ~~~ public void startDocument() throws SAXException { } public void endDocument() throws SAXException { } public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { } public void endElement(String uri, String localName, String qName) throws SAXException { } ~~~ 我们还需一个方法来处理元素节点中间的文本节点(我们常误以为元素节点的文本值) ~~~ public void characters(char[] ch, int start, int length) throws SAXException { } ~~~ ### 2.解析 解析步骤: (1)通过SAXParserFactory的静态方法newInstance()方法获取SAXParserFactory实例对象factory ~~~ SAXParserFactory factory = SAXParserFactory.newInstance(); ~~~ (2)通过SAXParserFactory实例的newSAXParser()方法返回SAXParser实例parser ~~~ SAXParser parser = factory.newSAXParser(); ~~~ (3)创建一个类继承DefaultHandler,重写其中的一些方法进行业务处理 ~~~ package com.qunar.handler; import org.xml.sax.Attributes; import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; public class SAXParserHandler extends DefaultHandler{ // 用来标示解析开始 @Override public void startDocument() throws SAXException { } // 用来标示解析结束 @Override public void endDocument() throws SAXException { } // 用来遍历XML文件的开始标签 @Override public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { super.startElement(uri, localName, qName, attributes); } // 用来遍历XML文件的结束标签 @Override public void endElement(String uri, String localName, String qName) throws SAXException { super.endElement(uri, localName, qName); } @Override public void characters(char[] ch, int start, int length) throws SAXException { super.characters(ch, start, length); } } ~~~ (4)创建Handler类对象实例 ~~~ // 定义SAXParserHandler对象 SAXParserHandler handler = new SAXParserHandler(); ~~~ (5)解析XML文档 ~~~ <?xml version="1.0" encoding="utf-8"?><bookstore> <book category="Java"> <title lang="chi">Java多线程编程核心技术</title> <author>高洪岩</author> <year>2015</year> <price>69.00</price> </book> <book category="C++"> <title lang="en">Effective C++: 55 Specific Ways to Improve Your Programs and Designs</title> <author>Scott Meyers</author> <year>2006</year> <price>58.00</price> </book> <book category="Web"> <title lang="en">Learning XML</title> <author>Erik T. Ray</author> <year>2016</year> <price>39.95</price> </book> </bookstore> ~~~ ### 3.具体实例: ~~~ package com.qunar.handler; import org.xml.sax.Attributes; import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; public class SAXParserHandler extends DefaultHandler{ private int bookIndex = 0; // 用来标示解析开始 @Override public void startDocument() throws SAXException { System.out.println("SAX解析开始..."); } // 用来标示解析结束 @Override public void endDocument() throws SAXException { System.out.println("SAX解析结束..."); } // 用来遍历XML文件的开始标签 @Override public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { // 调用DefaultHandler类的startElement方法 super.startElement(uri, localName, qName, attributes); // 开始解析book元素节点 if(qName.equals("book")){ ++ bookIndex; System.out.println("开始解析第" + bookIndex + "本书..."); // 已知book元素节点下的属性名称,根据属性名称获取属性值 /*String value = attributes.getValue("category"); System.out.println("value->"+value);*/ // 不知道book元素节点下的属性名称以及个数 int size = attributes.getLength(); for(int i = 0;i < size;++i){ System.out.println(attributes.getQName(i) + ":" + attributes.getValue(i)); }//for }//if else if(!qName.equals("bookstore")){ System.out.print(qName + ":"); }//else } // 用来遍历XML文件的结束标签 @Override public void endElement(String uri, String localName, String qName) throws SAXException { super.endElement(uri, localName, qName); // 判断一本书是否解析完 if(qName.equals("book")){ System.out.println("结束解析第" + bookIndex + "本书..."); }//if } @Override public void characters(char[] ch, int start, int length) throws SAXException { super.characters(ch, start, length); String text = new String(ch, start, length); if(!text.trim().equals("")){ System.out.println(text); }//if } } ~~~ ~~~ package com.qunar.xml; import java.io.IOException; import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import org.xml.sax.SAXException; import com.qunar.handler.SAXParserHandler; /** * SAX方式解析XML文档 * @author sjf0115 * */ public class SAXXMLCode { public static void main(String[] args) { String path = "D:\\bookstore.xml"; try { // 通过SAXParserFactory的静态方法newInstance()方法获取SAXParserFactory实例对象factory SAXParserFactory factory = SAXParserFactory.newInstance(); // 通过SAXParserFactory实例的newSAXParser()方法返回SAXParser实例parser SAXParser saxParser = factory.newSAXParser(); // 定义SAXParserHandler对象 SAXParserHandler handler = new SAXParserHandler(); // 解析XML文档 saxParser.parse(path, handler); } catch (ParserConfigurationException e) { e.printStackTrace(); } catch (SAXException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } } ~~~ **运行结果:** <table cellspacing="0" cellpadding="0" style="border-collapse:collapse; border:1px solid rgb(187,187,187); width:1050px"><tbody><tr><td style="border-collapse:collapse; border:1px solid rgb(187,187,187); width:1049px"><br/><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">SAX解析开始...</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">开始解析第1本书...</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">category:Java</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">title:Java多线程编程核心技术</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">author:高洪岩</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">year:2015</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">price:69.00</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">结束解析第1本书...</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">开始解析第2本书...</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">category:C++</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">title:Effective C++: 55 Specific Ways to Improve Your Programs and Designs</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">author:Scott Meyers</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">year:2006</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">price:58.00</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">结束解析第2本书...</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">开始解析第3本书...</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">category:Web</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">title:Learning XML</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">author:Erik T. Ray</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">year:2016</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">price:39.95</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">结束解析第3本书...</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">SAX解析结束...</span></div></td></tr></tbody></table> ### 4.解析并储存于对象中 ~~~ package com.qunar.bean; /** * book实体类 * @author sjf0115 * */ public class Book { private String category; private String title; private String author; private String year; private String price; private String lang; public String getCategory() { return category; } public void setCategory(String category) { this.category = category; } public String getTitle() { return title; } public void setTitle(String title) { this.title = title; } public String getAuthor() { return author; } public void setAuthor(String author) { this.author = author; } public String getYear() { return year; } public void setYear(String year) { this.year = year; } public String getPrice() { return price; } public void setPrice(String price) { this.price = price; } public String getLang() { return lang; } public void setLang(String lang) { this.lang = lang; } @Override public String toString() { return "category:" + category + " lang:" + lang + " title:" + title + " author:" + author + " year:" + year + " price:" + price; } } ~~~ ~~~ package com.qunar.handler; import java.util.ArrayList; import java.util.List; import org.xml.sax.Attributes; import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; import com.qunar.bean.Book; public class SAXParserHandler extends DefaultHandler{ private Book book; private int bookIndex = 0; // 节点文本内容 private String text; private List<Book> bookList = new ArrayList<Book>(); public List<Book> getBookList() { return bookList; } // 用来标示解析开始 @Override public void startDocument() throws SAXException { System.out.println("SAX解析开始..."); } // 用来标示解析结束 @Override public void endDocument() throws SAXException { System.out.println("SAX解析结束..."); } // 用来遍历XML文件的开始标签 @Override public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { // 调用DefaultHandler类的startElement方法 super.startElement(uri, localName, qName, attributes); // 开始解析book元素节点 if(qName.equals("book")){ // 创建一个book对象 book = new Book(); ++ bookIndex; System.out.println("开始解析第" + bookIndex + "本书..."); int size = attributes.getLength(); for(int i = 0;i < size;++i){ String attr = attributes.getQName(i); // 属性category if(attr.equals("category")){ book.setCategory(attributes.getValue(i)); }//if }//for }//if // 用于遍历title节点中的属性 else if(qName.equals("title")){ int size = attributes.getLength(); for(int i = 0;i < size;++i){ String attr = attributes.getQName(i); // 属性category if(attr.equals("lang")){ book.setLang(attributes.getValue(i)); }//if }//for }//else } // 用来遍历XML文件的结束标签 @Override public void endElement(String uri, String localName, String qName) throws SAXException { super.endElement(uri, localName, qName); // 判断一本书是否解析完 if(qName.equals("book")){ bookList.add(book); book = null; System.out.println("结束解析第" + bookIndex + "本书..."); }//if else if(qName.equals("title")){ book.setTitle(text); }//else else if(qName.equals("author")){ book.setAuthor(text); }//else else if(qName.equals("year")){ book.setYear(text); }//else else if(qName.equals("price")){ book.setPrice(text); }//else } // 文本值 @Override public void characters(char[] ch, int start, int length) throws SAXException { super.characters(ch, start, length); text = new String(ch, start, length); } } ~~~ ~~~ package com.qunar.xml; import java.io.IOException; import java.util.List; import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import org.xml.sax.SAXException; import com.qunar.bean.Book; import com.qunar.handler.SAXParserHandler; /** * SAX方式解析XML文档 * @author sjf0115 * */ public class SAXXMLCode { public static void main(String[] args) { String path = "D:\\bookstore.xml"; try { // 通过SAXParserFactory的静态方法newInstance()方法获取SAXParserFactory实例对象factory SAXParserFactory factory = SAXParserFactory.newInstance(); // 通过SAXParserFactory实例的newSAXParser()方法返回SAXParser实例parser SAXParser saxParser = factory.newSAXParser(); // 定义SAXParserHandler对象 SAXParserHandler handler = new SAXParserHandler(); // 解析XML文档 saxParser.parse(path, handler); // 得到遍历结果 List<Book> bookList = handler.getBookList(); System.out.println("遍历结果:"); for (Book book : bookList) { System.out.println(book); }//for } catch (ParserConfigurationException e) { e.printStackTrace(); } catch (SAXException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } } ~~~ **运行结果:** <table cellspacing="0" cellpadding="0" style="border-collapse:collapse; border:1px solid rgb(187,187,187); width:1050px"><tbody><tr><td style="border-collapse:collapse; border:1px solid rgb(187,187,187); width:1049px"><br/><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">SAX解析开始...</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">开始解析第1本书...</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">结束解析第1本书...</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">开始解析第2本书...</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">结束解析第2本书...</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">开始解析第3本书...</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">结束解析第3本书...</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">SAX解析结束...</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">遍历结果:</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">category:Java  lang:chi   title:Java多线程编程核心技术   author:高洪岩   year:2015   price:69.00</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">category:C++  lang:en   title:Effective C++: 55 Specific Ways to Improve Your Programs and Designs   author:Scott Meyers   year:2006   price:58.00</span></div><div><span style="font-size:14pt; color:windowtext; font-family:微软雅黑">category:Web  lang:en   title:Learning XML   author:Erik T. Ray   year:2016   price:39.95</span></div></td></tr></tbody></table>
';