View Javadoc

1   /*
2    * Copyright 2010-2013 Capgemini
3    * Licensed under the Apache License, Version 2.0 (the "License"); 
4    * you may not use this file except in compliance with the License. 
5    * You may obtain a copy of the License at 
6    * 
7    * http://www.apache.org/licenses/LICENSE-2.0 
8    * 
9    * Unless required by applicable law or agreed to in writing, software 
10   * distributed under the License is distributed on an "AS IS" BASIS, 
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
12   * See the License for the specific language governing permissions and 
13   * limitations under the License. 
14   * 
15   */
16  package org.xmlfield.core.impl.dom;
17  
18  import java.io.IOException;
19  import java.io.InputStream;
20  import java.io.StringReader;
21  import java.io.StringWriter;
22  import java.io.Writer;
23  import java.util.HashMap;
24  import java.util.Map;
25  
26  import javax.xml.parsers.DocumentBuilder;
27  import javax.xml.parsers.DocumentBuilderFactory;
28  import javax.xml.parsers.ParserConfigurationException;
29  import javax.xml.transform.OutputKeys;
30  import javax.xml.transform.Transformer;
31  import javax.xml.transform.TransformerConfigurationException;
32  import javax.xml.transform.TransformerException;
33  import javax.xml.transform.TransformerFactory;
34  import javax.xml.transform.TransformerFactoryConfigurationError;
35  import javax.xml.transform.dom.DOMSource;
36  import javax.xml.transform.stream.StreamResult;
37  
38  import org.slf4j.Logger;
39  import org.slf4j.LoggerFactory;
40  import org.w3c.dom.Document;
41  import org.w3c.dom.Node;
42  import org.xml.sax.InputSource;
43  import org.xml.sax.SAXException;
44  import org.xmlfield.core.api.XmlFieldNode;
45  import org.xmlfield.core.api.XmlFieldNodeParser;
46  import org.xmlfield.core.exception.XmlFieldParsingException;
47  import org.xmlfield.core.impl.dom.cleanup.EntitySanitizingInputStream;
48  import org.xmlfield.core.impl.dom.cleanup.InputSanitizer;
49  
50  /**
51   * Default xml field node parser. This implementation deal with a {@link Node}
52   * object.
53   * <p>
54   * <p>
55   * DomNodeParser is not thread safe.
56   * 
57   * @author Guillaume Mary <guillaume.mary@capgemini.com>
58   * @author Nicolas Richeton
59   */
60  public class DomNodeParser implements XmlFieldNodeParser {
61  
62  	/**
63  	 * Remove invalid XML numeric entities before parsing XML document. The
64  	 * underlying XML parser will fail if invalid entity are used. With this
65  	 * option enabled, XmlField will replace invalid entity by an
66  	 * "unknown character".
67  	 * <p>
68  	 * This option have a small performance impact but should be disabled only
69  	 * if is content is known to perfectly valid.
70  	 */
71  	public static final String CONFIG_CLEANUP_XML = "xmlfield.dom.cleanupXmlFirst";
72  
73  	/**
74  	 * Switch XML implementation.
75  	 * 
76  	 * @deprecated
77  	 * @see OutputKeys
78  	 */
79  	@Deprecated
80  	public static String CONFIG_INDENT_XML = OutputKeys.INDENT;
81  
82  	private static final Logger logger = LoggerFactory
83  			.getLogger(DomNodeParser.class);
84  
85  	/**
86  	 * When enabled, a global replacement of invalid characters is performed
87  	 * before sending the XML input to the parser.
88  	 */
89  	boolean cleanupXmlFirst = false;
90  	Map<String, String> configuration = null;
91  	DocumentBuilder documentBuilder = null;
92  	boolean indent = false;
93  
94  	Transformer t = null;
95  
96  	public DomNodeParser() throws TransformerConfigurationException,
97  			TransformerFactoryConfigurationError {
98  		this(null);
99  	}
100 
101 	/**
102 	 * Create document parser and writer.
103 	 * 
104 	 * @param configurationParam
105 	 *            configure XML output (Transformer). Allowed values are
106 	 *            {@link OutputKeys} constants.
107 	 * @throws TransformerConfigurationException
108 	 * @throws TransformerFactoryConfigurationError
109 	 */
110 	public DomNodeParser(Map<String, String> configurationParam)
111 			throws TransformerConfigurationException,
112 			TransformerFactoryConfigurationError {
113 
114 		if (configurationParam != null) {
115 			// Apply configuration
116 			this.configuration = new HashMap<String, String>(configurationParam);
117 
118 			// Warn for deprecated parameter value.
119 			// Should be removed eventually.
120 			if ("true".equals(configuration.get(OutputKeys.INDENT))) {
121 				logger.warn("Use of deprecated value \"true\" for configuration OutputKeys.INDENT. "
122 						+ "Please use \"yes\" instead");
123 				configuration.put(OutputKeys.INDENT, "yes");
124 			}
125 
126 			// Process CONFIG_CLEANUP_XML and remove key to prevent forwarding
127 			// to the underlying parser.
128 			if ("true".equals(configuration.get(CONFIG_CLEANUP_XML))) {
129 				cleanupXmlFirst = true;
130 				configuration.remove(CONFIG_CLEANUP_XML);
131 			}
132 
133 		}
134 	}
135 
136 	private void ensureBuilder() throws ParserConfigurationException {
137 
138 		if (documentBuilder == null) {
139 			DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory
140 					.newInstance();
141 
142 			documentBuilderFactory.setNamespaceAware(true);
143 			// Prevent XXE
144 			documentBuilderFactory.setExpandEntityReferences(false);
145 			documentBuilderFactory.setValidating(false);
146 
147 			documentBuilder = documentBuilderFactory.newDocumentBuilder();
148 
149 		}
150 	}
151 
152 	private void ensureTransformer() throws TransformerConfigurationException,
153 			TransformerFactoryConfigurationError {
154 		if (t == null) {
155 			t = TransformerFactory.newInstance().newTransformer();
156 
157 			// Default for this key
158 			t.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
159 
160 			// Apply configuration
161 			if (configuration != null) {
162 				for (String key : configuration.keySet()) {
163 					t.setOutputProperty(key, configuration.get(key));
164 				}
165 			}
166 		}
167 	}
168 
169 	@Override
170 	public String nodeToXml(XmlFieldNode node) throws XmlFieldParsingException {
171 		StringWriter sw;
172 		try {
173 
174 			sw = new StringWriter();
175 			ensureTransformer();
176 			t.transform(new DOMSource((Node) node.getNode()), new StreamResult(
177 					sw));
178 		} catch (TransformerConfigurationException e) {
179 			throw new XmlFieldParsingException(e);
180 		} catch (IllegalArgumentException e) {
181 			throw new XmlFieldParsingException(e);
182 		} catch (TransformerFactoryConfigurationError e) {
183 			throw new XmlFieldParsingException(e);
184 		} catch (TransformerException e) {
185 			throw new XmlFieldParsingException(e);
186 		}
187 
188 		return sw.toString();
189 	}
190 
191 	@Override
192 	public void nodeToXml(XmlFieldNode node, Writer writer)
193 			throws XmlFieldParsingException {
194 		try {
195 			ensureTransformer();
196 
197 			t.transform(new DOMSource((Node) node.getNode()), new StreamResult(
198 					writer));
199 		} catch (TransformerConfigurationException e) {
200 			throw new XmlFieldParsingException(e);
201 		} catch (IllegalArgumentException e) {
202 			throw new XmlFieldParsingException(e);
203 		} catch (TransformerFactoryConfigurationError e) {
204 			throw new XmlFieldParsingException(e);
205 		} catch (TransformerException e) {
206 			throw new XmlFieldParsingException(e);
207 		}
208 
209 	}
210 
211 	/**
212 	 * Loads xml content from the input source and create XML DOM object.
213 	 * 
214 	 * @param xmlInputSource
215 	 * @return
216 	 * @throws XmlFieldParsingException
217 	 */
218 	private Node xmlToNode(final InputSource xmlInputSource)
219 			throws XmlFieldParsingException {
220 
221 		new InputSource() {
222 
223 		};
224 		Document document = null;
225 		try {
226 			ensureBuilder();
227 			document = documentBuilder.parse(xmlInputSource);
228 		} catch (ParserConfigurationException e) {
229 			throw new XmlFieldParsingException(e);
230 		} catch (SAXException e) {
231 			throw new XmlFieldParsingException(e);
232 		} catch (IOException e) {
233 			throw new XmlFieldParsingException(e);
234 		}
235 
236 		return document.getDocumentElement();
237 	}
238 
239 	@Override
240 	public XmlFieldNode xmlToNode(InputStream xmlContent)
241 			throws XmlFieldParsingException {
242 
243 		InputStream stream = xmlContent;
244 		if (cleanupXmlFirst) {
245 			stream = new EntitySanitizingInputStream(stream);
246 		}
247 
248 		return new DomNode(xmlToNode(new InputSource(stream)));
249 	}
250 
251 	@Override
252 	public XmlFieldNode xmlToNode(String xml) throws XmlFieldParsingException {
253 		String xmlData = xml;
254 		if (cleanupXmlFirst) {
255 			xmlData = InputSanitizer.sanitizeXml(xml);
256 		}
257 
258 		return new DomNode(
259 				xmlToNode(new InputSource(new StringReader(xmlData))));
260 	}
261 }