1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 package no.sintef.xml;
22
23 import java.io.BufferedOutputStream;
24 import java.io.BufferedWriter;
25 import java.io.ByteArrayOutputStream;
26 import java.io.FileNotFoundException;
27 import java.io.IOException;
28 import java.io.InputStream;
29 import java.io.OutputStream;
30 import java.io.OutputStreamWriter;
31 import java.io.StringReader;
32 import java.io.Writer;
33 import java.lang.ref.SoftReference;
34 import java.net.MalformedURLException;
35 import java.net.URL;
36 import java.net.URLConnection;
37 import java.util.ArrayList;
38 import java.util.Collections;
39 import java.util.HashMap;
40 import java.util.List;
41
42 import javax.xml.parsers.DocumentBuilder;
43 import javax.xml.parsers.DocumentBuilderFactory;
44 import javax.xml.parsers.ParserConfigurationException;
45 import javax.xml.transform.ErrorListener;
46 import javax.xml.transform.OutputKeys;
47 import javax.xml.transform.Source;
48 import javax.xml.transform.Transformer;
49 import javax.xml.transform.TransformerConfigurationException;
50 import javax.xml.transform.TransformerException;
51 import javax.xml.transform.TransformerFactory;
52 import javax.xml.transform.dom.DOMSource;
53 import javax.xml.transform.stream.StreamResult;
54 import javax.xml.transform.stream.StreamSource;
55
56 import org.apache.log4j.Logger;
57 import org.apache.xalan.processor.TransformerFactoryImpl;
58 import org.doomdark.uuid.UUIDGenerator;
59 import org.w3c.dom.DOMImplementation;
60 import org.w3c.dom.Document;
61 import org.w3c.dom.DocumentType;
62 import org.w3c.dom.Element;
63 import org.w3c.dom.Node;
64 import org.w3c.dom.NodeList;
65 import org.w3c.dom.Text;
66 import org.xml.sax.EntityResolver;
67 import org.xml.sax.InputSource;
68 import org.xml.sax.helpers.DefaultHandler;
69
70 /***
71 * XML helper class.
72 *
73 * @author Fredrik Vraalsen
74 */
75 public final class XmlHelper {
76
77 private static final Logger LOGGER = Logger.getLogger(XmlHelper.class);
78
79 private static final DocumentCache DOCUMENT_CACHE = new DocumentCache();
80 private static final TransformerCache TRANSFORMER_CACHE = new TransformerCache(DOCUMENT_CACHE);
81 private static final DocumentBuilderFactory DBF;
82 private static final EntityResolver ER = new DefaultHandler() {
83 public InputSource resolveEntity(String publicId, String systemId) {
84 LOGGER.debug("publicId = " + publicId + ", systemId = " + systemId);
85 return new InputSource(new StringReader(""));
86 }
87 };
88
89 /***
90 * ThreadLocal identityTransformation to avoid reentrancy issues.
91 */
92 private static ThreadLocal identityTransformation = new ThreadLocal() {
93 protected synchronized Object initialValue() {
94 try {
95 Transformer t = TransformerFactory.newInstance().newTransformer();
96 t.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
97 t.setOutputProperty(OutputKeys.INDENT, "yes");
98 return t;
99 } catch (TransformerConfigurationException e) {
100 e.printStackTrace();
101 return null;
102 }
103 }
104 };
105
106 static {
107 DBF = DocumentBuilderFactory.newInstance();
108 DBF.setNamespaceAware(true);
109 DBF.setValidating(false);
110
111 }
112
113 /***
114 * Prevent instantiation of helper class.
115 */
116 private XmlHelper() {
117 }
118
119 /***
120 * Get document root element if Node is Document, otherwise cast node to
121 * Element if appropriate.
122 *
123 * TODO rename to getElement?
124 *
125 * @param node
126 * node to get Element for
127 * @return Element corresponding to node, or null if node is not Document or
128 * Element instance
129 */
130 public static Element getRootElement(Node node) {
131 if (node instanceof Element) {
132 return (Element) node;
133 } else if (node instanceof Document) {
134 Document doc = (Document) node;
135 return doc.getDocumentElement();
136 } else {
137 return null;
138 }
139 }
140
141 /***
142 * Get first child Element matching the specified name and namespace.
143 *
144 * @param parent
145 * parent Element
146 * @param namespaceURI
147 * the URI of the namespace to match
148 * @param localName
149 * the local name to match
150 * @return the first child Element matching local name and namespace, or
151 * null if none exists
152 */
153 public static Element getFirstElement(Element parent, String namespaceURI, String localName) {
154 List elements = getElements(parent, namespaceURI, localName);
155 if (elements.size() == 0) {
156 return null;
157 } else {
158 return (Element) elements.get(0);
159 }
160 }
161
162 /***
163 * Get child Elements matching the specified name and namespace.
164 *
165 * @param parent
166 * parent Element
167 * @param namespaceURI
168 * the URI of the namespace to match
169 * @param localName
170 * the local name to match
171 * @return List of matching child elements, possibly empty (never null)
172 */
173 public static List getElements(Element parent, String namespaceURI, String localName) {
174 if (parent == null) {
175 return Collections.EMPTY_LIST;
176 }
177
178 ArrayList result = new ArrayList();
179 NodeList nodes = parent.getChildNodes();
180 int length = nodes.getLength();
181 for (int i = 0; i < length; i++) {
182 Node n = nodes.item(i);
183 if (!(n instanceof Element)) {
184 continue;
185 }
186 Element e = (Element) n;
187 if (namespaceURI == null && localName != null && !localName.equals(e.getTagName())) {
188 continue;
189 } else if (namespaceURI != null && localName != null && !localName.equals(e.getLocalName())) {
190 continue;
191 } else if (namespaceURI != null && !namespaceURI.equals(e.getNamespaceURI())) {
192 continue;
193 }
194 result.add(e);
195 }
196
197 return result;
198 }
199
200 /***
201 * Get text content of element.
202 *
203 * @param element
204 * the element
205 * @return the text content of element (possibly empty), or null if element
206 * is null
207 */
208 public static String getText(Element element) {
209 if (element == null) {
210 return null;
211 }
212 StringBuffer sb = new StringBuffer();
213 NodeList children = element.getChildNodes();
214 for (int i = 0; i < children.getLength(); i++) {
215 Node n = children.item(i);
216 if (n instanceof Text) {
217 sb.append(n.getNodeValue());
218 } else if (n instanceof Element) {
219 sb.append(getText((Element) n));
220 }
221 }
222 return sb.toString();
223 }
224
225 /***
226 * Get text content of first child element matching the specified name and
227 * namespace.
228 *
229 * @param parent
230 * parent element
231 * @param namespaceURI
232 * the URI of the namespace to match
233 * @param localName
234 * the local name to match
235 * @return the text content of element, or null if no element matches or
236 * matching element is mixed or does not have text node child
237 */
238 public static String getText(Element parent, String namespaceURI, String localName) {
239 Element e = getFirstElement(parent, namespaceURI, localName);
240 return getText(e);
241 }
242
243 /***
244 * Parse XML Document from InputSource.
245 *
246 * @param is the InputSource to parse from.
247 * @return resulting XML Document, never null
248 * @throws XmlException if XML document was malformed or other XML parsing error occured
249 * @throws IOException if IO error occured reading from InputSource
250 */
251 public static Document parse(InputSource is) throws XmlException, IOException {
252 Document result = null;
253 try {
254 DocumentBuilder db = DBF.newDocumentBuilder();
255
256 db.setEntityResolver(ER);
257 result = db.parse(is);
258 } catch (IOException e) {
259 throw e;
260 } catch (Exception e) {
261 throw new XmlException("Unable to parse XML", e);
262 }
263 if (result == null) {
264
265 throw new XmlException("Null XML document");
266 }
267 return result;
268 }
269
270 /***
271 * Generate unique identifier.
272 *
273 * @return the unique identifier
274 */
275 public static String genID() {
276 return "I" + UUIDGenerator.getInstance().generateTimeBasedUUID();
277 }
278
279 /***
280 * Create XML Document of the specified DocumentType and with a root Element
281 * with the specified namespace, namespace prefix and name.
282 *
283 * @param namespaceUri
284 * URI of namespace for root element
285 * @param namespacePrefix
286 * prefix to use for root element namespace
287 * @param rootElement
288 * local name of root element
289 * @param doctype
290 * DocumentType of document
291 * @return the XML Document, or null if error occurs
292 */
293 public static Document createDocument(String namespaceUri,
294 String namespacePrefix, String rootElement, DocumentType doctype) {
295 try {
296 DocumentBuilder db = DBF.newDocumentBuilder();
297 DOMImplementation di = db.getDOMImplementation();
298 String qName = namespacePrefix != null ? namespacePrefix + ":" + rootElement : rootElement;
299 Document doc = di.createDocument(namespaceUri, qName, doctype);
300 return doc;
301 } catch (ParserConfigurationException e) {
302 e.printStackTrace();
303 return null;
304 }
305 }
306
307 /***
308 * Create XML Document with the specified root element.
309 *
310 * @param elem
311 * the root element
312 * @return the XML Document, or null if error occurs
313 */
314 public static Document createDocument(Element elem) {
315 try {
316 DocumentBuilder db = DBF.newDocumentBuilder();
317 Document doc = db.newDocument();
318 if (elem != null) {
319 Node n = doc.importNode(elem, true);
320 doc.appendChild(n);
321 }
322 return doc;
323 } catch (ParserConfigurationException e) {
324 e.printStackTrace();
325 return null;
326 }
327 }
328
329 /***
330 * Get the XSLT transformation from the specified URL.
331 *
332 * @param url
333 * URL to get the XSLT transformation from, or null to get identity (null) transformation
334 * @param logger
335 * Logger to use for error messages
336 * @return the Transformer, never null
337 * @throws XmlException
338 * if error occurs
339 */
340 public static Transformer getTransformer(URL url, final Logger logger) throws XmlException {
341 Transformer result = TRANSFORMER_CACHE.get(url);
342
343 if (result == null) {
344 throw new XmlException("THIS SHOULD NEVER HAPPEN: transformer is null");
345 }
346 if (logger != null) {
347 result.setErrorListener(new ErrorListener() {
348 public void warning(TransformerException e) throws TransformerException {
349 if (logger.isDebugEnabled()) {
350 logger.debug(e.getMessage());
351 }
352 }
353 public void error(TransformerException e) throws TransformerException {
354 logger.warn(e.getMessage(), e);
355 }
356 public void fatalError(TransformerException e) throws TransformerException {
357 logger.error(e.getMessage(), e);
358 }
359 });
360 }
361 return result;
362 }
363
364 /***
365 * Get the identity (null) transformation.
366 *
367 * @return the identity (null) transformation
368 */
369 public static Transformer getIdentityTransformation() {
370 return (Transformer) identityTransformation.get();
371 }
372
373 /***
374 * Serialize XML node to byte array in UTF-8 format.
375 *
376 * @param node
377 * the Node to serialize
378 * @param outputHeader
379 * true if XML header should be added, false otherwise
380 * @return byte array containing the UTF-8 bytes
381 */
382 public static byte[] xmlToUtf8(Node node, boolean outputHeader) {
383 ByteArrayOutputStream baos = new ByteArrayOutputStream();
384 BufferedOutputStream bos = new BufferedOutputStream(baos);
385 xmlToUtf8(node, bos, outputHeader);
386 try {
387 bos.close();
388 } catch (IOException e) {
389
390 e.printStackTrace();
391 }
392 return baos.toByteArray();
393 }
394
395 /***
396 * Serialize XML node to specified output stream in UTF-8 format.
397 *
398 * @param node
399 * the Node to serialize
400 * @param os
401 * the OutputStream to serialize to
402 * @param outputHeader
403 * true if XML header should be added, false otherwise
404 */
405 public static void xmlToUtf8(Node node, OutputStream os, boolean outputHeader) {
406 try {
407 Transformer t = getIdentityTransformation();
408 t.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, outputHeader ? "no" : "yes");
409 DOMSource source = new DOMSource(node);
410 Writer w = new BufferedWriter(new OutputStreamWriter(os, "UTF-8"));
411 StreamResult result = new StreamResult(w);
412 t.transform(source, result);
413 } catch (Exception e) {
414 e.printStackTrace();
415 }
416 }
417
418 /***
419 * Get XML Document from the specified URL.
420 *
421 * @param url
422 * URL to parse from
423 * @return the XML Document
424 * @throws XmlException
425 * if error occurs
426 */
427 public static Document getDocument(URL url) throws XmlException {
428 return DOCUMENT_CACHE.get(url);
429 }
430
431 }
432
433
434
435 /***
436 * XML Document cache.
437 *
438 * @author Fredrik Vraalsen
439 */
440 class DocumentCache implements javax.xml.transform.URIResolver {
441
442 private static final Logger LOGGER = Logger.getLogger(DocumentCache.class);
443 private HashMap documents = new HashMap();
444
445 /***
446 * Cached get of the XML Document from the specified URL.
447 *
448 * @param url
449 * the URL of the document
450 * @return the Document, or null if document could not be retrieved from URL
451 * @throws XmlException
452 * if error in parsing XML document
453 */
454 public Document get(URL url) throws XmlException {
455 DocumentEntry entry = getEntry(url);
456 return entry != null ? entry.getDoc() : null;
457 }
458
459 /***
460 * Get cache entry for XML document with specified URL, retrieving document
461 * if not in cache or not up to date.
462 *
463 * @param url
464 * the URL of the document
465 * @return the cache entry for the document, or null if document could not
466 * be retrieved from URL
467 * @throws XmlException
468 * if error in parsing XML document
469 */
470 private DocumentEntry getEntry(URL url) throws XmlException {
471 if (url == null) {
472 return null;
473 }
474 String urlString = url.toString();
475 long lastModified = 0;
476 URLConnection conn = null;
477 try {
478 conn = url.openConnection();
479 lastModified = conn.getLastModified();
480 } catch (IOException e) {
481
482 if (LOGGER.isDebugEnabled()) {
483 LOGGER.debug("IOException when loading document: " + urlString);
484 }
485 }
486 LOGGER.debug("last modified: " + lastModified);
487 DocumentEntry result = getEntry(urlString, lastModified);
488 if (result != null) {
489 return result;
490 }
491 if (conn != null) {
492 synchronized (this) {
493 try {
494 Document doc = XmlHelper.parse(new InputSource(conn.getInputStream()));
495 return setEntry(urlString, lastModified, doc);
496 } catch (IOException e) {
497 throw new XmlException("Unable to read XML: " + url, e);
498 }
499 }
500 } else {
501 return null;
502 }
503 }
504
505 /***
506 * Get cache entry for XML document with specified URL if up to date,
507 * otherwise remove entry from cache.
508 *
509 * @param url
510 * the URL of the document
511 * @param lastModified
512 * last modification time of document
513 * @return the cache entry for the document, or null if document is not up
514 * to date
515 */
516 private DocumentEntry getEntry(String url, long lastModified) {
517 SoftReference ref = (SoftReference) documents.get(url);
518 if (ref == null) {
519 LOGGER.debug("Document cache MISS: " + url);
520 return null;
521 }
522 Object o = ref.get();
523 if (!(o instanceof DocumentEntry)) {
524 LOGGER.debug("Document cache MISS: " + url);
525 return null;
526 }
527 DocumentEntry entry = (DocumentEntry) o;
528 if (lastModified <= entry.getLastModified()) {
529 LOGGER.debug("Document cache HIT: " + url);
530 return entry;
531 } else {
532 LOGGER.debug("Document cache MISS (outdated): " + url);
533 documents.remove(url);
534 return null;
535 }
536 }
537
538 /***
539 * Set cache entry for XML document.
540 *
541 * @param url
542 * URL of document
543 * @param lastModified
544 * last modification time of document
545 * @param doc
546 * the XML document
547 * @return the cache entry
548 */
549 private synchronized DocumentEntry setEntry(String url, long lastModified, Document doc) {
550 if (doc != null) {
551 DocumentEntry entry = new DocumentEntry(doc, lastModified);
552 SoftReference ref = new SoftReference(entry);
553 documents.put(url, ref);
554 return entry;
555 } else {
556 documents.remove(url);
557 return null;
558 }
559 }
560
561 /***
562 * Resolve the XML document at the specified URI (base + href) into an XML
563 * Source object.
564 *
565 * @param href
566 * relative URI
567 * @param base
568 * base URI
569 * @return the Source object, or null if the document cannot be resolved
570 * @see javax.xml.transform.URIResolver#resolve(java.lang.String,
571 * java.lang.String)
572 */
573 public Source resolve(String href, String base) {
574 LOGGER.debug("resolve(" + href + ", " + base + ")");
575 URL url = null;
576 if (base != null) {
577 try {
578 URL baseUrl = new URL(base);
579 url = new URL(baseUrl, href);
580 } catch (MalformedURLException e1) {
581
582 e1.printStackTrace();
583 }
584 }
585 if (url == null) {
586 ClassLoader cl = getClass().getClassLoader();
587 url = cl.getResource(href);
588 if (url == null) {
589 url = cl.getResource("coras/uml/" + href);
590 }
591 }
592 LOGGER.debug("url = " + url);
593 try {
594 DocumentEntry entry = getEntry(url);
595 return entry != null ? entry.getSource() : null;
596 } catch (XmlException e) {
597 return null;
598 }
599 }
600 }
601
602 /***
603 * XML document cache entry.
604 *
605 * @author Fredrik Vraalsen
606 */
607 final class DocumentEntry {
608
609 private Document doc;
610 private long lastModified;
611 private Source source;
612
613 /***
614 * @param doc
615 * the XML document
616 * @param lastModified
617 * last modification time of document
618 */
619 protected DocumentEntry(Document doc, long lastModified) {
620 this.doc = doc;
621 this.lastModified = lastModified;
622 }
623
624 /***
625 * @return the XML document
626 */
627 public Document getDoc() {
628 return doc;
629 }
630
631 /***
632 * @return the last modification time of the document
633 */
634 public long getLastModified() {
635 return lastModified;
636 }
637
638 /***
639 * @return the document as a Source object
640 */
641 public Source getSource() {
642 if (source == null) {
643 source = new DOMSource(doc);
644 }
645 return source;
646 }
647
648 }
649
650
651 /***
652 * XSLT Transformer cache implemented on top of DocumentCache.
653 *
654 * @author Fredrik Vraalsen
655 */
656 class TransformerCache {
657
658 private static final Logger LOGGER = Logger.getLogger(TransformerCache.class);
659 private static final TransformerFactory TF = new TransformerFactoryImpl();
660 private HashMap transformations = new HashMap();
661
662 /***
663 * @param documentCache the document cache used to cache XSLT documents
664 */
665 protected TransformerCache(final DocumentCache documentCache) {
666 if (documentCache != null) {
667 final javax.xml.transform.URIResolver defaultResolver = TF.getURIResolver();
668 TF.setURIResolver(new javax.xml.transform.URIResolver() {
669 public Source resolve(String href, String base) throws TransformerException {
670 Source source = documentCache.resolve(href, base);
671 if (source == null) {
672 source = defaultResolver.resolve(href, base);
673 }
674 return source;
675 }
676
677 });
678 }
679 }
680
681 /***
682 * Cached get of the XSLT transformer from the specified URL.
683 *
684 * @param url
685 * the URL of the XSLT transformation
686 * @return the XSLT Transformer, or null if an error occurs
687 * @throws XmlException
688 * if error parsing XSTL transformation
689 */
690 public Transformer get(URL url) throws XmlException {
691 String urlString = url != null ? url.toString() : null;
692 LOGGER.debug("URL: " + urlString);
693 long lastModified = 0;
694 URLConnection conn = null;
695 if (url != null) {
696 try {
697 conn = url.openConnection();
698 lastModified = conn.getLastModified();
699 } catch (IOException e) {
700
701 if (LOGGER.isDebugEnabled()) {
702 LOGGER.debug("IOException when getting transformer: " + urlString);
703 }
704 }
705 }
706 LOGGER.debug("last modified: " + lastModified);
707 TransformerEntry entry = getEntry(urlString, lastModified);
708 Transformer result = entry != null ? entry.getTransformer() : null;
709 if (result == null) {
710 synchronized (this) {
711 try {
712 result = createTransformer(conn != null ? conn.getInputStream() : null, url);
713 } catch (IOException e) {
714 throw new XmlException("Unable to read XSLT: " + url, e);
715 }
716 setEntry(urlString, lastModified, result);
717 }
718 }
719 return result;
720 }
721
722 /***
723 * Get the cache entry for the XSLT transformer.
724 *
725 * @param url
726 * the URL of the XSLT transformation
727 * @param lastModified
728 * the last modification time of the XSLT transformation
729 * @return the cache entry for the transformer, or null if transformer is
730 * not up to date
731 */
732 private TransformerEntry getEntry(String url, long lastModified) {
733 SoftReference ref = (SoftReference) transformations.get(url);
734 if (ref == null) {
735 LOGGER.debug("Transformer cache MISS: " + url);
736 return null;
737 }
738 Object o = ref.get();
739 if (!(o instanceof TransformerEntry)) {
740 LOGGER.debug("Transformer cache MISS: " + url);
741 return null;
742 }
743 TransformerEntry entry = (TransformerEntry) o;
744 if (lastModified <= entry.getLastModified()) {
745 LOGGER.debug("Transformer cache HIT: " + url);
746 return entry;
747 } else {
748 LOGGER.debug("Transformer cache MISS (outdated): " + url);
749 transformations.remove(url);
750 return null;
751 }
752 }
753
754 /***
755 * Set the cache entry for the XSLT transformer.
756 *
757 * @param url
758 * URL of the XSLT transformation
759 * @param lastModified
760 * last modification time of XSLT transformation
761 * @param transformer
762 * the XSLT transformer
763 */
764 private synchronized void setEntry(String url, long lastModified, Transformer transformer) {
765 if (transformer != null) {
766 TransformerEntry entry = new TransformerEntry(transformer, lastModified);
767 SoftReference ref = new SoftReference(entry);
768 transformations.put(url, ref);
769 } else {
770 transformations.remove(url);
771 }
772 }
773
774 /***
775 * Create transformer from InputStream.
776 *
777 * @param is
778 * InputStream to parse XSLT transformation from
779 * @param url
780 * URL of the XSLT transformation
781 * @return the Transformer, or null if error creating Transformer
782 * @throws IOException
783 * if error reading from the InputStream
784 */
785 private static Transformer createTransformer(InputStream is, URL url) throws IOException {
786 try {
787 Transformer t = null;
788 if (is != null) {
789 Source source = new StreamSource(is, url != null ? url.toString() : null);
790 t = TF.newTransformer(source);
791 } else {
792 throw new FileNotFoundException("Unable to find XSLT file " + url);
793 }
794 t.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
795 t.setOutputProperty(OutputKeys.INDENT, "yes");
796 return t;
797 } catch (TransformerConfigurationException e) {
798 e.printStackTrace();
799 return null;
800 }
801 }
802
803 }
804
805 /***
806 * Transformer cache entry.
807 *
808 * @author Fredrik Vraalsen
809 */
810 final class TransformerEntry {
811
812 private Transformer transformer;
813 private long lastModified;
814
815 /***
816 * @param transformer the Transformer
817 * @param lastModified last modification time of transformer
818 */
819 protected TransformerEntry(Transformer transformer, long lastModified) {
820 this.transformer = transformer;
821 this.lastModified = lastModified;
822 }
823
824 /***
825 * @return the Transformer
826 */
827 public Transformer getTransformer() {
828 return transformer;
829 }
830
831 /***
832 * @return last modification time of the transformer
833 */
834 public long getLastModified() {
835 return lastModified;
836 }
837
838 }