XMLEncoder.java |
/* * $Id: XMLEncoder.java,v 1.195 2003/09/30 15:36:46 znerd Exp $ */ package org.znerd.xmlenc; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.io.Writer; /** * Encodes character streams for an XML document. * * @version $Revision: 1.195 $ $Date: 2003/09/30 15:36:46 $ * @author Ernst de Haan (<a href="mailto:znerd@FreeBSD.org">znerd@FreeBSD.org</a>) * @author Jochen Schwoerer (j.schwoerer [at] web.de) * * @since xmlenc 0.1 */ public abstract class XMLEncoder extends Object { // Unicode values: // // Hex Dec Char Description // _____ _____ _____ ___________________ // // 34 " Quote // 38 & Ampersand // 39 ' Apostrophe // 60 < Less than // 62 > Greater than // // The following characters are invalid according to the XML 1.0 // Specification: // // Hex Dec Char Description // _____ _____ _____ ___________________ // // 0x00 0 NUL Null character // 0x01 1 SOH // 0x02 2 STX // 0x03 3 ETX // 0x04 4 EOT End of transmission // 0x05 5 ENQ // 0x06 6 ACK // 0x07 7 BEL Beep // 0x08 8 BS Backspace // 0x0b 11 VT Home // 0x0c 12 FF Form feed // 0x0e 14 SO // 0x0f 15 SI // 0x10 16 DLE // 0x11 17 DC1 // 0x12 18 DC2 // 0x13 19 DC3 // 0x14 20 DC4 // 0x15 21 NAK // 0x16 22 SYN // 0x17 23 ETB // 0x18 24 CAN // 0x19 25 EM // 0x1a 26 SUB // 0x1b 27 ESC // 0x1c 28 FS Cursor right // 0x1d 29 GS Cursor left // 0x1e 30 RS Cursor up // 0x1f 31 US Cursor down // // So the following ranges are invalid: 0x00-0x08, 0x0b-0x0c and 0x0e-0x1f. // As decimals: 00-08, 11-12 and 14-31. // // See: // http://www.w3.org/TR/REC-xml // http://www.jimprice.com/ascii-0-127.gif //------------------------------------------------------------------------- // Class functions //------------------------------------------------------------------------- /** * Retrieves an <code>XMLEncoder</code> for the specified encoding. If no * suitable encoder can be found, then an exception is thrown. * * @param encoding * the name of the encoding, not <code>null</code>. * * @return * an encoder for the specified encoding, never <code>null</code>. * * @throws IllegalArgumentException * if <code>encoding == null</code>. * * @throws UnsupportedEncodingException * if the specified encoding is not supported. */ public static final XMLEncoder getEncoder(String encoding) throws IllegalArgumentException, UnsupportedEncodingException { // Check argument if (encoding == null) { throw new IllegalArgumentException("encoding == null"); } // Check if the encoding supports all Unicode characters if ("UTF-8".equalsIgnoreCase(encoding) || "UTF-16".equalsIgnoreCase(encoding) || "ISO-10646-UCS-2".equalsIgnoreCase(encoding) || "ISO-10646-UCS-4".equalsIgnoreCase(encoding) || "ISO-10646-UTF-1".equalsIgnoreCase(encoding)) { return new UnicodeXMLEncoder(encoding); // Check if this is an ISO 646-based character set (7-bit ASCII) } else if ("US-ASCII".equalsIgnoreCase(encoding) || "ASCII".equalsIgnoreCase(encoding) || encoding.startsWith("iso-8859-") || encoding.startsWith("isO-8859-") || encoding.startsWith("iSo-8859-") || encoding.startsWith("iSO-8859-") || encoding.startsWith("Iso-8859-") || encoding.startsWith("IsO-8859-") || encoding.startsWith("ISo-8859-") || encoding.startsWith("ISO-8859-")) { return new SevenBitXMLEncoder(encoding); // Otherwise fail } else { throw new UnsupportedEncodingException(encoding); } } //------------------------------------------------------------------------- // Class fields //------------------------------------------------------------------------- /** * The tab character, <code>'\t'</code>. */ private static final char CHAR_TAB_9 = '\t'; /** * The line feed character, <code>'\n'</code>. */ private static final char CHAR_LF_10 = '\n'; /** * The carriage return character, <code>'\r'</code>. */ private static final char CHAR_CR_13 = '\r'; /** * The character with Unicode value 31, <code>''</code>. */ private static final char CHAR_31 = '\u001f'; /** * The quote character, <code>'"'</code>. */ private static final char CHAR_QUOTE_34 = '"'; /** * The ampersand character, <code>'&'</code>. */ private static final char CHAR_AMPERSAND_38 = '&'; /** * The apostrophe character, <code>'\''</code>. */ private static final char CHAR_APOSTROPHE_39 = '\''; /** * The less than character, <code>'<'</code>. */ private static final char CHAR_LESS_THAN_60 = '<'; /** * The greater than character, <code>'>'</code>. */ private static final char CHAR_GREATER_THAN_62 = '>'; //------------------------------------------------------------------------- // Constructor //------------------------------------------------------------------------- /** * Constructs a new <code>XMLEncoder</code>. */ protected XMLEncoder() { // empty } //------------------------------------------------------------------------- // Fields //------------------------------------------------------------------------- //------------------------------------------------------------------------- // Methods //------------------------------------------------------------------------- /** * Returns the encoding of this outputter. * * @return * the encoding of this encoding, never <code>null</code>. */ public abstract String getEncoding(); /** * Writes the specified text to the output stream. Any characters that are * non-printable in this character set will be escaped. * * <p />This method does not check if <code>text == null</code>. * * @param out * the output stream to print to, not <code>null</code>. * * @param text * the text to be printed. * * @param escapeAmpersands * flag that indicates if ampersands should be escaped. * * @throws NullPointerException * if <code>out == null || text == null</code>. * * @throws InvalidXMLException * if the specified text contains an invalid character. * * @throws IOException * if an I/O error occurs. */ public void text(Writer out, String text, boolean escapeAmpersands) throws NullPointerException, InvalidXMLException, IOException { char[] ch = text.toCharArray(); int length = ch.length; text(out, ch, 0, length, escapeAmpersands); } /** * Writes the text in the specified character array to the output stream. * Any characters that are non-printable in this character set will be * escaped. * * @param out * the output stream to print to, not <code>null</code>. * * @param ch * the array from which to retrieve the text to be printed. * * @param start * the start index into <code>ch</code>, must be >= 0. * * @param length * the number of characters to take from <code>ch</code>, starting at * the <code>start</code> index. * * @param escapeAmpersands * flag that indicates if ampersands should be escaped. * * @throws NullPointerException * if <code>out == null || ch == null</code>. * * @throws IndexOutOfBoundsException * if <code>start < 0 * || start + length > ch.length</code>. * * @throws InvalidXMLException * if the specified text contains an invalid character. * * @throws IOException * if an I/O error occurs. */ public void text(Writer out, char[] ch, int start, int length, boolean escapeAmpersands) throws NullPointerException, IndexOutOfBoundsException, InvalidXMLException, IOException { // Escape ampersands if (escapeAmpersands) { for (int i = start; i < length; i++) { char c = ch[i]; if (c > CHAR_GREATER_THAN_62) { text(out, c); } else if (c > CHAR_31) { if (c == CHAR_AMPERSAND_38) { out.write("&"); } else if (c == CHAR_LESS_THAN_60) { out.write("<"); } else if (c == CHAR_GREATER_THAN_62) { out.write(">"); } else { text(out, c); } } else { if (c == CHAR_TAB_9 || c == CHAR_LF_10 || c == CHAR_CR_13) { text(out, c); } else { // XXX: We could cache the messages in an array... throw new InvalidXMLException("The character 0x" + Integer.toHexString((int) c) + " is not valid."); } } } // Do not escape ampersands } else { for (int i = 0; i < length; i++) { char c = ch[i]; if (c > CHAR_GREATER_THAN_62) { text(out, c); } else if (c > CHAR_31) { if (c == CHAR_AMPERSAND_38) { out.write("&"); } else if (c == CHAR_LESS_THAN_60) { out.write("<"); } else if (c == CHAR_GREATER_THAN_62) { out.write(">"); } else { text(out, c); } } else { if (c == CHAR_TAB_9 || c == CHAR_LF_10 || c == CHAR_CR_13) { text(out, c); } else { // XXX: We could cache the messages in an array... throw new InvalidXMLException("The character 0x" + Integer.toHexString((int) c) + " is not valid."); } } } } } /** * Writes the specified character to the output stream. If the character is * non-printable in this character set, then it will be escaped. * * <p />It is safe for this method to assume that the specified character * does not need to be escaped unless the encoding does not support the * character. * * @param out * the output stream to print to, not <code>null</code>. * * @param c * the character to be printed. * * @throws IOException * if an I/O error occurs. */ public abstract void text(Writer out, char c) throws IOException; /** * Writes an XML declaration. * * @param out * the <code>Writer</code> to write the declaration to, not * <code>null</code>. * * @throws NullPointerException * if <code>out == null</code>. * * @throws IOException * if an I/O error occurs. */ public abstract void declaration(Writer out) throws NullPointerException, IOException; /** * Writes the whitespace in the specified character string to the output * stream. * * @param out * the output stream to print to, not <code>null</code>. * * @param s * the character string to print. * * @throws NullPointerException * if <code>out == null || s == null</code>. * * @throws InvalidXMLException * if the specified character string contains a character that is * invalid as whitespace. * * @throws IOException * if an I/O error occurs. */ public void whitespace(Writer out, String s) throws NullPointerException, InvalidXMLException, IOException { char[] ch = s.toCharArray(); int length = ch.length; whitespace(out, ch, 0, length); } /** * Writes the whitespace in the specified character array to the output * stream. * * @param out * the output stream to print to, not <code>null</code>. * * @param ch * the array from which to retrieve the text to be printed. * * @param start * the start index into <code>ch</code>, must be >= 0. * * @param length * the number of characters to take from <code>ch</code>, starting at * the <code>start</code> index. * * @param escapeAmpersands * flag that indicates if ampersands should be escaped. * * @throws NullPointerException * if <code>out == null || ch == null</code>. * * @throws IndexOutOfBoundsException * if <code>start < 0 * || start + length > ch.length</code>. * * @throws InvalidXMLException * if the specified character array contains a character that is invalid * as whitespace. * * @throws IOException * if an I/O error occurs. */ public void whitespace(Writer out, char[] ch, int start, int length) throws NullPointerException, IndexOutOfBoundsException, InvalidXMLException, IOException { // See: // http://www.w3.org/TR/REC-xml#NT-S for (int i = start; i < length; i++) { char c = ch[i]; if (c == 0x20 || c == 0x9 || c == 0xD || c == 0xA) { out.write(c); } else { throw new InvalidXMLException("The character 0x" + Integer.toHexString((int) c) + " is not valid white space."); } } } /** * Writes an attribute assignment. * * @param out * the <code>Writer</code> to write the declaration to, not * <code>null</code>. * * @param name * the name of the attribute, should not be <code>null</code>. * * @param value * the value of the attribute, should not be <code>null</code>. * * @param quotationMarkApostrophe * flag that indicates if the quotation mark is the apostrophe character * (<code>'\''</code>; if <code>false</code> the quotation mark is the * quote character (<code>'"'</code>). * * @throws NullPointerException * if <code>out == null || value == null</code>. * * @throws IOException * if an I/O error occurs. */ public void attribute(Writer out, String name, String value, boolean quotationMarkApostrophe, boolean escapeAmpersands) throws NullPointerException, IOException { char[] chars = value.toCharArray(); int length = chars.length; out.write(' '); out.write(name); if (quotationMarkApostrophe) { out.write("='"); if (escapeAmpersands) { for (int i = 0; i < length; i++) { char c = chars[i]; if (c >= '"' && c <= '>') { if (c == '&') { out.write("&"); } else if (c == '<') { out.write("<"); } else if (c == '>') { out.write(">"); } else if (c == '\'') { out.write("'"); } else { text(out, c); } } else { text(out, c); } } } else { for (int i = 0; i < length; i++) { char c = chars[i]; if (c >= '"' && c <= '>') { if (c == '<') { out.write("<"); } else if (c == '>') { out.write(">"); } else if (c == '\'') { out.write("'"); } else { text(out, c); } } else { text(out, c); } } } out.write('\''); } else { out.write("=\""); if (escapeAmpersands) { for (int i = 0; i < length; i++) { char c = chars[i]; if (c >= '"' && c <= '>') { if (c == '&') { out.write("&"); } else if (c == '<') { out.write("<"); } else if (c == '>') { out.write(">"); } else if (c == '"') { out.write("""); } else { text(out, c); } } else { text(out, c); } } } else { for (int i = 0; i < length; i++) { char c = chars[i]; if (c >= '"' && c <= '>') { if (c == '<') { out.write("<"); } else if (c == '>') { out.write(">"); } else if (c == '"') { out.write("""); } else { text(out, c); } } else { text(out, c); } } } out.write('"'); } } }