| XMLEncoder.java |
/*
* $Id: XMLEncoder.java,v 1.195 2003/09/30 15:36:46 znerd Exp $
*/
package org.znerd.xmlenc;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
/**
* Encodes character streams for an XML document.
*
* @version $Revision: 1.195 $ $Date: 2003/09/30 15:36:46 $
* @author Ernst de Haan (<a href="mailto:znerd@FreeBSD.org">znerd@FreeBSD.org</a>)
* @author Jochen Schwoerer (j.schwoerer [at] web.de)
*
* @since xmlenc 0.1
*/
public abstract class XMLEncoder extends Object {
// Unicode values:
//
// Hex Dec Char Description
// _____ _____ _____ ___________________
//
// 34 " Quote
// 38 & Ampersand
// 39 ' Apostrophe
// 60 < Less than
// 62 > Greater than
//
// The following characters are invalid according to the XML 1.0
// Specification:
//
// Hex Dec Char Description
// _____ _____ _____ ___________________
//
// 0x00 0 NUL Null character
// 0x01 1 SOH
// 0x02 2 STX
// 0x03 3 ETX
// 0x04 4 EOT End of transmission
// 0x05 5 ENQ
// 0x06 6 ACK
// 0x07 7 BEL Beep
// 0x08 8 BS Backspace
// 0x0b 11 VT Home
// 0x0c 12 FF Form feed
// 0x0e 14 SO
// 0x0f 15 SI
// 0x10 16 DLE
// 0x11 17 DC1
// 0x12 18 DC2
// 0x13 19 DC3
// 0x14 20 DC4
// 0x15 21 NAK
// 0x16 22 SYN
// 0x17 23 ETB
// 0x18 24 CAN
// 0x19 25 EM
// 0x1a 26 SUB
// 0x1b 27 ESC
// 0x1c 28 FS Cursor right
// 0x1d 29 GS Cursor left
// 0x1e 30 RS Cursor up
// 0x1f 31 US Cursor down
//
// So the following ranges are invalid: 0x00-0x08, 0x0b-0x0c and 0x0e-0x1f.
// As decimals: 00-08, 11-12 and 14-31.
//
// See:
// http://www.w3.org/TR/REC-xml
// http://www.jimprice.com/ascii-0-127.gif
//-------------------------------------------------------------------------
// Class functions
//-------------------------------------------------------------------------
/**
* Retrieves an <code>XMLEncoder</code> for the specified encoding. If no
* suitable encoder can be found, then an exception is thrown.
*
* @param encoding
* the name of the encoding, not <code>null</code>.
*
* @return
* an encoder for the specified encoding, never <code>null</code>.
*
* @throws IllegalArgumentException
* if <code>encoding == null</code>.
*
* @throws UnsupportedEncodingException
* if the specified encoding is not supported.
*/
public static final XMLEncoder getEncoder(String encoding)
throws IllegalArgumentException, UnsupportedEncodingException {
// Check argument
if (encoding == null) {
throw new IllegalArgumentException("encoding == null");
}
// Check if the encoding supports all Unicode characters
if ("UTF-8".equalsIgnoreCase(encoding)
|| "UTF-16".equalsIgnoreCase(encoding)
|| "ISO-10646-UCS-2".equalsIgnoreCase(encoding)
|| "ISO-10646-UCS-4".equalsIgnoreCase(encoding)
|| "ISO-10646-UTF-1".equalsIgnoreCase(encoding)) {
return new UnicodeXMLEncoder(encoding);
// Check if this is an ISO 646-based character set (7-bit ASCII)
} else if ("US-ASCII".equalsIgnoreCase(encoding)
|| "ASCII".equalsIgnoreCase(encoding)
|| encoding.startsWith("iso-8859-")
|| encoding.startsWith("isO-8859-")
|| encoding.startsWith("iSo-8859-")
|| encoding.startsWith("iSO-8859-")
|| encoding.startsWith("Iso-8859-")
|| encoding.startsWith("IsO-8859-")
|| encoding.startsWith("ISo-8859-")
|| encoding.startsWith("ISO-8859-")) {
return new SevenBitXMLEncoder(encoding);
// Otherwise fail
} else {
throw new UnsupportedEncodingException(encoding);
}
}
//-------------------------------------------------------------------------
// Class fields
//-------------------------------------------------------------------------
/**
* The tab character, <code>'\t'</code>.
*/
private static final char CHAR_TAB_9 = '\t';
/**
* The line feed character, <code>'\n'</code>.
*/
private static final char CHAR_LF_10 = '\n';
/**
* The carriage return character, <code>'\r'</code>.
*/
private static final char CHAR_CR_13 = '\r';
/**
* The character with Unicode value 31, <code>''</code>.
*/
private static final char CHAR_31 = '\u001f';
/**
* The quote character, <code>'"'</code>.
*/
private static final char CHAR_QUOTE_34 = '"';
/**
* The ampersand character, <code>'&'</code>.
*/
private static final char CHAR_AMPERSAND_38 = '&';
/**
* The apostrophe character, <code>'\''</code>.
*/
private static final char CHAR_APOSTROPHE_39 = '\'';
/**
* The less than character, <code>'<'</code>.
*/
private static final char CHAR_LESS_THAN_60 = '<';
/**
* The greater than character, <code>'>'</code>.
*/
private static final char CHAR_GREATER_THAN_62 = '>';
//-------------------------------------------------------------------------
// Constructor
//-------------------------------------------------------------------------
/**
* Constructs a new <code>XMLEncoder</code>.
*/
protected XMLEncoder() {
// empty
}
//-------------------------------------------------------------------------
// Fields
//-------------------------------------------------------------------------
//-------------------------------------------------------------------------
// Methods
//-------------------------------------------------------------------------
/**
* Returns the encoding of this outputter.
*
* @return
* the encoding of this encoding, never <code>null</code>.
*/
public abstract String getEncoding();
/**
* Writes the specified text to the output stream. Any characters that are
* non-printable in this character set will be escaped.
*
* <p />This method does not check if <code>text == null</code>.
*
* @param out
* the output stream to print to, not <code>null</code>.
*
* @param text
* the text to be printed.
*
* @param escapeAmpersands
* flag that indicates if ampersands should be escaped.
*
* @throws NullPointerException
* if <code>out == null || text == null</code>.
*
* @throws InvalidXMLException
* if the specified text contains an invalid character.
*
* @throws IOException
* if an I/O error occurs.
*/
public void text(Writer out, String text, boolean escapeAmpersands)
throws NullPointerException, InvalidXMLException, IOException {
char[] ch = text.toCharArray();
int length = ch.length;
text(out, ch, 0, length, escapeAmpersands);
}
/**
* Writes the text in the specified character array to the output stream.
* Any characters that are non-printable in this character set will be
* escaped.
*
* @param out
* the output stream to print to, not <code>null</code>.
*
* @param ch
* the array from which to retrieve the text to be printed.
*
* @param start
* the start index into <code>ch</code>, must be >= 0.
*
* @param length
* the number of characters to take from <code>ch</code>, starting at
* the <code>start</code> index.
*
* @param escapeAmpersands
* flag that indicates if ampersands should be escaped.
*
* @throws NullPointerException
* if <code>out == null || ch == null</code>.
*
* @throws IndexOutOfBoundsException
* if <code>start < 0
* || start + length > ch.length</code>.
*
* @throws InvalidXMLException
* if the specified text contains an invalid character.
*
* @throws IOException
* if an I/O error occurs.
*/
public void text(Writer out, char[] ch, int start, int length, boolean escapeAmpersands)
throws NullPointerException, IndexOutOfBoundsException, InvalidXMLException, IOException {
// Escape ampersands
if (escapeAmpersands) {
for (int i = start; i < length; i++) {
char c = ch[i];
if (c > CHAR_GREATER_THAN_62) {
text(out, c);
} else if (c > CHAR_31) {
if (c == CHAR_AMPERSAND_38) {
out.write("&");
} else if (c == CHAR_LESS_THAN_60) {
out.write("<");
} else if (c == CHAR_GREATER_THAN_62) {
out.write(">");
} else {
text(out, c);
}
} else {
if (c == CHAR_TAB_9 || c == CHAR_LF_10 || c == CHAR_CR_13) {
text(out, c);
} else {
// XXX: We could cache the messages in an array...
throw new InvalidXMLException("The character 0x" + Integer.toHexString((int) c) + " is not valid.");
}
}
}
// Do not escape ampersands
} else {
for (int i = 0; i < length; i++) {
char c = ch[i];
if (c > CHAR_GREATER_THAN_62) {
text(out, c);
} else if (c > CHAR_31) {
if (c == CHAR_AMPERSAND_38) {
out.write("&");
} else if (c == CHAR_LESS_THAN_60) {
out.write("<");
} else if (c == CHAR_GREATER_THAN_62) {
out.write(">");
} else {
text(out, c);
}
} else {
if (c == CHAR_TAB_9 || c == CHAR_LF_10 || c == CHAR_CR_13) {
text(out, c);
} else {
// XXX: We could cache the messages in an array...
throw new InvalidXMLException("The character 0x" + Integer.toHexString((int) c) + " is not valid.");
}
}
}
}
}
/**
* Writes the specified character to the output stream. If the character is
* non-printable in this character set, then it will be escaped.
*
* <p />It is safe for this method to assume that the specified character
* does not need to be escaped unless the encoding does not support the
* character.
*
* @param out
* the output stream to print to, not <code>null</code>.
*
* @param c
* the character to be printed.
*
* @throws IOException
* if an I/O error occurs.
*/
public abstract void text(Writer out, char c) throws IOException;
/**
* Writes an XML declaration.
*
* @param out
* the <code>Writer</code> to write the declaration to, not
* <code>null</code>.
*
* @throws NullPointerException
* if <code>out == null</code>.
*
* @throws IOException
* if an I/O error occurs.
*/
public abstract void declaration(Writer out)
throws NullPointerException, IOException;
/**
* Writes the whitespace in the specified character string to the output
* stream.
*
* @param out
* the output stream to print to, not <code>null</code>.
*
* @param s
* the character string to print.
*
* @throws NullPointerException
* if <code>out == null || s == null</code>.
*
* @throws InvalidXMLException
* if the specified character string contains a character that is
* invalid as whitespace.
*
* @throws IOException
* if an I/O error occurs.
*/
public void whitespace(Writer out, String s)
throws NullPointerException, InvalidXMLException, IOException {
char[] ch = s.toCharArray();
int length = ch.length;
whitespace(out, ch, 0, length);
}
/**
* Writes the whitespace in the specified character array to the output
* stream.
*
* @param out
* the output stream to print to, not <code>null</code>.
*
* @param ch
* the array from which to retrieve the text to be printed.
*
* @param start
* the start index into <code>ch</code>, must be >= 0.
*
* @param length
* the number of characters to take from <code>ch</code>, starting at
* the <code>start</code> index.
*
* @param escapeAmpersands
* flag that indicates if ampersands should be escaped.
*
* @throws NullPointerException
* if <code>out == null || ch == null</code>.
*
* @throws IndexOutOfBoundsException
* if <code>start < 0
* || start + length > ch.length</code>.
*
* @throws InvalidXMLException
* if the specified character array contains a character that is invalid
* as whitespace.
*
* @throws IOException
* if an I/O error occurs.
*/
public void whitespace(Writer out, char[] ch, int start, int length)
throws NullPointerException, IndexOutOfBoundsException, InvalidXMLException, IOException {
// See:
// http://www.w3.org/TR/REC-xml#NT-S
for (int i = start; i < length; i++) {
char c = ch[i];
if (c == 0x20 || c == 0x9 || c == 0xD || c == 0xA) {
out.write(c);
} else {
throw new InvalidXMLException("The character 0x" + Integer.toHexString((int) c) + " is not valid white space.");
}
}
}
/**
* Writes an attribute assignment.
*
* @param out
* the <code>Writer</code> to write the declaration to, not
* <code>null</code>.
*
* @param name
* the name of the attribute, should not be <code>null</code>.
*
* @param value
* the value of the attribute, should not be <code>null</code>.
*
* @param quotationMarkApostrophe
* flag that indicates if the quotation mark is the apostrophe character
* (<code>'\''</code>; if <code>false</code> the quotation mark is the
* quote character (<code>'"'</code>).
*
* @throws NullPointerException
* if <code>out == null || value == null</code>.
*
* @throws IOException
* if an I/O error occurs.
*/
public void attribute(Writer out, String name, String value, boolean quotationMarkApostrophe, boolean escapeAmpersands)
throws NullPointerException, IOException {
char[] chars = value.toCharArray();
int length = chars.length;
out.write(' ');
out.write(name);
if (quotationMarkApostrophe) {
out.write("='");
if (escapeAmpersands) {
for (int i = 0; i < length; i++) {
char c = chars[i];
if (c >= '"' && c <= '>') {
if (c == '&') {
out.write("&");
} else if (c == '<') {
out.write("<");
} else if (c == '>') {
out.write(">");
} else if (c == '\'') {
out.write("'");
} else {
text(out, c);
}
} else {
text(out, c);
}
}
} else {
for (int i = 0; i < length; i++) {
char c = chars[i];
if (c >= '"' && c <= '>') {
if (c == '<') {
out.write("<");
} else if (c == '>') {
out.write(">");
} else if (c == '\'') {
out.write("'");
} else {
text(out, c);
}
} else {
text(out, c);
}
}
}
out.write('\'');
} else {
out.write("=\"");
if (escapeAmpersands) {
for (int i = 0; i < length; i++) {
char c = chars[i];
if (c >= '"' && c <= '>') {
if (c == '&') {
out.write("&");
} else if (c == '<') {
out.write("<");
} else if (c == '>') {
out.write(">");
} else if (c == '"') {
out.write(""");
} else {
text(out, c);
}
} else {
text(out, c);
}
}
} else {
for (int i = 0; i < length; i++) {
char c = chars[i];
if (c >= '"' && c <= '>') {
if (c == '<') {
out.write("<");
} else if (c == '>') {
out.write(">");
} else if (c == '"') {
out.write(""");
} else {
text(out, c);
}
} else {
text(out, c);
}
}
}
out.write('"');
}
}
}