/*
 * $Id: XMLEncoder.java,v 1.195 2003/09/30 15:36:46 znerd Exp $
 */
package org.znerd.xmlenc;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.io.Writer;

/**
 * Encodes character streams for an XML document.
 *
 * @version $Revision: 1.195 $ $Date: 2003/09/30 15:36:46 $
 * @author Ernst de Haan (<a href="mailto:znerd@FreeBSD.org">znerd@FreeBSD.org</a>)
 * @author Jochen Schwoerer (j.schwoerer [at] web.de)
 *
 * @since xmlenc 0.1
 */
public abstract class XMLEncoder extends Object {

   // Unicode values:
   //
   //  Hex   Dec  Char  Description
   // _____ _____ _____ ___________________
   //
   //          34 "     Quote
   //          38 &     Ampersand
   //          39 '     Apostrophe
   //          60 <     Less than
   //          62 >     Greater than
   //
   // The following characters are invalid according to the XML 1.0
   // Specification:
   //
   //  Hex   Dec  Char  Description
   // _____ _____ _____ ___________________
   //
   //  0x00     0 NUL   Null character
   //  0x01     1 SOH
   //  0x02     2 STX
   //  0x03     3 ETX
   //  0x04     4 EOT   End of transmission
   //  0x05     5 ENQ
   //  0x06     6 ACK
   //  0x07     7 BEL   Beep
   //  0x08     8 BS    Backspace
   //  0x0b    11 VT    Home
   //  0x0c    12 FF    Form feed
   //  0x0e    14 SO
   //  0x0f    15 SI
   //  0x10    16 DLE
   //  0x11    17 DC1
   //  0x12    18 DC2
   //  0x13    19 DC3
   //  0x14    20 DC4
   //  0x15    21 NAK
   //  0x16    22 SYN
   //  0x17    23 ETB
   //  0x18    24 CAN
   //  0x19    25 EM
   //  0x1a    26 SUB
   //  0x1b    27 ESC
   //  0x1c    28 FS    Cursor right
   //  0x1d    29 GS    Cursor left
   //  0x1e    30 RS    Cursor up
   //  0x1f    31 US    Cursor down
   //
   // So the following ranges are invalid: 0x00-0x08, 0x0b-0x0c and 0x0e-0x1f.
   // As decimals: 00-08, 11-12 and 14-31.
   //
   // See:
   // http://www.w3.org/TR/REC-xml
   // http://www.jimprice.com/ascii-0-127.gif


   //-------------------------------------------------------------------------
   // Class functions
   //-------------------------------------------------------------------------

   /**
    * Retrieves an <code>XMLEncoder</code> for the specified encoding. If no
    * suitable encoder can be found, then an exception is thrown.
    *
    * @param encoding
    *    the name of the encoding, not <code>null</code>.
    *
    * @return
    *    an encoder for the specified encoding, never <code>null</code>.
    *
    * @throws IllegalArgumentException
    *    if <code>encoding == null</code>.
    *
    * @throws UnsupportedEncodingException
    *    if the specified encoding is not supported.
    */
   public static final XMLEncoder getEncoder(String encoding)
   throws IllegalArgumentException, UnsupportedEncodingException {

      // Check argument
      if (encoding == null) {
         throw new IllegalArgumentException("encoding == null");
      }

      // Check if the encoding supports all Unicode characters
      if ("UTF-8".equalsIgnoreCase(encoding)
       || "UTF-16".equalsIgnoreCase(encoding)
       || "ISO-10646-UCS-2".equalsIgnoreCase(encoding)
       || "ISO-10646-UCS-4".equalsIgnoreCase(encoding)
       || "ISO-10646-UTF-1".equalsIgnoreCase(encoding)) {
         return new UnicodeXMLEncoder(encoding);

      // Check if this is an ISO 646-based character set (7-bit ASCII)
      } else if ("US-ASCII".equalsIgnoreCase(encoding)
       || "ASCII".equalsIgnoreCase(encoding)
       || encoding.startsWith("iso-8859-")
       || encoding.startsWith("isO-8859-")
       || encoding.startsWith("iSo-8859-")
       || encoding.startsWith("iSO-8859-")
       || encoding.startsWith("Iso-8859-")
       || encoding.startsWith("IsO-8859-")
       || encoding.startsWith("ISo-8859-")
       || encoding.startsWith("ISO-8859-")) {
         return new SevenBitXMLEncoder(encoding);

      // Otherwise fail
      } else {
         throw new UnsupportedEncodingException(encoding);
      }
   }


   //-------------------------------------------------------------------------
   // Class fields
   //-------------------------------------------------------------------------

   /**
    * The tab character, <code>'\t'</code>.
    */
   private static final char CHAR_TAB_9 = '\t';

   /**
    * The line feed character, <code>'\n'</code>.
    */
   private static final char CHAR_LF_10 = '\n';

   /**
    * The carriage return character, <code>'\r'</code>.
    */
   private static final char CHAR_CR_13 = '\r';

   /**
    * The character with Unicode value 31, <code>''</code>.
    */
   private static final char CHAR_31 = '\u001f';

   /**
    * The quote character, <code>'"'</code>.
    */
   private static final char CHAR_QUOTE_34 = '"';

   /**
    * The ampersand character, <code>'&amp;'</code>.
    */
   private static final char CHAR_AMPERSAND_38 = '&';

   /**
    * The apostrophe character, <code>'\''</code>.
    */
   private static final char CHAR_APOSTROPHE_39 = '\'';

   /**
    * The less than character, <code>'&lt;'</code>.
    */
   private static final char CHAR_LESS_THAN_60 = '<';

   /**
    * The greater than character, <code>'&gt;'</code>.
    */
   private static final char CHAR_GREATER_THAN_62 = '>';


   //-------------------------------------------------------------------------
   // Constructor
   //-------------------------------------------------------------------------

   /**
    * Constructs a new <code>XMLEncoder</code>.
    */
   protected XMLEncoder() {
      // empty
   }


   //-------------------------------------------------------------------------
   // Fields
   //-------------------------------------------------------------------------

   //-------------------------------------------------------------------------
   // Methods
   //-------------------------------------------------------------------------

   /**
    * Returns the encoding of this outputter.
    *
    * @return
    *    the encoding of this encoding, never <code>null</code>.
    */
   public abstract String getEncoding();

   /**
    * Writes the specified text to the output stream. Any characters that are
    * non-printable in this character set will be escaped.
    *
    * <p />This method does not check if <code>text == null</code>.
    *
    * @param out
    *    the output stream to print to, not <code>null</code>.
    *
    * @param text
    *    the text to be printed.
    *
    * @param escapeAmpersands
    *    flag that indicates if ampersands should be escaped.
    *
    * @throws NullPointerException
    *    if <code>out == null || text == null</code>.
    *
    * @throws InvalidXMLException
    *    if the specified text contains an invalid character.
    *
    * @throws IOException
    *    if an I/O error occurs.
    */
   public void text(Writer out, String text, boolean escapeAmpersands)
   throws NullPointerException, InvalidXMLException, IOException {

      char[] ch  = text.toCharArray();
      int length = ch.length;
      text(out, ch, 0, length, escapeAmpersands);
   }

   /**
    * Writes the text in the specified character array to the output stream.
    * Any characters that are non-printable in this character set will be
    * escaped.
    *
    * @param out
    *    the output stream to print to, not <code>null</code>.
    *
    * @param ch
    *    the array from which to retrieve the text to be printed.
    *
    * @param start
    *    the start index into <code>ch</code>, must be &gt;= 0.
    *
    * @param length
    *    the number of characters to take from <code>ch</code>, starting at
    *    the <code>start</code> index.
    *
    * @param escapeAmpersands
    *    flag that indicates if ampersands should be escaped.
    *
    * @throws NullPointerException
    *    if <code>out == null || ch == null</code>.
    *
    * @throws IndexOutOfBoundsException
    *    if <code>start &lt; 0
    *          || start + length &gt; ch.length</code>.
    *
    * @throws InvalidXMLException
    *    if the specified text contains an invalid character.
    *
    * @throws IOException
    *    if an I/O error occurs.
    */
   public void text(Writer out, char[] ch, int start, int length, boolean escapeAmpersands)
   throws NullPointerException, IndexOutOfBoundsException, InvalidXMLException, IOException {

      // Escape ampersands
      if (escapeAmpersands) {
         for (int i = start; i < length; i++) {
            char c = ch[i];

            if (c > CHAR_GREATER_THAN_62) {
               text(out, c);
            } else if (c > CHAR_31) {
               if (c == CHAR_AMPERSAND_38) {
                  out.write("&amp;");
               } else if (c == CHAR_LESS_THAN_60) {
                  out.write("&lt;");
               } else if (c == CHAR_GREATER_THAN_62) {
                  out.write("&gt;");
               } else {
                  text(out, c);
               }
            } else {
               if (c == CHAR_TAB_9 || c == CHAR_LF_10 || c == CHAR_CR_13) {
                  text(out, c);
               } else {
                  // XXX: We could cache the messages in an array...
                  throw new InvalidXMLException("The character 0x" + Integer.toHexString((int) c) + " is not valid.");
               }
            }
         }

      // Do not escape ampersands
      } else {
         for (int i = 0; i < length; i++) {
            char c = ch[i];

            if (c > CHAR_GREATER_THAN_62) {
               text(out, c);
            } else if (c > CHAR_31) {
               if (c == CHAR_AMPERSAND_38) {
                  out.write("&amp;");
               } else if (c == CHAR_LESS_THAN_60) {
                  out.write("&lt;");
               } else if (c == CHAR_GREATER_THAN_62) {
                  out.write("&gt;");
               } else {
                  text(out, c);
               }
            } else {
               if (c == CHAR_TAB_9 || c == CHAR_LF_10 || c == CHAR_CR_13) {
                  text(out, c);
               } else {
                  // XXX: We could cache the messages in an array...
                  throw new InvalidXMLException("The character 0x" + Integer.toHexString((int) c) + " is not valid.");
               }
            }
         }
      }
   }

   /**
    * Writes the specified character to the output stream. If the character is
    * non-printable in this character set, then it will be escaped.
    *
    * <p />It is safe for this method to assume that the specified character
    * does not need to be escaped unless the encoding does not support the
    * character.
    *
    * @param out
    *    the output stream to print to, not <code>null</code>.
    *
    * @param c
    *    the character to be printed.
    *
    * @throws IOException
    *    if an I/O error occurs.
    */
   public abstract void text(Writer out, char c) throws IOException;

   /**
    * Writes an XML declaration.
    *
    * @param out
    *    the <code>Writer</code> to write the declaration to, not
    *    <code>null</code>.
    *
    * @throws NullPointerException
    *    if <code>out == null</code>.
    *
    * @throws IOException
    *    if an I/O error occurs.
    */
   public abstract void declaration(Writer out)
   throws NullPointerException, IOException;

   /**
    * Writes the whitespace in the specified character string to the output
    * stream.
    *
    * @param out
    *    the output stream to print to, not <code>null</code>.
    *
    * @param s
    *    the character string to print.
    *
    * @throws NullPointerException
    *    if <code>out == null || s == null</code>.
    *
    * @throws InvalidXMLException
    *    if the specified character string contains a character that is
    *    invalid as whitespace.
    *
    * @throws IOException
    *    if an I/O error occurs.
    */
   public void whitespace(Writer out, String s)
   throws NullPointerException, InvalidXMLException, IOException {

      char[] ch  = s.toCharArray();
      int length = ch.length;
      whitespace(out, ch, 0, length);
   }

   /**
    * Writes the whitespace in the specified character array to the output
    * stream.
    *
    * @param out
    *    the output stream to print to, not <code>null</code>.
    *
    * @param ch
    *    the array from which to retrieve the text to be printed.
    *
    * @param start
    *    the start index into <code>ch</code>, must be &gt;= 0.
    *
    * @param length
    *    the number of characters to take from <code>ch</code>, starting at
    *    the <code>start</code> index.
    *
    * @param escapeAmpersands
    *    flag that indicates if ampersands should be escaped.
    *
    * @throws NullPointerException
    *    if <code>out == null || ch == null</code>.
    *
    * @throws IndexOutOfBoundsException
    *    if <code>start &lt; 0
    *          || start + length &gt; ch.length</code>.
    *
    * @throws InvalidXMLException
    *    if the specified character array contains a character that is invalid
    *    as whitespace.
    *
    * @throws IOException
    *    if an I/O error occurs.
    */
   public void whitespace(Writer out, char[] ch, int start, int length)
   throws NullPointerException, IndexOutOfBoundsException, InvalidXMLException, IOException {

      // See:
      // http://www.w3.org/TR/REC-xml#NT-S

      for (int i = start; i < length; i++) {
         char c = ch[i];

         if (c == 0x20 || c == 0x9 || c == 0xD || c == 0xA) {
            out.write(c);
         } else {
            throw new InvalidXMLException("The character 0x" + Integer.toHexString((int) c) + " is not valid white space.");
         }
      }
   }

   /**
    * Writes an attribute assignment.
    *
    * @param out
    *    the <code>Writer</code> to write the declaration to, not
    *    <code>null</code>.
    *
    * @param name
    *    the name of the attribute, should not be <code>null</code>.
    *
    * @param value
    *    the value of the attribute, should not be <code>null</code>.
    *
    * @param quotationMarkApostrophe
    *    flag that indicates if the quotation mark is the apostrophe character
    *    (<code>'\''</code>; if <code>false</code> the quotation mark is the
    *    quote character (<code>'"'</code>).
    *
    * @throws NullPointerException
    *    if <code>out == null || value == null</code>.
    *
    * @throws IOException
    *    if an I/O error occurs.
    */
   public void attribute(Writer out, String name, String value, boolean quotationMarkApostrophe, boolean escapeAmpersands)
   throws NullPointerException, IOException {

      char[] chars = value.toCharArray();
      int length = chars.length;

      out.write(' ');
      out.write(name);

      if (quotationMarkApostrophe) {
         out.write("='");

         if (escapeAmpersands) {
            for (int i = 0; i < length; i++) {
               char c = chars[i];

               if (c >= '"' && c <= '>') {
                  if (c == '&') {
                     out.write("&amp;");
                  } else if (c == '<') {
                     out.write("&lt;");
                  } else if (c == '>') {
                     out.write("&gt;");
                  } else if (c == '\'') {
                     out.write("&apos;");
                  } else {
                     text(out, c);
                  }
               } else {
                  text(out, c);
               }
            }
         } else {
            for (int i = 0; i < length; i++) {
               char c = chars[i];

               if (c >= '"' && c <= '>') {
                  if (c == '<') {
                     out.write("&lt;");
                  } else if (c == '>') {
                     out.write("&gt;");
                  } else if (c == '\'') {
                     out.write("&apos;");
                  } else {
                     text(out, c);
                  }
               } else {
                  text(out, c);
               }
            }
         }
         out.write('\'');
      } else {
         out.write("=\"");

         if (escapeAmpersands) {
            for (int i = 0; i < length; i++) {
               char c = chars[i];

               if (c >= '"' && c <= '>') {
                  if (c == '&') {
                     out.write("&amp;");
                  } else if (c == '<') {
                     out.write("&lt;");
                  } else if (c == '>') {
                     out.write("&gt;");
                  } else if (c == '"') {
                     out.write("&quot;");
                  } else {
                     text(out, c);
                  }
               } else {
                  text(out, c);
               }
            }
         } else {
            for (int i = 0; i < length; i++) {
               char c = chars[i];

               if (c >= '"' && c <= '>') {
                  if (c == '<') {
                     out.write("&lt;");
                  } else if (c == '>') {
                     out.write("&gt;");
                  } else if (c == '"') {
                     out.write("&quot;");
                  } else {
                     text(out, c);
                  }
               } else {
                  text(out, c);
               }
            }
         }
         out.write('"');
      }
   }
}