diff options
Diffstat (limited to 'src/org/apache/commons/codec/net/QuotedPrintableCodec.java')
-rw-r--r-- | src/org/apache/commons/codec/net/QuotedPrintableCodec.java | 387 |
1 files changed, 387 insertions, 0 deletions
diff --git a/src/org/apache/commons/codec/net/QuotedPrintableCodec.java b/src/org/apache/commons/codec/net/QuotedPrintableCodec.java new file mode 100644 index 0000000..c2fcd27 --- /dev/null +++ b/src/org/apache/commons/codec/net/QuotedPrintableCodec.java @@ -0,0 +1,387 @@ +/* + * Copyright 2001-2004 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.codec.net; + +import java.io.ByteArrayOutputStream; +import java.io.UnsupportedEncodingException; +import java.util.BitSet; +import org.apache.commons.codec.BinaryDecoder; +import org.apache.commons.codec.BinaryEncoder; +import org.apache.commons.codec.DecoderException; +import org.apache.commons.codec.EncoderException; +import org.apache.commons.codec.StringDecoder; +import org.apache.commons.codec.StringEncoder; + +/** + * <p> + * Codec for the Quoted-Printable section of <a href="http://www.ietf.org/rfc/rfc1521.txt">RFC 1521 </a>. + * </p> + * <p> + * The Quoted-Printable encoding is intended to represent data that largely consists of octets that correspond to + * printable characters in the ASCII character set. It encodes the data in such a way that the resulting octets are + * unlikely to be modified by mail transport. If the data being encoded are mostly ASCII text, the encoded form of the + * data remains largely recognizable by humans. A body which is entirely ASCII may also be encoded in Quoted-Printable + * to ensure the integrity of the data should the message pass through a character- translating, and/or line-wrapping + * gateway. + * </p> + * + * <p> + * Note: + * </p> + * <p> + * Rules #3, #4, and #5 of the quoted-printable spec are not implemented yet because the complete quoted-printable spec + * does not lend itself well into the byte[] oriented codec framework. Complete the codec once the steamable codec + * framework is ready. The motivation behind providing the codec in a partial form is that it can already come in handy + * for those applications that do not require quoted-printable line formatting (rules #3, #4, #5), for instance Q codec. + * </p> + * + * @see <a href="http://www.ietf.org/rfc/rfc1521.txt"> RFC 1521 MIME (Multipurpose Internet Mail Extensions) Part One: + * Mechanisms for Specifying and Describing the Format of Internet Message Bodies </a> + * + * @author Apache Software Foundation + * @since 1.3 + * @version $Id: QuotedPrintableCodec.java,v 1.7 2004/04/09 22:21:07 ggregory Exp $ + */ +public class QuotedPrintableCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder { + /** + * The default charset used for string decoding and encoding. + */ + private String charset = StringEncodings.UTF8; + + /** + * BitSet of printable characters as defined in RFC 1521. + */ + private static final BitSet PRINTABLE_CHARS = new BitSet(256); + + private static byte ESCAPE_CHAR = '='; + + private static byte TAB = 9; + + private static byte SPACE = 32; + // Static initializer for printable chars collection + static { + // alpha characters + for (int i = 33; i <= 60; i++) { + PRINTABLE_CHARS.set(i); + } + for (int i = 62; i <= 126; i++) { + PRINTABLE_CHARS.set(i); + } + PRINTABLE_CHARS.set(TAB); + PRINTABLE_CHARS.set(SPACE); + } + + /** + * Default constructor. + */ + public QuotedPrintableCodec() { + super(); + } + + /** + * Constructor which allows for the selection of a default charset + * + * @param charset + * the default string charset to use. + */ + public QuotedPrintableCodec(String charset) { + super(); + this.charset = charset; + } + + /** + * Encodes byte into its quoted-printable representation. + * + * @param b + * byte to encode + * @param buffer + * the buffer to write to + */ + private static final void encodeQuotedPrintable(int b, ByteArrayOutputStream buffer) { + buffer.write(ESCAPE_CHAR); + char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16)); + char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, 16)); + buffer.write(hex1); + buffer.write(hex2); + } + + /** + * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped. + * + * <p> + * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in + * RFC 1521 and is suitable for encoding binary data and unformatted text. + * </p> + * + * @param printable + * bitset of characters deemed quoted-printable + * @param bytes + * array of bytes to be encoded + * @return array of bytes containing quoted-printable data + */ + public static final byte[] encodeQuotedPrintable(BitSet printable, byte[] bytes) { + if (bytes == null) { + return null; + } + if (printable == null) { + printable = PRINTABLE_CHARS; + } + ByteArrayOutputStream buffer = new ByteArrayOutputStream(); + for (int i = 0; i < bytes.length; i++) { + int b = bytes[i]; + if (b < 0) { + b = 256 + b; + } + if (printable.get(b)) { + buffer.write(b); + } else { + encodeQuotedPrintable(b, buffer); + } + } + return buffer.toByteArray(); + } + + /** + * Decodes an array quoted-printable characters into an array of original bytes. Escaped characters are converted + * back to their original representation. + * + * <p> + * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in + * RFC 1521. + * </p> + * + * @param bytes + * array of quoted-printable characters + * @return array of original bytes + * @throws DecoderException + * Thrown if quoted-printable decoding is unsuccessful + */ + public static final byte[] decodeQuotedPrintable(byte[] bytes) throws DecoderException { + if (bytes == null) { + return null; + } + ByteArrayOutputStream buffer = new ByteArrayOutputStream(); + for (int i = 0; i < bytes.length; i++) { + int b = bytes[i]; + if (b == ESCAPE_CHAR) { + try { + int u = Character.digit((char) bytes[++i], 16); + int l = Character.digit((char) bytes[++i], 16); + if (u == -1 || l == -1) { + throw new DecoderException("Invalid quoted-printable encoding"); + } + buffer.write((char) ((u << 4) + l)); + } catch (ArrayIndexOutOfBoundsException e) { + throw new DecoderException("Invalid quoted-printable encoding"); + } + } else { + buffer.write(b); + } + } + return buffer.toByteArray(); + } + + /** + * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped. + * + * <p> + * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in + * RFC 1521 and is suitable for encoding binary data and unformatted text. + * </p> + * + * @param bytes + * array of bytes to be encoded + * @return array of bytes containing quoted-printable data + */ + public byte[] encode(byte[] bytes) { + return encodeQuotedPrintable(PRINTABLE_CHARS, bytes); + } + + /** + * Decodes an array of quoted-printable characters into an array of original bytes. Escaped characters are converted + * back to their original representation. + * + * <p> + * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in + * RFC 1521. + * </p> + * + * @param bytes + * array of quoted-printable characters + * @return array of original bytes + * @throws DecoderException + * Thrown if quoted-printable decoding is unsuccessful + */ + public byte[] decode(byte[] bytes) throws DecoderException { + return decodeQuotedPrintable(bytes); + } + + /** + * Encodes a string into its quoted-printable form using the default string charset. Unsafe characters are escaped. + * + * <p> + * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in + * RFC 1521 and is suitable for encoding binary data. + * </p> + * + * @param pString + * string to convert to quoted-printable form + * @return quoted-printable string + * + * @throws EncoderException + * Thrown if quoted-printable encoding is unsuccessful + * + * @see #getDefaultCharset() + */ + public String encode(String pString) throws EncoderException { + if (pString == null) { + return null; + } + try { + return encode(pString, getDefaultCharset()); + } catch (UnsupportedEncodingException e) { + throw new EncoderException(e.getMessage()); + } + } + + /** + * Decodes a quoted-printable string into its original form using the specified string charset. Escaped characters + * are converted back to their original representation. + * + * @param pString + * quoted-printable string to convert into its original form + * @param charset + * the original string charset + * @return original string + * @throws DecoderException + * Thrown if quoted-printable decoding is unsuccessful + * @throws UnsupportedEncodingException + * Thrown if charset is not supported + */ + public String decode(String pString, String charset) throws DecoderException, UnsupportedEncodingException { + if (pString == null) { + return null; + } + return new String(decode(pString.getBytes(StringEncodings.US_ASCII)), charset); + } + + /** + * Decodes a quoted-printable string into its original form using the default string charset. Escaped characters are + * converted back to their original representation. + * + * @param pString + * quoted-printable string to convert into its original form + * @return original string + * @throws DecoderException + * Thrown if quoted-printable decoding is unsuccessful + * @throws UnsupportedEncodingException + * Thrown if charset is not supported + * @see #getDefaultCharset() + */ + public String decode(String pString) throws DecoderException { + if (pString == null) { + return null; + } + try { + return decode(pString, getDefaultCharset()); + } catch (UnsupportedEncodingException e) { + throw new DecoderException(e.getMessage()); + } + } + + /** + * Encodes an object into its quoted-printable safe form. Unsafe characters are escaped. + * + * @param pObject + * string to convert to a quoted-printable form + * @return quoted-printable object + * @throws EncoderException + * Thrown if quoted-printable encoding is not applicable to objects of this type or if encoding is + * unsuccessful + */ + public Object encode(Object pObject) throws EncoderException { + if (pObject == null) { + return null; + } else if (pObject instanceof byte[]) { + return encode((byte[]) pObject); + } else if (pObject instanceof String) { + return encode((String) pObject); + } else { + throw new EncoderException("Objects of type " + + pObject.getClass().getName() + + " cannot be quoted-printable encoded"); + } + } + + /** + * Decodes a quoted-printable object into its original form. Escaped characters are converted back to their original + * representation. + * + * @param pObject + * quoted-printable object to convert into its original form + * @return original object + * @throws DecoderException + * Thrown if quoted-printable decoding is not applicable to objects of this type if decoding is + * unsuccessful + */ + public Object decode(Object pObject) throws DecoderException { + if (pObject == null) { + return null; + } else if (pObject instanceof byte[]) { + return decode((byte[]) pObject); + } else if (pObject instanceof String) { + return decode((String) pObject); + } else { + throw new DecoderException("Objects of type " + + pObject.getClass().getName() + + " cannot be quoted-printable decoded"); + } + } + + /** + * Returns the default charset used for string decoding and encoding. + * + * @return the default string charset. + */ + public String getDefaultCharset() { + return this.charset; + } + + /** + * Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped. + * + * <p> + * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in + * RFC 1521 and is suitable for encoding binary data and unformatted text. + * </p> + * + * @param pString + * string to convert to quoted-printable form + * @param charset + * the charset for pString + * @return quoted-printable string + * + * @throws UnsupportedEncodingException + * Thrown if the charset is not supported + */ + public String encode(String pString, String charset) throws UnsupportedEncodingException { + if (pString == null) { + return null; + } + return new String(encode(pString.getBytes(charset)), StringEncodings.US_ASCII); + } +} |