diff options
Diffstat (limited to 'src/org/apache/commons/codec')
28 files changed, 0 insertions, 5172 deletions
diff --git a/src/org/apache/commons/codec/BinaryDecoder.java b/src/org/apache/commons/codec/BinaryDecoder.java deleted file mode 100644 index 7aebabf..0000000 --- a/src/org/apache/commons/codec/BinaryDecoder.java +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright 2001-2004 The Apache Software Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.codec; - -/** - * Defines common decoding methods for byte array decoders. - * - * @author Apache Software Foundation - * @version $Id: BinaryDecoder.java,v 1.10 2004/06/15 18:14:15 ggregory Exp $ - */ -public interface BinaryDecoder extends Decoder { - - /** - * Decodes a byte array and returns the results as a byte array. - * - * @param pArray A byte array which has been encoded with the - * appropriate encoder - * - * @return a byte array that contains decoded content - * - * @throws DecoderException A decoder exception is thrown - * if a Decoder encounters a failure condition during - * the decode process. - */ - byte[] decode(byte[] pArray) throws DecoderException; -} - diff --git a/src/org/apache/commons/codec/BinaryEncoder.java b/src/org/apache/commons/codec/BinaryEncoder.java deleted file mode 100644 index 52859ed..0000000 --- a/src/org/apache/commons/codec/BinaryEncoder.java +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright 2001-2004 The Apache Software Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.codec; - -/** - * Defines common encoding methods for byte array encoders. - * - * @author Apache Software Foundation - * @version $Id: BinaryEncoder.java,v 1.10 2004/02/29 04:08:31 tobrien Exp $ - */ -public interface BinaryEncoder extends Encoder { - - /** - * Encodes a byte array and return the encoded data - * as a byte array. - * - * @param pArray Data to be encoded - * - * @return A byte array containing the encoded data - * - * @throws EncoderException thrown if the Encoder - * encounters a failure condition during the - * encoding process. - */ - byte[] encode(byte[] pArray) throws EncoderException; -} - diff --git a/src/org/apache/commons/codec/Decoder.java b/src/org/apache/commons/codec/Decoder.java deleted file mode 100644 index 184920c..0000000 --- a/src/org/apache/commons/codec/Decoder.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright 2001-2004 The Apache Software Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.codec; - -/** - * <p>Provides the highest level of abstraction for Decoders. - * This is the sister interface of {@link Encoder}. All - * Decoders implement this common generic interface.</p> - * - * <p>Allows a user to pass a generic Object to any Decoder - * implementation in the codec package.</p> - * - * <p>One of the two interfaces at the center of the codec package.</p> - * - * @author Apache Software Foundation - * @version $Id: Decoder.java,v 1.9 2004/02/29 04:08:31 tobrien Exp $ - */ -public interface Decoder { - - /** - * Decodes an "encoded" Object and returns a "decoded" - * Object. Note that the implementation of this - * interface will try to cast the Object parameter - * to the specific type expected by a particular Decoder - * implementation. If a {@link java.lang.ClassCastException} occurs - * this decode method will throw a DecoderException. - * - * @param pObject an object to "decode" - * - * @return a 'decoded" object - * - * @throws DecoderException a decoder exception can - * be thrown for any number of reasons. Some good - * candidates are that the parameter passed to this - * method is null, a param cannot be cast to the - * appropriate type for a specific encoder. - */ - Object decode(Object pObject) throws DecoderException; -} - diff --git a/src/org/apache/commons/codec/DecoderException.java b/src/org/apache/commons/codec/DecoderException.java deleted file mode 100644 index f35c016..0000000 --- a/src/org/apache/commons/codec/DecoderException.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright 2001-2004 The Apache Software Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.codec; - -/** - * Thrown when a Decoder has encountered a failure condition during a decode. - * - * @author Apache Software Foundation - * @version $Id: DecoderException.java,v 1.9 2004/02/29 04:08:31 tobrien Exp $ - */ -public class DecoderException extends Exception { - - /** - * Creates a DecoderException - * - * @param pMessage A message with meaning to a human - */ - public DecoderException(String pMessage) { - super(pMessage); - } - -} - diff --git a/src/org/apache/commons/codec/Encoder.java b/src/org/apache/commons/codec/Encoder.java deleted file mode 100644 index fa339ee..0000000 --- a/src/org/apache/commons/codec/Encoder.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright 2001-2004 The Apache Software Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.codec; - -/** - * <p>Provides the highest level of abstraction for Encoders. - * This is the sister interface of {@link Decoder}. Every implementation of - * Encoder provides this common generic interface whic allows a user to pass a - * generic Object to any Encoder implementation in the codec package.</p> - * - * @author Apache Software Foundation - * @version $Id: Encoder.java,v 1.10 2004/02/29 04:08:31 tobrien Exp $ - */ -public interface Encoder { - - /** - * Encodes an "Object" and returns the encoded content - * as an Object. The Objects here may just be <code>byte[]</code> - * or <code>String</code>s depending on the implementation used. - * - * @param pObject An object ot encode - * - * @return An "encoded" Object - * - * @throws EncoderException an encoder exception is - * thrown if the encoder experiences a failure - * condition during the encoding process. - */ - Object encode(Object pObject) throws EncoderException; -} - diff --git a/src/org/apache/commons/codec/EncoderException.java b/src/org/apache/commons/codec/EncoderException.java deleted file mode 100644 index 0e202c1..0000000 --- a/src/org/apache/commons/codec/EncoderException.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright 2001-2004 The Apache Software Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.codec; - -/** - * Thrown when there is a failure condition during the encoding process. This - * exception is thrown when an Encoder encounters a encoding specific exception - * such as invalid data, inability to calculate a checksum, characters outside of the - * expected range. - * - * @author Apache Software Foundation - * @version $Id: EncoderException.java,v 1.10 2004/02/29 04:08:31 tobrien Exp $ - */ -public class EncoderException extends Exception { - - /** - * Creates a new instance of this exception with an useful message. - * - * @param pMessage a useful message relating to the encoder specific error. - */ - public EncoderException(String pMessage) { - super(pMessage); - } -} - diff --git a/src/org/apache/commons/codec/StringDecoder.java b/src/org/apache/commons/codec/StringDecoder.java deleted file mode 100644 index 9b1a0cd..0000000 --- a/src/org/apache/commons/codec/StringDecoder.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright 2001-2004 The Apache Software Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.codec; - -/** - * Decodes a String into a String. - * - * @author Apache Software Foundation - * @version $Id: StringDecoder.java,v 1.9 2004/02/29 04:08:31 tobrien Exp $ - */ -public interface StringDecoder extends Decoder { - - /** - * Decodes a String and returns a String. - * - * @param pString a String to encode - * - * @return the encoded String - * - * @throws DecoderException thrown if there is - * an error conidition during the Encoding process. - */ - String decode(String pString) throws DecoderException; -} - diff --git a/src/org/apache/commons/codec/StringEncoder.java b/src/org/apache/commons/codec/StringEncoder.java deleted file mode 100644 index 46f5404..0000000 --- a/src/org/apache/commons/codec/StringEncoder.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright 2001-2004 The Apache Software Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.codec; - -/** - * Encodes a String into a String. - * - * @author Apache Software Foundation - * @version $Id: StringEncoder.java,v 1.9 2004/02/29 04:08:31 tobrien Exp $ - */ -public interface StringEncoder extends Encoder { - - /** - * Encodes a String and returns a String. - * - * @param pString a String to encode - * - * @return the encoded String - * - * @throws EncoderException thrown if there is - * an error conidition during the Encoding process. - */ - String encode(String pString) throws EncoderException; -} - diff --git a/src/org/apache/commons/codec/StringEncoderComparator.java b/src/org/apache/commons/codec/StringEncoderComparator.java deleted file mode 100644 index 6d29af2..0000000 --- a/src/org/apache/commons/codec/StringEncoderComparator.java +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright 2001-2004 The Apache Software Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.codec; - -import java.util.Comparator; - -/** - * Strings are comparable, and this comparator allows - * you to configure it with an instance of a class - * which implements StringEncoder. This comparator - * is used to sort Strings by an encoding scheme such - * as Soundex, Metaphone, etc. This class can come in - * handy if one need to sort Strings by an encoded - * form of a name such as Soundex. - * - * @author Apache Software Foundation - * @version $Id: StringEncoderComparator.java,v 1.14 2004/06/21 23:24:17 ggregory Exp $ - */ -public class StringEncoderComparator implements Comparator { - - /** - * Internal encoder instance. - */ - private StringEncoder stringEncoder; - - /** - * Constructs a new instance. - */ - public StringEncoderComparator() { - // no init. - } - - /** - * Constructs a new instance with the given algorithm. - * @param stringEncoder the StringEncoder used for comparisons. - */ - public StringEncoderComparator(StringEncoder stringEncoder) { - this.stringEncoder = stringEncoder; - } - - /** - * Compares two strings based not on the strings - * themselves, but on an encoding of the two - * strings using the StringEncoder this Comparator - * was created with. - * - * If an {@link EncoderException} is encountered, return <code>0</code>. - * - * @param o1 the object to compare - * @param o2 the object to compare to - * @return the Comparable.compareTo() return code or 0 if an encoding error was caught. - * @see Comparable - */ - public int compare(Object o1, Object o2) { - - int compareCode = 0; - - try { - Comparable s1 = (Comparable) ((Encoder) this.stringEncoder).encode(o1); - Comparable s2 = (Comparable) ((Encoder) this.stringEncoder).encode(o2); - compareCode = s1.compareTo(s2); - } - catch (EncoderException ee) { - compareCode = 0; - } - return compareCode; - } - -} diff --git a/src/org/apache/commons/codec/binary/Base64.java b/src/org/apache/commons/codec/binary/Base64.java deleted file mode 100644 index ea479e9..0000000 --- a/src/org/apache/commons/codec/binary/Base64.java +++ /dev/null @@ -1,524 +0,0 @@ -/* - * Copyright 2001-2004 The Apache Software Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.codec.binary; - -import org.apache.commons.codec.BinaryDecoder; -import org.apache.commons.codec.BinaryEncoder; -import org.apache.commons.codec.DecoderException; -import org.apache.commons.codec.EncoderException; - -/** - * Provides Base64 encoding and decoding as defined by RFC 2045. - * - * <p>This class implements section <cite>6.8. Base64 Content-Transfer-Encoding</cite> - * from RFC 2045 <cite>Multipurpose Internet Mail Extensions (MIME) Part One: - * Format of Internet Message Bodies</cite> by Freed and Borenstein.</p> - * - * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a> - * @author Apache Software Foundation - * @since 1.0-dev - * @version $Id: Base64.java,v 1.20 2004/05/24 00:21:24 ggregory Exp $ - */ -public class Base64 implements BinaryEncoder, BinaryDecoder { - - /** - * Chunk size per RFC 2045 section 6.8. - * - * <p>The {@value} character limit does not count the trailing CRLF, but counts - * all other characters, including any equal signs.</p> - * - * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a> - */ - static final int CHUNK_SIZE = 76; - - /** - * Chunk separator per RFC 2045 section 2.1. - * - * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 2.1</a> - */ - static final byte[] CHUNK_SEPARATOR = "\r\n".getBytes(); - - /** - * The base length. - */ - static final int BASELENGTH = 255; - - /** - * Lookup length. - */ - static final int LOOKUPLENGTH = 64; - - /** - * Used to calculate the number of bits in a byte. - */ - static final int EIGHTBIT = 8; - - /** - * Used when encoding something which has fewer than 24 bits. - */ - static final int SIXTEENBIT = 16; - - /** - * Used to determine how many bits data contains. - */ - static final int TWENTYFOURBITGROUP = 24; - - /** - * Used to get the number of Quadruples. - */ - static final int FOURBYTE = 4; - - /** - * Used to test the sign of a byte. - */ - static final int SIGN = -128; - - /** - * Byte used to pad output. - */ - static final byte PAD = (byte) '='; - - // Create arrays to hold the base64 characters and a - // lookup for base64 chars - private static byte[] base64Alphabet = new byte[BASELENGTH]; - private static byte[] lookUpBase64Alphabet = new byte[LOOKUPLENGTH]; - - // Populating the lookup and character arrays - static { - for (int i = 0; i < BASELENGTH; i++) { - base64Alphabet[i] = (byte) -1; - } - for (int i = 'Z'; i >= 'A'; i--) { - base64Alphabet[i] = (byte) (i - 'A'); - } - for (int i = 'z'; i >= 'a'; i--) { - base64Alphabet[i] = (byte) (i - 'a' + 26); - } - for (int i = '9'; i >= '0'; i--) { - base64Alphabet[i] = (byte) (i - '0' + 52); - } - - base64Alphabet['+'] = 62; - base64Alphabet['/'] = 63; - - for (int i = 0; i <= 25; i++) { - lookUpBase64Alphabet[i] = (byte) ('A' + i); - } - - for (int i = 26, j = 0; i <= 51; i++, j++) { - lookUpBase64Alphabet[i] = (byte) ('a' + j); - } - - for (int i = 52, j = 0; i <= 61; i++, j++) { - lookUpBase64Alphabet[i] = (byte) ('0' + j); - } - - lookUpBase64Alphabet[62] = (byte) '+'; - lookUpBase64Alphabet[63] = (byte) '/'; - } - - private static boolean isBase64(byte octect) { - if (octect == PAD) { - return true; - } else if (base64Alphabet[octect] == -1) { - return false; - } else { - return true; - } - } - - /** - * Tests a given byte array to see if it contains - * only valid characters within the Base64 alphabet. - * - * @param arrayOctect byte array to test - * @return true if all bytes are valid characters in the Base64 - * alphabet or if the byte array is empty; false, otherwise - */ - public static boolean isArrayByteBase64(byte[] arrayOctect) { - - arrayOctect = discardWhitespace(arrayOctect); - - int length = arrayOctect.length; - if (length == 0) { - // shouldn't a 0 length array be valid base64 data? - // return false; - return true; - } - for (int i = 0; i < length; i++) { - if (!isBase64(arrayOctect[i])) { - return false; - } - } - return true; - } - - /** - * Encodes binary data using the base64 algorithm but - * does not chunk the output. - * - * @param binaryData binary data to encode - * @return Base64 characters - */ - public static byte[] encodeBase64(byte[] binaryData) { - return encodeBase64(binaryData, false); - } - - /** - * Encodes binary data using the base64 algorithm and chunks - * the encoded output into 76 character blocks - * - * @param binaryData binary data to encode - * @return Base64 characters chunked in 76 character blocks - */ - public static byte[] encodeBase64Chunked(byte[] binaryData) { - return encodeBase64(binaryData, true); - } - - - /** - * Decodes an Object using the base64 algorithm. This method - * is provided in order to satisfy the requirements of the - * Decoder interface, and will throw a DecoderException if the - * supplied object is not of type byte[]. - * - * @param pObject Object to decode - * @return An object (of type byte[]) containing the - * binary data which corresponds to the byte[] supplied. - * @throws DecoderException if the parameter supplied is not - * of type byte[] - */ - public Object decode(Object pObject) throws DecoderException { - if (!(pObject instanceof byte[])) { - throw new DecoderException("Parameter supplied to Base64 decode is not a byte[]"); - } - return decode((byte[]) pObject); - } - - /** - * Decodes a byte[] containing containing - * characters in the Base64 alphabet. - * - * @param pArray A byte array containing Base64 character data - * @return a byte array containing binary data - */ - public byte[] decode(byte[] pArray) { - return decodeBase64(pArray); - } - - /** - * Encodes binary data using the base64 algorithm, optionally - * chunking the output into 76 character blocks. - * - * @param binaryData Array containing binary data to encode. - * @param isChunked if isChunked is true this encoder will chunk - * the base64 output into 76 character blocks - * @return Base64-encoded data. - */ - public static byte[] encodeBase64(byte[] binaryData, boolean isChunked) { - int lengthDataBits = binaryData.length * EIGHTBIT; - int fewerThan24bits = lengthDataBits % TWENTYFOURBITGROUP; - int numberTriplets = lengthDataBits / TWENTYFOURBITGROUP; - byte encodedData[] = null; - int encodedDataLength = 0; - int nbrChunks = 0; - - if (fewerThan24bits != 0) { - //data not divisible by 24 bit - encodedDataLength = (numberTriplets + 1) * 4; - } else { - // 16 or 8 bit - encodedDataLength = numberTriplets * 4; - } - - // If the output is to be "chunked" into 76 character sections, - // for compliance with RFC 2045 MIME, then it is important to - // allow for extra length to account for the separator(s) - if (isChunked) { - - nbrChunks = - (CHUNK_SEPARATOR.length == 0 ? 0 : (int) Math.ceil((float) encodedDataLength / CHUNK_SIZE)); - encodedDataLength += nbrChunks * CHUNK_SEPARATOR.length; - } - - encodedData = new byte[encodedDataLength]; - - byte k = 0, l = 0, b1 = 0, b2 = 0, b3 = 0; - - int encodedIndex = 0; - int dataIndex = 0; - int i = 0; - int nextSeparatorIndex = CHUNK_SIZE; - int chunksSoFar = 0; - - //log.debug("number of triplets = " + numberTriplets); - for (i = 0; i < numberTriplets; i++) { - dataIndex = i * 3; - b1 = binaryData[dataIndex]; - b2 = binaryData[dataIndex + 1]; - b3 = binaryData[dataIndex + 2]; - - //log.debug("b1= " + b1 +", b2= " + b2 + ", b3= " + b3); - - l = (byte) (b2 & 0x0f); - k = (byte) (b1 & 0x03); - - byte val1 = - ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0); - byte val2 = - ((b2 & SIGN) == 0) ? (byte) (b2 >> 4) : (byte) ((b2) >> 4 ^ 0xf0); - byte val3 = - ((b3 & SIGN) == 0) ? (byte) (b3 >> 6) : (byte) ((b3) >> 6 ^ 0xfc); - - encodedData[encodedIndex] = lookUpBase64Alphabet[val1]; - //log.debug( "val2 = " + val2 ); - //log.debug( "k4 = " + (k<<4) ); - //log.debug( "vak = " + (val2 | (k<<4)) ); - encodedData[encodedIndex + 1] = - lookUpBase64Alphabet[val2 | (k << 4)]; - encodedData[encodedIndex + 2] = - lookUpBase64Alphabet[(l << 2) | val3]; - encodedData[encodedIndex + 3] = lookUpBase64Alphabet[b3 & 0x3f]; - - encodedIndex += 4; - - // If we are chunking, let's put a chunk separator down. - if (isChunked) { - // this assumes that CHUNK_SIZE % 4 == 0 - if (encodedIndex == nextSeparatorIndex) { - System.arraycopy( - CHUNK_SEPARATOR, - 0, - encodedData, - encodedIndex, - CHUNK_SEPARATOR.length); - chunksSoFar++; - nextSeparatorIndex = - (CHUNK_SIZE * (chunksSoFar + 1)) + - (chunksSoFar * CHUNK_SEPARATOR.length); - encodedIndex += CHUNK_SEPARATOR.length; - } - } - } - - // form integral number of 6-bit groups - dataIndex = i * 3; - - if (fewerThan24bits == EIGHTBIT) { - b1 = binaryData[dataIndex]; - k = (byte) (b1 & 0x03); - //log.debug("b1=" + b1); - //log.debug("b1<<2 = " + (b1>>2) ); - byte val1 = - ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0); - encodedData[encodedIndex] = lookUpBase64Alphabet[val1]; - encodedData[encodedIndex + 1] = lookUpBase64Alphabet[k << 4]; - encodedData[encodedIndex + 2] = PAD; - encodedData[encodedIndex + 3] = PAD; - } else if (fewerThan24bits == SIXTEENBIT) { - - b1 = binaryData[dataIndex]; - b2 = binaryData[dataIndex + 1]; - l = (byte) (b2 & 0x0f); - k = (byte) (b1 & 0x03); - - byte val1 = - ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0); - byte val2 = - ((b2 & SIGN) == 0) ? (byte) (b2 >> 4) : (byte) ((b2) >> 4 ^ 0xf0); - - encodedData[encodedIndex] = lookUpBase64Alphabet[val1]; - encodedData[encodedIndex + 1] = - lookUpBase64Alphabet[val2 | (k << 4)]; - encodedData[encodedIndex + 2] = lookUpBase64Alphabet[l << 2]; - encodedData[encodedIndex + 3] = PAD; - } - - if (isChunked) { - // we also add a separator to the end of the final chunk. - if (chunksSoFar < nbrChunks) { - System.arraycopy( - CHUNK_SEPARATOR, - 0, - encodedData, - encodedDataLength - CHUNK_SEPARATOR.length, - CHUNK_SEPARATOR.length); - } - } - - return encodedData; - } - - /** - * Decodes Base64 data into octects - * - * @param base64Data Byte array containing Base64 data - * @return Array containing decoded data. - */ - public static byte[] decodeBase64(byte[] base64Data) { - // RFC 2045 requires that we discard ALL non-Base64 characters - base64Data = discardNonBase64(base64Data); - - // handle the edge case, so we don't have to worry about it later - if (base64Data.length == 0) { - return new byte[0]; - } - - int numberQuadruple = base64Data.length / FOURBYTE; - byte decodedData[] = null; - byte b1 = 0, b2 = 0, b3 = 0, b4 = 0, marker0 = 0, marker1 = 0; - - // Throw away anything not in base64Data - - int encodedIndex = 0; - int dataIndex = 0; - { - // this sizes the output array properly - rlw - int lastData = base64Data.length; - // ignore the '=' padding - while (base64Data[lastData - 1] == PAD) { - if (--lastData == 0) { - return new byte[0]; - } - } - decodedData = new byte[lastData - numberQuadruple]; - } - - for (int i = 0; i < numberQuadruple; i++) { - dataIndex = i * 4; - marker0 = base64Data[dataIndex + 2]; - marker1 = base64Data[dataIndex + 3]; - - b1 = base64Alphabet[base64Data[dataIndex]]; - b2 = base64Alphabet[base64Data[dataIndex + 1]]; - - if (marker0 != PAD && marker1 != PAD) { - //No PAD e.g 3cQl - b3 = base64Alphabet[marker0]; - b4 = base64Alphabet[marker1]; - - decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4); - decodedData[encodedIndex + 1] = - (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf)); - decodedData[encodedIndex + 2] = (byte) (b3 << 6 | b4); - } else if (marker0 == PAD) { - //Two PAD e.g. 3c[Pad][Pad] - decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4); - } else if (marker1 == PAD) { - //One PAD e.g. 3cQ[Pad] - b3 = base64Alphabet[marker0]; - - decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4); - decodedData[encodedIndex + 1] = - (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf)); - } - encodedIndex += 3; - } - return decodedData; - } - - /** - * Discards any whitespace from a base-64 encoded block. - * - * @param data The base-64 encoded data to discard the whitespace - * from. - * @return The data, less whitespace (see RFC 2045). - */ - static byte[] discardWhitespace(byte[] data) { - byte groomedData[] = new byte[data.length]; - int bytesCopied = 0; - - for (int i = 0; i < data.length; i++) { - switch (data[i]) { - case (byte) ' ' : - case (byte) '\n' : - case (byte) '\r' : - case (byte) '\t' : - break; - default: - groomedData[bytesCopied++] = data[i]; - } - } - - byte packedData[] = new byte[bytesCopied]; - - System.arraycopy(groomedData, 0, packedData, 0, bytesCopied); - - return packedData; - } - - /** - * Discards any characters outside of the base64 alphabet, per - * the requirements on page 25 of RFC 2045 - "Any characters - * outside of the base64 alphabet are to be ignored in base64 - * encoded data." - * - * @param data The base-64 encoded data to groom - * @return The data, less non-base64 characters (see RFC 2045). - */ - static byte[] discardNonBase64(byte[] data) { - byte groomedData[] = new byte[data.length]; - int bytesCopied = 0; - - for (int i = 0; i < data.length; i++) { - if (isBase64(data[i])) { - groomedData[bytesCopied++] = data[i]; - } - } - - byte packedData[] = new byte[bytesCopied]; - - System.arraycopy(groomedData, 0, packedData, 0, bytesCopied); - - return packedData; - } - - - // Implementation of the Encoder Interface - - /** - * Encodes an Object using the base64 algorithm. This method - * is provided in order to satisfy the requirements of the - * Encoder interface, and will throw an EncoderException if the - * supplied object is not of type byte[]. - * - * @param pObject Object to encode - * @return An object (of type byte[]) containing the - * base64 encoded data which corresponds to the byte[] supplied. - * @throws EncoderException if the parameter supplied is not - * of type byte[] - */ - public Object encode(Object pObject) throws EncoderException { - if (!(pObject instanceof byte[])) { - throw new EncoderException( - "Parameter supplied to Base64 encode is not a byte[]"); - } - return encode((byte[]) pObject); - } - - /** - * Encodes a byte[] containing binary data, into a byte[] containing - * characters in the Base64 alphabet. - * - * @param pArray a byte array containing binary data - * @return A byte array containing only Base64 character data - */ - public byte[] encode(byte[] pArray) { - return encodeBase64(pArray, false); - } - -} diff --git a/src/org/apache/commons/codec/binary/BinaryCodec.java b/src/org/apache/commons/codec/binary/BinaryCodec.java deleted file mode 100644 index 98c6409..0000000 --- a/src/org/apache/commons/codec/binary/BinaryCodec.java +++ /dev/null @@ -1,285 +0,0 @@ -/* - * Copyright 2001-2004 The Apache Software Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.codec.binary; - -import org.apache.commons.codec.BinaryDecoder; -import org.apache.commons.codec.BinaryEncoder; -import org.apache.commons.codec.DecoderException; -import org.apache.commons.codec.EncoderException; - -/** - * Translates between byte arrays and strings of "0"s and "1"s. - * - * <b>TODO:</b> may want to add more bit vector functions like and/or/xor/nand. - * <B>TODO:</b> also might be good to generate boolean[] - * from byte[] et. cetera. - * - * @author Apache Software Foundation - * @since 1.3 - * @version $Id $ - */ -public class BinaryCodec implements BinaryDecoder, BinaryEncoder { - /* - * tried to avoid using ArrayUtils to minimize dependencies while using these empty arrays - dep is just not worth - * it. - */ - /** Empty char array. */ - private static final char[] EMPTY_CHAR_ARRAY = new char[0]; - - /** Empty byte array. */ - private static final byte[] EMPTY_BYTE_ARRAY = new byte[0]; - - /** Mask for bit 0 of a byte. */ - private static final int BIT_0 = 1; - - /** Mask for bit 1 of a byte. */ - private static final int BIT_1 = 0x02; - - /** Mask for bit 2 of a byte. */ - private static final int BIT_2 = 0x04; - - /** Mask for bit 3 of a byte. */ - private static final int BIT_3 = 0x08; - - /** Mask for bit 4 of a byte. */ - private static final int BIT_4 = 0x10; - - /** Mask for bit 5 of a byte. */ - private static final int BIT_5 = 0x20; - - /** Mask for bit 6 of a byte. */ - private static final int BIT_6 = 0x40; - - /** Mask for bit 7 of a byte. */ - private static final int BIT_7 = 0x80; - - private static final int[] BITS = {BIT_0, BIT_1, BIT_2, BIT_3, BIT_4, BIT_5, BIT_6, BIT_7}; - - /** - * Converts an array of raw binary data into an array of ascii 0 and 1 characters. - * - * @param raw - * the raw binary data to convert - * @return 0 and 1 ascii character bytes one for each bit of the argument - * @see org.apache.commons.codec.BinaryEncoder#encode(byte[]) - */ - public byte[] encode(byte[] raw) { - return toAsciiBytes(raw); - } - - /** - * Converts an array of raw binary data into an array of ascii 0 and 1 chars. - * - * @param raw - * the raw binary data to convert - * @return 0 and 1 ascii character chars one for each bit of the argument - * @throws EncoderException - * if the argument is not a byte[] - * @see org.apache.commons.codec.Encoder#encode(java.lang.Object) - */ - public Object encode(Object raw) throws EncoderException { - if (!(raw instanceof byte[])) { - throw new EncoderException("argument not a byte array"); - } - return toAsciiChars((byte[]) raw); - } - - /** - * Decodes a byte array where each byte represents an ascii '0' or '1'. - * - * @param ascii - * each byte represents an ascii '0' or '1' - * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument - * @throws DecoderException - * if argument is not a byte[], char[] or String - * @see org.apache.commons.codec.Decoder#decode(java.lang.Object) - */ - public Object decode(Object ascii) throws DecoderException { - if (ascii == null) { - return EMPTY_BYTE_ARRAY; - } - if (ascii instanceof byte[]) { - return fromAscii((byte[]) ascii); - } - if (ascii instanceof char[]) { - return fromAscii((char[]) ascii); - } - if (ascii instanceof String) { - return fromAscii(((String) ascii).toCharArray()); - } - throw new DecoderException("argument not a byte array"); - } - - /** - * Decodes a byte array where each byte represents an ascii '0' or '1'. - * - * @param ascii - * each byte represents an ascii '0' or '1' - * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument - * @see org.apache.commons.codec.Decoder#decode(Object) - */ - public byte[] decode(byte[] ascii) { - return fromAscii(ascii); - } - - /** - * Decodes a String where each char of the String represents an ascii '0' or '1'. - * - * @param ascii - * String of '0' and '1' characters - * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument - * @see org.apache.commons.codec.Decoder#decode(Object) - */ - public byte[] toByteArray(String ascii) { - if (ascii == null) { - return EMPTY_BYTE_ARRAY; - } - return fromAscii(ascii.toCharArray()); - } - - // ------------------------------------------------------------------------ - // - // static codec operations - // - // ------------------------------------------------------------------------ - /** - * Decodes a byte array where each char represents an ascii '0' or '1'. - * - * @param ascii - * each char represents an ascii '0' or '1' - * @return the raw encoded binary where each bit corresponds to a char in the char array argument - */ - public static byte[] fromAscii(char[] ascii) { - if (ascii == null || ascii.length == 0) { - return EMPTY_BYTE_ARRAY; - } - // get length/8 times bytes with 3 bit shifts to the right of the length - byte[] l_raw = new byte[ascii.length >> 3]; - /* - * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the - * loop. - */ - for (int ii = 0, jj = ascii.length - 1; ii < l_raw.length; ii++, jj -= 8) { - for (int bits = 0; bits < BITS.length; ++bits) { - if (ascii[jj - bits] == '1') { - l_raw[ii] |= BITS[bits]; - } - } - } - return l_raw; - } - - /** - * Decodes a byte array where each byte represents an ascii '0' or '1'. - * - * @param ascii - * each byte represents an ascii '0' or '1' - * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument - */ - public static byte[] fromAscii(byte[] ascii) { - if (ascii == null || ascii.length == 0) { - return EMPTY_BYTE_ARRAY; - } - // get length/8 times bytes with 3 bit shifts to the right of the length - byte[] l_raw = new byte[ascii.length >> 3]; - /* - * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the - * loop. - */ - for (int ii = 0, jj = ascii.length - 1; ii < l_raw.length; ii++, jj -= 8) { - for (int bits = 0; bits < BITS.length; ++bits) { - if (ascii[jj - bits] == '1') { - l_raw[ii] |= BITS[bits]; - } - } - } - return l_raw; - } - - /** - * Converts an array of raw binary data into an array of ascii 0 and 1 character bytes - each byte is a truncated - * char. - * - * @param raw - * the raw binary data to convert - * @return an array of 0 and 1 character bytes for each bit of the argument - * @see org.apache.commons.codec.BinaryEncoder#encode(byte[]) - */ - public static byte[] toAsciiBytes(byte[] raw) { - if (raw == null || raw.length == 0) { - return EMPTY_BYTE_ARRAY; - } - // get 8 times the bytes with 3 bit shifts to the left of the length - byte[] l_ascii = new byte[raw.length << 3]; - /* - * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the - * loop. - */ - for (int ii = 0, jj = l_ascii.length - 1; ii < raw.length; ii++, jj -= 8) { - for (int bits = 0; bits < BITS.length; ++bits) { - if ((raw[ii] & BITS[bits]) == 0) { - l_ascii[jj - bits] = '0'; - } else { - l_ascii[jj - bits] = '1'; - } - } - } - return l_ascii; - } - - /** - * Converts an array of raw binary data into an array of ascii 0 and 1 characters. - * - * @param raw - * the raw binary data to convert - * @return an array of 0 and 1 characters for each bit of the argument - * @see org.apache.commons.codec.BinaryEncoder#encode(byte[]) - */ - public static char[] toAsciiChars(byte[] raw) { - if (raw == null || raw.length == 0) { - return EMPTY_CHAR_ARRAY; - } - // get 8 times the bytes with 3 bit shifts to the left of the length - char[] l_ascii = new char[raw.length << 3]; - /* - * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the - * loop. - */ - for (int ii = 0, jj = l_ascii.length - 1; ii < raw.length; ii++, jj -= 8) { - for (int bits = 0; bits < BITS.length; ++bits) { - if ((raw[ii] & BITS[bits]) == 0) { - l_ascii[jj - bits] = '0'; - } else { - l_ascii[jj - bits] = '1'; - } - } - } - return l_ascii; - } - - /** - * Converts an array of raw binary data into a String of ascii 0 and 1 characters. - * - * @param raw - * the raw binary data to convert - * @return a String of 0 and 1 characters representing the binary data - * @see org.apache.commons.codec.BinaryEncoder#encode(byte[]) - */ - public static String toAsciiString(byte[] raw) { - return new String(toAsciiChars(raw)); - } -} diff --git a/src/org/apache/commons/codec/binary/Hex.java b/src/org/apache/commons/codec/binary/Hex.java deleted file mode 100644 index 78f5510..0000000 --- a/src/org/apache/commons/codec/binary/Hex.java +++ /dev/null @@ -1,192 +0,0 @@ -/* - * Copyright 2001-2004 The Apache Software Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.codec.binary; - -import org.apache.commons.codec.BinaryDecoder; -import org.apache.commons.codec.BinaryEncoder; -import org.apache.commons.codec.DecoderException; -import org.apache.commons.codec.EncoderException; - -/** - * Hex encoder and decoder. - * - * @since 1.1 - * @author Apache Software Foundation - * @version $Id: Hex.java,v 1.13 2004/04/18 18:22:33 ggregory Exp $ - */ -public class Hex implements BinaryEncoder, BinaryDecoder { - - /** - * Used building output as Hex - */ - private static final char[] DIGITS = { - '0', '1', '2', '3', '4', '5', '6', '7', - '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' - }; - - /** - * Converts an array of characters representing hexidecimal values into an - * array of bytes of those same values. The returned array will be half the - * length of the passed array, as it takes two characters to represent any - * given byte. An exception is thrown if the passed char array has an odd - * number of elements. - * - * @param data An array of characters containing hexidecimal digits - * @return A byte array containing binary data decoded from - * the supplied char array. - * @throws DecoderException Thrown if an odd number or illegal of characters - * is supplied - */ - public static byte[] decodeHex(char[] data) throws DecoderException { - - int len = data.length; - - if ((len & 0x01) != 0) { - throw new DecoderException("Odd number of characters."); - } - - byte[] out = new byte[len >> 1]; - - // two characters form the hex value. - for (int i = 0, j = 0; j < len; i++) { - int f = toDigit(data[j], j) << 4; - j++; - f = f | toDigit(data[j], j); - j++; - out[i] = (byte) (f & 0xFF); - } - - return out; - } - - /** - * Converts a hexadecimal character to an integer. - * - * @param ch A character to convert to an integer digit - * @param index The index of the character in the source - * @return An integer - * @throws DecoderException Thrown if ch is an illegal hex character - */ - protected static int toDigit(char ch, int index) throws DecoderException { - int digit = Character.digit(ch, 16); - if (digit == -1) { - throw new DecoderException("Illegal hexadecimal charcter " + ch + " at index " + index); - } - return digit; - } - - /** - * Converts an array of bytes into an array of characters representing the hexidecimal values of each byte in order. - * The returned array will be double the length of the passed array, as it takes two characters to represent any - * given byte. - * - * @param data - * a byte[] to convert to Hex characters - * @return A char[] containing hexidecimal characters - */ - public static char[] encodeHex(byte[] data) { - - int l = data.length; - - char[] out = new char[l << 1]; - - // two characters form the hex value. - for (int i = 0, j = 0; i < l; i++) { - out[j++] = DIGITS[(0xF0 & data[i]) >>> 4 ]; - out[j++] = DIGITS[ 0x0F & data[i] ]; - } - - return out; - } - - /** - * Converts an array of character bytes representing hexidecimal values into an - * array of bytes of those same values. The returned array will be half the - * length of the passed array, as it takes two characters to represent any - * given byte. An exception is thrown if the passed char array has an odd - * number of elements. - * - * @param array An array of character bytes containing hexidecimal digits - * @return A byte array containing binary data decoded from - * the supplied byte array (representing characters). - * @throws DecoderException Thrown if an odd number of characters is supplied - * to this function - * @see #decodeHex(char[]) - */ - public byte[] decode(byte[] array) throws DecoderException { - return decodeHex(new String(array).toCharArray()); - } - - /** - * Converts a String or an array of character bytes representing hexidecimal values into an - * array of bytes of those same values. The returned array will be half the - * length of the passed String or array, as it takes two characters to represent any - * given byte. An exception is thrown if the passed char array has an odd - * number of elements. - * - * @param object A String or, an array of character bytes containing hexidecimal digits - * @return A byte array containing binary data decoded from - * the supplied byte array (representing characters). - * @throws DecoderException Thrown if an odd number of characters is supplied - * to this function or the object is not a String or char[] - * @see #decodeHex(char[]) - */ - public Object decode(Object object) throws DecoderException { - try { - char[] charArray = object instanceof String ? ((String) object).toCharArray() : (char[]) object; - return decodeHex(charArray); - } catch (ClassCastException e) { - throw new DecoderException(e.getMessage()); - } - } - - /** - * Converts an array of bytes into an array of bytes for the characters representing the - * hexidecimal values of each byte in order. The returned array will be - * double the length of the passed array, as it takes two characters to - * represent any given byte. - * - * @param array a byte[] to convert to Hex characters - * @return A byte[] containing the bytes of the hexidecimal characters - * @see #encodeHex(byte[]) - */ - public byte[] encode(byte[] array) { - return new String(encodeHex(array)).getBytes(); - } - - /** - * Converts a String or an array of bytes into an array of characters representing the - * hexidecimal values of each byte in order. The returned array will be - * double the length of the passed String or array, as it takes two characters to - * represent any given byte. - * - * @param object a String, or byte[] to convert to Hex characters - * @return A char[] containing hexidecimal characters - * @throws EncoderException Thrown if the given object is not a String or byte[] - * @see #encodeHex(byte[]) - */ - public Object encode(Object object) throws EncoderException { - try { - byte[] byteArray = object instanceof String ? ((String) object).getBytes() : (byte[]) object; - return encodeHex(byteArray); - } catch (ClassCastException e) { - throw new EncoderException(e.getMessage()); - } - } - -} - diff --git a/src/org/apache/commons/codec/binary/package.html b/src/org/apache/commons/codec/binary/package.html deleted file mode 100644 index 844d918..0000000 --- a/src/org/apache/commons/codec/binary/package.html +++ /dev/null @@ -1,20 +0,0 @@ -<!-- -Copyright 2003-2004 The Apache Software Foundation. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. ---> -<html> - <body> - Base64, Binary, and Hexadecimal String encoding and decoding. - </body> -</html> diff --git a/src/org/apache/commons/codec/language/DoubleMetaphone.java b/src/org/apache/commons/codec/language/DoubleMetaphone.java deleted file mode 100644 index 1cad991..0000000 --- a/src/org/apache/commons/codec/language/DoubleMetaphone.java +++ /dev/null @@ -1,1103 +0,0 @@ -/* - * Copyright 2001-2004 The Apache Software Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.codec.language; - -import org.apache.commons.codec.EncoderException; -import org.apache.commons.codec.StringEncoder; - -/** - * Encodes a string into a double metaphone value. - * This Implementation is based on the algorithm by <CITE>Lawrence Philips</CITE>. - * <ul> - * <li>Original Article: <a - * href="http://www.cuj.com/documents/s=8038/cuj0006philips/"> - * http://www.cuj.com/documents/s=8038/cuj0006philips/</a></li> - * <li>Original Source Code: <a href="ftp://ftp.cuj.com/pub/2000/1806/philips.zip"> - * ftp://ftp.cuj.com/pub/2000/1806/philips.zip</a></li> - * </ul> - * - * @author Apache Software Foundation - * @version $Id: DoubleMetaphone.java,v 1.24 2004/06/05 18:32:04 ggregory Exp $ - */ -public class DoubleMetaphone implements StringEncoder { - - /** - * "Vowels" to test for - */ - private static final String VOWELS = "AEIOUY"; - - /** - * Prefixes when present which are not pronounced - */ - private static final String[] SILENT_START = - { "GN", "KN", "PN", "WR", "PS" }; - private static final String[] L_R_N_M_B_H_F_V_W_SPACE = - { "L", "R", "N", "M", "B", "H", "F", "V", "W", " " }; - private static final String[] ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER = - { "ES", "EP", "EB", "EL", "EY", "IB", "IL", "IN", "IE", "EI", "ER" }; - private static final String[] L_T_K_S_N_M_B_Z = - { "L", "T", "K", "S", "N", "M", "B", "Z" }; - - /** - * Maximum length of an encoding, default is 4 - */ - protected int maxCodeLen = 4; - - /** - * Creates an instance of this DoubleMetaphone encoder - */ - public DoubleMetaphone() { - super(); - } - - /** - * Encode a value with Double Metaphone - * - * @param value String to encode - * @return an encoded string - */ - public String doubleMetaphone(String value) { - return doubleMetaphone(value, false); - } - - /** - * Encode a value with Double Metaphone, optionally using the alternate - * encoding. - * - * @param value String to encode - * @param alternate use alternate encode - * @return an encoded string - */ - public String doubleMetaphone(String value, boolean alternate) { - value = cleanInput(value); - if (value == null) { - return null; - } - - boolean slavoGermanic = isSlavoGermanic(value); - int index = isSilentStart(value) ? 1 : 0; - - DoubleMetaphoneResult result = new DoubleMetaphoneResult(this.getMaxCodeLen()); - - while (!result.isComplete() && index <= value.length() - 1) { - switch (value.charAt(index)) { - case 'A': - case 'E': - case 'I': - case 'O': - case 'U': - case 'Y': - index = handleAEIOUY(value, result, index); - break; - case 'B': - result.append('P'); - index = charAt(value, index + 1) == 'B' ? index + 2 : index + 1; - break; - case '\u00C7': - // A C with a Cedilla - result.append('S'); - index++; - break; - case 'C': - index = handleC(value, result, index); - break; - case 'D': - index = handleD(value, result, index); - break; - case 'F': - result.append('F'); - index = charAt(value, index + 1) == 'F' ? index + 2 : index + 1; - break; - case 'G': - index = handleG(value, result, index, slavoGermanic); - break; - case 'H': - index = handleH(value, result, index); - break; - case 'J': - index = handleJ(value, result, index, slavoGermanic); - break; - case 'K': - result.append('K'); - index = charAt(value, index + 1) == 'K' ? index + 2 : index + 1; - break; - case 'L': - index = handleL(value, result, index); - break; - case 'M': - result.append('M'); - index = conditionM0(value, index) ? index + 2 : index + 1; - break; - case 'N': - result.append('N'); - index = charAt(value, index + 1) == 'N' ? index + 2 : index + 1; - break; - case '\u00D1': - // N with a tilde (spanish ene) - result.append('N'); - index++; - break; - case 'P': - index = handleP(value, result, index); - break; - case 'Q': - result.append('K'); - index = charAt(value, index + 1) == 'Q' ? index + 2 : index + 1; - break; - case 'R': - index = handleR(value, result, index, slavoGermanic); - break; - case 'S': - index = handleS(value, result, index, slavoGermanic); - break; - case 'T': - index = handleT(value, result, index); - break; - case 'V': - result.append('F'); - index = charAt(value, index + 1) == 'V' ? index + 2 : index + 1; - break; - case 'W': - index = handleW(value, result, index); - break; - case 'X': - index = handleX(value, result, index); - break; - case 'Z': - index = handleZ(value, result, index, slavoGermanic); - break; - default: - index++; - break; - } - } - - return alternate ? result.getAlternate() : result.getPrimary(); - } - - /** - * Encode the value using DoubleMetaphone. It will only work if - * <code>obj</code> is a <code>String</code> (like <code>Metaphone</code>). - * - * @param obj Object to encode (should be of type String) - * @return An encoded Object (will be of type String) - * @throws EncoderException encode parameter is not of type String - */ - public Object encode(Object obj) throws EncoderException { - if (!(obj instanceof String)) { - throw new EncoderException("DoubleMetaphone encode parameter is not of type String"); - } - return doubleMetaphone((String) obj); - } - - /** - * Encode the value using DoubleMetaphone. - * - * @param value String to encode - * @return An encoded String - */ - public String encode(String value) { - return doubleMetaphone(value); - } - - /** - * Check if the Double Metaphone values of two <code>String</code> values - * are equal. - * - * @param value1 The left-hand side of the encoded {@link String#equals(Object)}. - * @param value2 The right-hand side of the encoded {@link String#equals(Object)}. - * @return <code>true</code> if the encoded <code>String</code>s are equal; - * <code>false</code> otherwise. - * @see #isDoubleMetaphoneEqual(String,String,boolean) - */ - public boolean isDoubleMetaphoneEqual(String value1, String value2) { - return isDoubleMetaphoneEqual(value1, value2, false); - } - - /** - * Check if the Double Metaphone values of two <code>String</code> values - * are equal, optionally using the alternate value. - * - * @param value1 The left-hand side of the encoded {@link String#equals(Object)}. - * @param value2 The right-hand side of the encoded {@link String#equals(Object)}. - * @param alternate use the alternate value if <code>true</code>. - * @return <code>true</code> if the encoded <code>String</code>s are equal; - * <code>false</code> otherwise. - */ - public boolean isDoubleMetaphoneEqual(String value1, - String value2, - boolean alternate) { - return doubleMetaphone(value1, alternate).equals(doubleMetaphone - (value2, alternate)); - } - - /** - * Returns the maxCodeLen. - * @return int - */ - public int getMaxCodeLen() { - return this.maxCodeLen; - } - - /** - * Sets the maxCodeLen. - * @param maxCodeLen The maxCodeLen to set - */ - public void setMaxCodeLen(int maxCodeLen) { - this.maxCodeLen = maxCodeLen; - } - - //-- BEGIN HANDLERS --// - - /** - * Handles 'A', 'E', 'I', 'O', 'U', and 'Y' cases - */ - private int handleAEIOUY(String value, DoubleMetaphoneResult result, int - index) { - if (index == 0) { - result.append('A'); - } - return index + 1; - } - - /** - * Handles 'C' cases - */ - private int handleC(String value, - DoubleMetaphoneResult result, - int index) { - if (conditionC0(value, index)) { // very confusing, moved out - result.append('K'); - index += 2; - } else if (index == 0 && contains(value, index, 6, "CAESAR")) { - result.append('S'); - index += 2; - } else if (contains(value, index, 2, "CH")) { - index = handleCH(value, result, index); - } else if (contains(value, index, 2, "CZ") && - !contains(value, index - 2, 4, "WICZ")) { - //-- "Czerny" --// - result.append('S', 'X'); - index += 2; - } else if (contains(value, index + 1, 3, "CIA")) { - //-- "focaccia" --// - result.append('X'); - index += 3; - } else if (contains(value, index, 2, "CC") && - !(index == 1 && charAt(value, 0) == 'M')) { - //-- double "cc" but not "McClelland" --// - return handleCC(value, result, index); - } else if (contains(value, index, 2, "CK", "CG", "CQ")) { - result.append('K'); - index += 2; - } else if (contains(value, index, 2, "CI", "CE", "CY")) { - //-- Italian vs. English --// - if (contains(value, index, 3, "CIO", "CIE", "CIA")) { - result.append('S', 'X'); - } else { - result.append('S'); - } - index += 2; - } else { - result.append('K'); - if (contains(value, index + 1, 2, " C", " Q", " G")) { - //-- Mac Caffrey, Mac Gregor --// - index += 3; - } else if (contains(value, index + 1, 1, "C", "K", "Q") && - !contains(value, index + 1, 2, "CE", "CI")) { - index += 2; - } else { - index++; - } - } - - return index; - } - - /** - * Handles 'CC' cases - */ - private int handleCC(String value, - DoubleMetaphoneResult result, - int index) { - if (contains(value, index + 2, 1, "I", "E", "H") && - !contains(value, index + 2, 2, "HU")) { - //-- "bellocchio" but not "bacchus" --// - if ((index == 1 && charAt(value, index - 1) == 'A') || - contains(value, index - 1, 5, "UCCEE", "UCCES")) { - //-- "accident", "accede", "succeed" --// - result.append("KS"); - } else { - //-- "bacci", "bertucci", other Italian --// - result.append('X'); - } - index += 3; - } else { // Pierce's rule - result.append('K'); - index += 2; - } - - return index; - } - - /** - * Handles 'CH' cases - */ - private int handleCH(String value, - DoubleMetaphoneResult result, - int index) { - if (index > 0 && contains(value, index, 4, "CHAE")) { // Michael - result.append('K', 'X'); - return index + 2; - } else if (conditionCH0(value, index)) { - //-- Greek roots ("chemistry", "chorus", etc.) --// - result.append('K'); - return index + 2; - } else if (conditionCH1(value, index)) { - //-- Germanic, Greek, or otherwise 'ch' for 'kh' sound --// - result.append('K'); - return index + 2; - } else { - if (index > 0) { - if (contains(value, 0, 2, "MC")) { - result.append('K'); - } else { - result.append('X', 'K'); - } - } else { - result.append('X'); - } - return index + 2; - } - } - - /** - * Handles 'D' cases - */ - private int handleD(String value, - DoubleMetaphoneResult result, - int index) { - if (contains(value, index, 2, "DG")) { - //-- "Edge" --// - if (contains(value, index + 2, 1, "I", "E", "Y")) { - result.append('J'); - index += 3; - //-- "Edgar" --// - } else { - result.append("TK"); - index += 2; - } - } else if (contains(value, index, 2, "DT", "DD")) { - result.append('T'); - index += 2; - } else { - result.append('T'); - index++; - } - return index; - } - - /** - * Handles 'G' cases - */ - private int handleG(String value, - DoubleMetaphoneResult result, - int index, - boolean slavoGermanic) { - if (charAt(value, index + 1) == 'H') { - index = handleGH(value, result, index); - } else if (charAt(value, index + 1) == 'N') { - if (index == 1 && isVowel(charAt(value, 0)) && !slavoGermanic) { - result.append("KN", "N"); - } else if (!contains(value, index + 2, 2, "EY") && - charAt(value, index + 1) != 'Y' && !slavoGermanic) { - result.append("N", "KN"); - } else { - result.append("KN"); - } - index = index + 2; - } else if (contains(value, index + 1, 2, "LI") && !slavoGermanic) { - result.append("KL", "L"); - index += 2; - } else if (index == 0 && (charAt(value, index + 1) == 'Y' || contains(value, index + 1, 2, ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER))) { - //-- -ges-, -gep-, -gel-, -gie- at beginning --// - result.append('K', 'J'); - index += 2; - } else if ((contains(value, index + 1, 2, "ER") || - charAt(value, index + 1) == 'Y') && - !contains(value, 0, 6, "DANGER", "RANGER", "MANGER") && - !contains(value, index - 1, 1, "E", "I") && - !contains(value, index - 1, 3, "RGY", "OGY")) { - //-- -ger-, -gy- --// - result.append('K', 'J'); - index += 2; - } else if (contains(value, index + 1, 1, "E", "I", "Y") || - contains(value, index - 1, 4, "AGGI", "OGGI")) { - //-- Italian "biaggi" --// - if ((contains(value, 0 ,4, "VAN ", "VON ") || contains(value, 0, 3, "SCH")) || contains(value, index + 1, 2, "ET")) { - //-- obvious germanic --// - result.append('K'); - } else if (contains(value, index + 1, 4, "IER")) { - result.append('J'); - } else { - result.append('J', 'K'); - } - index += 2; - } else if (charAt(value, index + 1) == 'G') { - index += 2; - result.append('K'); - } else { - index++; - result.append('K'); - } - return index; - } - - /** - * Handles 'GH' cases - */ - private int handleGH(String value, - DoubleMetaphoneResult result, - int index) { - if (index > 0 && !isVowel(charAt(value, index - 1))) { - result.append('K'); - index += 2; - } else if (index == 0) { - if (charAt(value, index + 2) == 'I') { - result.append('J'); - } else { - result.append('K'); - } - index += 2; - } else if ((index > 1 && contains(value, index - 2, 1, "B", "H", "D")) || - (index > 2 && contains(value, index - 3, 1, "B", "H", "D")) || - (index > 3 && contains(value, index - 4, 1, "B", "H"))) { - //-- Parker's rule (with some further refinements) - "hugh" - index += 2; - } else { - if (index > 2 && charAt(value, index - 1) == 'U' && - contains(value, index - 3, 1, "C", "G", "L", "R", "T")) { - //-- "laugh", "McLaughlin", "cough", "gough", "rough", "tough" - result.append('F'); - } else if (index > 0 && charAt(value, index - 1) != 'I') { - result.append('K'); - } - index += 2; - } - return index; - } - - /** - * Handles 'H' cases - */ - private int handleH(String value, - DoubleMetaphoneResult result, - int index) { - //-- only keep if first & before vowel or between 2 vowels --// - if ((index == 0 || isVowel(charAt(value, index - 1))) && - isVowel(charAt(value, index + 1))) { - result.append('H'); - index += 2; - //-- also takes car of "HH" --// - } else { - index++; - } - return index; - } - - /** - * Handles 'J' cases - */ - private int handleJ(String value, DoubleMetaphoneResult result, int index, - boolean slavoGermanic) { - if (contains(value, index, 4, "JOSE") || contains(value, 0, 4, "SAN ")) { - //-- obvious Spanish, "Jose", "San Jacinto" --// - if ((index == 0 && (charAt(value, index + 4) == ' ') || - value.length() == 4) || contains(value, 0, 4, "SAN ")) { - result.append('H'); - } else { - result.append('J', 'H'); - } - index++; - } else { - if (index == 0 && !contains(value, index, 4, "JOSE")) { - result.append('J', 'A'); - } else if (isVowel(charAt(value, index - 1)) && !slavoGermanic && - (charAt(value, index + 1) == 'A' || charAt(value, index + 1) == 'O')) { - result.append('J', 'H'); - } else if (index == value.length() - 1) { - result.append('J', ' '); - } else if (!contains(value, index + 1, 1, L_T_K_S_N_M_B_Z) && !contains(value, index - 1, 1, "S", "K", "L")) { - result.append('J'); - } - - if (charAt(value, index + 1) == 'J') { - index += 2; - } else { - index++; - } - } - return index; - } - - /** - * Handles 'L' cases - */ - private int handleL(String value, - DoubleMetaphoneResult result, - int index) { - result.append('L'); - if (charAt(value, index + 1) == 'L') { - if (conditionL0(value, index)) { - result.appendAlternate(' '); - } - index += 2; - } else { - index++; - } - return index; - } - - /** - * Handles 'P' cases - */ - private int handleP(String value, - DoubleMetaphoneResult result, - int index) { - if (charAt(value, index + 1) == 'H') { - result.append('F'); - index += 2; - } else { - result.append('P'); - index = contains(value, index + 1, 1, "P", "B") ? index + 2 : index + 1; - } - return index; - } - - /** - * Handles 'R' cases - */ - private int handleR(String value, - DoubleMetaphoneResult result, - int index, - boolean slavoGermanic) { - if (index == value.length() - 1 && !slavoGermanic && - contains(value, index - 2, 2, "IE") && - !contains(value, index - 4, 2, "ME", "MA")) { - result.appendAlternate('R'); - } else { - result.append('R'); - } - return charAt(value, index + 1) == 'R' ? index + 2 : index + 1; - } - - /** - * Handles 'S' cases - */ - private int handleS(String value, - DoubleMetaphoneResult result, - int index, - boolean slavoGermanic) { - if (contains(value, index - 1, 3, "ISL", "YSL")) { - //-- special cases "island", "isle", "carlisle", "carlysle" --// - index++; - } else if (index == 0 && contains(value, index, 5, "SUGAR")) { - //-- special case "sugar-" --// - result.append('X', 'S'); - index++; - } else if (contains(value, index, 2, "SH")) { - if (contains(value, index + 1, 4, - "HEIM", "HOEK", "HOLM", "HOLZ")) { - //-- germanic --// - result.append('S'); - } else { - result.append('X'); - } - index += 2; - } else if (contains(value, index, 3, "SIO", "SIA") || contains(value, index, 4, "SIAN")) { - //-- Italian and Armenian --// - if (slavoGermanic) { - result.append('S'); - } else { - result.append('S', 'X'); - } - index += 3; - } else if ((index == 0 && contains(value, index + 1, 1, "M", "N", "L", "W")) || contains(value, index + 1, 1, "Z")) { - //-- german & anglicisations, e.g. "smith" match "schmidt" // - // "snider" match "schneider" --// - //-- also, -sz- in slavic language altho in hungarian it // - // is pronounced "s" --// - result.append('S', 'X'); - index = contains(value, index + 1, 1, "Z") ? index + 2 : index + 1; - } else if (contains(value, index, 2, "SC")) { - index = handleSC(value, result, index); - } else { - if (index == value.length() - 1 && contains(value, index - 2, - 2, "AI", "OI")){ - //-- french e.g. "resnais", "artois" --// - result.appendAlternate('S'); - } else { - result.append('S'); - } - index = contains(value, index + 1, 1, "S", "Z") ? index + 2 : index + 1; - } - return index; - } - - /** - * Handles 'SC' cases - */ - private int handleSC(String value, - DoubleMetaphoneResult result, - int index) { - if (charAt(value, index + 2) == 'H') { - //-- Schlesinger's rule --// - if (contains(value, index + 3, - 2, "OO", "ER", "EN", "UY", "ED", "EM")) { - //-- Dutch origin, e.g. "school", "schooner" --// - if (contains(value, index + 3, 2, "ER", "EN")) { - //-- "schermerhorn", "schenker" --// - result.append("X", "SK"); - } else { - result.append("SK"); - } - } else { - if (index == 0 && !isVowel(charAt(value, 3)) && charAt(value, 3) != 'W') { - result.append('X', 'S'); - } else { - result.append('X'); - } - } - } else if (contains(value, index + 2, 1, "I", "E", "Y")) { - result.append('S'); - } else { - result.append("SK"); - } - return index + 3; - } - - /** - * Handles 'T' cases - */ - private int handleT(String value, - DoubleMetaphoneResult result, - int index) { - if (contains(value, index, 4, "TION")) { - result.append('X'); - index += 3; - } else if (contains(value, index, 3, "TIA", "TCH")) { - result.append('X'); - index += 3; - } else if (contains(value, index, 2, "TH") || contains(value, index, - 3, "TTH")) { - if (contains(value, index + 2, 2, "OM", "AM") || - //-- special case "thomas", "thames" or germanic --// - contains(value, 0, 4, "VAN ", "VON ") || - contains(value, 0, 3, "SCH")) { - result.append('T'); - } else { - result.append('0', 'T'); - } - index += 2; - } else { - result.append('T'); - index = contains(value, index + 1, 1, "T", "D") ? index + 2 : index + 1; - } - return index; - } - - /** - * Handles 'W' cases - */ - private int handleW(String value, - DoubleMetaphoneResult result, - int index) { - if (contains(value, index, 2, "WR")) { - //-- can also be in middle of word --// - result.append('R'); - index += 2; - } else { - if (index == 0 && (isVowel(charAt(value, index + 1)) || - contains(value, index, 2, "WH"))) { - if (isVowel(charAt(value, index + 1))) { - //-- Wasserman should match Vasserman --// - result.append('A', 'F'); - } else { - //-- need Uomo to match Womo --// - result.append('A'); - } - index++; - } else if ((index == value.length() - 1 && isVowel(charAt(value, index - 1))) || - contains(value, index - 1, - 5, "EWSKI", "EWSKY", "OWSKI", "OWSKY") || - contains(value, 0, 3, "SCH")) { - //-- Arnow should match Arnoff --// - result.appendAlternate('F'); - index++; - } else if (contains(value, index, 4, "WICZ", "WITZ")) { - //-- Polish e.g. "filipowicz" --// - result.append("TS", "FX"); - index += 4; - } else { - index++; - } - } - return index; - } - - /** - * Handles 'X' cases - */ - private int handleX(String value, - DoubleMetaphoneResult result, - int index) { - if (index == 0) { - result.append('S'); - index++; - } else { - if (!((index == value.length() - 1) && - (contains(value, index - 3, 3, "IAU", "EAU") || - contains(value, index - 2, 2, "AU", "OU")))) { - //-- French e.g. breaux --// - result.append("KS"); - } - index = contains(value, index + 1, 1, "C", "X") ? index + 2 : index + 1; - } - return index; - } - - /** - * Handles 'Z' cases - */ - private int handleZ(String value, DoubleMetaphoneResult result, int index, - boolean slavoGermanic) { - if (charAt(value, index + 1) == 'H') { - //-- Chinese pinyin e.g. "zhao" or Angelina "Zhang" --// - result.append('J'); - index += 2; - } else { - if (contains(value, index + 1, 2, "ZO", "ZI", "ZA") || (slavoGermanic && (index > 0 && charAt(value, index - 1) != 'T'))) { - result.append("S", "TS"); - } else { - result.append('S'); - } - index = charAt(value, index + 1) == 'Z' ? index + 2 : index + 1; - } - return index; - } - - //-- BEGIN CONDITIONS --// - - /** - * Complex condition 0 for 'C' - */ - private boolean conditionC0(String value, int index) { - if (contains(value, index, 4, "CHIA")) { - return true; - } else if (index <= 1) { - return false; - } else if (isVowel(charAt(value, index - 2))) { - return false; - } else if (!contains(value, index - 1, 3, "ACH")) { - return false; - } else { - char c = charAt(value, index + 2); - return (c != 'I' && c != 'E') - || contains(value, index - 2, 6, "BACHER", "MACHER"); - } - } - - /** - * Complex condition 0 for 'CH' - */ - private boolean conditionCH0(String value, int index) { - if (index != 0) { - return false; - } else if (!contains(value, index + 1, 5, "HARAC", "HARIS") && - !contains(value, index + 1, 3, "HOR", "HYM", "HIA", "HEM")) { - return false; - } else if (contains(value, 0, 5, "CHORE")) { - return false; - } else { - return true; - } - } - - /** - * Complex condition 1 for 'CH' - */ - private boolean conditionCH1(String value, int index) { - return ((contains(value, 0, 4, "VAN ", "VON ") || contains(value, 0, - 3, "SCH")) || - contains(value, index - 2, 6, "ORCHES", "ARCHIT", "ORCHID") || - contains(value, index + 2, 1, "T", "S") || - ((contains(value, index - 1, 1, "A", "O", "U", "E") || index == 0) && - (contains(value, index + 2, 1, L_R_N_M_B_H_F_V_W_SPACE) || index + 1 == value.length() - 1))); - } - - /** - * Complex condition 0 for 'L' - */ - private boolean conditionL0(String value, int index) { - if (index == value.length() - 3 && - contains(value, index - 1, 4, "ILLO", "ILLA", "ALLE")) { - return true; - } else if ((contains(value, index - 1, 2, "AS", "OS") || - contains(value, value.length() - 1, 1, "A", "O")) && - contains(value, index - 1, 4, "ALLE")) { - return true; - } else { - return false; - } - } - - /** - * Complex condition 0 for 'M' - */ - private boolean conditionM0(String value, int index) { - if (charAt(value, index + 1) == 'M') { - return true; - } - return contains(value, index - 1, 3, "UMB") - && ((index + 1) == value.length() - 1 || contains(value, - index + 2, 2, "ER")); - } - - //-- BEGIN HELPER FUNCTIONS --// - - /** - * Determines whether or not a value is of slavo-germanic orgin. A value is - * of slavo-germanic origin if it contians any of 'W', 'K', 'CZ', or 'WITZ'. - */ - private boolean isSlavoGermanic(String value) { - return value.indexOf('W') > -1 || value.indexOf('K') > -1 || - value.indexOf("CZ") > -1 || value.indexOf("WITZ") > -1; - } - - /** - * Determines whether or not a character is a vowel or not - */ - private boolean isVowel(char ch) { - return VOWELS.indexOf(ch) != -1; - } - - /** - * Determines whether or not the value starts with a silent letter. It will - * return <code>true</code> if the value starts with any of 'GN', 'KN', - * 'PN', 'WR' or 'PS'. - */ - private boolean isSilentStart(String value) { - boolean result = false; - for (int i = 0; i < SILENT_START.length; i++) { - if (value.startsWith(SILENT_START[i])) { - result = true; - break; - } - } - return result; - } - - /** - * Cleans the input - */ - private String cleanInput(String input) { - if (input == null) { - return null; - } - input = input.trim(); - if (input.length() == 0) { - return null; - } - return input.toUpperCase(); - } - - /** - * Gets the character at index <code>index</code> if available, otherwise - * it returns <code>Character.MIN_VALUE</code> so that there is some sort - * of a default - */ - protected char charAt(String value, int index) { - if (index < 0 || index >= value.length()) { - return Character.MIN_VALUE; - } - return value.charAt(index); - } - - /** - * Shortcut method with 1 criteria - */ - private static boolean contains(String value, int start, int length, - String criteria) { - return contains(value, start, length, - new String[] { criteria }); - } - - /** - * Shortcut method with 2 criteria - */ - private static boolean contains(String value, int start, int length, - String criteria1, String criteria2) { - return contains(value, start, length, - new String[] { criteria1, criteria2 }); - } - - /** - * Shortcut method with 3 criteria - */ - private static boolean contains(String value, int start, int length, - String criteria1, String criteria2, - String criteria3) { - return contains(value, start, length, - new String[] { criteria1, criteria2, criteria3 }); - } - - /** - * Shortcut method with 4 criteria - */ - private static boolean contains(String value, int start, int length, - String criteria1, String criteria2, - String criteria3, String criteria4) { - return contains(value, start, length, - new String[] { criteria1, criteria2, criteria3, - criteria4 }); - } - - /** - * Shortcut method with 5 criteria - */ - private static boolean contains(String value, int start, int length, - String criteria1, String criteria2, - String criteria3, String criteria4, - String criteria5) { - return contains(value, start, length, - new String[] { criteria1, criteria2, criteria3, - criteria4, criteria5 }); - } - - /** - * Shortcut method with 6 criteria - */ - private static boolean contains(String value, int start, int length, - String criteria1, String criteria2, - String criteria3, String criteria4, - String criteria5, String criteria6) { - return contains(value, start, length, - new String[] { criteria1, criteria2, criteria3, - criteria4, criteria5, criteria6 }); - } - - /** - * Determines whether <code>value</code> contains any of the criteria - starting - * at index <code>start</code> and matching up to length <code>length</code> - */ - protected static boolean contains(String value, int start, int length, - String[] criteria) { - boolean result = false; - if (start >= 0 && start + length <= value.length()) { - String target = value.substring(start, start + length); - - for (int i = 0; i < criteria.length; i++) { - if (target.equals(criteria[i])) { - result = true; - break; - } - } - } - return result; - } - - //-- BEGIN INNER CLASSES --// - - /** - * Inner class for storing results, since there is the optional alternate - * encoding. - */ - public class DoubleMetaphoneResult { - - private StringBuffer primary = new StringBuffer(getMaxCodeLen()); - private StringBuffer alternate = new StringBuffer(getMaxCodeLen()); - private int maxLength; - - public DoubleMetaphoneResult(int maxLength) { - this.maxLength = maxLength; - } - - public void append(char value) { - appendPrimary(value); - appendAlternate(value); - } - - public void append(char primary, char alternate) { - appendPrimary(primary); - appendAlternate(alternate); - } - - public void appendPrimary(char value) { - if (this.primary.length() < this.maxLength) { - this.primary.append(value); - } - } - - public void appendAlternate(char value) { - if (this.alternate.length() < this.maxLength) { - this.alternate.append(value); - } - } - - public void append(String value) { - appendPrimary(value); - appendAlternate(value); - } - - public void append(String primary, String alternate) { - appendPrimary(primary); - appendAlternate(alternate); - } - - public void appendPrimary(String value) { - int addChars = this.maxLength - this.primary.length(); - if (value.length() <= addChars) { - this.primary.append(value); - } else { - this.primary.append(value.substring(0, addChars)); - } - } - - public void appendAlternate(String value) { - int addChars = this.maxLength - this.alternate.length(); - if (value.length() <= addChars) { - this.alternate.append(value); - } else { - this.alternate.append(value.substring(0, addChars)); - } - } - - public String getPrimary() { - return this.primary.toString(); - } - - public String getAlternate() { - return this.alternate.toString(); - } - - public boolean isComplete() { - return this.primary.length() >= this.maxLength && - this.alternate.length() >= this.maxLength; - } - } -} diff --git a/src/org/apache/commons/codec/language/Metaphone.java b/src/org/apache/commons/codec/language/Metaphone.java deleted file mode 100644 index dce2c72..0000000 --- a/src/org/apache/commons/codec/language/Metaphone.java +++ /dev/null @@ -1,399 +0,0 @@ -/* - * Copyright 2001-2004 The Apache Software Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.codec.language; - -import org.apache.commons.codec.EncoderException; -import org.apache.commons.codec.StringEncoder; - -/** - * Encodes a string into a metaphone value. - * <p> - * Initial Java implementation by <CITE>William B. Brogden. December, 1997</CITE>. - * Permission given by <CITE>wbrogden</CITE> for code to be used anywhere. - * </p> - * <p> - * <CITE>Hanging on the Metaphone</CITE> by <CITE>Lawrence Philips</CITE> in <CITE>Computer Language of Dec. 1990, p - * 39.</CITE> - * </p> - * - * @author Apache Software Foundation - * @version $Id: Metaphone.java,v 1.20 2004/06/05 18:32:04 ggregory Exp $ - */ -public class Metaphone implements StringEncoder { - - /** - * Five values in the English language - */ - private String vowels = "AEIOU" ; - - /** - * Variable used in Metaphone algorithm - */ - private String frontv = "EIY" ; - - /** - * Variable used in Metaphone algorithm - */ - private String varson = "CSPTG" ; - - /** - * The max code length for metaphone is 4 - */ - private int maxCodeLen = 4 ; - - /** - * Creates an instance of the Metaphone encoder - */ - public Metaphone() { - super(); - } - - /** - * Find the metaphone value of a String. This is similar to the - * soundex algorithm, but better at finding similar sounding words. - * All input is converted to upper case. - * Limitations: Input format is expected to be a single ASCII word - * with only characters in the A - Z range, no punctuation or numbers. - * - * @param txt String to find the metaphone code for - * @return A metaphone code corresponding to the String supplied - */ - public String metaphone(String txt) { - boolean hard = false ; - if ((txt == null) || (txt.length() == 0)) { - return "" ; - } - // single character is itself - if (txt.length() == 1) { - return txt.toUpperCase() ; - } - - char[] inwd = txt.toUpperCase().toCharArray() ; - - StringBuffer local = new StringBuffer(40); // manipulate - StringBuffer code = new StringBuffer(10) ; // output - // handle initial 2 characters exceptions - switch(inwd[0]) { - case 'K' : - case 'G' : - case 'P' : /* looking for KN, etc*/ - if (inwd[1] == 'N') { - local.append(inwd, 1, inwd.length - 1); - } else { - local.append(inwd); - } - break; - case 'A': /* looking for AE */ - if (inwd[1] == 'E') { - local.append(inwd, 1, inwd.length - 1); - } else { - local.append(inwd); - } - break; - case 'W' : /* looking for WR or WH */ - if (inwd[1] == 'R') { // WR -> R - local.append(inwd, 1, inwd.length - 1); - break ; - } - if (inwd[1] == 'H') { - local.append(inwd, 1, inwd.length - 1); - local.setCharAt(0, 'W'); // WH -> W - } else { - local.append(inwd); - } - break; - case 'X' : /* initial X becomes S */ - inwd[0] = 'S'; - local.append(inwd); - break ; - default : - local.append(inwd); - } // now local has working string with initials fixed - - int wdsz = local.length(); - int n = 0 ; - - while ((code.length() < this.getMaxCodeLen()) && - (n < wdsz) ) { // max code size of 4 works well - char symb = local.charAt(n) ; - // remove duplicate letters except C - if ((symb != 'C') && (isPreviousChar( local, n, symb )) ) { - n++ ; - } else { // not dup - switch(symb) { - case 'A' : case 'E' : case 'I' : case 'O' : case 'U' : - if (n == 0) { - code.append(symb); - } - break ; // only use vowel if leading char - case 'B' : - if ( isPreviousChar(local, n, 'M') && - isLastChar(wdsz, n) ) { // B is silent if word ends in MB - break; - } - code.append(symb); - break; - case 'C' : // lots of C special cases - /* discard if SCI, SCE or SCY */ - if ( isPreviousChar(local, n, 'S') && - !isLastChar(wdsz, n) && - (this.frontv.indexOf(local.charAt(n + 1)) >= 0) ) { - break; - } - if (regionMatch(local, n, "CIA")) { // "CIA" -> X - code.append('X'); - break; - } - if (!isLastChar(wdsz, n) && - (this.frontv.indexOf(local.charAt(n + 1)) >= 0)) { - code.append('S'); - break; // CI,CE,CY -> S - } - if (isPreviousChar(local, n, 'S') && - isNextChar(local, n, 'H') ) { // SCH->sk - code.append('K') ; - break ; - } - if (isNextChar(local, n, 'H')) { // detect CH - if ((n == 0) && - (wdsz >= 3) && - isVowel(local,2) ) { // CH consonant -> K consonant - code.append('K'); - } else { - code.append('X'); // CHvowel -> X - } - } else { - code.append('K'); - } - break ; - case 'D' : - if (!isLastChar(wdsz, n + 1) && - isNextChar(local, n, 'G') && - (this.frontv.indexOf(local.charAt(n + 2)) >= 0)) { // DGE DGI DGY -> J - code.append('J'); n += 2 ; - } else { - code.append('T'); - } - break ; - case 'G' : // GH silent at end or before consonant - if (isLastChar(wdsz, n + 1) && - isNextChar(local, n, 'H')) { - break; - } - if (!isLastChar(wdsz, n + 1) && - isNextChar(local,n,'H') && - !isVowel(local,n+2)) { - break; - } - if ((n > 0) && - ( regionMatch(local, n, "GN") || - regionMatch(local, n, "GNED") ) ) { - break; // silent G - } - if (isPreviousChar(local, n, 'G')) { - hard = true ; - } else { - hard = false ; - } - if (!isLastChar(wdsz, n) && - (this.frontv.indexOf(local.charAt(n + 1)) >= 0) && - (!hard)) { - code.append('J'); - } else { - code.append('K'); - } - break ; - case 'H': - if (isLastChar(wdsz, n)) { - break ; // terminal H - } - if ((n > 0) && - (this.varson.indexOf(local.charAt(n - 1)) >= 0)) { - break; - } - if (isVowel(local,n+1)) { - code.append('H'); // Hvowel - } - break; - case 'F': - case 'J' : - case 'L' : - case 'M': - case 'N' : - case 'R' : - code.append(symb); - break; - case 'K' : - if (n > 0) { // not initial - if (!isPreviousChar(local, n, 'C')) { - code.append(symb); - } - } else { - code.append(symb); // initial K - } - break ; - case 'P' : - if (isNextChar(local,n,'H')) { - // PH -> F - code.append('F'); - } else { - code.append(symb); - } - break ; - case 'Q' : - code.append('K'); - break; - case 'S' : - if (regionMatch(local,n,"SH") || - regionMatch(local,n,"SIO") || - regionMatch(local,n,"SIA")) { - code.append('X'); - } else { - code.append('S'); - } - break; - case 'T' : - if (regionMatch(local,n,"TIA") || - regionMatch(local,n,"TIO")) { - code.append('X'); - break; - } - if (regionMatch(local,n,"TCH")) { - // Silent if in "TCH" - break; - } - // substitute numeral 0 for TH (resembles theta after all) - if (regionMatch(local,n,"TH")) { - code.append('0'); - } else { - code.append('T'); - } - break ; - case 'V' : - code.append('F'); break ; - case 'W' : case 'Y' : // silent if not followed by vowel - if (!isLastChar(wdsz,n) && - isVowel(local,n+1)) { - code.append(symb); - } - break ; - case 'X' : - code.append('K'); code.append('S'); - break ; - case 'Z' : - code.append('S'); break ; - } // end switch - n++ ; - } // end else from symb != 'C' - if (code.length() > this.getMaxCodeLen()) { - code.setLength(this.getMaxCodeLen()); - } - } - return code.toString(); - } - - private boolean isVowel(StringBuffer string, int index) { - return (this.vowels.indexOf(string.charAt(index)) >= 0); - } - - private boolean isPreviousChar(StringBuffer string, int index, char c) { - boolean matches = false; - if( index > 0 && - index < string.length() ) { - matches = string.charAt(index - 1) == c; - } - return matches; - } - - private boolean isNextChar(StringBuffer string, int index, char c) { - boolean matches = false; - if( index >= 0 && - index < string.length() - 1 ) { - matches = string.charAt(index + 1) == c; - } - return matches; - } - - private boolean regionMatch(StringBuffer string, int index, String test) { - boolean matches = false; - if( index >= 0 && - (index + test.length() - 1) < string.length() ) { - String substring = string.substring( index, index + test.length()); - matches = substring.equals( test ); - } - return matches; - } - - private boolean isLastChar(int wdsz, int n) { - return n + 1 == wdsz; - } - - - /** - * Encodes an Object using the metaphone algorithm. This method - * is provided in order to satisfy the requirements of the - * Encoder interface, and will throw an EncoderException if the - * supplied object is not of type java.lang.String. - * - * @param pObject Object to encode - * @return An object (or type java.lang.String) containing the - * metaphone code which corresponds to the String supplied. - * @throws EncoderException if the parameter supplied is not - * of type java.lang.String - */ - public Object encode(Object pObject) throws EncoderException { - if (!(pObject instanceof java.lang.String)) { - throw new EncoderException("Parameter supplied to Metaphone encode is not of type java.lang.String"); - } - return metaphone((String) pObject); - } - - /** - * Encodes a String using the Metaphone algorithm. - * - * @param pString String object to encode - * @return The metaphone code corresponding to the String supplied - */ - public String encode(String pString) { - return metaphone(pString); - } - - /** - * Tests is the metaphones of two strings are identical. - * - * @param str1 First of two strings to compare - * @param str2 Second of two strings to compare - * @return true if the metaphones of these strings are identical, - * false otherwise. - */ - public boolean isMetaphoneEqual(String str1, String str2) { - return metaphone(str1).equals(metaphone(str2)); - } - - /** - * Returns the maxCodeLen. - * @return int - */ - public int getMaxCodeLen() { return this.maxCodeLen; } - - /** - * Sets the maxCodeLen. - * @param maxCodeLen The maxCodeLen to set - */ - public void setMaxCodeLen(int maxCodeLen) { this.maxCodeLen = maxCodeLen; } - -} diff --git a/src/org/apache/commons/codec/language/RefinedSoundex.java b/src/org/apache/commons/codec/language/RefinedSoundex.java deleted file mode 100644 index dbf60fe..0000000 --- a/src/org/apache/commons/codec/language/RefinedSoundex.java +++ /dev/null @@ -1,186 +0,0 @@ -/* - * Copyright 2001-2004 The Apache Software Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.codec.language; - -import org.apache.commons.codec.EncoderException; -import org.apache.commons.codec.StringEncoder; - -/** - * Encodes a string into a Refined Soundex value. A refined soundex code is - * optimized for spell checking words. Soundex method originally developed by - * <CITE>Margaret Odell</CITE> and <CITE>Robert Russell</CITE>. - * - * @author Apache Software Foundation - * @version $Id: RefinedSoundex.java,v 1.21 2004/06/05 18:32:04 ggregory Exp $ - */ -public class RefinedSoundex implements StringEncoder { - - /** - * This static variable contains an instance of the RefinedSoundex using - * the US_ENGLISH mapping. - */ - public static final RefinedSoundex US_ENGLISH = new RefinedSoundex(); - - /** - * RefinedSoundex is *refined* for a number of reasons one being that the - * mappings have been altered. This implementation contains default - * mappings for US English. - */ - public static final char[] US_ENGLISH_MAPPING = "01360240043788015936020505".toCharArray(); - - /** - * Every letter of the alphabet is "mapped" to a numerical value. This char - * array holds the values to which each letter is mapped. This - * implementation contains a default map for US_ENGLISH - */ - private char[] soundexMapping; - - /** - * Creates an instance of the RefinedSoundex object using the default US - * English mapping. - */ - public RefinedSoundex() { - this(US_ENGLISH_MAPPING); - } - - /** - * Creates a refined soundex instance using a custom mapping. This - * constructor can be used to customize the mapping, and/or possibly - * provide an internationalized mapping for a non-Western character set. - * - * @param mapping - * Mapping array to use when finding the corresponding code for - * a given character - */ - public RefinedSoundex(char[] mapping) { - this.soundexMapping = mapping; - } - - // BEGIN android-note - // Removed @see reference to SoundexUtils below, since the class isn't - // public. - // END android-note - /** - * Returns the number of characters in the two encoded Strings that are the - * same. This return value ranges from 0 to the length of the shortest - * encoded String: 0 indicates little or no similarity, and 4 out of 4 (for - * example) indicates strong similarity or identical values. For refined - * Soundex, the return value can be greater than 4. - * - * @param s1 - * A String that will be encoded and compared. - * @param s2 - * A String that will be encoded and compared. - * @return The number of characters in the two encoded Strings that are the - * same from 0 to to the length of the shortest encoded String. - * - * @see <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp"> - * MS T-SQL DIFFERENCE</a> - * - * @throws EncoderException - * if an error occurs encoding one of the strings - * @since 1.3 - */ - public int difference(String s1, String s2) throws EncoderException { - return SoundexUtils.difference(this, s1, s2); - } - - /** - * Encodes an Object using the refined soundex algorithm. This method is - * provided in order to satisfy the requirements of the Encoder interface, - * and will throw an EncoderException if the supplied object is not of type - * java.lang.String. - * - * @param pObject - * Object to encode - * @return An object (or type java.lang.String) containing the refined - * soundex code which corresponds to the String supplied. - * @throws EncoderException - * if the parameter supplied is not of type java.lang.String - */ - public Object encode(Object pObject) throws EncoderException { - if (!(pObject instanceof java.lang.String)) { - throw new EncoderException("Parameter supplied to RefinedSoundex encode is not of type java.lang.String"); - } - return soundex((String) pObject); - } - - /** - * Encodes a String using the refined soundex algorithm. - * - * @param pString - * A String object to encode - * @return A Soundex code corresponding to the String supplied - */ - public String encode(String pString) { - return soundex(pString); - } - - /** - * Returns the mapping code for a given character. The mapping codes are - * maintained in an internal char array named soundexMapping, and the - * default values of these mappings are US English. - * - * @param c - * char to get mapping for - * @return A character (really a numeral) to return for the given char - */ - char getMappingCode(char c) { - if (!Character.isLetter(c)) { - return 0; - } - return this.soundexMapping[Character.toUpperCase(c) - 'A']; - } - - /** - * Retreives the Refined Soundex code for a given String object. - * - * @param str - * String to encode using the Refined Soundex algorithm - * @return A soundex code for the String supplied - */ - public String soundex(String str) { - if (str == null) { - return null; - } - str = SoundexUtils.clean(str); - if (str.length() == 0) { - return str; - } - - StringBuffer sBuf = new StringBuffer(); - sBuf.append(str.charAt(0)); - - char last, current; - last = '*'; - - for (int i = 0; i < str.length(); i++) { - - current = getMappingCode(str.charAt(i)); - if (current == last) { - continue; - } else if (current != 0) { - sBuf.append(current); - } - - last = current; - - } - - return sBuf.toString(); - } -} diff --git a/src/org/apache/commons/codec/language/Soundex.java b/src/org/apache/commons/codec/language/Soundex.java deleted file mode 100644 index 61ce440..0000000 --- a/src/org/apache/commons/codec/language/Soundex.java +++ /dev/null @@ -1,274 +0,0 @@ -/* - * Copyright 2001-2004 The Apache Software Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.codec.language; - -import org.apache.commons.codec.EncoderException; -import org.apache.commons.codec.StringEncoder; - -/** - * Encodes a string into a Soundex value. Soundex is an encoding used to relate similar names, but can also be used as a - * general purpose scheme to find word with similar phonemes. - * - * @author Apache Software Foundation - * @version $Id: Soundex.java,v 1.26 2004/07/07 23:15:24 ggregory Exp $ - */ -public class Soundex implements StringEncoder { - - /** - * An instance of Soundex using the US_ENGLISH_MAPPING mapping. - * - * @see #US_ENGLISH_MAPPING - */ - public static final Soundex US_ENGLISH = new Soundex(); - - /** - * This is a default mapping of the 26 letters used in US English. A value of <code>0</code> for a letter position - * means do not encode. - * <p> - * (This constant is provided as both an implementation convenience and to allow Javadoc to pick - * up the value for the constant values page.) - * </p> - * - * @see #US_ENGLISH_MAPPING - */ - public static final String US_ENGLISH_MAPPING_STRING = "01230120022455012623010202"; - - /** - * This is a default mapping of the 26 letters used in US English. A value of <code>0</code> for a letter position - * means do not encode. - * - * @see Soundex#Soundex(char[]) - */ - public static final char[] US_ENGLISH_MAPPING = US_ENGLISH_MAPPING_STRING.toCharArray(); - - // BEGIN android-note - // Removed @see reference to SoundexUtils below, since the class isn't - // public. - // END android-note - /** - * Encodes the Strings and returns the number of characters in the two encoded Strings that are the same. This - * return value ranges from 0 through 4: 0 indicates little or no similarity, and 4 indicates strong similarity or - * identical values. - * - * @param s1 - * A String that will be encoded and compared. - * @param s2 - * A String that will be encoded and compared. - * @return The number of characters in the two encoded Strings that are the same from 0 to 4. - * - * @see <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp"> MS - * T-SQL DIFFERENCE </a> - * - * @throws EncoderException - * if an error occurs encoding one of the strings - * @since 1.3 - */ - public int difference(String s1, String s2) throws EncoderException { - return SoundexUtils.difference(this, s1, s2); - } - - /** - * The maximum length of a Soundex code - Soundex codes are only four characters by definition. - * - * @deprecated This feature is not needed since the encoding size must be constant. Will be removed in 2.0. - */ - private int maxLength = 4; - - /** - * Every letter of the alphabet is "mapped" to a numerical value. This char array holds the values to which each - * letter is mapped. This implementation contains a default map for US_ENGLISH - */ - private char[] soundexMapping; - - /** - * Creates an instance using US_ENGLISH_MAPPING - * - * @see Soundex#Soundex(char[]) - * @see Soundex#US_ENGLISH_MAPPING - */ - public Soundex() { - this(US_ENGLISH_MAPPING); - } - - /** - * Creates a soundex instance using the given mapping. This constructor can be used to provide an internationalized - * mapping for a non-Western character set. - * - * Every letter of the alphabet is "mapped" to a numerical value. This char array holds the values to which each - * letter is mapped. This implementation contains a default map for US_ENGLISH - * - * @param mapping - * Mapping array to use when finding the corresponding code for a given character - */ - public Soundex(char[] mapping) { - this.setSoundexMapping(mapping); - } - - /** - * Encodes an Object using the soundex algorithm. This method is provided in order to satisfy the requirements of - * the Encoder interface, and will throw an EncoderException if the supplied object is not of type java.lang.String. - * - * @param pObject - * Object to encode - * @return An object (or type java.lang.String) containing the soundex code which corresponds to the String - * supplied. - * @throws EncoderException - * if the parameter supplied is not of type java.lang.String - * @throws IllegalArgumentException - * if a character is not mapped - */ - public Object encode(Object pObject) throws EncoderException { - if (!(pObject instanceof String)) { - throw new EncoderException("Parameter supplied to Soundex encode is not of type java.lang.String"); - } - return soundex((String) pObject); - } - - /** - * Encodes a String using the soundex algorithm. - * - * @param pString - * A String object to encode - * @return A Soundex code corresponding to the String supplied - * @throws IllegalArgumentException - * if a character is not mapped - */ - public String encode(String pString) { - return soundex(pString); - } - - /** - * Used internally by the SoundEx algorithm. - * - * Consonants from the same code group separated by W or H are treated as one. - * - * @param str - * the cleaned working string to encode (in upper case). - * @param index - * the character position to encode - * @return Mapping code for a particular character - * @throws IllegalArgumentException - * if the character is not mapped - */ - private char getMappingCode(String str, int index) { - char mappedChar = this.map(str.charAt(index)); - // HW rule check - if (index > 1 && mappedChar != '0') { - char hwChar = str.charAt(index - 1); - if ('H' == hwChar || 'W' == hwChar) { - char preHWChar = str.charAt(index - 2); - char firstCode = this.map(preHWChar); - if (firstCode == mappedChar || 'H' == preHWChar || 'W' == preHWChar) { - return 0; - } - } - } - return mappedChar; - } - - /** - * Returns the maxLength. Standard Soundex - * - * @deprecated This feature is not needed since the encoding size must be constant. Will be removed in 2.0. - * @return int - */ - public int getMaxLength() { - return this.maxLength; - } - - /** - * Returns the soundex mapping. - * - * @return soundexMapping. - */ - private char[] getSoundexMapping() { - return this.soundexMapping; - } - - /** - * Maps the given upper-case character to it's Soudex code. - * - * @param ch - * An upper-case character. - * @return A Soundex code. - * @throws IllegalArgumentException - * Thrown if <code>ch</code> is not mapped. - */ - private char map(char ch) { - int index = ch - 'A'; - if (index < 0 || index >= this.getSoundexMapping().length) { - throw new IllegalArgumentException("The character is not mapped: " + ch); - } - return this.getSoundexMapping()[index]; - } - - /** - * Sets the maxLength. - * - * @deprecated This feature is not needed since the encoding size must be constant. Will be removed in 2.0. - * @param maxLength - * The maxLength to set - */ - public void setMaxLength(int maxLength) { - this.maxLength = maxLength; - } - - /** - * Sets the soundexMapping. - * - * @param soundexMapping - * The soundexMapping to set. - */ - private void setSoundexMapping(char[] soundexMapping) { - this.soundexMapping = soundexMapping; - } - - /** - * Retreives the Soundex code for a given String object. - * - * @param str - * String to encode using the Soundex algorithm - * @return A soundex code for the String supplied - * @throws IllegalArgumentException - * if a character is not mapped - */ - public String soundex(String str) { - if (str == null) { - return null; - } - str = SoundexUtils.clean(str); - if (str.length() == 0) { - return str; - } - char out[] = {'0', '0', '0', '0'}; - char last, mapped; - int incount = 1, count = 1; - out[0] = str.charAt(0); - last = getMappingCode(str, 0); - while ((incount < str.length()) && (count < out.length)) { - mapped = getMappingCode(str, incount++); - if (mapped != 0) { - if ((mapped != '0') && (mapped != last)) { - out[count++] = mapped; - } - last = mapped; - } - } - return new String(out); - } - -} diff --git a/src/org/apache/commons/codec/language/SoundexUtils.java b/src/org/apache/commons/codec/language/SoundexUtils.java deleted file mode 100644 index 48f2d87..0000000 --- a/src/org/apache/commons/codec/language/SoundexUtils.java +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Copyright 2001-2004 The Apache Software Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.codec.language; - -import org.apache.commons.codec.EncoderException; -import org.apache.commons.codec.StringEncoder; - -/** - * Utility methods for {@link Soundex} and {@link RefinedSoundex} classes. - * - * @author Apache Software Foundation - * @version $Id: SoundexUtils.java,v 1.5 2004/03/17 18:31:35 ggregory Exp $ - * @since 1.3 - */ -final class SoundexUtils { - - /** - * Cleans up the input string before Soundex processing by only returning - * upper case letters. - * - * @param str - * The String to clean. - * @return A clean String. - */ - static String clean(String str) { - if (str == null || str.length() == 0) { - return str; - } - int len = str.length(); - char[] chars = new char[len]; - int count = 0; - for (int i = 0; i < len; i++) { - if (Character.isLetter(str.charAt(i))) { - chars[count++] = str.charAt(i); - } - } - if (count == len) { - return str.toUpperCase(); - } - return new String(chars, 0, count).toUpperCase(); - } - - /** - * Encodes the Strings and returns the number of characters in the two - * encoded Strings that are the same. - * <ul> - * <li>For Soundex, this return value ranges from 0 through 4: 0 indicates - * little or no similarity, and 4 indicates strong similarity or identical - * values.</li> - * <li>For refined Soundex, the return value can be greater than 4.</li> - * </ul> - * - * @param encoder - * The encoder to use to encode the Strings. - * @param s1 - * A String that will be encoded and compared. - * @param s2 - * A String that will be encoded and compared. - * @return The number of characters in the two Soundex encoded Strings that - * are the same. - * - * @see #differenceEncoded(String,String) - * @see <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp"> - * MS T-SQL DIFFERENCE</a> - * - * @throws EncoderException - * if an error occurs encoding one of the strings - */ - static int difference(StringEncoder encoder, String s1, String s2) throws EncoderException { - return differenceEncoded(encoder.encode(s1), encoder.encode(s2)); - } - - /** - * Returns the number of characters in the two Soundex encoded Strings that - * are the same. - * <ul> - * <li>For Soundex, this return value ranges from 0 through 4: 0 indicates - * little or no similarity, and 4 indicates strong similarity or identical - * values.</li> - * <li>For refined Soundex, the return value can be greater than 4.</li> - * </ul> - * - * @param es1 - * An encoded String. - * @param es2 - * An encoded String. - * @return The number of characters in the two Soundex encoded Strings that - * are the same. - * - * @see <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp"> - * MS T-SQL DIFFERENCE</a> - */ - static int differenceEncoded(String es1, String es2) { - - if (es1 == null || es2 == null) { - return 0; - } - int lengthToMatch = Math.min(es1.length(), es2.length()); - int diff = 0; - for (int i = 0; i < lengthToMatch; i++) { - if (es1.charAt(i) == es2.charAt(i)) { - diff++; - } - } - return diff; - } - -} diff --git a/src/org/apache/commons/codec/language/package.html b/src/org/apache/commons/codec/language/package.html deleted file mode 100644 index fab8e4c..0000000 --- a/src/org/apache/commons/codec/language/package.html +++ /dev/null @@ -1,20 +0,0 @@ -<!-- -Copyright 2003-2004 The Apache Software Foundation. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. ---> -<html> - <body> - Language and phonetic encoders. - </body> -</html> diff --git a/src/org/apache/commons/codec/net/BCodec.java b/src/org/apache/commons/codec/net/BCodec.java deleted file mode 100644 index b164100..0000000 --- a/src/org/apache/commons/codec/net/BCodec.java +++ /dev/null @@ -1,207 +0,0 @@ -/* - * Copyright 2001-2004 The Apache Software Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.codec.net; - -import java.io.UnsupportedEncodingException; -import org.apache.commons.codec.DecoderException; -import org.apache.commons.codec.EncoderException; -import org.apache.commons.codec.StringDecoder; -import org.apache.commons.codec.StringEncoder; -import org.apache.commons.codec.binary.Base64; - -/** - * <p> - * Identical to the Base64 encoding defined by <a href="http://www.ietf.org/rfc/rfc1521.txt">RFC - * 1521</a> and allows a character set to be specified. - * </p> - * - * <p> - * <a href="http://www.ietf.org/rfc/rfc1522.txt">RFC 1522</a> describes techniques to allow the encoding of non-ASCII - * text in various portions of a RFC 822 [2] message header, in a manner which is unlikely to confuse existing message - * handling software. - * </p> - * - * @see <a href="http://www.ietf.org/rfc/rfc1522.txt">MIME (Multipurpose Internet Mail Extensions) Part Two: Message - * Header Extensions for Non-ASCII Text</a> - * - * @author Apache Software Foundation - * @since 1.3 - * @version $Id: BCodec.java,v 1.5 2004/04/13 22:46:37 ggregory Exp $ - */ -public class BCodec extends RFC1522Codec implements StringEncoder, StringDecoder { - /** - * The default charset used for string decoding and encoding. - */ - private String charset = StringEncodings.UTF8; - - /** - * Default constructor. - */ - public BCodec() { - super(); - } - - /** - * Constructor which allows for the selection of a default charset - * - * @param charset - * the default string charset to use. - * - * @see <a href="http://java.sun.com/j2se/1.3/docs/api/java/lang/package-summary.html#charenc">JRE character - * encoding names</a> - */ - public BCodec(final String charset) { - super(); - this.charset = charset; - } - - protected String getEncoding() { - return "B"; - } - - protected byte[] doEncoding(byte[] bytes) throws EncoderException { - if (bytes == null) { - return null; - } - return Base64.encodeBase64(bytes); - } - - protected byte[] doDecoding(byte[] bytes) throws DecoderException { - if (bytes == null) { - return null; - } - return Base64.decodeBase64(bytes); - } - - /** - * Encodes a string into its Base64 form using the specified charset. Unsafe characters are escaped. - * - * @param value - * string to convert to Base64 form - * @param charset - * the charset for pString - * @return Base64 string - * - * @throws EncoderException - * thrown if a failure condition is encountered during the encoding process. - */ - public String encode(final String value, final String charset) throws EncoderException { - if (value == null) { - return null; - } - try { - return encodeText(value, charset); - } catch (UnsupportedEncodingException e) { - throw new EncoderException(e.getMessage()); - } - } - - /** - * Encodes a string into its Base64 form using the default charset. Unsafe characters are escaped. - * - * @param value - * string to convert to Base64 form - * @return Base64 string - * - * @throws EncoderException - * thrown if a failure condition is encountered during the encoding process. - */ - public String encode(String value) throws EncoderException { - if (value == null) { - return null; - } - return encode(value, getDefaultCharset()); - } - - /** - * Decodes a Base64 string into its original form. Escaped characters are converted back to their original - * representation. - * - * @param value - * Base64 string to convert into its original form - * - * @return original string - * - * @throws DecoderException - * A decoder exception is thrown if a failure condition is encountered during the decode process. - */ - public String decode(String value) throws DecoderException { - if (value == null) { - return null; - } - try { - return decodeText(value); - } catch (UnsupportedEncodingException e) { - throw new DecoderException(e.getMessage()); - } - } - - /** - * Encodes an object into its Base64 form using the default charset. Unsafe characters are escaped. - * - * @param value - * object to convert to Base64 form - * @return Base64 object - * - * @throws EncoderException - * thrown if a failure condition is encountered during the encoding process. - */ - public Object encode(Object value) throws EncoderException { - if (value == null) { - return null; - } else if (value instanceof String) { - return encode((String) value); - } else { - throw new EncoderException("Objects of type " - + value.getClass().getName() - + " cannot be encoded using BCodec"); - } - } - - /** - * Decodes a Base64 object into its original form. Escaped characters are converted back to their original - * representation. - * - * @param value - * Base64 object to convert into its original form - * - * @return original object - * - * @throws DecoderException - * A decoder exception is thrown if a failure condition is encountered during the decode process. - */ - public Object decode(Object value) throws DecoderException { - if (value == null) { - return null; - } else if (value instanceof String) { - return decode((String) value); - } else { - throw new DecoderException("Objects of type " - + value.getClass().getName() - + " cannot be decoded using BCodec"); - } - } - - /** - * The default charset used for string decoding and encoding. - * - * @return the default string charset. - */ - public String getDefaultCharset() { - return this.charset; - } -} diff --git a/src/org/apache/commons/codec/net/QCodec.java b/src/org/apache/commons/codec/net/QCodec.java deleted file mode 100644 index 5736080..0000000 --- a/src/org/apache/commons/codec/net/QCodec.java +++ /dev/null @@ -1,309 +0,0 @@ -/* - * Copyright 2001-2004 The Apache Software Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.codec.net; - -import java.io.UnsupportedEncodingException; -import java.util.BitSet; - -import org.apache.commons.codec.DecoderException; -import org.apache.commons.codec.EncoderException; -import org.apache.commons.codec.StringDecoder; -import org.apache.commons.codec.StringEncoder; - -/** - * <p> - * Similar to the Quoted-Printable content-transfer-encoding defined in <a - * href="http://www.ietf.org/rfc/rfc1521.txt">RFC 1521</a> and designed to allow text containing mostly ASCII - * characters to be decipherable on an ASCII terminal without decoding. - * </p> - * - * <p> - * <a href="http://www.ietf.org/rfc/rfc1522.txt">RFC 1522</a> describes techniques to allow the encoding of non-ASCII - * text in various portions of a RFC 822 [2] message header, in a manner which is unlikely to confuse existing message - * handling software. - * </p> - * - * @see <a href="http://www.ietf.org/rfc/rfc1522.txt">MIME (Multipurpose Internet Mail Extensions) Part Two: Message - * Header Extensions for Non-ASCII Text</a> - * - * @author Apache Software Foundation - * @since 1.3 - * @version $Id: QCodec.java,v 1.6 2004/05/24 00:24:32 ggregory Exp $ - */ -public class QCodec extends RFC1522Codec implements StringEncoder, StringDecoder { - /** - * The default charset used for string decoding and encoding. - */ - private String charset = StringEncodings.UTF8; - - /** - * BitSet of printable characters as defined in RFC 1522. - */ - private static final BitSet PRINTABLE_CHARS = new BitSet(256); - // Static initializer for printable chars collection - static { - // alpha characters - PRINTABLE_CHARS.set(' '); - PRINTABLE_CHARS.set('!'); - PRINTABLE_CHARS.set('"'); - PRINTABLE_CHARS.set('#'); - PRINTABLE_CHARS.set('$'); - PRINTABLE_CHARS.set('%'); - PRINTABLE_CHARS.set('&'); - PRINTABLE_CHARS.set('\''); - PRINTABLE_CHARS.set('('); - PRINTABLE_CHARS.set(')'); - PRINTABLE_CHARS.set('*'); - PRINTABLE_CHARS.set('+'); - PRINTABLE_CHARS.set(','); - PRINTABLE_CHARS.set('-'); - PRINTABLE_CHARS.set('.'); - PRINTABLE_CHARS.set('/'); - for (int i = '0'; i <= '9'; i++) { - PRINTABLE_CHARS.set(i); - } - PRINTABLE_CHARS.set(':'); - PRINTABLE_CHARS.set(';'); - PRINTABLE_CHARS.set('<'); - PRINTABLE_CHARS.set('>'); - PRINTABLE_CHARS.set('@'); - for (int i = 'A'; i <= 'Z'; i++) { - PRINTABLE_CHARS.set(i); - } - PRINTABLE_CHARS.set('['); - PRINTABLE_CHARS.set('\\'); - PRINTABLE_CHARS.set(']'); - PRINTABLE_CHARS.set('^'); - PRINTABLE_CHARS.set('`'); - for (int i = 'a'; i <= 'z'; i++) { - PRINTABLE_CHARS.set(i); - } - PRINTABLE_CHARS.set('{'); - PRINTABLE_CHARS.set('|'); - PRINTABLE_CHARS.set('}'); - PRINTABLE_CHARS.set('~'); - } - - private static byte BLANK = 32; - - private static byte UNDERSCORE = 95; - - private boolean encodeBlanks = false; - - /** - * Default constructor. - */ - public QCodec() { - super(); - } - - /** - * Constructor which allows for the selection of a default charset - * - * @param charset - * the default string charset to use. - * - * @see <a href="http://java.sun.com/j2se/1.3/docs/api/java/lang/package-summary.html#charenc">JRE character - * encoding names</a> - */ - public QCodec(final String charset) { - super(); - this.charset = charset; - } - - protected String getEncoding() { - return "Q"; - } - - protected byte[] doEncoding(byte[] bytes) throws EncoderException { - if (bytes == null) { - return null; - } - byte[] data = QuotedPrintableCodec.encodeQuotedPrintable(PRINTABLE_CHARS, bytes); - if (this.encodeBlanks) { - for (int i = 0; i < data.length; i++) { - if (data[i] == BLANK) { - data[i] = UNDERSCORE; - } - } - } - return data; - } - - protected byte[] doDecoding(byte[] bytes) throws DecoderException { - if (bytes == null) { - return null; - } - boolean hasUnderscores = false; - for (int i = 0; i < bytes.length; i++) { - if (bytes[i] == UNDERSCORE) { - hasUnderscores = true; - break; - } - } - if (hasUnderscores) { - byte[] tmp = new byte[bytes.length]; - for (int i = 0; i < bytes.length; i++) { - byte b = bytes[i]; - if (b != UNDERSCORE) { - tmp[i] = b; - } else { - tmp[i] = BLANK; - } - } - return QuotedPrintableCodec.decodeQuotedPrintable(tmp); - } - return QuotedPrintableCodec.decodeQuotedPrintable(bytes); - } - - /** - * Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped. - * - * @param pString - * string to convert to quoted-printable form - * @param charset - * the charset for pString - * @return quoted-printable string - * - * @throws EncoderException - * thrown if a failure condition is encountered during the encoding process. - */ - public String encode(final String pString, final String charset) throws EncoderException { - if (pString == null) { - return null; - } - try { - return encodeText(pString, charset); - } catch (UnsupportedEncodingException e) { - throw new EncoderException(e.getMessage()); - } - } - - /** - * Encodes a string into its quoted-printable form using the default charset. Unsafe characters are escaped. - * - * @param pString - * string to convert to quoted-printable form - * @return quoted-printable string - * - * @throws EncoderException - * thrown if a failure condition is encountered during the encoding process. - */ - public String encode(String pString) throws EncoderException { - if (pString == null) { - return null; - } - return encode(pString, getDefaultCharset()); - } - - /** - * Decodes a quoted-printable string into its original form. Escaped characters are converted back to their original - * representation. - * - * @param pString - * quoted-printable string to convert into its original form - * - * @return original string - * - * @throws DecoderException - * A decoder exception is thrown if a failure condition is encountered during the decode process. - */ - public String decode(String pString) throws DecoderException { - if (pString == null) { - return null; - } - try { - return decodeText(pString); - } catch (UnsupportedEncodingException e) { - throw new DecoderException(e.getMessage()); - } - } - - /** - * Encodes an object into its quoted-printable form using the default charset. Unsafe characters are escaped. - * - * @param pObject - * object to convert to quoted-printable form - * @return quoted-printable object - * - * @throws EncoderException - * thrown if a failure condition is encountered during the encoding process. - */ - public Object encode(Object pObject) throws EncoderException { - if (pObject == null) { - return null; - } else if (pObject instanceof String) { - return encode((String) pObject); - } else { - throw new EncoderException("Objects of type " - + pObject.getClass().getName() - + " cannot be encoded using Q codec"); - } - } - - /** - * Decodes a quoted-printable object into its original form. Escaped characters are converted back to their original - * representation. - * - * @param pObject - * quoted-printable object to convert into its original form - * - * @return original object - * - * @throws DecoderException - * A decoder exception is thrown if a failure condition is encountered during the decode process. - */ - public Object decode(Object pObject) throws DecoderException { - if (pObject == null) { - return null; - } else if (pObject instanceof String) { - return decode((String) pObject); - } else { - throw new DecoderException("Objects of type " - + pObject.getClass().getName() - + " cannot be decoded using Q codec"); - } - } - - /** - * The default charset used for string decoding and encoding. - * - * @return the default string charset. - */ - public String getDefaultCharset() { - return this.charset; - } - - /** - * Tests if optional tranformation of SPACE characters is to be used - * - * @return <code>true</code> if SPACE characters are to be transformed, <code>false</code> otherwise - */ - public boolean isEncodeBlanks() { - return this.encodeBlanks; - } - - /** - * Defines whether optional tranformation of SPACE characters is to be used - * - * @param b - * <code>true</code> if SPACE characters are to be transformed, <code>false</code> otherwise - */ - public void setEncodeBlanks(boolean b) { - this.encodeBlanks = b; - } -} diff --git a/src/org/apache/commons/codec/net/QuotedPrintableCodec.java b/src/org/apache/commons/codec/net/QuotedPrintableCodec.java deleted file mode 100644 index c2fcd27..0000000 --- a/src/org/apache/commons/codec/net/QuotedPrintableCodec.java +++ /dev/null @@ -1,387 +0,0 @@ -/* - * Copyright 2001-2004 The Apache Software Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.codec.net; - -import java.io.ByteArrayOutputStream; -import java.io.UnsupportedEncodingException; -import java.util.BitSet; -import org.apache.commons.codec.BinaryDecoder; -import org.apache.commons.codec.BinaryEncoder; -import org.apache.commons.codec.DecoderException; -import org.apache.commons.codec.EncoderException; -import org.apache.commons.codec.StringDecoder; -import org.apache.commons.codec.StringEncoder; - -/** - * <p> - * Codec for the Quoted-Printable section of <a href="http://www.ietf.org/rfc/rfc1521.txt">RFC 1521 </a>. - * </p> - * <p> - * The Quoted-Printable encoding is intended to represent data that largely consists of octets that correspond to - * printable characters in the ASCII character set. It encodes the data in such a way that the resulting octets are - * unlikely to be modified by mail transport. If the data being encoded are mostly ASCII text, the encoded form of the - * data remains largely recognizable by humans. A body which is entirely ASCII may also be encoded in Quoted-Printable - * to ensure the integrity of the data should the message pass through a character- translating, and/or line-wrapping - * gateway. - * </p> - * - * <p> - * Note: - * </p> - * <p> - * Rules #3, #4, and #5 of the quoted-printable spec are not implemented yet because the complete quoted-printable spec - * does not lend itself well into the byte[] oriented codec framework. Complete the codec once the steamable codec - * framework is ready. The motivation behind providing the codec in a partial form is that it can already come in handy - * for those applications that do not require quoted-printable line formatting (rules #3, #4, #5), for instance Q codec. - * </p> - * - * @see <a href="http://www.ietf.org/rfc/rfc1521.txt"> RFC 1521 MIME (Multipurpose Internet Mail Extensions) Part One: - * Mechanisms for Specifying and Describing the Format of Internet Message Bodies </a> - * - * @author Apache Software Foundation - * @since 1.3 - * @version $Id: QuotedPrintableCodec.java,v 1.7 2004/04/09 22:21:07 ggregory Exp $ - */ -public class QuotedPrintableCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder { - /** - * The default charset used for string decoding and encoding. - */ - private String charset = StringEncodings.UTF8; - - /** - * BitSet of printable characters as defined in RFC 1521. - */ - private static final BitSet PRINTABLE_CHARS = new BitSet(256); - - private static byte ESCAPE_CHAR = '='; - - private static byte TAB = 9; - - private static byte SPACE = 32; - // Static initializer for printable chars collection - static { - // alpha characters - for (int i = 33; i <= 60; i++) { - PRINTABLE_CHARS.set(i); - } - for (int i = 62; i <= 126; i++) { - PRINTABLE_CHARS.set(i); - } - PRINTABLE_CHARS.set(TAB); - PRINTABLE_CHARS.set(SPACE); - } - - /** - * Default constructor. - */ - public QuotedPrintableCodec() { - super(); - } - - /** - * Constructor which allows for the selection of a default charset - * - * @param charset - * the default string charset to use. - */ - public QuotedPrintableCodec(String charset) { - super(); - this.charset = charset; - } - - /** - * Encodes byte into its quoted-printable representation. - * - * @param b - * byte to encode - * @param buffer - * the buffer to write to - */ - private static final void encodeQuotedPrintable(int b, ByteArrayOutputStream buffer) { - buffer.write(ESCAPE_CHAR); - char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16)); - char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, 16)); - buffer.write(hex1); - buffer.write(hex2); - } - - /** - * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped. - * - * <p> - * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in - * RFC 1521 and is suitable for encoding binary data and unformatted text. - * </p> - * - * @param printable - * bitset of characters deemed quoted-printable - * @param bytes - * array of bytes to be encoded - * @return array of bytes containing quoted-printable data - */ - public static final byte[] encodeQuotedPrintable(BitSet printable, byte[] bytes) { - if (bytes == null) { - return null; - } - if (printable == null) { - printable = PRINTABLE_CHARS; - } - ByteArrayOutputStream buffer = new ByteArrayOutputStream(); - for (int i = 0; i < bytes.length; i++) { - int b = bytes[i]; - if (b < 0) { - b = 256 + b; - } - if (printable.get(b)) { - buffer.write(b); - } else { - encodeQuotedPrintable(b, buffer); - } - } - return buffer.toByteArray(); - } - - /** - * Decodes an array quoted-printable characters into an array of original bytes. Escaped characters are converted - * back to their original representation. - * - * <p> - * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in - * RFC 1521. - * </p> - * - * @param bytes - * array of quoted-printable characters - * @return array of original bytes - * @throws DecoderException - * Thrown if quoted-printable decoding is unsuccessful - */ - public static final byte[] decodeQuotedPrintable(byte[] bytes) throws DecoderException { - if (bytes == null) { - return null; - } - ByteArrayOutputStream buffer = new ByteArrayOutputStream(); - for (int i = 0; i < bytes.length; i++) { - int b = bytes[i]; - if (b == ESCAPE_CHAR) { - try { - int u = Character.digit((char) bytes[++i], 16); - int l = Character.digit((char) bytes[++i], 16); - if (u == -1 || l == -1) { - throw new DecoderException("Invalid quoted-printable encoding"); - } - buffer.write((char) ((u << 4) + l)); - } catch (ArrayIndexOutOfBoundsException e) { - throw new DecoderException("Invalid quoted-printable encoding"); - } - } else { - buffer.write(b); - } - } - return buffer.toByteArray(); - } - - /** - * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped. - * - * <p> - * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in - * RFC 1521 and is suitable for encoding binary data and unformatted text. - * </p> - * - * @param bytes - * array of bytes to be encoded - * @return array of bytes containing quoted-printable data - */ - public byte[] encode(byte[] bytes) { - return encodeQuotedPrintable(PRINTABLE_CHARS, bytes); - } - - /** - * Decodes an array of quoted-printable characters into an array of original bytes. Escaped characters are converted - * back to their original representation. - * - * <p> - * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in - * RFC 1521. - * </p> - * - * @param bytes - * array of quoted-printable characters - * @return array of original bytes - * @throws DecoderException - * Thrown if quoted-printable decoding is unsuccessful - */ - public byte[] decode(byte[] bytes) throws DecoderException { - return decodeQuotedPrintable(bytes); - } - - /** - * Encodes a string into its quoted-printable form using the default string charset. Unsafe characters are escaped. - * - * <p> - * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in - * RFC 1521 and is suitable for encoding binary data. - * </p> - * - * @param pString - * string to convert to quoted-printable form - * @return quoted-printable string - * - * @throws EncoderException - * Thrown if quoted-printable encoding is unsuccessful - * - * @see #getDefaultCharset() - */ - public String encode(String pString) throws EncoderException { - if (pString == null) { - return null; - } - try { - return encode(pString, getDefaultCharset()); - } catch (UnsupportedEncodingException e) { - throw new EncoderException(e.getMessage()); - } - } - - /** - * Decodes a quoted-printable string into its original form using the specified string charset. Escaped characters - * are converted back to their original representation. - * - * @param pString - * quoted-printable string to convert into its original form - * @param charset - * the original string charset - * @return original string - * @throws DecoderException - * Thrown if quoted-printable decoding is unsuccessful - * @throws UnsupportedEncodingException - * Thrown if charset is not supported - */ - public String decode(String pString, String charset) throws DecoderException, UnsupportedEncodingException { - if (pString == null) { - return null; - } - return new String(decode(pString.getBytes(StringEncodings.US_ASCII)), charset); - } - - /** - * Decodes a quoted-printable string into its original form using the default string charset. Escaped characters are - * converted back to their original representation. - * - * @param pString - * quoted-printable string to convert into its original form - * @return original string - * @throws DecoderException - * Thrown if quoted-printable decoding is unsuccessful - * @throws UnsupportedEncodingException - * Thrown if charset is not supported - * @see #getDefaultCharset() - */ - public String decode(String pString) throws DecoderException { - if (pString == null) { - return null; - } - try { - return decode(pString, getDefaultCharset()); - } catch (UnsupportedEncodingException e) { - throw new DecoderException(e.getMessage()); - } - } - - /** - * Encodes an object into its quoted-printable safe form. Unsafe characters are escaped. - * - * @param pObject - * string to convert to a quoted-printable form - * @return quoted-printable object - * @throws EncoderException - * Thrown if quoted-printable encoding is not applicable to objects of this type or if encoding is - * unsuccessful - */ - public Object encode(Object pObject) throws EncoderException { - if (pObject == null) { - return null; - } else if (pObject instanceof byte[]) { - return encode((byte[]) pObject); - } else if (pObject instanceof String) { - return encode((String) pObject); - } else { - throw new EncoderException("Objects of type " - + pObject.getClass().getName() - + " cannot be quoted-printable encoded"); - } - } - - /** - * Decodes a quoted-printable object into its original form. Escaped characters are converted back to their original - * representation. - * - * @param pObject - * quoted-printable object to convert into its original form - * @return original object - * @throws DecoderException - * Thrown if quoted-printable decoding is not applicable to objects of this type if decoding is - * unsuccessful - */ - public Object decode(Object pObject) throws DecoderException { - if (pObject == null) { - return null; - } else if (pObject instanceof byte[]) { - return decode((byte[]) pObject); - } else if (pObject instanceof String) { - return decode((String) pObject); - } else { - throw new DecoderException("Objects of type " - + pObject.getClass().getName() - + " cannot be quoted-printable decoded"); - } - } - - /** - * Returns the default charset used for string decoding and encoding. - * - * @return the default string charset. - */ - public String getDefaultCharset() { - return this.charset; - } - - /** - * Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped. - * - * <p> - * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in - * RFC 1521 and is suitable for encoding binary data and unformatted text. - * </p> - * - * @param pString - * string to convert to quoted-printable form - * @param charset - * the charset for pString - * @return quoted-printable string - * - * @throws UnsupportedEncodingException - * Thrown if the charset is not supported - */ - public String encode(String pString, String charset) throws UnsupportedEncodingException { - if (pString == null) { - return null; - } - return new String(encode(pString.getBytes(charset)), StringEncodings.US_ASCII); - } -} diff --git a/src/org/apache/commons/codec/net/RFC1522Codec.java b/src/org/apache/commons/codec/net/RFC1522Codec.java deleted file mode 100644 index 0acf921..0000000 --- a/src/org/apache/commons/codec/net/RFC1522Codec.java +++ /dev/null @@ -1,161 +0,0 @@ -/* - * Copyright 2001-2004 The Apache Software Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.codec.net; - -import java.io.UnsupportedEncodingException; - -import org.apache.commons.codec.DecoderException; -import org.apache.commons.codec.EncoderException; - -/** - * <p> - * Implements methods common to all codecs defined in RFC 1522. - * </p> - * - * <p> - * <a href="http://www.ietf.org/rfc/rfc1522.txt">RFC 1522</a> - * describes techniques to allow the encoding of non-ASCII text in - * various portions of a RFC 822 [2] message header, in a manner which - * is unlikely to confuse existing message handling software. - * </p> - - * @see <a href="http://www.ietf.org/rfc/rfc1522.txt"> - * MIME (Multipurpose Internet Mail Extensions) Part Two: - * Message Header Extensions for Non-ASCII Text</a> - * </p> - * - * @author Apache Software Foundation - * @since 1.3 - * @version $Id: RFC1522Codec.java,v 1.2 2004/04/09 22:21:43 ggregory Exp $ - */ -abstract class RFC1522Codec { - - /** - * Applies an RFC 1522 compliant encoding scheme to the given string of text with the - * given charset. This method constructs the "encoded-word" header common to all the - * RFC 1522 codecs and then invokes {@link #doEncoding(byte [])} method of a concrete - * class to perform the specific enconding. - * - * @param text a string to encode - * @param charset a charset to be used - * - * @return RFC 1522 compliant "encoded-word" - * - * @throws EncoderException thrown if there is an error conidition during the Encoding - * process. - * @throws UnsupportedEncodingException thrown if charset is not supported - * - * @see <a href="http://java.sun.com/j2se/1.3/docs/api/java/lang/package-summary.html#charenc">JRE character - * encoding names</a> - */ - protected String encodeText(final String text, final String charset) - throws EncoderException, UnsupportedEncodingException - { - if (text == null) { - return null; - } - StringBuffer buffer = new StringBuffer(); - buffer.append("=?"); - buffer.append(charset); - buffer.append('?'); - buffer.append(getEncoding()); - buffer.append('?'); - byte [] rawdata = doEncoding(text.getBytes(charset)); - buffer.append(new String(rawdata, StringEncodings.US_ASCII)); - buffer.append("?="); - return buffer.toString(); - } - - /** - * Applies an RFC 1522 compliant decoding scheme to the given string of text. This method - * processes the "encoded-word" header common to all the RFC 1522 codecs and then invokes - * {@link #doEncoding(byte [])} method of a concrete class to perform the specific deconding. - * - * @param text a string to decode - * - * @throws DecoderException thrown if there is an error conidition during the Decoding - * process. - * @throws UnsupportedEncodingException thrown if charset specified in the "encoded-word" - * header is not supported - */ - protected String decodeText(final String text) - throws DecoderException, UnsupportedEncodingException - { - if (text == null) { - return null; - } - if ((!text.startsWith("=?")) || (!text.endsWith("?="))) { - throw new DecoderException("RFC 1522 violation: malformed encoded content"); - } - int termnator = text.length() - 2; - int from = 2; - int to = text.indexOf("?", from); - if ((to == -1) || (to == termnator)) { - throw new DecoderException("RFC 1522 violation: charset token not found"); - } - String charset = text.substring(from, to); - if (charset.equals("")) { - throw new DecoderException("RFC 1522 violation: charset not specified"); - } - from = to + 1; - to = text.indexOf("?", from); - if ((to == -1) || (to == termnator)) { - throw new DecoderException("RFC 1522 violation: encoding token not found"); - } - String encoding = text.substring(from, to); - if (!getEncoding().equalsIgnoreCase(encoding)) { - throw new DecoderException("This codec cannot decode " + - encoding + " encoded content"); - } - from = to + 1; - to = text.indexOf("?", from); - byte[] data = text.substring(from, to).getBytes(StringEncodings.US_ASCII); - data = doDecoding(data); - return new String(data, charset); - } - - /** - * Returns the codec name (referred to as encoding in the RFC 1522) - * - * @return name of the codec - */ - protected abstract String getEncoding(); - - /** - * Encodes an array of bytes using the defined encoding scheme - * - * @param bytes Data to be encoded - * - * @return A byte array containing the encoded data - * - * @throws EncoderException thrown if the Encoder encounters a failure condition - * during the encoding process. - */ - protected abstract byte[] doEncoding(byte[] bytes) throws EncoderException; - - /** - * Decodes an array of bytes using the defined encoding scheme - * - * @param bytes Data to be decoded - * - * @return a byte array that contains decoded data - * - * @throws DecoderException A decoder exception is thrown if a Decoder encounters a - * failure condition during the decode process. - */ - protected abstract byte[] doDecoding(byte[] bytes) throws DecoderException; -} diff --git a/src/org/apache/commons/codec/net/StringEncodings.java b/src/org/apache/commons/codec/net/StringEncodings.java deleted file mode 100644 index e7f6bb8..0000000 --- a/src/org/apache/commons/codec/net/StringEncodings.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright 2001-2004 The Apache Software Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.codec.net; - -/** - * String encodings used in this package. - * - * @author Apache Software Foundation - * @since 1.3 - * @version $Id: StringEncodings.java,v 1.2 2004/04/09 22:21:07 ggregory Exp $ - */ -interface StringEncodings { - /** - * <p> - * Seven-bit ASCII, also known as ISO646-US, also known as the Basic Latin block of the Unicode character set. - * </p> - * <p> - * Every implementation of the Java platform is required to support this character encoding. - * </p> - * - * @see <a href="http://java.sun.com/j2se/1.3/docs/api/java/lang/package-summary.html#charenc">JRE character - * encoding names</a> - */ - String US_ASCII = "US-ASCII"; - - /** - * <p> - * Eight-bit Unicode Transformation Format. - * </p> - * <p> - * Every implementation of the Java platform is required to support this character encoding. - * </p> - * - * @see <a href="http://java.sun.com/j2se/1.3/docs/api/java/lang/package-summary.html#charenc">JRE character - * encoding names</a> - */ - String UTF8 = "UTF-8"; -} diff --git a/src/org/apache/commons/codec/net/URLCodec.java b/src/org/apache/commons/codec/net/URLCodec.java deleted file mode 100644 index 1bc3507..0000000 --- a/src/org/apache/commons/codec/net/URLCodec.java +++ /dev/null @@ -1,364 +0,0 @@ -/* - * Copyright 2001-2004 The Apache Software Foundation. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.codec.net; - -import java.io.ByteArrayOutputStream; -import java.io.UnsupportedEncodingException; -import java.util.BitSet; - -import org.apache.commons.codec.BinaryDecoder; -import org.apache.commons.codec.BinaryEncoder; -import org.apache.commons.codec.DecoderException; -import org.apache.commons.codec.EncoderException; -import org.apache.commons.codec.StringDecoder; -import org.apache.commons.codec.StringEncoder; - -/** - * <p>Implements the 'www-form-urlencoded' encoding scheme, - * also misleadingly known as URL encoding.</p> - * - * <p>For more detailed information please refer to - * <a href="http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4.1"> - * Chapter 17.13.4 'Form content types'</a> of the - * <a href="http://www.w3.org/TR/html4/">HTML 4.01 Specification<a></p> - * - * <p> - * This codec is meant to be a replacement for standard Java classes - * {@link java.net.URLEncoder} and {@link java.net.URLDecoder} - * on older Java platforms, as these classes in Java versions below - * 1.4 rely on the platform's default charset encoding. - * </p> - * - * @author Apache Software Foundation - * @since 1.2 - * @version $Id: URLCodec.java,v 1.19 2004/03/29 07:59:00 ggregory Exp $ - */ -public class URLCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder { - - /** - * The default charset used for string decoding and encoding. - */ - protected String charset = StringEncodings.UTF8; - - protected static byte ESCAPE_CHAR = '%'; - /** - * BitSet of www-form-url safe characters. - */ - protected static final BitSet WWW_FORM_URL = new BitSet(256); - - // Static initializer for www_form_url - static { - // alpha characters - for (int i = 'a'; i <= 'z'; i++) { - WWW_FORM_URL.set(i); - } - for (int i = 'A'; i <= 'Z'; i++) { - WWW_FORM_URL.set(i); - } - // numeric characters - for (int i = '0'; i <= '9'; i++) { - WWW_FORM_URL.set(i); - } - // special chars - WWW_FORM_URL.set('-'); - WWW_FORM_URL.set('_'); - WWW_FORM_URL.set('.'); - WWW_FORM_URL.set('*'); - // blank to be replaced with + - WWW_FORM_URL.set(' '); - } - - - /** - * Default constructor. - */ - public URLCodec() { - super(); - } - - /** - * Constructor which allows for the selection of a default charset - * - * @param charset the default string charset to use. - */ - public URLCodec(String charset) { - super(); - this.charset = charset; - } - - /** - * Encodes an array of bytes into an array of URL safe 7-bit - * characters. Unsafe characters are escaped. - * - * @param urlsafe bitset of characters deemed URL safe - * @param bytes array of bytes to convert to URL safe characters - * @return array of bytes containing URL safe characters - */ - public static final byte[] encodeUrl(BitSet urlsafe, byte[] bytes) - { - if (bytes == null) { - return null; - } - if (urlsafe == null) { - urlsafe = WWW_FORM_URL; - } - - ByteArrayOutputStream buffer = new ByteArrayOutputStream(); - for (int i = 0; i < bytes.length; i++) { - int b = bytes[i]; - if (b < 0) { - b = 256 + b; - } - if (urlsafe.get(b)) { - if (b == ' ') { - b = '+'; - } - buffer.write(b); - } else { - buffer.write('%'); - char hex1 = Character.toUpperCase( - Character.forDigit((b >> 4) & 0xF, 16)); - char hex2 = Character.toUpperCase( - Character.forDigit(b & 0xF, 16)); - buffer.write(hex1); - buffer.write(hex2); - } - } - return buffer.toByteArray(); - } - - - /** - * Decodes an array of URL safe 7-bit characters into an array of - * original bytes. Escaped characters are converted back to their - * original representation. - * - * @param bytes array of URL safe characters - * @return array of original bytes - * @throws DecoderException Thrown if URL decoding is unsuccessful - */ - public static final byte[] decodeUrl(byte[] bytes) - throws DecoderException - { - if (bytes == null) { - return null; - } - ByteArrayOutputStream buffer = new ByteArrayOutputStream(); - for (int i = 0; i < bytes.length; i++) { - int b = bytes[i]; - if (b == '+') { - buffer.write(' '); - } else if (b == '%') { - try { - int u = Character.digit((char)bytes[++i], 16); - int l = Character.digit((char)bytes[++i], 16); - if (u == -1 || l == -1) { - throw new DecoderException("Invalid URL encoding"); - } - buffer.write((char)((u << 4) + l)); - } catch(ArrayIndexOutOfBoundsException e) { - throw new DecoderException("Invalid URL encoding"); - } - } else { - buffer.write(b); - } - } - return buffer.toByteArray(); - } - - - /** - * Encodes an array of bytes into an array of URL safe 7-bit - * characters. Unsafe characters are escaped. - * - * @param bytes array of bytes to convert to URL safe characters - * @return array of bytes containing URL safe characters - */ - public byte[] encode(byte[] bytes) { - return encodeUrl(WWW_FORM_URL, bytes); - } - - - /** - * Decodes an array of URL safe 7-bit characters into an array of - * original bytes. Escaped characters are converted back to their - * original representation. - * - * @param bytes array of URL safe characters - * @return array of original bytes - * @throws DecoderException Thrown if URL decoding is unsuccessful - */ - public byte[] decode(byte[] bytes) throws DecoderException { - return decodeUrl(bytes); - } - - - /** - * Encodes a string into its URL safe form using the specified - * string charset. Unsafe characters are escaped. - * - * @param pString string to convert to a URL safe form - * @param charset the charset for pString - * @return URL safe string - * @throws UnsupportedEncodingException Thrown if charset is not - * supported - */ - public String encode(String pString, String charset) - throws UnsupportedEncodingException - { - if (pString == null) { - return null; - } - return new String(encode(pString.getBytes(charset)), StringEncodings.US_ASCII); - } - - - /** - * Encodes a string into its URL safe form using the default string - * charset. Unsafe characters are escaped. - * - * @param pString string to convert to a URL safe form - * @return URL safe string - * @throws EncoderException Thrown if URL encoding is unsuccessful - * - * @see #getDefaultCharset() - */ - public String encode(String pString) throws EncoderException { - if (pString == null) { - return null; - } - try { - return encode(pString, getDefaultCharset()); - } catch(UnsupportedEncodingException e) { - throw new EncoderException(e.getMessage()); - } - } - - - /** - * Decodes a URL safe string into its original form using the - * specified encoding. Escaped characters are converted back - * to their original representation. - * - * @param pString URL safe string to convert into its original form - * @param charset the original string charset - * @return original string - * @throws DecoderException Thrown if URL decoding is unsuccessful - * @throws UnsupportedEncodingException Thrown if charset is not - * supported - */ - public String decode(String pString, String charset) - throws DecoderException, UnsupportedEncodingException - { - if (pString == null) { - return null; - } - return new String(decode(pString.getBytes(StringEncodings.US_ASCII)), charset); - } - - - /** - * Decodes a URL safe string into its original form using the default - * string charset. Escaped characters are converted back to their - * original representation. - * - * @param pString URL safe string to convert into its original form - * @return original string - * @throws DecoderException Thrown if URL decoding is unsuccessful - * - * @see #getDefaultCharset() - */ - public String decode(String pString) throws DecoderException { - if (pString == null) { - return null; - } - try { - return decode(pString, getDefaultCharset()); - } catch(UnsupportedEncodingException e) { - throw new DecoderException(e.getMessage()); - } - } - - /** - * Encodes an object into its URL safe form. Unsafe characters are - * escaped. - * - * @param pObject string to convert to a URL safe form - * @return URL safe object - * @throws EncoderException Thrown if URL encoding is not - * applicable to objects of this type or - * if encoding is unsuccessful - */ - public Object encode(Object pObject) throws EncoderException { - if (pObject == null) { - return null; - } else if (pObject instanceof byte[]) { - return encode((byte[])pObject); - } else if (pObject instanceof String) { - return encode((String)pObject); - } else { - throw new EncoderException("Objects of type " + - pObject.getClass().getName() + " cannot be URL encoded"); - - } - } - - /** - * Decodes a URL safe object into its original form. Escaped - * characters are converted back to their original representation. - * - * @param pObject URL safe object to convert into its original form - * @return original object - * @throws DecoderException Thrown if URL decoding is not - * applicable to objects of this type - * if decoding is unsuccessful - */ - public Object decode(Object pObject) throws DecoderException { - if (pObject == null) { - return null; - } else if (pObject instanceof byte[]) { - return decode((byte[])pObject); - } else if (pObject instanceof String) { - return decode((String)pObject); - } else { - throw new DecoderException("Objects of type " + - pObject.getClass().getName() + " cannot be URL decoded"); - - } - } - - /** - * The <code>String</code> encoding used for decoding and encoding. - * - * @return Returns the encoding. - * - * @deprecated use #getDefaultCharset() - */ - public String getEncoding() { - return this.charset; - } - - /** - * The default charset used for string decoding and encoding. - * - * @return the default string charset. - */ - public String getDefaultCharset() { - return this.charset; - } - -} diff --git a/src/org/apache/commons/codec/net/package.html b/src/org/apache/commons/codec/net/package.html deleted file mode 100644 index 4607c57..0000000 --- a/src/org/apache/commons/codec/net/package.html +++ /dev/null @@ -1,22 +0,0 @@ -<!-- -Copyright 2003-2004 The Apache Software Foundation. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. ---> -<html> - <body> - <p> - Network related encoding and decoding. - </p> - </body> -</html> diff --git a/src/org/apache/commons/codec/overview.html b/src/org/apache/commons/codec/overview.html deleted file mode 100644 index 6b6f6c9..0000000 --- a/src/org/apache/commons/codec/overview.html +++ /dev/null @@ -1,28 +0,0 @@ -<!-- -Copyright 2003-2004 The Apache Software Foundation. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. ---> -<!-- $Id: overview.html,v 1.6 2004/05/17 17:06:10 ggregory Exp $ --> -<html> -<body> -<p> -This document is the API specification for the Apache Jakarta Commons Codec Library, version 1.3. -</p> -<p> -This library requires a JRE version of 1.2.2 or greater. -The hypertext links originating from this document point to Sun's version 1.3 API as the 1.2.2 API documentation -is no longer on-line. -</p> -</body> -</html> diff --git a/src/org/apache/commons/codec/package.html b/src/org/apache/commons/codec/package.html deleted file mode 100644 index b7ccf03..0000000 --- a/src/org/apache/commons/codec/package.html +++ /dev/null @@ -1,99 +0,0 @@ -<!-- -Copyright 2003-2004 The Apache Software Foundation. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. ---> -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> -<html> - <head> - </head> - <body> - <p>A small set of interfaces used by - the various implementations in the sub-packages.</p> - - <p>Definitive implementations of commonly used encoders and decoders.</p> - - <p>Codec is currently comprised of a modest set of utilities and a - simple framework for String encoding and decoding in three categories: - Binary Encoders, Language Encoders, and Network Encoders. </p> - - <h4><a name="Common Encoders">Binary Encoders</a></h4> - - <table border="1" width="100%" cellspacing="2" cellpadding="3"> - <tbody> - <tr> - <td> - <a href="binary/Base64.html"> - org.apache.commons.codec.binary.Base64</a> - </td> - <td> - Provides Base64 content-transfer-encoding as defined in - <a href="http://www.ietf.org/rfc/rfc2045.txt"> RFC 2045</a> - </td> - <td>Production</td> - </tr> - <tr> - <td> - <a href="binary/Hex.html"> - org.apache.commons.codec.binary.Hex</a> - </td> - <td> - Converts an array of bytes into an array of characters - representing the hexidecimal values of each byte in order - </td> - <td>Production</td> - </tr> - </tbody> - </table> - <h4> - <a name="Language Encoders">Language Encoders</a> - </h4> - <p> - Codec contains a number of commonly used language and phonetic - encoders - </p> - <table border="1" width="100%" cellspacing="2" cellpadding="3"> - <tbody> - <tr> - <td> - <a href="#">org.apache.commons.codec.language.Soundex</a> - </td> - <td>Implementation of the Soundex algorithm.</td> - <td>Production</td> - </tr> - <tr> - <td> - <a href="#">org.apache.commons.codec.language.Metaphone</a> - </td> - <td>Implementation of the Metaphone algorithm.</td> - <td>Production</td> - </tr> - </tbody> - </table> - <h4><a name="Network_Encoders">Network Encoders</a></h4> - <h4> </h4> - <p> Codec contains network related encoders </p> - <table border="1" width="100%" cellspacing="2" cellpadding="3"> - <tbody> - <tr> - <td> - <a href="#">org.apache.commons.codec.net.URLCodec</a> - </td> - <td>Implements the 'www-form-urlencoded' encoding scheme.</td> - <td>Production</td> - </tr> - </tbody> - </table> - <br> - </body> -</html> |