1 files changed, 429 insertions, 0 deletions
diff --git a/src/org/apache/http/message/BasicTokenIterator.java b/src/org/apache/http/message/BasicTokenIterator.java
new file mode 100644
index 0000000..5fbf5ba
--- /dev/null
+++ b/src/org/apache/http/message/BasicTokenIterator.java
@@ -0,0 +1,429 @@
+/*
+ * $HeadURL: http://svn.apache.org/repos/asf/httpcomponents/httpcore/trunk/module-main/src/main/java/org/apache/http/message/BasicTokenIterator.java $
+ * $Revision: 602520 $
+ * $Date: 2007-12-08 09:42:26 -0800 (Sat, 08 Dec 2007) $
+ *
+ * ====================================================================
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ *
+ */
+
+package org.apache.http.message;
+
+import java.util.NoSuchElementException;
+
+import org.apache.http.HeaderIterator;
+import org.apache.http.ParseException;
+import org.apache.http.TokenIterator;
+
+/**
+ * Basic implementation of a {@link TokenIterator}.
+ * This implementation parses <tt>#token<tt> sequences as
+ * defined by RFC 2616, section 2.
+ * It extends that definition somewhat beyond US-ASCII.
+ * 
+ * @version $Revision: 602520 $
+ */
+public class BasicTokenIterator implements TokenIterator {
+
+    /** The HTTP separator characters. Defined in RFC 2616, section 2.2. */
+    // the order of the characters here is adjusted to put the
+    // most likely candidates at the beginning of the collection
+    public final static String HTTP_SEPARATORS = " ,;=()<>@:\\\"/[]?{}\t";
+
+
+    /** The iterator from which to obtain the next header. */
+    protected final HeaderIterator headerIt;
+
+    /**
+     * The value of the current header.
+     * This is the header value that includes {@link #currentToken}.
+     * Undefined if the iteration is over.
+     */
+    protected String currentHeader;
+
+    /**
+     * The token to be returned by the next call to {@link #currentToken}.
+     * <code>null</code> if the iteration is over.
+     */
+    protected String currentToken;
+
+    /**
+     * The position after {@link #currentToken} in {@link #currentHeader}.
+     * Undefined if the iteration is over.
+     */
+    protected int searchPos;
+
+
+    /**
+     * Creates a new instance of {@link BasicTokenIterator}.
+     *
+     * @param headerIterator    the iterator for the headers to tokenize
+     */
+    public BasicTokenIterator(final HeaderIterator headerIterator) {
+        if (headerIterator == null) {
+            throw new IllegalArgumentException
+                ("Header iterator must not be null.");
+        }
+
+        this.headerIt = headerIterator;
+        this.searchPos = findNext(-1);
+    }
+
+
+    // non-javadoc, see interface TokenIterator
+    public boolean hasNext() {
+        return (this.currentToken != null);
+    }
+
+
+    /**
+     * Obtains the next token from this iteration.
+     *
+     * @return  the next token in this iteration
+     *
+     * @throws NoSuchElementException   if the iteration is already over
+     * @throws ParseException   if an invalid header value is encountered
+     */
+    public String nextToken()
+        throws NoSuchElementException, ParseException {
+
+        if (this.currentToken == null) {
+            throw new NoSuchElementException("Iteration already finished.");
+        }
+
+        final String result = this.currentToken;
+        // updates currentToken, may trigger ParseException:
+        this.searchPos = findNext(this.searchPos);
+
+        return result;
+    }
+
+
+    /**
+     * Returns the next token.
+     * Same as {@link #nextToken}, but with generic return type.
+     *
+     * @return  the next token in this iteration
+     *
+     * @throws NoSuchElementException   if there are no more tokens
+     * @throws ParseException   if an invalid header value is encountered
+     */
+    public final Object next()
+        throws NoSuchElementException, ParseException {
+        return nextToken();
+    }
+
+
+    /**
+     * Removing tokens is not supported.
+     *
+     * @throws UnsupportedOperationException    always
+     */
+    public final void remove()
+        throws UnsupportedOperationException {
+
+        throw new UnsupportedOperationException
+            ("Removing tokens is not supported.");
+    }
+
+
+    /**
+     * Determines the next token.
+     * If found, the token is stored in {@link #currentToken}.
+     * The return value indicates the position after the token
+     * in {@link #currentHeader}. If necessary, the next header
+     * will be obtained from {@link #headerIt}.
+     * If not found, {@link #currentToken} is set to <code>null</code>.
+     *
+     * @param from      the position in the current header at which to
+     *                  start the search, -1 to search in the first header
+     *
+     * @return  the position after the found token in the current header, or
+     *          negative if there was no next token
+     *
+     * @throws ParseException   if an invalid header value is encountered
+     */
+    protected int findNext(int from)
+        throws ParseException {
+
+        if (from < 0) {
+            // called from the constructor, initialize the first header
+            if (!this.headerIt.hasNext()) {
+                return -1;
+            }
+            this.currentHeader = this.headerIt.nextHeader().getValue();
+            from = 0;
+        } else {
+            // called after a token, make sure there is a separator
+            from = findTokenSeparator(from);
+        }
+
+        int start = findTokenStart(from);
+        if (start < 0) {
+            this.currentToken = null;
+            return -1; // nothing found
+        }
+
+        int end = findTokenEnd(start);
+        this.currentToken = createToken(this.currentHeader, start, end);
+        return end;
+    }
+
+
+    /**
+     * Creates a new token to be returned.
+     * Called from {@link #findNext findNext} after the token is identified.
+     * The default implementation simply calls
+     * {@link java.lang.String#substring String.substring}.
+     * <br/>
+     * If header values are significantly longer than tokens, and some
+     * tokens are permanently referenced by the application, there can
+     * be problems with garbage collection. A substring will hold a
+     * reference to the full characters of the original string and
+     * therefore occupies more memory than might be expected.
+     * To avoid this, override this method and create a new string
+     * instead of a substring.
+     *
+     * @param value     the full header value from which to create a token
+     * @param start     the index of the first token character
+     * @param end       the index after the last token character
+     *
+     * @return  a string representing the token identified by the arguments
+     */
+    protected String createToken(String value, int start, int end) {
+        return value.substring(start, end);
+    }
+
+
+    /**
+     * Determines the starting position of the next token.
+     * This method will iterate over headers if necessary.
+     *
+     * @param from      the position in the current header at which to
+     *                  start the search
+     *
+     * @return  the position of the token start in the current header,
+     *          negative if no token start could be found
+     */
+    protected int findTokenStart(int from) {
+        if (from < 0) {
+            throw new IllegalArgumentException
+                ("Search position must not be negative: " + from);
+        }
+
+        boolean found = false;
+        while (!found && (this.currentHeader != null)) {
+
+            final int to = this.currentHeader.length();
+            while (!found && (from < to)) {
+
+                final char ch = this.currentHeader.charAt(from);
+                if (isTokenSeparator(ch) || isWhitespace(ch)) {
+                    // whitspace and token separators are skipped
+                    from++;
+                } else if (isTokenChar(this.currentHeader.charAt(from))) {
+                    // found the start of a token
+                    found = true;
+                } else {
+                    throw new ParseException
+                        ("Invalid character before token (pos " + from +
+                         "): " + this.currentHeader);
+                }
+            }
+            if (!found) {
+                if (this.headerIt.hasNext()) {
+                    this.currentHeader = this.headerIt.nextHeader().getValue();
+                    from = 0;
+                } else {
+                    this.currentHeader = null;
+                }
+            }
+        } // while headers
+
+        return found ? from : -1;
+    }
+
+
+    /**
+     * Determines the position of the next token separator.
+     * Because of multi-header joining rules, the end of a
+     * header value is a token separator. This method does
+     * therefore not need to iterate over headers.
+     *
+     * @param from      the position in the current header at which to
+     *                  start the search
+     *
+     * @return  the position of a token separator in the current header,
+     *          or at the end
+     *
+     * @throws ParseException
+     *         if a new token is found before a token separator.
+     *         RFC 2616, section 2.1 explicitly requires a comma between
+     *         tokens for <tt>#</tt>.
+     */
+    protected int findTokenSeparator(int from) {
+        if (from < 0) {
+            throw new IllegalArgumentException
+                ("Search position must not be negative: " + from);
+        }
+
+        boolean found = false;
+        final int to = this.currentHeader.length();
+        while (!found && (from < to)) {
+            final char ch = this.currentHeader.charAt(from);
+            if (isTokenSeparator(ch)) {
+                found = true;
+            } else if (isWhitespace(ch)) {
+                from++;
+            } else if (isTokenChar(ch)) {
+                throw new ParseException
+                    ("Tokens without separator (pos " + from +
+                     "): " + this.currentHeader);
+            } else {
+                throw new ParseException
+                    ("Invalid character after token (pos " + from +
+                     "): " + this.currentHeader);
+            }
+        }
+
+        return from;
+    }
+
+
+    /**
+     * Determines the ending position of the current token.
+     * This method will not leave the current header value,
+     * since the end of the header value is a token boundary.
+     *
+     * @param from      the position of the first character of the token
+     *
+     * @return  the position after the last character of the token.
+     *          The behavior is undefined if <code>from</code> does not
+     *          point to a token character in the current header value.
+     */
+    protected int findTokenEnd(int from) {
+        if (from < 0) {
+            throw new IllegalArgumentException
+                ("Token start position must not be negative: " + from);
+        }
+
+        final int to = this.currentHeader.length();
+        int end = from+1;
+        while ((end < to) && isTokenChar(this.currentHeader.charAt(end))) {
+            end++;
+        }
+
+        return end;
+    }
+
+
+    /**
+     * Checks whether a character is a token separator.
+     * RFC 2616, section 2.1 defines comma as the separator for
+     * <tt>#token</tt> sequences. The end of a header value will
+     * also separate tokens, but that is not a character check.
+     *
+     * @param ch        the character to check
+     *
+     * @return  <code>true</code> if the character is a token separator,
+     *          <code>false</code> otherwise
+     */
+    protected boolean isTokenSeparator(char ch) {
+        return (ch == ',');
+    }
+
+
+    /**
+     * Checks whether a character is a whitespace character.
+     * RFC 2616, section 2.2 defines space and horizontal tab as whitespace.
+     * The optional preceeding line break is irrelevant, since header
+     * continuation is handled transparently when parsing messages.
+     *
+     * @param ch        the character to check
+     *
+     * @return  <code>true</code> if the character is whitespace,
+     *          <code>false</code> otherwise
+     */
+    protected boolean isWhitespace(char ch) {
+
+        // we do not use Character.isWhitspace(ch) here, since that allows
+        // many control characters which are not whitespace as per RFC 2616
+        return ((ch == '\t') || Character.isSpaceChar(ch));
+    }
+
+
+    /**
+     * Checks whether a character is a valid token character.
+     * Whitespace, control characters, and HTTP separators are not
+     * valid token characters. The HTTP specification (RFC 2616, section 2.2)
+     * defines tokens only for the US-ASCII character set, this
+     * method extends the definition to other character sets.
+     *
+     * @param ch        the character to check
+     *
+     * @return  <code>true</code> if the character is a valid token start,
+     *          <code>false</code> otherwise
+     */
+    protected boolean isTokenChar(char ch) {
+
+        // common sense extension of ALPHA + DIGIT
+        if (Character.isLetterOrDigit(ch))
+            return true;
+
+        // common sense extension of CTL
+        if (Character.isISOControl(ch))
+            return false;
+
+        // no common sense extension for this
+        if (isHttpSeparator(ch))
+            return false;
+
+        // RFC 2616, section 2.2 defines a token character as
+        // "any CHAR except CTLs or separators". The controls
+        // and separators are included in the checks above.
+        // This will yield unexpected results for Unicode format characters.
+        // If that is a problem, overwrite isHttpSeparator(char) to filter
+        // out the false positives.
+        return true;
+    }
+
+
+    /**
+     * Checks whether a character is an HTTP separator.
+     * The implementation in this class checks only for the HTTP separators
+     * defined in RFC 2616, section 2.2. If you need to detect other
+     * separators beyond the US-ASCII character set, override this method.
+     *
+     * @param ch        the character to check
+     *
+     * @return  <code>true</code> if the character is an HTTP separator
+     */
+    protected boolean isHttpSeparator(char ch) {
+        return (HTTP_SEPARATORS.indexOf(ch) >= 0);
+    }
+
+
+} // class BasicTokenIterator
+