diff options
Diffstat (limited to 'src/org/apache/http/message/BasicTokenIterator.java')
-rw-r--r-- | src/org/apache/http/message/BasicTokenIterator.java | 429 |
1 files changed, 0 insertions, 429 deletions
diff --git a/src/org/apache/http/message/BasicTokenIterator.java b/src/org/apache/http/message/BasicTokenIterator.java deleted file mode 100644 index 5fbf5ba..0000000 --- a/src/org/apache/http/message/BasicTokenIterator.java +++ /dev/null @@ -1,429 +0,0 @@ -/* - * $HeadURL: http://svn.apache.org/repos/asf/httpcomponents/httpcore/trunk/module-main/src/main/java/org/apache/http/message/BasicTokenIterator.java $ - * $Revision: 602520 $ - * $Date: 2007-12-08 09:42:26 -0800 (Sat, 08 Dec 2007) $ - * - * ==================================================================== - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - * ==================================================================== - * - * This software consists of voluntary contributions made by many - * individuals on behalf of the Apache Software Foundation. For more - * information on the Apache Software Foundation, please see - * <http://www.apache.org/>. - * - */ - -package org.apache.http.message; - -import java.util.NoSuchElementException; - -import org.apache.http.HeaderIterator; -import org.apache.http.ParseException; -import org.apache.http.TokenIterator; - -/** - * Basic implementation of a {@link TokenIterator}. - * This implementation parses <tt>#token<tt> sequences as - * defined by RFC 2616, section 2. - * It extends that definition somewhat beyond US-ASCII. - * - * @version $Revision: 602520 $ - */ -public class BasicTokenIterator implements TokenIterator { - - /** The HTTP separator characters. Defined in RFC 2616, section 2.2. */ - // the order of the characters here is adjusted to put the - // most likely candidates at the beginning of the collection - public final static String HTTP_SEPARATORS = " ,;=()<>@:\\\"/[]?{}\t"; - - - /** The iterator from which to obtain the next header. */ - protected final HeaderIterator headerIt; - - /** - * The value of the current header. - * This is the header value that includes {@link #currentToken}. - * Undefined if the iteration is over. - */ - protected String currentHeader; - - /** - * The token to be returned by the next call to {@link #currentToken}. - * <code>null</code> if the iteration is over. - */ - protected String currentToken; - - /** - * The position after {@link #currentToken} in {@link #currentHeader}. - * Undefined if the iteration is over. - */ - protected int searchPos; - - - /** - * Creates a new instance of {@link BasicTokenIterator}. - * - * @param headerIterator the iterator for the headers to tokenize - */ - public BasicTokenIterator(final HeaderIterator headerIterator) { - if (headerIterator == null) { - throw new IllegalArgumentException - ("Header iterator must not be null."); - } - - this.headerIt = headerIterator; - this.searchPos = findNext(-1); - } - - - // non-javadoc, see interface TokenIterator - public boolean hasNext() { - return (this.currentToken != null); - } - - - /** - * Obtains the next token from this iteration. - * - * @return the next token in this iteration - * - * @throws NoSuchElementException if the iteration is already over - * @throws ParseException if an invalid header value is encountered - */ - public String nextToken() - throws NoSuchElementException, ParseException { - - if (this.currentToken == null) { - throw new NoSuchElementException("Iteration already finished."); - } - - final String result = this.currentToken; - // updates currentToken, may trigger ParseException: - this.searchPos = findNext(this.searchPos); - - return result; - } - - - /** - * Returns the next token. - * Same as {@link #nextToken}, but with generic return type. - * - * @return the next token in this iteration - * - * @throws NoSuchElementException if there are no more tokens - * @throws ParseException if an invalid header value is encountered - */ - public final Object next() - throws NoSuchElementException, ParseException { - return nextToken(); - } - - - /** - * Removing tokens is not supported. - * - * @throws UnsupportedOperationException always - */ - public final void remove() - throws UnsupportedOperationException { - - throw new UnsupportedOperationException - ("Removing tokens is not supported."); - } - - - /** - * Determines the next token. - * If found, the token is stored in {@link #currentToken}. - * The return value indicates the position after the token - * in {@link #currentHeader}. If necessary, the next header - * will be obtained from {@link #headerIt}. - * If not found, {@link #currentToken} is set to <code>null</code>. - * - * @param from the position in the current header at which to - * start the search, -1 to search in the first header - * - * @return the position after the found token in the current header, or - * negative if there was no next token - * - * @throws ParseException if an invalid header value is encountered - */ - protected int findNext(int from) - throws ParseException { - - if (from < 0) { - // called from the constructor, initialize the first header - if (!this.headerIt.hasNext()) { - return -1; - } - this.currentHeader = this.headerIt.nextHeader().getValue(); - from = 0; - } else { - // called after a token, make sure there is a separator - from = findTokenSeparator(from); - } - - int start = findTokenStart(from); - if (start < 0) { - this.currentToken = null; - return -1; // nothing found - } - - int end = findTokenEnd(start); - this.currentToken = createToken(this.currentHeader, start, end); - return end; - } - - - /** - * Creates a new token to be returned. - * Called from {@link #findNext findNext} after the token is identified. - * The default implementation simply calls - * {@link java.lang.String#substring String.substring}. - * <br/> - * If header values are significantly longer than tokens, and some - * tokens are permanently referenced by the application, there can - * be problems with garbage collection. A substring will hold a - * reference to the full characters of the original string and - * therefore occupies more memory than might be expected. - * To avoid this, override this method and create a new string - * instead of a substring. - * - * @param value the full header value from which to create a token - * @param start the index of the first token character - * @param end the index after the last token character - * - * @return a string representing the token identified by the arguments - */ - protected String createToken(String value, int start, int end) { - return value.substring(start, end); - } - - - /** - * Determines the starting position of the next token. - * This method will iterate over headers if necessary. - * - * @param from the position in the current header at which to - * start the search - * - * @return the position of the token start in the current header, - * negative if no token start could be found - */ - protected int findTokenStart(int from) { - if (from < 0) { - throw new IllegalArgumentException - ("Search position must not be negative: " + from); - } - - boolean found = false; - while (!found && (this.currentHeader != null)) { - - final int to = this.currentHeader.length(); - while (!found && (from < to)) { - - final char ch = this.currentHeader.charAt(from); - if (isTokenSeparator(ch) || isWhitespace(ch)) { - // whitspace and token separators are skipped - from++; - } else if (isTokenChar(this.currentHeader.charAt(from))) { - // found the start of a token - found = true; - } else { - throw new ParseException - ("Invalid character before token (pos " + from + - "): " + this.currentHeader); - } - } - if (!found) { - if (this.headerIt.hasNext()) { - this.currentHeader = this.headerIt.nextHeader().getValue(); - from = 0; - } else { - this.currentHeader = null; - } - } - } // while headers - - return found ? from : -1; - } - - - /** - * Determines the position of the next token separator. - * Because of multi-header joining rules, the end of a - * header value is a token separator. This method does - * therefore not need to iterate over headers. - * - * @param from the position in the current header at which to - * start the search - * - * @return the position of a token separator in the current header, - * or at the end - * - * @throws ParseException - * if a new token is found before a token separator. - * RFC 2616, section 2.1 explicitly requires a comma between - * tokens for <tt>#</tt>. - */ - protected int findTokenSeparator(int from) { - if (from < 0) { - throw new IllegalArgumentException - ("Search position must not be negative: " + from); - } - - boolean found = false; - final int to = this.currentHeader.length(); - while (!found && (from < to)) { - final char ch = this.currentHeader.charAt(from); - if (isTokenSeparator(ch)) { - found = true; - } else if (isWhitespace(ch)) { - from++; - } else if (isTokenChar(ch)) { - throw new ParseException - ("Tokens without separator (pos " + from + - "): " + this.currentHeader); - } else { - throw new ParseException - ("Invalid character after token (pos " + from + - "): " + this.currentHeader); - } - } - - return from; - } - - - /** - * Determines the ending position of the current token. - * This method will not leave the current header value, - * since the end of the header value is a token boundary. - * - * @param from the position of the first character of the token - * - * @return the position after the last character of the token. - * The behavior is undefined if <code>from</code> does not - * point to a token character in the current header value. - */ - protected int findTokenEnd(int from) { - if (from < 0) { - throw new IllegalArgumentException - ("Token start position must not be negative: " + from); - } - - final int to = this.currentHeader.length(); - int end = from+1; - while ((end < to) && isTokenChar(this.currentHeader.charAt(end))) { - end++; - } - - return end; - } - - - /** - * Checks whether a character is a token separator. - * RFC 2616, section 2.1 defines comma as the separator for - * <tt>#token</tt> sequences. The end of a header value will - * also separate tokens, but that is not a character check. - * - * @param ch the character to check - * - * @return <code>true</code> if the character is a token separator, - * <code>false</code> otherwise - */ - protected boolean isTokenSeparator(char ch) { - return (ch == ','); - } - - - /** - * Checks whether a character is a whitespace character. - * RFC 2616, section 2.2 defines space and horizontal tab as whitespace. - * The optional preceeding line break is irrelevant, since header - * continuation is handled transparently when parsing messages. - * - * @param ch the character to check - * - * @return <code>true</code> if the character is whitespace, - * <code>false</code> otherwise - */ - protected boolean isWhitespace(char ch) { - - // we do not use Character.isWhitspace(ch) here, since that allows - // many control characters which are not whitespace as per RFC 2616 - return ((ch == '\t') || Character.isSpaceChar(ch)); - } - - - /** - * Checks whether a character is a valid token character. - * Whitespace, control characters, and HTTP separators are not - * valid token characters. The HTTP specification (RFC 2616, section 2.2) - * defines tokens only for the US-ASCII character set, this - * method extends the definition to other character sets. - * - * @param ch the character to check - * - * @return <code>true</code> if the character is a valid token start, - * <code>false</code> otherwise - */ - protected boolean isTokenChar(char ch) { - - // common sense extension of ALPHA + DIGIT - if (Character.isLetterOrDigit(ch)) - return true; - - // common sense extension of CTL - if (Character.isISOControl(ch)) - return false; - - // no common sense extension for this - if (isHttpSeparator(ch)) - return false; - - // RFC 2616, section 2.2 defines a token character as - // "any CHAR except CTLs or separators". The controls - // and separators are included in the checks above. - // This will yield unexpected results for Unicode format characters. - // If that is a problem, overwrite isHttpSeparator(char) to filter - // out the false positives. - return true; - } - - - /** - * Checks whether a character is an HTTP separator. - * The implementation in this class checks only for the HTTP separators - * defined in RFC 2616, section 2.2. If you need to detect other - * separators beyond the US-ASCII character set, override this method. - * - * @param ch the character to check - * - * @return <code>true</code> if the character is an HTTP separator - */ - protected boolean isHttpSeparator(char ch) { - return (HTTP_SEPARATORS.indexOf(ch) >= 0); - } - - -} // class BasicTokenIterator - |