diff options
Diffstat (limited to 'simple/simple-http/src/main/java/org/simpleframework/http/parse/ContentTypeParser.java')
-rw-r--r-- | simple/simple-http/src/main/java/org/simpleframework/http/parse/ContentTypeParser.java | 556 |
1 files changed, 556 insertions, 0 deletions
diff --git a/simple/simple-http/src/main/java/org/simpleframework/http/parse/ContentTypeParser.java b/simple/simple-http/src/main/java/org/simpleframework/http/parse/ContentTypeParser.java new file mode 100644 index 0000000..f42c073 --- /dev/null +++ b/simple/simple-http/src/main/java/org/simpleframework/http/parse/ContentTypeParser.java @@ -0,0 +1,556 @@ +/* + * ContentTypeParser.java February 2001 + * + * Copyright (C) 2001, Niall Gallagher <niallg@users.sf.net> + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package org.simpleframework.http.parse; + +import org.simpleframework.common.KeyMap; +import org.simpleframework.common.parse.ParseBuffer; +import org.simpleframework.common.parse.Parser; +import org.simpleframework.http.ContentType; + +/** + * This provides access to the MIME type parts, that is the primary + * type, the secondary type and an optional character set parameter. + * The <code>charset</code> parameter is one of many parameters that + * can be associated with a MIME type. This however this exposes this + * parameter with a typed method. + * <p> + * The <code>getCharset</code> will return the character encoding the + * content type is encoded within. This allows the user of the content + * to decode it correctly. Other parameters can be acquired from this + * by simply providing the name of the parameter. + * + * @author Niall Gallagher + */ +public class ContentTypeParser extends Parser implements ContentType { + + /** + * Used to store the characters consumed for the secondary type. + */ + private ParseBuffer secondary; + + /** + * Used to store the characters consumed for the primary type. + */ + private ParseBuffer primary; + + /** + * Used to store the characters for the charset parameter. + */ + private ParseBuffer charset; + + /** + * Used to store the characters consumed for the type. + */ + private ParseBuffer type; + + /** + * Used to collect the name of a content type parameter. + */ + private ParseBuffer name; + + /** + * Used to collect the value of the content type parameter. + */ + private ParseBuffer value; + + /** + * Used to store the name value pairs of the parameters. + */ + private KeyMap<String> map; + + /** + * The default constructor will create a <code>ContentParser</code> + * that contains no charset, primary or secondary. This can be used + * to extract the primary, secondary and the optional charset + * parameter by using the parser's <code>parse(String)</code> + * method. + */ + public ContentTypeParser(){ + this.secondary = new ParseBuffer(); + this.primary = new ParseBuffer(); + this.charset = new ParseBuffer(); + this.value = new ParseBuffer(); + this.type = new ParseBuffer(); + this.name = new ParseBuffer(); + this.map = new KeyMap<String>(); + } + + /** + * This is primarily a convenience constructor. This will parse + * the <code>String</code> given to extract the MIME type. This + * could be achieved by calling the default no-arg constructor + * and then using the instance to invoke the <code>parse</code> + * method on that <code>String</code>. + * + * @param header <code>String</code> containing a MIME type value + */ + public ContentTypeParser(String header){ + this(); + parse(header); + } + + /** + * This method is used to get the primary and secondary parts + * joined together with a "/". This is typically how a content + * type is examined. Here convenience is most important, we can + * easily compare content types without any parameters. + * + * @return this returns the primary and secondary types + */ + public String getType() { + return type.toString(); + } + + /** + * This sets the primary type to whatever value is in the string + * provided is. If the string is null then this will contain a + * null string for the primary type of the parameter, which is + * likely invalid in most cases. + * + * @param value the type to set for the primary type of this + */ + public void setPrimary(String value) { + type.reset(value); + type.append('/'); + type.append(secondary); + primary.reset(value); + } + + /** + * This is used to retrieve the primary type of this MIME type. The + * primary type part within the MIME type defines the generic type. + * For example <code>text/plain; charset=UTF-8</code>. This will + * return the text value. If there is no primary type then this + * will return <code>null</code> otherwise the string value. + * + * @return the primary type part of this MIME type + */ + public String getPrimary() { + return primary.toString(); + } + + /** + * This sets the secondary type to whatever value is in the string + * provided is. If the string is null then this will contain a + * null string for the secondary type of the parameter, which is + * likely invalid in most cases. + * + * @param value the type to set for the primary type of this + */ + public void setSecondary(String value) { + type.reset(primary); + type.append('/'); + type.append(value); + secondary.reset(value); + } + + /** + * This is used to retrieve the secondary type of this MIME type. + * The secondary type part within the MIME type defines the generic + * type. For example <code>text/html; charset=UTF-8</code>. This + * will return the HTML value. If there is no secondary type then + * this will return <code>null</code> otherwise the string value. + * + * @return the primary type part of this MIME type + */ + public String getSecondary(){ + return secondary.toString(); + } + + /** + * This will set the <code>charset</code> to whatever value the + * string contains. If the string is null then this will not set + * the parameter to any value and the <code>toString</code> method + * will not contain any details of the parameter. + * + * @param enc parameter value to add to the MIME type + */ + public void setCharset(String enc) { + charset.reset(enc); + } + + /** + * This is used to retrieve the <code>charset</code> of this MIME + * type. This is a special parameter associated with the type, if + * the parameter is not contained within the type then this will + * return null, which typically means the default of ISO-8859-1. + * + * @return the value that this parameter contains + */ + public String getCharset() { + return charset.toString(); + } + + /** + * This is used to retrieve an arbitrary parameter from the MIME + * type header. This ensures that values for <code>boundary</code> + * or other such parameters are not lost when the header is parsed. + * This will return the value, unquoted if required, as a string. + * + * @param name this is the name of the parameter to be retrieved + * + * @return this is the value for the parameter, or null if empty + */ + public String getParameter(String name) { + return map.get(name); + } + + /** + * This will add a named parameter to the content type header. If + * a parameter of the specified name has already been added to the + * header then that value will be replaced by the new value given. + * Parameters such as the <code>boundary</code> as well as other + * common parameters can be set with this method. + * + * @param name this is the name of the parameter to be added + * @param value this is the value to associate with the name + */ + public void setParameter(String name, String value) { + map.put(name, value); + } + + /** + * This will initialize the parser when it is ready to parse + * a new <code>String</code>. This will reset the parser to a + * ready state. The init method is invoked by the parser when + * the <code>Parser.parse</code> method is invoked. + */ + protected void init(){ + if(count > 0) { + pack(); + } + clear(); + } + + /** + * This is used to clear all previously collected tokens. This + * allows the parser to be reused when there are multiple source + * strings to be parsed. Clearing of the tokens is performed + * when the parser is initialized. + */ + private void clear() { + primary.clear(); + secondary.clear(); + charset.clear(); + name.clear(); + value.clear(); + type.clear(); + map.clear(); + off = 0; + } + + /** + * Reads and parses the MIME type from the given <code>String</code> + * object. This uses the syntax defined by RFC 2616 for the media-type + * syntax. This parser is only concerned with one parameter, the + * <code>charset</code> parameter. The syntax for the media type is + * <pre> + * media-type = token "/" token *( ";" parameter ) + * parameter = token | literal + * </pre> + */ + protected void parse(){ + primary(); + off++; + secondary(); + parameters(); + } + + /** + * This is used to remove all whitespace characters from the + * <code>String</code> excluding the whitespace within literals. + * The definition of a literal can be found in RFC 2616. + * <p> + * The definition of a literal for RFC 2616 is anything between 2 + * quotes but excluding quotes that are prefixed with the backward + * slash character. + */ + private void pack() { + char old = buf[0]; + int len = count; + int seek = 0; + int pos = 0; + + while(seek < len){ + char ch = buf[seek++]; + + if(ch == '"' && old != '\\'){ /* qd-text*/ + buf[pos++] = ch; + + while(seek < len){ + old = buf[seek-1]; + ch = buf[seek++]; + buf[pos++] = ch; + + if(ch =='"'&& old!='\\'){ /*qd-text*/ + break; + } + } + }else if(!space(ch)){ + old = buf[seek - 1]; + buf[pos++] = old; + } + } + count = pos; + } + + /** + * This reads the type from the MIME type. This will fill the + * type <code>ParseBuffer</code>. This will read all chars + * upto but not including the first instance of a '/'. The type + * of a media-type as defined by RFC 2616 is + * <code>type/subtype;param=val;param2=val</code>. + */ + private void primary(){ + while(off < count){ + if(buf[off] =='/'){ + type.append('/'); + break; + } + type.append(buf[off]); + primary.append(buf[off]); + off++; + } + } + + /** + * This reads the subtype from the MIME type. This will fill the + * subtype <code>ParseBuffer</code>. This will read all chars + * upto but not including the first instance of a ';'. The subtype + * of a media-type as defined by RFC 2616 is + * <code>type/subtype;param=val;param2=val</code>. + */ + private void secondary(){ + while(off < count){ + if(buf[off] ==';'){ + break; + } + type.append(buf[off]); + secondary.append(buf[off]); + off++; + } + } + + /** + * This will read the parameters from the MIME type. This will search + * for the <code>charset</code> parameter within the set of parameters + * which are given to the type. The <code>charset</code> param is the + * only parameter that this parser will tokenize. + * <p> + * This will remove any parameters that preceed the charset parameter. + * Once the <code>charset</code> is retrived the MIME type is considered + * to be parsed. + */ + private void parameters(){ + while(skip(";")){ + if(skip("charset=")){ + charset(); + break; + }else{ + parameter(); + insert(); + } + } + } + + /** + * This will add the name and value tokens to the parameters map. + * If any previous value of the given name has been inserted + * into the map then this will overwrite that value. This is + * used to ensure that the string value is inserted to the map. + */ + private void insert() { + insert(name, value); + name.clear(); + value.clear(); + } + + /** + * This will add the given name and value to the parameters map. + * If any previous value of the given name has been inserted + * into the map then this will overwrite that value. This is + * used to ensure that the string value is inserted to the map. + * + * @param name this is the name of the value to be inserted + * @param value this is the value of a that is to be inserted + */ + private void insert(ParseBuffer name, ParseBuffer value) { + map.put(name.toString(), value.toString()); + } + + /** + * This is a parameter as defined by RFC 2616. The parameter is added to a + * MIME type e.g. <code>type/subtype;param=val</code> etc. The parameter + * name and value are not stored. This is used to simply update the read + * offset past the parameter. The reason for reading the parameters is to + * search for the <code>charset</code> parameter which will indicate the + * encoding. + */ + private void parameter(){ + name(); + off++; /* = */ + value(); + } + + /** + * This will simply read all characters from the buffer before the first '=' + * character. This represents a parameter name (see RFC 2616 for token). The + * parameter name is not buffered it is simply read from the buffer. This will + * not cause an <code>IndexOutOfBoundsException</code> as each offset + * is checked before it is acccessed. + */ + private void name(){ + while(off < count){ + if(buf[off] =='='){ + break; + } + name.append(buf[off]); + off++; + } + } + + /** + * This is used to read a parameters value from the buf. This will read all + * <code>char</code>'s upto but excluding the first terminal <code>char</code> + * encountered from the off within the buf, or if the value is a literal + * it will read a literal from the buffer (literal is any data between + * quotes except if the quote is prefixed with a backward slash character). + */ + private void value(){ + if(quote(buf[off])){ + for(off++; off < count;){ + if(quote(buf[off])){ + if(buf[++off-2]!='\\'){ + break; + } + } + value.append(buf[off++]); + } + }else{ + while(off < count){ + if(buf[off] ==';') { + break; + } + value.append(buf[off]); + off++; + } + } + } + + /** + * This method is used to determine if the specified character is a quote + * character. The quote character is typically used as a boundary for the + * values within the header. This accepts a single or double quote. + * + * @param ch the character to determine if it is a quotation + * + * @return true if the character provided is a quotation character + */ + private boolean quote(char ch) { + return ch == '\'' || ch == '"'; + } + + /** + * This is used to read the value from the <code>charset</code> param. + * This will fill the <code>charset</code> <code>ParseBuffer</code> and with + * the <code>charset</code> value. This will read a literal or a token as + * the <code>charset</code> value. If the <code>charset</code> is a literal + * then the quotes will be read as part of the charset. + */ + private void charset(){ + if(buf[off] == '"'){ + charset.append('"'); + for(off++; off < count;){ + charset.append(buf[off]); + if(buf[off++]=='"') + if(buf[off-2]!='\\'){ + break; + } + } + }else{ + while(off < count){ + if(buf[off]==';') { + break; + } + charset.append(buf[off]); + off++; + } + } + } + + /** + * This will return the value of the MIME type as a string. This + * will concatenate the primary and secondary type values and + * add the <code>charset</code> parameter to the type which will + * recreate the content type. + * + * @return this returns the string representation of the type + */ + private String encode() { + StringBuilder text = new StringBuilder(); + + if(primary != null) { + text.append(primary); + text.append("/"); + text.append(secondary); + } + if(charset.length() > 0) { + text.append("; charset="); + text.append(charset); + } + return encode(text); + } + + /** + * This will return the value of the MIME type as a string. This + * will concatenate the primary and secondary type values and + * add the <code>charset</code> parameter to the type which will + * recreate the content type. + * + * @param text this is the buffer to encode the parameters to + * + * @return this returns the string representation of the type + */ + private String encode(StringBuilder text) { + for(String name : map) { + String value = map.get(name); + + text.append("; "); + text.append(name); + + if(value != null) { + text.append("="); + text.append(value);; + } + } + return text.toString(); + } + + /** + * This will return the value of the MIME type as a string. This + * will concatenate the primary and secondary type values and + * add the <code>charset</code> parameter to the type which will + * recreate the content type. + * + * @return this returns the string representation of the type + */ + public String toString() { + return encode(); + } +} |