summaryrefslogtreecommitdiffstats
path: root/simple/simple-http/src/main/java/org/simpleframework/http/parse/ContentTypeParser.java
diff options
context:
space:
mode:
Diffstat (limited to 'simple/simple-http/src/main/java/org/simpleframework/http/parse/ContentTypeParser.java')
-rw-r--r--simple/simple-http/src/main/java/org/simpleframework/http/parse/ContentTypeParser.java556
1 files changed, 556 insertions, 0 deletions
diff --git a/simple/simple-http/src/main/java/org/simpleframework/http/parse/ContentTypeParser.java b/simple/simple-http/src/main/java/org/simpleframework/http/parse/ContentTypeParser.java
new file mode 100644
index 0000000..f42c073
--- /dev/null
+++ b/simple/simple-http/src/main/java/org/simpleframework/http/parse/ContentTypeParser.java
@@ -0,0 +1,556 @@
+/*
+ * ContentTypeParser.java February 2001
+ *
+ * Copyright (C) 2001, Niall Gallagher <niallg@users.sf.net>
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied. See the License for the specific language governing
+ * permissions and limitations under the License.
+ */
+
+package org.simpleframework.http.parse;
+
+import org.simpleframework.common.KeyMap;
+import org.simpleframework.common.parse.ParseBuffer;
+import org.simpleframework.common.parse.Parser;
+import org.simpleframework.http.ContentType;
+
+/**
+ * This provides access to the MIME type parts, that is the primary
+ * type, the secondary type and an optional character set parameter.
+ * The <code>charset</code> parameter is one of many parameters that
+ * can be associated with a MIME type. This however this exposes this
+ * parameter with a typed method.
+ * <p>
+ * The <code>getCharset</code> will return the character encoding the
+ * content type is encoded within. This allows the user of the content
+ * to decode it correctly. Other parameters can be acquired from this
+ * by simply providing the name of the parameter.
+ *
+ * @author Niall Gallagher
+ */
+public class ContentTypeParser extends Parser implements ContentType {
+
+ /**
+ * Used to store the characters consumed for the secondary type.
+ */
+ private ParseBuffer secondary;
+
+ /**
+ * Used to store the characters consumed for the primary type.
+ */
+ private ParseBuffer primary;
+
+ /**
+ * Used to store the characters for the charset parameter.
+ */
+ private ParseBuffer charset;
+
+ /**
+ * Used to store the characters consumed for the type.
+ */
+ private ParseBuffer type;
+
+ /**
+ * Used to collect the name of a content type parameter.
+ */
+ private ParseBuffer name;
+
+ /**
+ * Used to collect the value of the content type parameter.
+ */
+ private ParseBuffer value;
+
+ /**
+ * Used to store the name value pairs of the parameters.
+ */
+ private KeyMap<String> map;
+
+ /**
+ * The default constructor will create a <code>ContentParser</code>
+ * that contains no charset, primary or secondary. This can be used
+ * to extract the primary, secondary and the optional charset
+ * parameter by using the parser's <code>parse(String)</code>
+ * method.
+ */
+ public ContentTypeParser(){
+ this.secondary = new ParseBuffer();
+ this.primary = new ParseBuffer();
+ this.charset = new ParseBuffer();
+ this.value = new ParseBuffer();
+ this.type = new ParseBuffer();
+ this.name = new ParseBuffer();
+ this.map = new KeyMap<String>();
+ }
+
+ /**
+ * This is primarily a convenience constructor. This will parse
+ * the <code>String</code> given to extract the MIME type. This
+ * could be achieved by calling the default no-arg constructor
+ * and then using the instance to invoke the <code>parse</code>
+ * method on that <code>String</code>.
+ *
+ * @param header <code>String</code> containing a MIME type value
+ */
+ public ContentTypeParser(String header){
+ this();
+ parse(header);
+ }
+
+ /**
+ * This method is used to get the primary and secondary parts
+ * joined together with a "/". This is typically how a content
+ * type is examined. Here convenience is most important, we can
+ * easily compare content types without any parameters.
+ *
+ * @return this returns the primary and secondary types
+ */
+ public String getType() {
+ return type.toString();
+ }
+
+ /**
+ * This sets the primary type to whatever value is in the string
+ * provided is. If the string is null then this will contain a
+ * null string for the primary type of the parameter, which is
+ * likely invalid in most cases.
+ *
+ * @param value the type to set for the primary type of this
+ */
+ public void setPrimary(String value) {
+ type.reset(value);
+ type.append('/');
+ type.append(secondary);
+ primary.reset(value);
+ }
+
+ /**
+ * This is used to retrieve the primary type of this MIME type. The
+ * primary type part within the MIME type defines the generic type.
+ * For example <code>text/plain; charset=UTF-8</code>. This will
+ * return the text value. If there is no primary type then this
+ * will return <code>null</code> otherwise the string value.
+ *
+ * @return the primary type part of this MIME type
+ */
+ public String getPrimary() {
+ return primary.toString();
+ }
+
+ /**
+ * This sets the secondary type to whatever value is in the string
+ * provided is. If the string is null then this will contain a
+ * null string for the secondary type of the parameter, which is
+ * likely invalid in most cases.
+ *
+ * @param value the type to set for the primary type of this
+ */
+ public void setSecondary(String value) {
+ type.reset(primary);
+ type.append('/');
+ type.append(value);
+ secondary.reset(value);
+ }
+
+ /**
+ * This is used to retrieve the secondary type of this MIME type.
+ * The secondary type part within the MIME type defines the generic
+ * type. For example <code>text/html; charset=UTF-8</code>. This
+ * will return the HTML value. If there is no secondary type then
+ * this will return <code>null</code> otherwise the string value.
+ *
+ * @return the primary type part of this MIME type
+ */
+ public String getSecondary(){
+ return secondary.toString();
+ }
+
+ /**
+ * This will set the <code>charset</code> to whatever value the
+ * string contains. If the string is null then this will not set
+ * the parameter to any value and the <code>toString</code> method
+ * will not contain any details of the parameter.
+ *
+ * @param enc parameter value to add to the MIME type
+ */
+ public void setCharset(String enc) {
+ charset.reset(enc);
+ }
+
+ /**
+ * This is used to retrieve the <code>charset</code> of this MIME
+ * type. This is a special parameter associated with the type, if
+ * the parameter is not contained within the type then this will
+ * return null, which typically means the default of ISO-8859-1.
+ *
+ * @return the value that this parameter contains
+ */
+ public String getCharset() {
+ return charset.toString();
+ }
+
+ /**
+ * This is used to retrieve an arbitrary parameter from the MIME
+ * type header. This ensures that values for <code>boundary</code>
+ * or other such parameters are not lost when the header is parsed.
+ * This will return the value, unquoted if required, as a string.
+ *
+ * @param name this is the name of the parameter to be retrieved
+ *
+ * @return this is the value for the parameter, or null if empty
+ */
+ public String getParameter(String name) {
+ return map.get(name);
+ }
+
+ /**
+ * This will add a named parameter to the content type header. If
+ * a parameter of the specified name has already been added to the
+ * header then that value will be replaced by the new value given.
+ * Parameters such as the <code>boundary</code> as well as other
+ * common parameters can be set with this method.
+ *
+ * @param name this is the name of the parameter to be added
+ * @param value this is the value to associate with the name
+ */
+ public void setParameter(String name, String value) {
+ map.put(name, value);
+ }
+
+ /**
+ * This will initialize the parser when it is ready to parse
+ * a new <code>String</code>. This will reset the parser to a
+ * ready state. The init method is invoked by the parser when
+ * the <code>Parser.parse</code> method is invoked.
+ */
+ protected void init(){
+ if(count > 0) {
+ pack();
+ }
+ clear();
+ }
+
+ /**
+ * This is used to clear all previously collected tokens. This
+ * allows the parser to be reused when there are multiple source
+ * strings to be parsed. Clearing of the tokens is performed
+ * when the parser is initialized.
+ */
+ private void clear() {
+ primary.clear();
+ secondary.clear();
+ charset.clear();
+ name.clear();
+ value.clear();
+ type.clear();
+ map.clear();
+ off = 0;
+ }
+
+ /**
+ * Reads and parses the MIME type from the given <code>String</code>
+ * object. This uses the syntax defined by RFC 2616 for the media-type
+ * syntax. This parser is only concerned with one parameter, the
+ * <code>charset</code> parameter. The syntax for the media type is
+ * <pre>
+ * media-type = token "/" token *( ";" parameter )
+ * parameter = token | literal
+ * </pre>
+ */
+ protected void parse(){
+ primary();
+ off++;
+ secondary();
+ parameters();
+ }
+
+ /**
+ * This is used to remove all whitespace characters from the
+ * <code>String</code> excluding the whitespace within literals.
+ * The definition of a literal can be found in RFC 2616.
+ * <p>
+ * The definition of a literal for RFC 2616 is anything between 2
+ * quotes but excluding quotes that are prefixed with the backward
+ * slash character.
+ */
+ private void pack() {
+ char old = buf[0];
+ int len = count;
+ int seek = 0;
+ int pos = 0;
+
+ while(seek < len){
+ char ch = buf[seek++];
+
+ if(ch == '"' && old != '\\'){ /* qd-text*/
+ buf[pos++] = ch;
+
+ while(seek < len){
+ old = buf[seek-1];
+ ch = buf[seek++];
+ buf[pos++] = ch;
+
+ if(ch =='"'&& old!='\\'){ /*qd-text*/
+ break;
+ }
+ }
+ }else if(!space(ch)){
+ old = buf[seek - 1];
+ buf[pos++] = old;
+ }
+ }
+ count = pos;
+ }
+
+ /**
+ * This reads the type from the MIME type. This will fill the
+ * type <code>ParseBuffer</code>. This will read all chars
+ * upto but not including the first instance of a '/'. The type
+ * of a media-type as defined by RFC 2616 is
+ * <code>type/subtype;param=val;param2=val</code>.
+ */
+ private void primary(){
+ while(off < count){
+ if(buf[off] =='/'){
+ type.append('/');
+ break;
+ }
+ type.append(buf[off]);
+ primary.append(buf[off]);
+ off++;
+ }
+ }
+
+ /**
+ * This reads the subtype from the MIME type. This will fill the
+ * subtype <code>ParseBuffer</code>. This will read all chars
+ * upto but not including the first instance of a ';'. The subtype
+ * of a media-type as defined by RFC 2616 is
+ * <code>type/subtype;param=val;param2=val</code>.
+ */
+ private void secondary(){
+ while(off < count){
+ if(buf[off] ==';'){
+ break;
+ }
+ type.append(buf[off]);
+ secondary.append(buf[off]);
+ off++;
+ }
+ }
+
+ /**
+ * This will read the parameters from the MIME type. This will search
+ * for the <code>charset</code> parameter within the set of parameters
+ * which are given to the type. The <code>charset</code> param is the
+ * only parameter that this parser will tokenize.
+ * <p>
+ * This will remove any parameters that preceed the charset parameter.
+ * Once the <code>charset</code> is retrived the MIME type is considered
+ * to be parsed.
+ */
+ private void parameters(){
+ while(skip(";")){
+ if(skip("charset=")){
+ charset();
+ break;
+ }else{
+ parameter();
+ insert();
+ }
+ }
+ }
+
+ /**
+ * This will add the name and value tokens to the parameters map.
+ * If any previous value of the given name has been inserted
+ * into the map then this will overwrite that value. This is
+ * used to ensure that the string value is inserted to the map.
+ */
+ private void insert() {
+ insert(name, value);
+ name.clear();
+ value.clear();
+ }
+
+ /**
+ * This will add the given name and value to the parameters map.
+ * If any previous value of the given name has been inserted
+ * into the map then this will overwrite that value. This is
+ * used to ensure that the string value is inserted to the map.
+ *
+ * @param name this is the name of the value to be inserted
+ * @param value this is the value of a that is to be inserted
+ */
+ private void insert(ParseBuffer name, ParseBuffer value) {
+ map.put(name.toString(), value.toString());
+ }
+
+ /**
+ * This is a parameter as defined by RFC 2616. The parameter is added to a
+ * MIME type e.g. <code>type/subtype;param=val</code> etc. The parameter
+ * name and value are not stored. This is used to simply update the read
+ * offset past the parameter. The reason for reading the parameters is to
+ * search for the <code>charset</code> parameter which will indicate the
+ * encoding.
+ */
+ private void parameter(){
+ name();
+ off++; /* = */
+ value();
+ }
+
+ /**
+ * This will simply read all characters from the buffer before the first '='
+ * character. This represents a parameter name (see RFC 2616 for token). The
+ * parameter name is not buffered it is simply read from the buffer. This will
+ * not cause an <code>IndexOutOfBoundsException</code> as each offset
+ * is checked before it is acccessed.
+ */
+ private void name(){
+ while(off < count){
+ if(buf[off] =='='){
+ break;
+ }
+ name.append(buf[off]);
+ off++;
+ }
+ }
+
+ /**
+ * This is used to read a parameters value from the buf. This will read all
+ * <code>char</code>'s upto but excluding the first terminal <code>char</code>
+ * encountered from the off within the buf, or if the value is a literal
+ * it will read a literal from the buffer (literal is any data between
+ * quotes except if the quote is prefixed with a backward slash character).
+ */
+ private void value(){
+ if(quote(buf[off])){
+ for(off++; off < count;){
+ if(quote(buf[off])){
+ if(buf[++off-2]!='\\'){
+ break;
+ }
+ }
+ value.append(buf[off++]);
+ }
+ }else{
+ while(off < count){
+ if(buf[off] ==';') {
+ break;
+ }
+ value.append(buf[off]);
+ off++;
+ }
+ }
+ }
+
+ /**
+ * This method is used to determine if the specified character is a quote
+ * character. The quote character is typically used as a boundary for the
+ * values within the header. This accepts a single or double quote.
+ *
+ * @param ch the character to determine if it is a quotation
+ *
+ * @return true if the character provided is a quotation character
+ */
+ private boolean quote(char ch) {
+ return ch == '\'' || ch == '"';
+ }
+
+ /**
+ * This is used to read the value from the <code>charset</code> param.
+ * This will fill the <code>charset</code> <code>ParseBuffer</code> and with
+ * the <code>charset</code> value. This will read a literal or a token as
+ * the <code>charset</code> value. If the <code>charset</code> is a literal
+ * then the quotes will be read as part of the charset.
+ */
+ private void charset(){
+ if(buf[off] == '"'){
+ charset.append('"');
+ for(off++; off < count;){
+ charset.append(buf[off]);
+ if(buf[off++]=='"')
+ if(buf[off-2]!='\\'){
+ break;
+ }
+ }
+ }else{
+ while(off < count){
+ if(buf[off]==';') {
+ break;
+ }
+ charset.append(buf[off]);
+ off++;
+ }
+ }
+ }
+
+ /**
+ * This will return the value of the MIME type as a string. This
+ * will concatenate the primary and secondary type values and
+ * add the <code>charset</code> parameter to the type which will
+ * recreate the content type.
+ *
+ * @return this returns the string representation of the type
+ */
+ private String encode() {
+ StringBuilder text = new StringBuilder();
+
+ if(primary != null) {
+ text.append(primary);
+ text.append("/");
+ text.append(secondary);
+ }
+ if(charset.length() > 0) {
+ text.append("; charset=");
+ text.append(charset);
+ }
+ return encode(text);
+ }
+
+ /**
+ * This will return the value of the MIME type as a string. This
+ * will concatenate the primary and secondary type values and
+ * add the <code>charset</code> parameter to the type which will
+ * recreate the content type.
+ *
+ * @param text this is the buffer to encode the parameters to
+ *
+ * @return this returns the string representation of the type
+ */
+ private String encode(StringBuilder text) {
+ for(String name : map) {
+ String value = map.get(name);
+
+ text.append("; ");
+ text.append(name);
+
+ if(value != null) {
+ text.append("=");
+ text.append(value);;
+ }
+ }
+ return text.toString();
+ }
+
+ /**
+ * This will return the value of the MIME type as a string. This
+ * will concatenate the primary and secondary type values and
+ * add the <code>charset</code> parameter to the type which will
+ * recreate the content type.
+ *
+ * @return this returns the string representation of the type
+ */
+ public String toString() {
+ return encode();
+ }
+}