1 files changed, 556 insertions, 0 deletions
diff --git a/simple/simple-http/src/main/java/org/simpleframework/http/parse/ContentTypeParser.java b/simple/simple-http/src/main/java/org/simpleframework/http/parse/ContentTypeParser.java
new file mode 100644
index 0000000..f42c073
--- /dev/null
+++ b/simple/simple-http/src/main/java/org/simpleframework/http/parse/ContentTypeParser.java
@@ -0,0 +1,556 @@
+/*
+ * ContentTypeParser.java February 2001
+ *
+ * Copyright (C) 2001, Niall Gallagher <niallg@users.sf.net>
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 
+ * implied. See the License for the specific language governing 
+ * permissions and limitations under the License.
+ */
+ 
+package org.simpleframework.http.parse;
+
+import org.simpleframework.common.KeyMap;
+import org.simpleframework.common.parse.ParseBuffer;
+import org.simpleframework.common.parse.Parser;
+import org.simpleframework.http.ContentType;
+
+/** 
+ * This provides access to the MIME type parts, that is the primary 
+ * type, the secondary type and an optional character set parameter.
+ * The <code>charset</code> parameter is one of many parameters that 
+ * can be associated with a MIME type. This however this exposes this
+ * parameter with a typed method.
+ * <p>
+ * The <code>getCharset</code> will return the character encoding the
+ * content type is encoded within. This allows the user of the content
+ * to decode it correctly. Other parameters can be acquired from this
+ * by simply providing the name of the parameter. 
+ *
+ * @author Niall Gallagher
+ */
+public class ContentTypeParser extends Parser implements ContentType {
+
+   /** 
+    * Used to store the characters consumed for the secondary type.
+    */
+   private ParseBuffer secondary;   
+
+   /** 
+    * Used to store the characters consumed for the primary type.
+    */
+   private ParseBuffer primary;
+
+   /** 
+    * Used to store the characters for the charset parameter.
+    */
+   private ParseBuffer charset;
+   
+   /** 
+    * Used to store the characters consumed for the type.
+    */
+   private ParseBuffer type;
+   
+   /**
+    * Used to collect the name of a content type parameter.
+    */
+   private ParseBuffer name;
+   
+   /**
+    * Used to collect the value of the content type parameter.
+    */
+   private ParseBuffer value;
+   
+   /**
+    * Used to store the name value pairs of the parameters.
+    */
+   private KeyMap<String> map;
+   
+   /** 
+    * The default constructor will create a <code>ContentParser</code>
+    * that contains no charset, primary or secondary. This can be used 
+    * to extract the primary, secondary and the optional charset 
+    * parameter by using the parser's <code>parse(String)</code> 
+    * method.
+    */   
+   public ContentTypeParser(){
+      this.secondary = new ParseBuffer();
+      this.primary = new ParseBuffer();  
+      this.charset = new ParseBuffer();
+      this.value = new ParseBuffer();
+      this.type = new ParseBuffer(); 
+      this.name = new ParseBuffer(); 
+      this.map = new KeyMap<String>();
+   }
+
+   /** 
+    * This is primarily a convenience constructor. This will parse 
+    * the <code>String</code> given to extract the MIME type. This 
+    * could be achieved by calling the default no-arg constructor 
+    * and then using the instance to invoke the <code>parse</code> 
+    * method on that <code>String</code>.
+    *
+    * @param header <code>String</code> containing a MIME type value
+    */
+   public ContentTypeParser(String header){
+      this();
+      parse(header);
+   }     
+   
+   /**
+    * This method is used to get the primary and secondary parts 
+    * joined together with a "/". This is typically how a content
+    * type is examined. Here convenience is most important, we can
+    * easily compare content types without any parameters.
+    * 
+    * @return this returns the primary and secondary types
+    */
+   public String getType() {
+      return type.toString();
+   }
+
+   /** 
+    * This sets the primary type to whatever value is in the string 
+    * provided is. If the string is null then this will contain a 
+    * null string for the primary type of the parameter, which is 
+    * likely invalid in most cases.
+    * 
+    * @param value the type to set for the primary type of this
+    */ 
+   public void setPrimary(String value) {
+      type.reset(value);
+      type.append('/');
+      type.append(secondary);
+      primary.reset(value);
+   }
+   
+   /** 
+    * This is used to retrieve the primary type of this MIME type. The 
+    * primary type part within the MIME type defines the generic type.
+    * For example <code>text/plain; charset=UTF-8</code>. This will 
+    * return the text value. If there is no primary type then this 
+    * will return <code>null</code> otherwise the string value.
+    *
+    * @return the primary type part of this MIME type
+    */  
+   public String getPrimary() {
+      return primary.toString();
+   }   
+   
+   /** 
+    * This sets the secondary type to whatever value is in the string 
+    * provided is. If the string is null then this will contain a 
+    * null string for the secondary type of the parameter, which is 
+    * likely invalid in most cases.
+    * 
+    * @param value the type to set for the primary type of this
+    */ 
+   public void setSecondary(String value) {
+      type.reset(primary);
+      type.append('/');
+      type.append(value);
+      secondary.reset(value);
+   }   
+
+   /** 
+    * This is used to retrieve the secondary type of this MIME type. 
+    * The secondary type part within the MIME type defines the generic 
+    * type. For example <code>text/html; charset=UTF-8</code>. This 
+    * will return the HTML value. If there is no secondary type then 
+    * this will return <code>null</code> otherwise the string value.
+    *
+    * @return the primary type part of this MIME type
+    */ 
+   public String getSecondary(){
+      return secondary.toString();
+   }   
+
+   /** 
+    * This will set the <code>charset</code> to whatever value the
+    * string contains. If the string is null then this will not set 
+    * the parameter to any value and the <code>toString</code> method 
+    * will not contain any details of the parameter.
+    *
+    * @param enc parameter value to add to the MIME type
+    */ 
+   public void setCharset(String enc) {
+      charset.reset(enc);  
+   }   
+
+   /** 
+    * This is used to retrieve the <code>charset</code> of this MIME 
+    * type. This is a special parameter associated with the type, if
+    * the parameter is not contained within the type then this will
+    * return null, which typically means the default of ISO-8859-1.
+    *
+    * @return the value that this parameter contains
+    */  
+   public String getCharset() {
+      return charset.toString();   
+   }  
+   
+   /**
+    * This is used to retrieve an arbitrary parameter from the MIME
+    * type header. This ensures that values for <code>boundary</code>
+    * or other such parameters are not lost when the header is parsed.
+    * This will return the value, unquoted if required, as a string. 
+    * 
+    * @param name this is the name of the parameter to be retrieved
+    * 
+    * @return this is the value for the parameter, or null if empty
+    */
+   public String getParameter(String name) {
+      return map.get(name);
+   }
+   
+   /**
+    * This will add a named parameter to the content type header. If
+    * a parameter of the specified name has already been added to the
+    * header then that value will be replaced by the new value given.
+    * Parameters such as the <code>boundary</code> as well as other
+    * common parameters can be set with this method.
+    * 
+    * @param name this is the name of the parameter to be added     
+    * @param value this is the value to associate with the name
+    */
+   public void setParameter(String name, String value) {
+      map.put(name, value);
+   }
+
+   /** 
+    * This will initialize the parser when it is ready to parse 
+    * a new <code>String</code>. This will reset the parser to a 
+    * ready state. The init method is invoked by the parser when 
+    * the <code>Parser.parse</code> method is invoked.
+    */
+   protected void init(){
+      if(count > 0) { 
+         pack();
+      }
+      clear();
+   }
+   
+   /**
+    * This is used to clear all previously collected tokens. This 
+    * allows the parser to be reused when there are multiple source
+    * strings to be parsed. Clearing of the tokens is performed 
+    * when the parser is initialized.
+    */
+   private void clear() {
+      primary.clear();
+      secondary.clear();
+      charset.clear();
+      name.clear();
+      value.clear();
+      type.clear();
+      map.clear();
+      off = 0;
+   }
+   
+   /** 
+    * Reads and parses the MIME type from the given <code>String</code> 
+    * object. This uses the syntax defined by RFC 2616 for the media-type 
+    * syntax. This parser is only concerned with one parameter, the 
+    * <code>charset</code> parameter. The syntax for the media type is 
+    * <pre>
+    * media-type = token "/" token *( ";" parameter )
+    * parameter = token | literal 
+    * </pre>
+    */
+   protected void parse(){
+      primary();
+      off++;
+      secondary();
+      parameters();
+   }
+   
+   /** 
+    * This is used to remove all whitespace characters from the 
+    * <code>String</code> excluding the whitespace within literals. 
+    * The definition of a literal can be found in RFC 2616. 
+    * <p>
+    * The definition of a literal for RFC 2616 is anything between 2 
+    * quotes but excluding quotes that are prefixed with the backward 
+    * slash character.
+    */
+   private void pack() {
+      char old = buf[0];
+      int len = count;
+      int seek = 0;
+      int pos = 0;
+
+      while(seek < len){
+         char ch = buf[seek++];
+         
+         if(ch == '"' && old != '\\'){  /* qd-text*/
+            buf[pos++] = ch;
+            
+            while(seek < len){
+               old = buf[seek-1];
+               ch = buf[seek++];  
+               buf[pos++] = ch;  
+               
+               if(ch =='"'&& old!='\\'){  /*qd-text*/
+                  break;
+               }
+            }
+         }else if(!space(ch)){            
+            old = buf[seek - 1];  
+            buf[pos++] = old;                   
+         }         
+      }
+      count = pos;
+   }
+   
+   /** 
+    * This reads the type from the MIME type. This will fill the 
+    * type <code>ParseBuffer</code>. This will read all chars 
+    * upto but not including the first instance of a '/'. The type 
+    * of a media-type as defined by RFC 2616 is
+    * <code>type/subtype;param=val;param2=val</code>.
+    */  
+   private void primary(){
+      while(off < count){
+         if(buf[off] =='/'){
+            type.append('/');
+            break;
+         }
+         type.append(buf[off]);
+         primary.append(buf[off]);
+         off++;
+      }
+   }
+  
+   /** 
+    * This reads the subtype from the MIME type. This will fill the 
+    * subtype <code>ParseBuffer</code>. This will read all chars 
+    * upto but not including the first instance of a ';'. The subtype 
+    * of a media-type as defined by RFC 2616 is
+    * <code>type/subtype;param=val;param2=val</code>.
+    */
+   private void secondary(){
+      while(off < count){
+         if(buf[off] ==';'){
+            break;
+         }
+         type.append(buf[off]);
+         secondary.append(buf[off]);
+         off++;
+      }      
+   }
+   
+   /** 
+    * This will read the parameters from the MIME type. This will search 
+    * for the <code>charset</code> parameter within the set of parameters 
+    * which are given to the type. The <code>charset</code> param is the 
+    * only parameter that this parser will tokenize. 
+    * <p>
+    * This will remove any parameters that preceed the charset parameter. 
+    * Once the <code>charset</code> is retrived the MIME type is considered 
+    * to be parsed.
+    */
+   private void parameters(){   
+      while(skip(";")){
+         if(skip("charset=")){
+            charset();
+            break;
+         }else{
+            parameter();
+            insert();
+         }
+      }
+   }   
+   
+   /**
+    * This will add the name and value tokens to the parameters map.
+    * If any previous value of the given name has been inserted
+    * into the map then this will overwrite that value. This is
+    * used to ensure that the string value is inserted to the map.
+    */
+   private void insert() {
+      insert(name, value);
+      name.clear();
+      value.clear();
+   }
+   
+   /**
+    * This will add the given name and value to the parameters map.
+    * If any previous value of the given name has been inserted
+    * into the map then this will overwrite that value. This is
+    * used to ensure that the string value is inserted to the map.
+    *
+    * @param name this is the name of the value to be inserted
+    * @param value this is the value of a that is to be inserted
+    */
+   private void insert(ParseBuffer name, ParseBuffer value) {
+	   map.put(name.toString(), value.toString());
+   }
+   
+   /** 
+    * This is a parameter as defined by RFC 2616. The parameter is added to a 
+    * MIME type e.g. <code>type/subtype;param=val</code> etc.  The parameter 
+    * name and value are not stored. This is used to simply update the read 
+    * offset past the parameter. The reason for reading the parameters is to 
+    * search for the <code>charset</code> parameter which will indicate the 
+    * encoding.
+    */
+   private void parameter(){
+      name();
+      off++; /* = */
+      value();
+   }
+   
+   /** 
+    * This will simply read all characters from the buffer before the first '=' 
+    * character. This represents a parameter name (see RFC 2616 for token). The 
+    * parameter name is not buffered it is simply read from the buffer. This will
+    * not cause an <code>IndexOutOfBoundsException</code> as each offset
+    * is checked before it is acccessed.
+    */
+   private void name(){
+      while(off < count){
+         if(buf[off] =='='){
+            break;
+         }
+         name.append(buf[off]);
+         off++;
+      }   
+   }
+   
+   /** 
+    * This is used to read a parameters value from the buf. This will read all 
+    * <code>char</code>'s upto but excluding the first terminal <code>char</code> 
+    * encountered from the off within the buf, or if the value is a literal 
+    * it will read a literal from the buffer (literal is any data between 
+    * quotes except if the quote is prefixed with a backward slash character).    
+    */
+   private void value(){
+      if(quote(buf[off])){         
+         for(off++; off < count;){
+            if(quote(buf[off])){
+               if(buf[++off-2]!='\\'){
+                  break;
+               }
+            }
+            value.append(buf[off++]);
+         }
+      }else{   
+         while(off < count){
+            if(buf[off] ==';') {
+               break;           
+            }
+            value.append(buf[off]);
+            off++;
+         }
+      }
+   }
+   
+   /**
+    * This method is used to determine if the specified character is a quote
+    * character. The quote character is typically used as a boundary for the
+    * values within the header. This accepts a single or double quote.
+    * 
+    * @param ch the character to determine if it is a quotation
+    * 
+    * @return true if the character provided is a quotation character
+    */
+   private boolean quote(char ch) {
+      return ch == '\'' || ch == '"';
+   }
+   
+   /** 
+    * This is used to read the value from the <code>charset</code> param.
+    * This will fill the <code>charset</code> <code>ParseBuffer</code> and with 
+    * the <code>charset</code> value. This will read a literal or a token as 
+    * the <code>charset</code> value. If the <code>charset</code> is a literal 
+    * then the quotes will be read as part of the charset.
+    */ 
+   private void charset(){
+      if(buf[off] == '"'){         
+         charset.append('"');
+         for(off++; off < count;){
+            charset.append(buf[off]);
+            if(buf[off++]=='"')
+               if(buf[off-2]!='\\'){
+                  break;
+               }            
+         }
+      }else{   
+         while(off < count){
+            if(buf[off]==';') {
+               break;          
+            }
+            charset.append(buf[off]);
+            off++;
+         }
+      }   
+   }
+   
+   /** 
+    * This will return the value of the MIME type as a string. This
+    * will concatenate the primary and secondary type values and 
+    * add the <code>charset</code> parameter to the type which will
+    * recreate the content type.
+    * 
+    * @return this returns the string representation of the type
+    */
+   private String encode() {
+      StringBuilder text = new StringBuilder();
+      
+      if(primary != null) {
+         text.append(primary);
+         text.append("/");
+         text.append(secondary);
+      }
+      if(charset.length() > 0) {
+         text.append("; charset=");
+         text.append(charset);
+      }
+      return encode(text);
+   }
+   
+   /** 
+    * This will return the value of the MIME type as a string. This
+    * will concatenate the primary and secondary type values and 
+    * add the <code>charset</code> parameter to the type which will
+    * recreate the content type.
+    * 
+    * @param text this is the buffer to encode the parameters to
+    * 
+    * @return this returns the string representation of the type
+    */
+   private String encode(StringBuilder text) {
+      for(String name : map) {
+         String value = map.get(name);
+         
+         text.append("; ");
+         text.append(name);
+         
+         if(value != null) {
+            text.append("=");
+            text.append(value);;
+         }
+      }
+      return text.toString();
+   }
+
+   /** 
+    * This will return the value of the MIME type as a string. This
+    * will concatenate the primary and secondary type values and 
+    * add the <code>charset</code> parameter to the type which will
+    * recreate the content type.
+    * 
+    * @return this returns the string representation of the type
+    */
+   public String toString() {
+      return encode();
+   }
+}