simple/simple-http/src/main/java/org/simpleframework/http/message/ChunkedConsumer.java


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258

/*
 * ChunkedConsumer.java February 2007
 *
 * Copyright (C) 2007, Niall Gallagher <niallg@users.sf.net>
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 
 * implied. See the License for the specific language governing 
 * permissions and limitations under the License.
 */

package org.simpleframework.http.message;

import java.io.IOException;

import org.simpleframework.common.buffer.Allocator;
import org.simpleframework.common.buffer.Buffer;

/**
 * The <code>ChunkedConsumer</code> is reads an decodes a stream
 * using the chunked transfer coding. This is used so that any data
 * sent in the chunked transfer coding can be decoded. All bytes are
 * appended to an internal buffer so that they can be read without
 * having to parse the encoding. 
 * <pre>
 *
 *    length := 0
 *    read chunk-size, chunk-extension (if any) and CRLF
 *    while (chunk-size &gt; 0) {
 *       read chunk-data and CRLF
 *       append chunk-data to entity-body
 *       length := length + chunk-size
 *       read chunk-size and CRLF
 *    }
 *    read entity-header
 *    while (entity-header not empty) {
 *       append entity-header to existing header fields
 *       read entity-header
 *    }
 *
 * </pre>
 * The above algorithm is taken from RFC 2616 section 19.4.6. This
 * coding scheme is used in HTTP pipelines so that dynamic content,
 * that is, content with which a length cannot be determined does
 * not require a connection close to delimit the message body. 
 *
 * @author Niall Gallagher
 */
public class ChunkedConsumer extends UpdateConsumer {
  
   /**
    * This is used to create the internal buffer for the body.
    */         
   private Allocator allocator;
   
   /**
    * This is the internal buffer used to capture the body read.
    */  
   private Buffer buffer;
   
   /**
    * This is used to determine whether a full chunk has been read.
    */  
   private boolean terminal;
   
   /**
    * This is used to determine if the zero length chunk was read.
    */  
   private boolean last;
 
   /**
    * This is used to accumulate the bytes of the chunk size line.
    */   
   private byte line[];
   
   /**
    * This is the number of bytes appended to the line buffer.
    */ 
   private int count;
   
   /**
    * This is the number of bytes left in the current chunk.
    */ 
   private int chunk;
   
   /**
    * Constructor for the <code>ChunkedConsumer</code> object. This 
    * is used to create a consumer that reads chunked encoded data and
    * appended that data in decoded form to an internal buffer so that
    * it can be read in a clean decoded fromat.
    *
    * @param allocator this is used to allocate the internal buffer
    */
   public ChunkedConsumer(Allocator allocator) {
      this(allocator, 1024);
   }
   
   /**
    * Constructor for the <code>ChunkedConsumer</code> object. This 
    * is used to create a consumer that reads chunked encoded data and
    * appended that data in decoded form to an internal buffer so that
    * it can be read in a clean decoded fromat.
    *
    * @param allocator this is used to allocate the internal buffer
    * @param chunk this is the maximum size line allowed
    */   
   private ChunkedConsumer(Allocator allocator, int chunk) {    
      this.line = new byte[chunk];
      this.allocator = allocator;  
   }   
   
   /**
    * This is used to acquire the body that has been consumed. This
    * will return a body which can be used to read the content of
    * the message, also if the request is multipart upload then all
    * of the parts are provided as <code>Attachment</code> objects. 
    * Each part can then be read as an individual message.
    *  
    * @return the body that has been consumed by this instance
    */
   public Body getBody() {
      return new BufferBody(buffer);
   }
   
   /** 
    * This method is used to append the contents of the array to the
    * internal buffer. The appended bytes can be acquired from the
    * internal buffer using an <code>InputStream</code>, or the text
    * of the appended bytes can be acquired by encoding the bytes.   
    *
    * @param array this is the array of bytes to be appended
    * @param off this is the start offset in the array to read from
    * @param len this is the number of bytes to write to the buffer  
    */
   private void append(byte[] array, int off, int len) throws IOException {
      if(buffer == null) {
         buffer = allocator.allocate();
      }
      buffer.append(array, off, len);
   }

   /**
    * This is used to process the bytes that have been read from the
    * cursor. This will keep reading bytes from the stream until such
    * time as the zero length chunk has been read from the stream. If
    * the zero length chunk is encountered then the overflow count is
    * returned so it can be used to reset the cursor.
    *
    * @param array this is a chunk read from the cursor
    * @param off this is the offset within the array the chunk starts
    * @param size this is the number of bytes within the array
    *
    * @return this returns the number of bytes overflow that is read
    */          
   @Override
   protected int update(byte[] array, int off, int size) throws IOException {
	  int mark = off + size;
      
      while(off < mark){
         if(terminal || last) {
            while(off < mark) {
               if(array[off++] == '\n') { // CR[LF]
                  if(last) { // 0; CRLFCR[LF]
                     finished = true;
                     return mark - off;
                  }
                  terminal = false;
                  break;
               }
            }
         } else if(chunk == 0) {
            while(chunk == 0) {
               if(off >= mark) {
                  break;
               } else if(array[off++] == '\n') { // CR[LF] 
                  parse();
                  
                  if(chunk == 0) { // 0; CR[LF]CRLF                	 
                     last = true;
                     break;
                  }
               } else {
                  line[count++] = array[off-1];
               }                      
            }            
         } else {
            int write = Math.min(mark - off, chunk);
            
            append(array, off, write); 
            chunk -= write;
            off += write;
            
            if(chunk == 0) { // []CRLF
               terminal = true;
            }
         }
      }
      return 0;
   }   
   
   /**
    * This method is used to convert the size in hexidecimal to a 
    * decimal <code>int</code>. This will use the specified number 
    * of bytes from the internal buffer and parse each character 
    * read as a hexidecimal character. This stops interpreting the
    * size line when a non-hexidecimal character is encountered.
    */
   private void parse() throws IOException {
      int off = 0;
      
      while(off < count) {
         int octet = toDecimal(line[off]);
            
         if(octet < 0){
            if(off < 1) {
               throw new IOException("Invalid chunk size line");
            }
            break;
         }
         chunk <<= 4; 
         chunk ^= octet;
         off++;
      }
      count = 0;
   }

   /**
    * This performs a conversion from a character to an integer. If
    * the character given, as a <code>byte</code>, is a hexidecimal
    * char this will convert it into its integer equivelant. So a 
    * char of <code>A</code> is converted into <code>10</code>.
    *
    * @param octet this is an ISO 8869-1 hexidecimal character
    *
    * @return returns the hex character into its decinal value
    */   
   private int toDecimal(byte octet){
      if(octet >= 'A' && octet <= 'Z') {
         return (octet - 'A') + 10;
      }
      if(octet >= '0' && octet <= '9') {
         return octet - '0';   
      } 
      if(octet >= 'a' && octet <= 'f') {
         return (octet - 'a') + 10;
      }
      return -1;
   }
}