simple/simple-http/src/main/java/org/simpleframework/http/parse/CookieParser.java


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589

/*
 * CookieParser.java February 2001
 *
 * Copyright (C) 2001, Niall Gallagher <niallg@users.sf.net>
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 
 * implied. See the License for the specific language governing 
 * permissions and limitations under the License.
 */

package org.simpleframework.http.parse;

import org.simpleframework.common.parse.Parser;
import org.simpleframework.http.Cookie;

import java.util.Iterator;

/**
 * CookieParser is used to parse the cookie header. The cookie header is
 * one of the headers that is used by the HTTP state management mechanism.
 * The Cookie header is the header that is sent from the client to the
 * server in response to a Set-Cookie header. The syntax of the Cookie
 * header as taken from RFC 2109, HTTP State Management Mechanism.
 * <pre>
 *
 *  cookie          =       "Cookie:" cookie-version
 *                          1*((";" | ",") cookie-value)
 *  cookie-value    =       NAME "=" VALUE [";" path] [";" domain]
 *  cookie-version  =       "$Version" "=" value
 *  NAME            =       attr
 *  VALUE           =       value
 *  path            =       "$Path" "=" value
 *  domain          =       "$Domain" "=" value
 *
 * </pre>
 * The cookie header may consist of several cookies. Each cookie can be
 * extracted from the header by examining the it syntax of the cookie
 * header. The syntax of the cookie header is defined in RFC 2109.
 * <p>
 * Each cookie has a <code>$Version</code> attribute followed by multiple
 * cookies. Each contains a name and a value, followed  by an optional
 * <code>$Path</code> and <code>$Domain</code> attribute. This will parse
 * a given cookie header and return each cookie extracted as a
 * <code>Cookie</code> object.
 *
 * @author Niall Gallagher
 */
public class CookieParser extends Parser implements Iterable<Cookie> {

   /**
    * Determines when the <code>Parser</code> has finished.
    */
   private boolean finished;

   /**
    * Used so the <code>Parser</code> does not parse twice.
    */
   private boolean parsed;

   /**
    * Version of the <code>Cookie</code> being parsed.
    */
   private int version;

   /**
    * Used to store the name of the <code>Cookie</code>.
    */
   private Token name;

   /**
    * Used to store the value of the <code>Cookie</code>.
    */
   private Token value;

   /**
    * Used to store the <code>$Path</code> values.
    */
   private Token path;

   /**
    * Used to store the <code>$Domain</code> values.
    */
   private Token domain;

   /**
    * Create a <code>CookieParser</code> that contains no cookies.
    * the instance will return <code>false</code> for the
    * <code>hasNext</code> method. cookies may be parsed using
    * this instance by using the <code>parse</code> method.
    */
   public CookieParser(){
      this.path = new Token();
      this.domain = new Token();
      this.name = new Token();
      this.value = new Token();
      this.finished = true;
   }

   /**
    * This is primarily a convineance constructor. This will parse the
    * <code>String</code> given to extract the cookies. This could be
    * achived by calling the default no-arg constructor and then using
    * the instance to invoke the <code>parse</code> method on that
    * <code>String</code>.
    *
    * @param header a <code>String</code> containing a cookie value
    */
   public CookieParser(String header){
      this();
      parse(header);
   }

   /**
    * Resets the cookie and the buffer variables for this
    * <code>CookieParser</code>. It is used to set the
    * state of the parser to start parsing a new cookie.
    */
   protected void init() {
      finished = false;
      parsed =false;
      version = 0;
      off = 0;
      version();
   }

   /**
    * This will extract the next <code>Cookie</code> from the
    * buffer. If all the characters in the buffer have already
    * been examined then this method will simply do nothing.
    * Otherwise this will parse the remainder of the buffer
    * and (if it follows RFC 2109) produce a <code>Cookie</code>.
    */
   protected void parse() {
      if(!finished){
         cookie();
         parsed=true;
      }
   }

   /**
    * This is used to skip an arbitrary <code>String</code> within the
    * <code>char</code> buf. It checks the length of the <code>String</code>
    * first to ensure that it will not go out of bounds. A comparison
    * is then made with the buffers contents and the <code>String</code>
    * if the reigon in the buffer matched the <code>String</code> then the
    * offset within the buffer is increased by the <code>String</code>'s
    * length so that it has effectively skipped it.
    * <p>
    * This <code>skip</code> method will ignore all of the whitespace text.
    * This will also skip trailing spaces within the the input text and
    * all spaces within the source text. For example if the input was
    * the string "s omete xt" and the source was "some text to skip" then
    * the result of a skip ignoring spaces would be "to skip" in the
    * source string, as the trailing spaces are also eaten by this.
    *
    * @param text this is the <code>String</code> value to be skipped
    *
    * @return true if the <code>String</code> was skipped
    */
   protected boolean skip(String text){      
      int size = text.length();
      int seek = off;
      int read = 0;

      if(off + size > count){
         return false;
      }
      while(read < size) {
         char a = text.charAt(read);
         char b = buf[seek];

         if(space(b)){
            if(++seek >= count){
               return false;
            }
         }else if(space(a)){
            if(++read >= size) {
               continue;
            }
         }else {
            if(toLower(a) != toLower(b)){
               return false;
            }
            read++;
            seek++;
         }
      }
      for(off = seek; off < count; off++){
         if(!space(buf[off]))
            break;
      }
      return true;
   }
   
   /**
    * This is used to acquire the cookie values from the provided 
    * the provided source text. This allows the cookie parser to be
    * used within a for each loop to parse out the values of a
    * cookie one by one so that they may be used or stored.
    * 
    * @return this returns an iterator for extracting cookie value
    */
   public Iterator<Cookie> iterator() {
      return new Sequence();
   }

   /**
    * This is used so that the collection of <code>Cookies</code>
    * can be reiterated. This allows the collection to be reused.
    * The <code>reset</code> method will invoke the super classes
    * <code>init</code> method. This will reinitialize this
    * <code>Parser</code> so the cookie will be reparsed.
    */
   public void reset() {
      init();
      parse();
   }

   /**
    * Creates the <code>Cookie</code> from the token objects. It is
    * assumed that the <code>Cookie</code> <code>String</code> has
    * been parsed when this is called. This should only be used after
    * the <code>parse</code> method has been called.
    * <p>
    * If there is no <code>$Domain</code> or <code>$Path</code>
    * within  the <code>Cookie</code> <code>String</code> then the
    * <code>getDomain</code> and <code>getPath</code> are null.
    *
    * @return the <code>Cookie</code> that was just parsed
    */
   private Cookie getCookie() {
      return getCookie(name.toString(),
         value.toString());
   }

   /**
    * Creates the <code>Cookie</code> from the token objects. It is
    * assumed that the <code>Cookie</code> <code>String</code> has
    * been parsed when this is called. This should only be used after
    * the <code>parse</code> method has been called.
    * <p>
    * If there is no <code>$Domain</code> or <code>$Path</code>
    * within  the <code>Cookie</code> <code>String</code> then the
    * <code>getDomain</code> and <code>getPath</code> are null.
    *
    * @param name the name that the <code>Cookie</code> contains
    * @param value the value that the <code>Cookie</code> contains
    *
    * @return the <code>Cookie</code> that was just parsed
    */
   private Cookie getCookie(String name, String value) {
      Cookie cookie = new Cookie(name, value, false);
      
      if(domain.len > 0) {
         cookie.setDomain(domain.toString());
      }
      if(path.len > 0) {
         cookie.setPath(path.toString());
      }
      cookie.setVersion(version);
      return cookie;
   }

   /**
    * This is used to parse a <code>Cookie</code> from the buffer
    * that contains the <code>Cookie</code> values. This will first
    * try to remove any trailing value after the version/prev
    * <code>Cookie</code> once this is removed it will extract the
    * name/value pair from the <code>Cookie</code>. The name and
    * value of the <code>Cookie</code> will be saved by the name
    * and value tokens.
    */
   private void cookie(){
      if(!skip(",")){ /* ,|; */
         skip(";");
      }
      name();
      skip("="); /* = */
      value();
   }

   /**
    * This initializes the name token and extracts the name of this
    * <code>Cookie</code>. The offset and length of the name will be
    * saved in the name token. This will read all <code>char</code>'s
    * upto but excluding the first '=' <code>char</code> encountered
    * from the <code>off</code> within the buffer.
    */
   private void name() {
      name.off = off;
      name.len = 0;
      while(off < count){
         if(buf[off] == '='){
            break;
         }
         name.len++;
         off++;
      }
   }

   /**
    * Used to extract everything found after the <code>NAME '='</code>
    * within a <code>Cookie</code>. This extracts the <code>Cookie</code>
    * value the <code>$Path</code> and <code>$Domain</code> attributes
    * if they exist (i.e. <code>$Path</code> and <code>$Domain</code>
    * are optional in a cookie see RFC 2109).
    * <p>
    * The path method reads the terminal found before it as does the
    * <code>domain</code> method that is ";$Path" is read as the first
    * part of the path method. This is because if there is no path the
    * parser should not read data it does not know belongs to a specific
    * part of the <code>Cookie</code>.
    */
   private void value() {
      data();
      path();
      domain();
   }

   /**
    * This initializes the value token and extracts the value of this
    * <code>Cookie</code>. The offset and length of the value will be
    * saved in the value token. This will read all <code>char</code>'s
    * upto but excluding the first terminal char encountered from the
    * off within the buffer, or if the value is a literal it will read
    * a literal from the buffer (literal is any data between quotes
    * except if the quote is prefixed with a backward slash character
    * that is '\').
    */
   private void data() {
      value.off = off;
      value.len = 0;
      if(off < count && buf[off] == '"'){
         value.len++;
         for(off++; off < count;){
            value.len++;
            if(buf[off++]=='"')
               if(buf[off-2]!='\\'){
                  break;
               }
         }
         value.len-=2;  /* remove " */
         value.off++; /* remove " */
      }else {
         while(off < count){
            if(terminal(buf[off]))
               break;
            value.len++;
            off++;
         }
      }
   }

   /**
    * This initializes the path token and extracts the <code>$Path</code>
    * of this <code>Cookie</code>. The offset and length of the path will
    * be saved in the path token. This will read all <code>char</code>'s
    * up to but excluding the first terminal <code>char</code> encountered
    * from the <code>off</code> within the buffer, or if the value is a
    * literal it will read a literal from the buffer (literal is any data
    * between quotes except if the quote is prefixed with a backward slash
    * character, that is '\').
    * <p>
    * This reads the terminal before the <code>$Path</code> so that if
    * there is no <code>$Path</code> for the <code>Cookie</code> then
    * the character before it will not be read needlessly.
    */
   private void path() {
      path.len = 0; /* reset */
      if(skip(";$Path=")){
         path.off = off;
         if(buf[off] == '"'){
            path.len++;
            for(off++; off < count;){
               path.len++;
               if(buf[off++]=='"')
                  if(buf[off-2]!='\\'){
                     break;
                  }
            }
            path.len-=2;  /* remove " */
            path.off++; /* remove " */
         }else{
            while(off < count){
               if(terminal(buf[off]))
                  break;
               path.len++;
               off++;
            }
         }
      }
   }

   /**
    * Initializes the domain token and extracts the <code>$Domain</code>
    * of this <code>Cookie</code>. The offset and length of the domain
    * will be saved in the path token. This will read all characters up
    * to but excluding the first terminal <code>char</code> encountered
    * from the off within the buffer, or  if the value is a literal it
    * will read a literal from the buffer (literal is any data between
    * quotes except if the quote is prefixed with a backward slash
    * character, that is '\').
    * <p>
    * This reads the terminal before the <code>$Domain</code> so that
    * if there is  no <code>$Domain</code> for the <code>Cookie</code>
    * then the character before it will not be read needlessly.
    */
   private void domain(){
      domain.len = 0;   /* reset */
      if(skip(";$Domain=")) {
         domain.off = off;
         if(buf[off] == '"'){
            domain.len++;
            for(off++; off < count;){
               domain.len++;
               if(buf[off++]=='"')
                  if(buf[off-2]!='\\'){
                     break;
                  }
            }
            domain.len-=2;  /* remove " */
            domain.off++; /* remove " */
         }else{
            while(off < count){
               if(terminal(buf[off]))
                  break;
               domain.len++;
               off++;
            }
         }
      }
   }

   /**
    * This extracts the <code>$Version</code> of this <code>Cookie</code>.
    * The version is parsed and converted into a decimal int from the digit
    * characters that make up a version.
    * <p>
    * This will read all digit <code>char</code>'s up to but excluding the
    * first non digit <code>char</code> that it encounters from the offset
    * within the buffer, or if the value is a literal it will read a literal
    * from the buffer (literal is any data between quotes except if the quote
    * is prefixed with a backward slash character i.e. '\').
    */
   private void version(){
      if(skip("$Version=")) {
         if(buf[off] == '"'){
            off++;
         }
         while(off < count){
            if(!digit(buf[off])){
               break;
            }
            version *= 10;
            version += buf[off];
            version -= '0';
            off++;
         }
         if(buf[off] == '"'){
            off++;
         }
      }else{
         version = 1;
      }
   }

   /**
    * This is used to determine if a given  iso8859-1 character is
    * a terminal character. That is either the ';' or ','
    * characters. Although the RFC 2109 says the terminal can be
    * either a comma, it is not used by any browsers.
    *
    * @param ch the character that is to be compared
    *
    * @return true if this is a semicolon character
    */
   private boolean terminal(char ch) {
      return ch == ';'; 
   }

   /**
    * This is used to represent an <code>Iterator</code> that will 
    * iterate over the available cookies within the provided source
    * text. This allows the cookie parser to be used as an iterable
    * with for each loops. Cookies can not be removed with this.
    */
   private class Sequence implements Iterator<Cookie> {
      
      /**
       * Extracts the next <code>Cookie</code> object from the string
       * given. This will return <code>null</code> when there are no
       * more cookies left in the <code>String</code> being parsed.
       * <p>
       * To find out when there are no more cookies left use the
       * <code>hasNext</code> method. This will only set the name,
       * value, path, domain name version of the <code>cookie</code>
       * because as of RFC 2109 these are the only attributes a
       * <code>Cookie</code> may have, the path and domain are
       * optional.
       *
       * @return an initialized <code>Cookie</code> object
       */
      public Cookie next(){
         if(!hasNext()) {
            return null;
         }
         parsed = false;
         return getCookie();
      }
      

      /**
       * Determine whether or not there are any <code>Cookie</code>s
       * left in the <code>String</code>. This will attempt to extract
       * another <code>Cookie</code> from the <code>String</code> and
       * cache the result so the <code>next</code> method will produce
       * this <code>Cookie</code>. If another <code>Cookie</code> cannot
       * be parsed from the remainder of the <code>String</code> then
       * this will return <code>false</code> otherwise it will return
       * <code>true</code>.
       *
       * @return true if there are more cookies false otherwise
       */
      public boolean hasNext(){
         if(finished) {
            return false;
         }
         if(parsed) {
            return true;
         }
         parse();
         
         if(name.len <=0){
            finished = true;
            return false;
         }
         return true;

      }
      
      /**
       * This method is used to remove items from the iterator. This
       * however performs no action as the act of parsing should not
       * modify the underlying source text value so that it can be 
       * reset with the <code>reset</code> method and used again.
       */
      public void remove() {
         return;
      }
   }
   
   /**
    * This is a token object that is used to store the offset and
    * length of a region of chars in the <code>CookieParser.buf</code>
    * array. The <code>toString</code> method of this token will
    * produce the <code>String</code> value of the region it
    * represents.
    */
   private class Token {

      /**
       * The numer of characters that were consumed by this token.
       */
      public int len;

      /**
       * The offset within the buffer that this token starts from.
       */
      public int off;

      /**
       * This converts region within the buffer to a <code>String</code>.
       * This converts the region only if there is a sufficient length.
       *
       * @return the <code>String</code> value of the region
       */
      public String toString(){
         return new String(buf,off,len);
      }
   }
}