aboutsummaryrefslogtreecommitdiffstats
path: root/include/llvm/Support/JSONParser.h
blob: 11149f1e47baf1320252d22321c69b373cec419f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
//===--- JSONParser.h - Simple JSON parser ----------------------*- C++ -*-===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
//  This file implements a JSON parser.
//
//  See http://www.json.org/ for an overview.
//  See http://www.ietf.org/rfc/rfc4627.txt for the full standard.
//
//  FIXME: Currently this supports a subset of JSON. Specifically, support
//  for numbers, booleans and null for values is missing.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_SUPPORT_JSON_PARSER_H
#define LLVM_SUPPORT_JSON_PARSER_H

#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/SourceMgr.h"

namespace llvm {

class JSONContainer;
class JSONString;
class JSONValue;
class JSONKeyValuePair;

/// \brief Base class for a parsable JSON atom.
///
/// This class has no semantics other than being a unit of JSON data which can
/// be parsed out of a JSON document.
class JSONAtom {
public:
  /// \brief Possible types of JSON objects.
  enum Kind { JK_KeyValuePair, JK_Array, JK_Object, JK_String };

  /// \brief Returns the type of this value.
  Kind getKind() const { return MyKind; }

  static bool classof(const JSONAtom *Atom) { return true; }

protected:
  JSONAtom(Kind MyKind) : MyKind(MyKind) {}

private:
  Kind MyKind;
};

/// \brief A parser for JSON text.
///
/// Use an object of JSONParser to iterate over the values of a JSON text.
/// All objects are parsed during the iteration, so you can only iterate once
/// over the JSON text, but the cost of partial iteration is minimized.
/// Create a new JSONParser if you want to iterate multiple times.
class JSONParser {
public:
  /// \brief Create a JSONParser for the given input.
  ///
  /// Parsing is started via parseRoot(). Access to the object returned from
  /// parseRoot() will parse the input lazily.
  JSONParser(StringRef Input, SourceMgr *SM);

  /// \brief Returns the outermost JSON value (either an array or an object).
  ///
  /// Can return NULL if the input does not start with an array or an object.
  /// The object is not parsed yet - the caller must iterate over the
  /// returned object to trigger parsing.
  ///
  /// A JSONValue can be either a JSONString, JSONObject or JSONArray.
  JSONValue *parseRoot();

  /// \brief Parses the JSON text and returns whether it is valid JSON.
  ///
  /// In case validate() return false, failed() will return true and
  /// getErrorMessage() will return the parsing error.
  bool validate();

  /// \brief Returns true if an error occurs during parsing.
  ///
  /// If there was an error while parsing an object that was created by
  /// iterating over the result of 'parseRoot', 'failed' will return true.
  bool failed() const;

private:
  /// \brief These methods manage the implementation details of parsing new JSON
  /// atoms.
  /// @{
  JSONString *parseString();
  JSONValue *parseValue();
  JSONKeyValuePair *parseKeyValuePair();
  /// @}

  /// \brief Helpers to parse the elements out of both forms of containers.
  /// @{
  const JSONAtom *parseElement(JSONAtom::Kind ContainerKind);
  StringRef::iterator parseFirstElement(JSONAtom::Kind ContainerKind,
                                        char StartChar, char EndChar,
                                        const JSONAtom *&Element);
  StringRef::iterator parseNextElement(JSONAtom::Kind ContainerKind,
                                       char EndChar,
                                       const JSONAtom *&Element);
  /// @}

  /// \brief Whitespace parsing.
  /// @{
  void nextNonWhitespace();
  bool isWhitespace();
  /// @}

  /// \brief These methods are used for error handling.
  /// {
  void setExpectedError(StringRef Expected, StringRef Found);
  void setExpectedError(StringRef Expected, char Found);
  bool errorIfAtEndOfFile(StringRef Message);
  bool errorIfNotAt(char C, StringRef Message);
  /// }

  /// \brief Skips all elements in the given container.
  bool skipContainer(const JSONContainer &Container);

  /// \brief Skips to the next position behind the given JSON atom.
  bool skip(const JSONAtom &Atom);

  /// All nodes are allocated by the parser and will be deallocated when the
  /// parser is destroyed.
  BumpPtrAllocator ValueAllocator;

  /// \brief The original input to the parser.
  MemoryBuffer *InputBuffer;

  /// \brief The source manager used for diagnostics and buffer management.
  SourceMgr *SM;

  /// \brief The current position in the parse stream.
  StringRef::iterator Position;

  /// \brief The end position for fast EOF checks without introducing
  /// unnecessary dereferences.
  StringRef::iterator End;

  /// \brief If true, an error has occurred.
  bool Failed;

  friend class JSONContainer;
};


/// \brief Base class for JSON value objects.
///
/// This object represents an abstract JSON value. It is the root node behind
/// the group of JSON entities that can represent top-level values in a JSON
/// document. It has no API, and is just a placeholder in the type hierarchy of
/// nodes.
class JSONValue : public JSONAtom {
protected:
  JSONValue(Kind MyKind) : JSONAtom(MyKind) {}

public:
  /// \brief dyn_cast helpers
  ///@{
  static bool classof(const JSONAtom *Atom) {
    switch (Atom->getKind()) {
      case JK_Array:
      case JK_Object:
      case JK_String:
        return true;
      case JK_KeyValuePair:
        return false;
    }
    llvm_unreachable("Invalid JSONAtom kind");
  }
  static bool classof(const JSONValue *Value) { return true; }
  ///@}
};

/// \brief Gives access to the text of a JSON string.
///
/// FIXME: Implement a method to return the unescaped text.
class JSONString : public JSONValue {
public:
  /// \brief Returns the underlying parsed text of the string.
  ///
  /// This is the unescaped content of the JSON text.
  /// See http://www.ietf.org/rfc/rfc4627.txt for details.
  StringRef getRawText() const { return RawText; }

private:
  JSONString(StringRef RawText) : JSONValue(JK_String), RawText(RawText) {}

  StringRef RawText;

  friend class JSONParser;

public:
  /// \brief dyn_cast helpers
  ///@{
  static bool classof(const JSONAtom *Atom) {
    return Atom->getKind() == JK_String;
  }
  static bool classof(const JSONString *String) { return true; }
  ///@}
};

/// \brief A (key, value) tuple of type (JSONString *, JSONValue *).
///
/// Note that JSONKeyValuePair is not a JSONValue, it is a bare JSONAtom.
/// JSONKeyValuePairs can be elements of a JSONObject, but not of a JSONArray.
/// They are not viable as top-level values either.
class JSONKeyValuePair : public JSONAtom {
public:
  const JSONString * const Key;
  const JSONValue * const Value;

private:
  JSONKeyValuePair(const JSONString *Key, const JSONValue *Value)
      : JSONAtom(JK_KeyValuePair), Key(Key), Value(Value) {}

  friend class JSONParser;

public:
  /// \brief dyn_cast helpers
  ///@{
  static bool classof(const JSONAtom *Atom) {
    return Atom->getKind() == JK_KeyValuePair;
  }
  static bool classof(const JSONKeyValuePair *KeyValuePair) { return true; }
  ///@}
};

/// \brief Implementation of JSON containers (arrays and objects).
///
/// JSONContainers drive the lazy parsing of JSON arrays and objects via
/// forward iterators.
class JSONContainer : public JSONValue {
private:
  /// \brief An iterator that parses the underlying container during iteration.
  ///
  /// Iterators on the same collection use shared state, so when multiple copies
  /// of an iterator exist, only one is allowed to be used for iteration;
  /// iterating multiple copies of an iterator of the same collection will lead
  /// to undefined behavior.
  class AtomIterator {
  public:
    AtomIterator(const AtomIterator &I) : Container(I.Container) {}

    /// \brief Iterator interface.
    ///@{
    bool operator==(const AtomIterator &I) const {
      if (isEnd() || I.isEnd())
        return isEnd() == I.isEnd();
      return Container->Position == I.Container->Position;
    }
    bool operator!=(const AtomIterator &I) const {
      return !(*this == I);
    }
    AtomIterator &operator++() {
      Container->parseNextElement();
      return *this;
    }
    const JSONAtom *operator*() {
      return Container->Current;
    }
    ///@}

  private:
    /// \brief Create an iterator for which 'isEnd' returns true.
    AtomIterator() : Container(0) {}

    /// \brief Create an iterator for the given container.
    AtomIterator(const JSONContainer *Container) : Container(Container) {}

    bool isEnd() const {
      return Container == 0 || Container->Position == StringRef::iterator();
    }

    const JSONContainer * const Container;

    friend class JSONContainer;
  };

protected:
  /// \brief An iterator for the specified AtomT.
  ///
  /// Used for the implementation of iterators for JSONArray and JSONObject.
  template <typename AtomT>
  class IteratorTemplate : public std::iterator<std::forward_iterator_tag,
                                                const AtomT*> {
  public:
    explicit IteratorTemplate(const AtomIterator& AtomI)
      : AtomI(AtomI) {}

    bool operator==(const IteratorTemplate &I) const {
      return AtomI == I.AtomI;
    }
    bool operator!=(const IteratorTemplate &I) const { return !(*this == I); }

    IteratorTemplate &operator++() {
      ++AtomI;
      return *this;
    }

    const AtomT *operator*() { return dyn_cast<AtomT>(*AtomI); }

  private:
    AtomIterator AtomI;
  };

  JSONContainer(JSONParser *Parser, char StartChar, char EndChar,
                JSONAtom::Kind ContainerKind)
    : JSONValue(ContainerKind), Parser(Parser),
      Position(), Current(0), Started(false),
      StartChar(StartChar), EndChar(EndChar) {}

  /// \brief Returns a lazy parsing iterator over the container.
  ///
  /// As the iterator drives the parse stream, begin() must only be called
  /// once per container.
  AtomIterator atom_begin() const {
    if (Started)
      report_fatal_error("Cannot parse container twice.");
    Started = true;
    // Set up the position and current element when we begin iterating over the
    // container.
    Position = Parser->parseFirstElement(getKind(), StartChar, EndChar, Current);
    return AtomIterator(this);
  }
  AtomIterator atom_end() const {
    return AtomIterator();
  }

private:
  AtomIterator atom_current() const {
    if (!Started)
      return atom_begin();

    return AtomIterator(this);
  }

  /// \brief Parse the next element in the container into the Current element.
  ///
  /// This routine is called as an iterator into this container walks through
  /// its elements. It mutates the container's internal current node to point to
  /// the next atom of the container.
  void parseNextElement() const {
    Parser->skip(*Current);
    Position = Parser->parseNextElement(getKind(), EndChar, Current);
  }

  // For parsing, JSONContainers call back into the JSONParser.
  JSONParser * const Parser;

  // 'Position', 'Current' and 'Started' store the state of the parse stream
  // for iterators on the container, they don't change the container's elements
  // and are thus marked as mutable.
  mutable StringRef::iterator Position;
  mutable const JSONAtom *Current;
  mutable bool Started;

  const char StartChar;
  const char EndChar;

  friend class JSONParser;

public:
  /// \brief dyn_cast helpers
  ///@{
  static bool classof(const JSONAtom *Atom) {
    switch (Atom->getKind()) {
      case JK_Array:
      case JK_Object:
        return true;
      case JK_KeyValuePair:
      case JK_String:
        return false;
    }
    llvm_unreachable("Invalid JSONAtom kind");
  }
  static bool classof(const JSONContainer *Container) { return true; }
  ///@}
};

/// \brief A simple JSON array.
class JSONArray : public JSONContainer {
public:
  typedef IteratorTemplate<JSONValue> const_iterator;

  /// \brief Returns a lazy parsing iterator over the container.
  ///
  /// As the iterator drives the parse stream, begin() must only be called
  /// once per container.
  const_iterator begin() const { return const_iterator(atom_begin()); }
  const_iterator end() const { return const_iterator(atom_end()); }

private:
  JSONArray(JSONParser *Parser)
    : JSONContainer(Parser, '[', ']', JSONAtom::JK_Array) {}

public:
  /// \brief dyn_cast helpers
  ///@{
  static bool classof(const JSONAtom *Atom) {
    return Atom->getKind() == JSONAtom::JK_Array;
  }
  static bool classof(const JSONArray *Array) { return true; }
  ///@}

  friend class JSONParser;
};

/// \brief A JSON object: an iterable list of JSON key-value pairs.
class JSONObject : public JSONContainer {
public:
  typedef IteratorTemplate<JSONKeyValuePair> const_iterator;

  /// \brief Returns a lazy parsing iterator over the container.
  ///
  /// As the iterator drives the parse stream, begin() must only be called
  /// once per container.
  const_iterator begin() const { return const_iterator(atom_begin()); }
  const_iterator end() const { return const_iterator(atom_end()); }

private:
  JSONObject(JSONParser *Parser)
    : JSONContainer(Parser, '{', '}', JSONAtom::JK_Object) {}

public:
  /// \brief dyn_cast helpers
  ///@{
  static bool classof(const JSONAtom *Atom) {
    return Atom->getKind() == JSONAtom::JK_Object;
  }
  static bool classof(const JSONObject *Object) { return true; }
  ///@}

  friend class JSONParser;
};

} // end namespace llvm

#endif // LLVM_SUPPORT_JSON_PARSER_H