1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
|
//===--- JSONParser.h - Simple JSON parser ----------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements a JSON parser.
//
// See http://www.json.org/ for an overview.
// See http://www.ietf.org/rfc/rfc4627.txt for the full standard.
//
// FIXME: Currently this supports a subset of JSON. Specifically, support
// for numbers, booleans and null for values is missing.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_SUPPORT_JSON_PARSER_H
#define LLVM_SUPPORT_JSON_PARSER_H
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/ErrorHandling.h"
namespace llvm {
class JSONString;
class JSONValue;
class JSONKeyValuePair;
/// \brief Base class for a parsable JSON atom.
///
/// This class has no semantics other than being a unit of JSON data which can
/// be parsed out of a JSON document.
class JSONAtom {
public:
/// \brief Possible types of JSON objects.
enum Kind { JK_KeyValuePair, JK_Array, JK_Object, JK_String };
/// \brief Returns the type of this value.
Kind getKind() const { return MyKind; }
static bool classof(const JSONAtom *Atom) { return true; }
protected:
JSONAtom(Kind MyKind) : MyKind(MyKind) {}
private:
/// \brief Parses to the end of the object and returns whether parsing
/// was successful.
bool skip() const;
Kind MyKind;
friend class JSONParser;
friend class JSONKeyValuePair;
template <typename, char, char, JSONAtom::Kind> friend class JSONContainer;
};
/// \brief A parser for JSON text.
///
/// Use an object of JSONParser to iterate over the values of a JSON text.
/// All objects are parsed during the iteration, so you can only iterate once
/// over the JSON text, but the cost of partial iteration is minimized.
/// Create a new JSONParser if you want to iterate multiple times.
class JSONParser {
public:
/// \brief Create a JSONParser for the given input.
///
/// Parsing is started via parseRoot(). Access to the object returned from
/// parseRoot() will parse the input lazily.
JSONParser(StringRef Input);
/// \brief Returns the outermost JSON value (either an array or an object).
///
/// Can return NULL if the input does not start with an array or an object.
/// The object is not parsed yet - the caller must either iterate over the
/// returned object or call 'skip' to trigger parsing.
///
/// A JSONValue can be either a JSONString, JSONObject or JSONArray.
JSONValue *parseRoot();
/// \brief Parses the JSON text and returns whether it is valid JSON.
///
/// In case validate() return false, failed() will return true and
/// getErrorMessage() will return the parsing error.
bool validate();
/// \brief Returns true if an error occurs during parsing.
///
/// If there was an error while parsing an object that was created by
/// iterating over the result of 'parseRoot', 'failed' will return true.
bool failed() const;
/// \brief Returns an error message when 'failed' returns true.
std::string getErrorMessage() const;
private:
/// \brief These methods manage the implementation details of parsing new JSON
/// atoms.
/// @{
JSONString *parseString();
JSONValue *parseValue();
JSONKeyValuePair *parseKeyValuePair();
/// @}
/// \brief Templated helpers to parse the elements out of both forms of JSON
/// containers.
/// @{
template <typename AtomT> AtomT *parseElement();
template <typename AtomT, char StartChar, char EndChar>
StringRef::iterator parseFirstElement(const AtomT *&Element);
template <typename AtomT, char EndChar>
StringRef::iterator parseNextElement(const AtomT *&Element);
/// @}
/// \brief Whitespace parsing.
/// @{
void nextNonWhitespace();
bool isWhitespace();
/// @}
/// \brief These methods are used for error handling.
/// {
void setExpectedError(StringRef Expected, StringRef Found);
void setExpectedError(StringRef Expected, char Found);
bool errorIfAtEndOfFile(StringRef Message);
bool errorIfNotAt(char C, StringRef Message);
/// }
/// All nodes are allocated by the parser and will be deallocated when the
/// parser is destroyed.
BumpPtrAllocator ValueAllocator;
/// \brief The original input to the parser.
const StringRef Input;
/// \brief The current position in the parse stream.
StringRef::iterator Position;
/// \brief If non-empty, an error has occurred.
std::string ErrorMessage;
template <typename AtomT, char StartChar, char EndChar,
JSONAtom::Kind ContainerKind>
friend class JSONContainer;
};
/// \brief Base class for JSON value objects.
///
/// This object represents an abstract JSON value. It is the root node behind
/// the group of JSON entities that can represent top-level values in a JSON
/// document. It has no API, and is just a placeholder in the type hierarchy of
/// nodes.
class JSONValue : public JSONAtom {
protected:
JSONValue(Kind MyKind) : JSONAtom(MyKind) {}
public:
/// \brief dyn_cast helpers
///@{
static bool classof(const JSONAtom *Atom) {
switch (Atom->getKind()) {
case JK_Array:
case JK_Object:
case JK_String:
return true;
case JK_KeyValuePair:
return false;
};
llvm_unreachable("Invalid JSONAtom kind");
}
static bool classof(const JSONValue *Value) { return true; }
///@}
};
/// \brief Gives access to the text of a JSON string.
///
/// FIXME: Implement a method to return the unescaped text.
class JSONString : public JSONValue {
public:
/// \brief Returns the underlying parsed text of the string.
///
/// This is the unescaped content of the JSON text.
/// See http://www.ietf.org/rfc/rfc4627.txt for details.
StringRef getRawText() const { return RawText; };
private:
JSONString(StringRef RawText) : JSONValue(JK_String), RawText(RawText) {}
/// \brief Skips to the next position in the parse stream.
bool skip() const { return true; };
StringRef RawText;
friend class JSONAtom;
friend class JSONParser;
public:
/// \brief dyn_cast helpers
///@{
static bool classof(const JSONAtom *Atom) {
return Atom->getKind() == JK_String;
}
static bool classof(const JSONString *String) { return true; }
///@}
};
/// \brief A (key, value) tuple of type (JSONString *, JSONValue *).
///
/// Note that JSONKeyValuePair is not a JSONValue, it is a bare JSONAtom.
/// JSONKeyValuePairs can be elements of a JSONObject, but not of a JSONArray.
/// They are not viable as top-level values either.
class JSONKeyValuePair : public JSONAtom {
public:
const JSONString * const Key;
const JSONValue * const Value;
private:
JSONKeyValuePair(const JSONString *Key, const JSONValue *Value)
: JSONAtom(JK_KeyValuePair), Key(Key), Value(Value) {}
/// \brief Skips to the next position in the parse stream.
bool skip() const { return Value->skip(); };
friend class JSONAtom;
friend class JSONParser;
template <typename, char, char, JSONAtom::Kind> friend class JSONContainer;
public:
/// \brief dyn_cast helpers
///@{
static bool classof(const JSONAtom *Atom) {
return Atom->getKind() == JK_KeyValuePair;
}
static bool classof(const JSONKeyValuePair *KeyValuePair) { return true; }
///@}
};
/// \brief Implementation of JSON containers (arrays and objects).
///
/// JSONContainers drive the lazy parsing of JSON arrays and objects via
/// forward iterators. Call 'skip' to validate parsing of all elements of the
/// container and to position the parse stream behind the container.
template <typename AtomT, char StartChar, char EndChar,
JSONAtom::Kind ContainerKind>
class JSONContainer : public JSONValue {
public:
/// \brief An iterator that parses the underlying container during iteration.
///
/// Iterators on the same collection use shared state, so when multiple copies
/// of an iterator exist, only one is allowed to be used for iteration;
/// iterating multiple copies of an iterator of the same collection will lead
/// to undefined behavior.
class const_iterator : public std::iterator<std::forward_iterator_tag,
const AtomT*> {
public:
const_iterator(const const_iterator &I) : Container(I.Container) {}
bool operator==(const const_iterator &I) const {
if (isEnd() || I.isEnd())
return isEnd() == I.isEnd();
return Container->Position == I.Container->Position;
}
bool operator!=(const const_iterator &I) const { return !(*this == I); }
const_iterator &operator++() {
Container->parseNextElement();
return *this;
}
const AtomT *operator*() { return Container->Current; }
private:
/// \brief Create an iterator for which 'isEnd' returns true.
const_iterator() : Container(0) {}
/// \brief Create an iterator for the given container.
const_iterator(const JSONContainer *Container) : Container(Container) {}
bool isEnd() const {
return Container == 0 || Container->Position == StringRef::iterator();
}
const JSONContainer * const Container;
friend class JSONContainer;
};
/// \brief Returns a lazy parsing iterator over the container.
///
/// As the iterator drives the parse stream, begin() must only be called
/// once per container.
const_iterator begin() const {
if (Started)
report_fatal_error("Cannot parse container twice.");
Started = true;
// Set up the position and current element when we begin iterating over the
// container.
Position = Parser->parseFirstElement<AtomT, StartChar, EndChar>(Current);
return const_iterator(this);
}
const_iterator end() const {
return const_iterator();
}
private:
JSONContainer(JSONParser *Parser)
: JSONValue(ContainerKind), Parser(Parser),
Position(), Current(0), Started(false) {}
const_iterator current() const {
if (!Started)
return begin();
return const_iterator(this);
}
/// \brief Skips to the next position in the parse stream.
bool skip() const {
for (const_iterator I = current(), E = end(); I != E; ++I) {
assert(*I != 0);
if (!(*I)->skip())
return false;
}
return !Parser->failed();
}
/// \brief Parse the next element in the container into the Current element.
///
/// This routine is called as an iterator into this container walks through
/// its elements. It mutates the container's internal current node to point to
/// the next atom of the container.
void parseNextElement() const {
Current->skip();
Position = Parser->parseNextElement<AtomT, EndChar>(Current);
}
// For parsing, JSONContainers call back into the JSONParser.
JSONParser * const Parser;
// 'Position', 'Current' and 'Started' store the state of the parse stream
// for iterators on the container, they don't change the container's elements
// and are thus marked as mutable.
mutable StringRef::iterator Position;
mutable const AtomT *Current;
mutable bool Started;
friend class JSONAtom;
friend class JSONParser;
friend class const_iterator;
public:
/// \brief dyn_cast helpers
///@{
static bool classof(const JSONAtom *Atom) {
return Atom->getKind() == ContainerKind;
}
static bool classof(const JSONContainer *Container) { return true; }
///@}
};
/// \brief A simple JSON array.
typedef JSONContainer<JSONValue, '[', ']', JSONAtom::JK_Array> JSONArray;
/// \brief A JSON object: an iterable list of JSON key-value pairs.
typedef JSONContainer<JSONKeyValuePair, '{', '}', JSONAtom::JK_Object>
JSONObject;
/// \brief Template adaptor to dispatch element parsing for values.
template <> JSONValue *JSONParser::parseElement();
/// \brief Template adaptor to dispatch element parsing for key value pairs.
template <> JSONKeyValuePair *JSONParser::parseElement();
/// \brief Parses the first element of a JSON array or object, or closes the
/// array.
///
/// The method assumes that the current position is before the first character
/// of the element, with possible white space in between. When successful, it
/// returns the new position after parsing the element. Otherwise, if there is
/// no next value, it returns a default constructed StringRef::iterator.
template <typename AtomT, char StartChar, char EndChar>
StringRef::iterator JSONParser::parseFirstElement(const AtomT *&Element) {
assert(*Position == StartChar);
Element = 0;
nextNonWhitespace();
if (errorIfAtEndOfFile("value or end of container at start of container"))
return StringRef::iterator();
if (*Position == EndChar)
return StringRef::iterator();
Element = parseElement<AtomT>();
if (Element == 0)
return StringRef::iterator();
return Position;
}
/// \brief Parses the next element of a JSON array or object, or closes the
/// array.
///
/// The method assumes that the current position is before the ',' which
/// separates the next element from the current element. When successful, it
/// returns the new position after parsing the element. Otherwise, if there is
/// no next value, it returns a default constructed StringRef::iterator.
template <typename AtomT, char EndChar>
StringRef::iterator JSONParser::parseNextElement(const AtomT *&Element) {
Element = 0;
nextNonWhitespace();
if (errorIfAtEndOfFile("',' or end of container for next element"))
return 0;
switch (*Position) {
case ',':
nextNonWhitespace();
if (errorIfAtEndOfFile("element in container"))
return StringRef::iterator();
Element = parseElement<AtomT>();
if (Element == 0)
return StringRef::iterator();
return Position;
case EndChar:
return StringRef::iterator();
default:
setExpectedError("',' or end of container for next element", *Position);
return StringRef::iterator();
}
}
} // end namespace llvm
#endif // LLVM_SUPPORT_JSON_PARSER_H
|