aboutsummaryrefslogtreecommitdiffstats
path: root/src/google/protobuf/stubs/structurally_valid.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/google/protobuf/stubs/structurally_valid.cc')
-rw-r--r--src/google/protobuf/stubs/structurally_valid.cc521
1 files changed, 521 insertions, 0 deletions
diff --git a/src/google/protobuf/stubs/structurally_valid.cc b/src/google/protobuf/stubs/structurally_valid.cc
new file mode 100644
index 0000000..e385a81
--- /dev/null
+++ b/src/google/protobuf/stubs/structurally_valid.cc
@@ -0,0 +1,521 @@
+// Copyright 2005-2008 Google Inc. All Rights Reserved.
+// Author: jrm@google.com (Jim Meehan)
+
+#include <google/protobuf/stubs/common.h>
+
+namespace google {
+namespace protobuf {
+namespace internal {
+
+// These four-byte entries compactly encode how many bytes 0..255 to delete
+// in making a string replacement, how many bytes to add 0..255, and the offset
+// 0..64k-1 of the replacement string in remap_string.
+struct RemapEntry {
+ uint8 delete_bytes;
+ uint8 add_bytes;
+ uint16 bytes_offset;
+};
+
+// Exit type codes for state tables. All but the first get stuffed into
+// signed one-byte entries. The first is only generated by executable code.
+// To distinguish from next-state entries, these must be contiguous and
+// all <= kExitNone
+typedef enum {
+ kExitDstSpaceFull = 239,
+ kExitIllegalStructure, // 240
+ kExitOK, // 241
+ kExitReject, // ...
+ kExitReplace1,
+ kExitReplace2,
+ kExitReplace3,
+ kExitReplace21,
+ kExitReplace31,
+ kExitReplace32,
+ kExitReplaceOffset1,
+ kExitReplaceOffset2,
+ kExitReplace1S0,
+ kExitSpecial,
+ kExitDoAgain,
+ kExitRejectAlt,
+ kExitNone // 255
+} ExitReason;
+
+
+// This struct represents one entire state table. The three initialized byte
+// areas are state_table, remap_base, and remap_string. state0 and state0_size
+// give the byte offset and length within state_table of the initial state --
+// table lookups are expected to start and end in this state, but for
+// truncated UTF-8 strings, may end in a different state. These allow a quick
+// test for that condition. entry_shift is 8 for tables subscripted by a full
+// byte value and 6 for space-optimized tables subscripted by only six
+// significant bits in UTF-8 continuation bytes.
+typedef struct {
+ const uint32 state0;
+ const uint32 state0_size;
+ const uint32 total_size;
+ const int max_expand;
+ const int entry_shift;
+ const int bytes_per_entry;
+ const uint32 losub;
+ const uint32 hiadd;
+ const uint8* state_table;
+ const RemapEntry* remap_base;
+ const uint8* remap_string;
+ const uint8* fast_state;
+} UTF8StateMachineObj;
+
+typedef UTF8StateMachineObj UTF8ScanObj;
+
+#define X__ (kExitIllegalStructure)
+#define RJ_ (kExitReject)
+#define S1_ (kExitReplace1)
+#define S2_ (kExitReplace2)
+#define S3_ (kExitReplace3)
+#define S21 (kExitReplace21)
+#define S31 (kExitReplace31)
+#define S32 (kExitReplace32)
+#define T1_ (kExitReplaceOffset1)
+#define T2_ (kExitReplaceOffset2)
+#define S11 (kExitReplace1S0)
+#define SP_ (kExitSpecial)
+#define D__ (kExitDoAgain)
+#define RJA (kExitRejectAlt)
+
+// Entire table has 9 state blocks of 256 entries each
+static const unsigned int utf8acceptnonsurrogates_STATE0 = 0; // state[0]
+static const unsigned int utf8acceptnonsurrogates_STATE0_SIZE = 256; // =[1]
+static const unsigned int utf8acceptnonsurrogates_TOTAL_SIZE = 2304;
+static const unsigned int utf8acceptnonsurrogates_MAX_EXPAND_X4 = 0;
+static const unsigned int utf8acceptnonsurrogates_SHIFT = 8;
+static const unsigned int utf8acceptnonsurrogates_BYTES = 1;
+static const unsigned int utf8acceptnonsurrogates_LOSUB = 0x20202020;
+static const unsigned int utf8acceptnonsurrogates_HIADD = 0x00000000;
+
+static const uint8 utf8acceptnonsurrogates[] = {
+// state[0] 0x000000 Byte 1
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+
+X__, X__, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 3, 3,
+ 4, 5, 5, 5, 6, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+
+// state[1] 0x000080 Byte 2 of 2
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+
+// state[2] 0x000000 Byte 2 of 3
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+
+// state[3] 0x001000 Byte 2 of 3
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+
+// state[4] 0x000000 Byte 2 of 4
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+
+// state[5] 0x040000 Byte 2 of 4
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+
+// state[6] 0x100000 Byte 2 of 4
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+
+// state[7] 0x00d000 Byte 2 of 3
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+
+// state[8] 0x00d800 Byte 3 of 3
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+
+RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_,
+RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_,
+RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_,
+RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_,
+
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
+};
+
+// Remap base[0] = (del, add, string_offset)
+static const RemapEntry utf8acceptnonsurrogates_remap_base[] = {
+{0, 0, 0} };
+
+// Remap string[0]
+static const unsigned char utf8acceptnonsurrogates_remap_string[] = {
+0 };
+
+static const unsigned char utf8acceptnonsurrogates_fast[256] = {
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+};
+
+static const UTF8ScanObj utf8acceptnonsurrogates_obj = {
+ utf8acceptnonsurrogates_STATE0,
+ utf8acceptnonsurrogates_STATE0_SIZE,
+ utf8acceptnonsurrogates_TOTAL_SIZE,
+ utf8acceptnonsurrogates_MAX_EXPAND_X4,
+ utf8acceptnonsurrogates_SHIFT,
+ utf8acceptnonsurrogates_BYTES,
+ utf8acceptnonsurrogates_LOSUB,
+ utf8acceptnonsurrogates_HIADD,
+ utf8acceptnonsurrogates,
+ utf8acceptnonsurrogates_remap_base,
+ utf8acceptnonsurrogates_remap_string,
+ utf8acceptnonsurrogates_fast
+};
+
+
+#undef X__
+#undef RJ_
+#undef S1_
+#undef S2_
+#undef S3_
+#undef S21
+#undef S31
+#undef S32
+#undef T1_
+#undef T2_
+#undef S11
+#undef SP_
+#undef D__
+#undef RJA
+
+// Return true if current Tbl pointer is within state0 range
+// Note that unsigned compare checks both ends of range simultaneously
+static inline bool InStateZero(const UTF8ScanObj* st, const uint8* Tbl) {
+ const uint8* Tbl0 = &st->state_table[st->state0];
+ return (static_cast<uint32>(Tbl - Tbl0) < st->state0_size);
+}
+
+// Scan a UTF-8 string based on state table.
+// Always scan complete UTF-8 characters
+// Set number of bytes scanned. Return reason for exiting
+int UTF8GenericScan(const UTF8ScanObj* st,
+ const char * str,
+ int str_length,
+ int* bytes_consumed) {
+ *bytes_consumed = 0;
+ if (str_length == 0) return kExitOK;
+
+ int eshift = st->entry_shift;
+ const uint8* isrc = reinterpret_cast<const uint8*>(str);
+ const uint8* src = isrc;
+ const uint8* srclimit = isrc + str_length;
+ const uint8* srclimit8 = srclimit - 7;
+ const uint8* Tbl_0 = &st->state_table[st->state0];
+
+ DoAgain:
+ // Do state-table scan
+ int e = 0;
+ uint8 c;
+
+ // Do fast for groups of 8 identity bytes.
+ // This covers a lot of 7-bit ASCII ~8x faster then the 1-byte loop,
+ // including slowing slightly on cr/lf/ht
+ //----------------------------
+ const uint8* Tbl2 = &st->fast_state[0];
+ uint32 losub = st->losub;
+ uint32 hiadd = st->hiadd;
+ while (src < srclimit8) {
+ uint32 s0123 = (reinterpret_cast<const uint32 *>(src))[0];
+ uint32 s4567 = (reinterpret_cast<const uint32 *>(src))[1];
+ src += 8;
+ // This is a fast range check for all bytes in [lowsub..0x80-hiadd)
+ uint32 temp = (s0123 - losub) | (s0123 + hiadd) |
+ (s4567 - losub) | (s4567 + hiadd);
+ if ((temp & 0x80808080) != 0) {
+ // We typically end up here on cr/lf/ht; src was incremented
+ int e0123 = (Tbl2[src[-8]] | Tbl2[src[-7]]) |
+ (Tbl2[src[-6]] | Tbl2[src[-5]]);
+ if (e0123 != 0) {
+ src -= 8;
+ break;
+ } // Exit on Non-interchange
+ e0123 = (Tbl2[src[-4]] | Tbl2[src[-3]]) |
+ (Tbl2[src[-2]] | Tbl2[src[-1]]);
+ if (e0123 != 0) {
+ src -= 4;
+ break;
+ } // Exit on Non-interchange
+ // Else OK, go around again
+ }
+ }
+ //----------------------------
+
+ // Byte-at-a-time scan
+ //----------------------------
+ const uint8* Tbl = Tbl_0;
+ while (src < srclimit) {
+ c = *src;
+ e = Tbl[c];
+ src++;
+ if (e >= kExitIllegalStructure) {break;}
+ Tbl = &Tbl_0[e << eshift];
+ }
+ //----------------------------
+
+
+ // Exit posibilities:
+ // Some exit code, !state0, back up over last char
+ // Some exit code, state0, back up one byte exactly
+ // source consumed, !state0, back up over partial char
+ // source consumed, state0, exit OK
+ // For illegal byte in state0, avoid backup up over PREVIOUS char
+ // For truncated last char, back up to beginning of it
+
+ if (e >= kExitIllegalStructure) {
+ // Back up over exactly one byte of rejected/illegal UTF-8 character
+ src--;
+ // Back up more if needed
+ if (!InStateZero(st, Tbl)) {
+ do {
+ src--;
+ } while ((src > isrc) && ((src[0] & 0xc0) == 0x80));
+ }
+ } else if (!InStateZero(st, Tbl)) {
+ // Back up over truncated UTF-8 character
+ e = kExitIllegalStructure;
+ do {
+ src--;
+ } while ((src > isrc) && ((src[0] & 0xc0) == 0x80));
+ } else {
+ // Normal termination, source fully consumed
+ e = kExitOK;
+ }
+
+ if (e == kExitDoAgain) {
+ // Loop back up to the fast scan
+ goto DoAgain;
+ }
+
+ *bytes_consumed = src - isrc;
+ return e;
+}
+
+int UTF8GenericScanFastAscii(const UTF8ScanObj* st,
+ const char * str,
+ int str_length,
+ int* bytes_consumed) {
+ *bytes_consumed = 0;
+ if (str_length == 0) return kExitOK;
+
+ const uint8* isrc = reinterpret_cast<const uint8*>(str);
+ const uint8* src = isrc;
+ const uint8* srclimit = isrc + str_length;
+ const uint8* srclimit8 = srclimit - 7;
+ int n;
+ int rest_consumed;
+ int exit_reason;
+ do {
+ while ((src < srclimit8) &&
+ (((reinterpret_cast<const uint32*>(src)[0] |
+ reinterpret_cast<const uint32*>(src)[1]) & 0x80808080) == 0)) {
+ src += 8;
+ }
+ while ((src < srclimit) && (src[0] < 0x80)) {
+ src++;
+ }
+ // Run state table on the rest
+ n = src - isrc;
+ exit_reason = UTF8GenericScan(st, str + n, str_length - n, &rest_consumed);
+ src += rest_consumed;
+ } while ( exit_reason == kExitDoAgain );
+
+ *bytes_consumed = src - isrc;
+ return exit_reason;
+}
+
+// Hack: On some compilers the static tables are initialized at startup.
+// We can't use them until they are initialized. However, some Protocol
+// Buffer parsing happens at static init time and may try to validate
+// UTF-8 strings. Since UTF-8 validation is only used for debugging
+// anyway, we simply always return success if initialization hasn't
+// occurred yet.
+namespace {
+
+bool module_initialized_ = false;
+
+struct InitDetector {
+ InitDetector() {
+ module_initialized_ = true;
+ }
+};
+InitDetector init_detector;
+
+} // namespace
+
+bool IsStructurallyValidUTF8(const char* buf, int len) {
+ if (!module_initialized_) return true;
+
+ int bytes_consumed = 0;
+ UTF8GenericScanFastAscii(&utf8acceptnonsurrogates_obj,
+ buf, len, &bytes_consumed);
+ return (bytes_consumed == len);
+}
+
+} // namespace internal
+} // namespace protobuf
+} // namespace google