summaryrefslogtreecommitdiffstats
path: root/V8Binding/v8/src/unicode.h
diff options
context:
space:
mode:
Diffstat (limited to 'V8Binding/v8/src/unicode.h')
-rw-r--r--V8Binding/v8/src/unicode.h279
1 files changed, 0 insertions, 279 deletions
diff --git a/V8Binding/v8/src/unicode.h b/V8Binding/v8/src/unicode.h
deleted file mode 100644
index f5e4210..0000000
--- a/V8Binding/v8/src/unicode.h
+++ /dev/null
@@ -1,279 +0,0 @@
-// Copyright 2007-2008 the V8 project authors. All rights reserved.
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following
-// disclaimer in the documentation and/or other materials provided
-// with the distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived
-// from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#ifndef V8_UNICODE_H_
-#define V8_UNICODE_H_
-
-#include <sys/types.h>
-
-/**
- * \file
- * Definitions and convenience functions for working with unicode.
- */
-
-namespace unibrow {
-
-typedef unsigned int uchar;
-typedef unsigned char byte;
-
-/**
- * The max length of the result of converting the case of a single
- * character.
- */
-static const int kMaxMappingSize = 4;
-
-template <class T, int size = 256>
-class Predicate {
- public:
- inline Predicate() { }
- inline bool get(uchar c);
- private:
- friend class Test;
- bool CalculateValue(uchar c);
- struct CacheEntry {
- inline CacheEntry() : code_point_(0), value_(0) { }
- inline CacheEntry(uchar code_point, bool value)
- : code_point_(code_point),
- value_(value) { }
- uchar code_point_ : 21;
- bool value_ : 1;
- };
- static const int kSize = size;
- static const int kMask = kSize - 1;
- CacheEntry entries_[kSize];
-};
-
-// A cache used in case conversion. It caches the value for characters
-// that either have no mapping or map to a single character independent
-// of context. Characters that map to more than one character or that
-// map differently depending on context are always looked up.
-template <class T, int size = 256>
-class Mapping {
- public:
- inline Mapping() { }
- inline int get(uchar c, uchar n, uchar* result);
- private:
- friend class Test;
- int CalculateValue(uchar c, uchar n, uchar* result);
- struct CacheEntry {
- inline CacheEntry() : code_point_(kNoChar), offset_(0) { }
- inline CacheEntry(uchar code_point, signed offset)
- : code_point_(code_point),
- offset_(offset) { }
- uchar code_point_;
- signed offset_;
- static const int kNoChar = (1 << 21) - 1;
- };
- static const int kSize = size;
- static const int kMask = kSize - 1;
- CacheEntry entries_[kSize];
-};
-
-class UnicodeData {
- private:
- friend class Test;
- static int GetByteCount();
- static uchar kMaxCodePoint;
-};
-
-// --- U t f 8 ---
-
-template <typename Data>
-class Buffer {
- public:
- inline Buffer(Data data, unsigned length) : data_(data), length_(length) { }
- inline Buffer() : data_(0), length_(0) { }
- Data data() { return data_; }
- unsigned length() { return length_; }
- private:
- Data data_;
- unsigned length_;
-};
-
-class Utf8 {
- public:
- static inline uchar Length(uchar chr);
- static inline unsigned Encode(char* out, uchar c);
- static const byte* ReadBlock(Buffer<const char*> str, byte* buffer,
- unsigned capacity, unsigned* chars_read, unsigned* offset);
- static const uchar kBadChar = 0xFFFD;
- static const unsigned kMaxEncodedSize = 4;
- static const unsigned kMaxOneByteChar = 0x7f;
- static const unsigned kMaxTwoByteChar = 0x7ff;
- static const unsigned kMaxThreeByteChar = 0xffff;
- static const unsigned kMaxFourByteChar = 0x1fffff;
-
- private:
- template <unsigned s> friend class Utf8InputBuffer;
- friend class Test;
- static inline uchar ValueOf(const byte* str,
- unsigned length,
- unsigned* cursor);
- static uchar CalculateValue(const byte* str,
- unsigned length,
- unsigned* cursor);
-};
-
-// --- C h a r a c t e r S t r e a m ---
-
-class CharacterStream {
- public:
- inline uchar GetNext();
- inline bool has_more() { return remaining_ != 0; }
- // Note that default implementation is not efficient.
- virtual void Seek(unsigned);
- unsigned Length();
- virtual ~CharacterStream() { }
- static inline bool EncodeCharacter(uchar c, byte* buffer, unsigned capacity,
- unsigned& offset);
- static inline bool EncodeAsciiCharacter(uchar c, byte* buffer,
- unsigned capacity, unsigned& offset);
- static inline bool EncodeNonAsciiCharacter(uchar c, byte* buffer,
- unsigned capacity, unsigned& offset);
- static inline uchar DecodeCharacter(const byte* buffer, unsigned* offset);
- virtual void Rewind() = 0;
- protected:
- virtual void FillBuffer() = 0;
- // The number of characters left in the current buffer
- unsigned remaining_;
- // The current offset within the buffer
- unsigned cursor_;
- // The buffer containing the decoded characters.
- const byte* buffer_;
-};
-
-// --- I n p u t B u f f e r ---
-
-/**
- * Provides efficient access to encoded characters in strings. It
- * does so by reading characters one block at a time, rather than one
- * character at a time, which gives string implementations an
- * opportunity to optimize the decoding.
- */
-template <class Reader, class Input = Reader*, unsigned kSize = 256>
-class InputBuffer : public CharacterStream {
- public:
- virtual void Rewind();
- inline void Reset(Input input);
- void Seek(unsigned position);
- inline void Reset(unsigned position, Input input);
- protected:
- InputBuffer() { }
- explicit InputBuffer(Input input) { Reset(input); }
- virtual void FillBuffer();
-
- // A custom offset that can be used by the string implementation to
- // mark progress within the encoded string.
- unsigned offset_;
- // The input string
- Input input_;
- // To avoid heap allocation, we keep an internal buffer to which
- // the encoded string can write its characters. The string
- // implementation is free to decide whether it wants to use this
- // buffer or not.
- byte util_buffer_[kSize];
-};
-
-// --- U t f 8 I n p u t B u f f e r ---
-
-template <unsigned s = 256>
-class Utf8InputBuffer : public InputBuffer<Utf8, Buffer<const char*>, s> {
- public:
- inline Utf8InputBuffer() { }
- inline Utf8InputBuffer(const char* data, unsigned length);
- inline void Reset(const char* data, unsigned length) {
- InputBuffer<Utf8, Buffer<const char*>, s>::Reset(
- Buffer<const char*>(data, length));
- }
-};
-
-struct Uppercase {
- static bool Is(uchar c);
-};
-struct Lowercase {
- static bool Is(uchar c);
-};
-struct Letter {
- static bool Is(uchar c);
-};
-struct Space {
- static bool Is(uchar c);
-};
-struct Number {
- static bool Is(uchar c);
-};
-struct WhiteSpace {
- static bool Is(uchar c);
-};
-struct LineTerminator {
- static bool Is(uchar c);
-};
-struct CombiningMark {
- static bool Is(uchar c);
-};
-struct ConnectorPunctuation {
- static bool Is(uchar c);
-};
-struct ToLowercase {
- static const int kMaxWidth = 3;
- static int Convert(uchar c,
- uchar n,
- uchar* result,
- bool* allow_caching_ptr);
-};
-struct ToUppercase {
- static const int kMaxWidth = 3;
- static int Convert(uchar c,
- uchar n,
- uchar* result,
- bool* allow_caching_ptr);
-};
-struct Ecma262Canonicalize {
- static const int kMaxWidth = 1;
- static int Convert(uchar c,
- uchar n,
- uchar* result,
- bool* allow_caching_ptr);
-};
-struct Ecma262UnCanonicalize {
- static const int kMaxWidth = 4;
- static int Convert(uchar c,
- uchar n,
- uchar* result,
- bool* allow_caching_ptr);
-};
-struct CanonicalizationRange {
- static const int kMaxWidth = 1;
- static int Convert(uchar c,
- uchar n,
- uchar* result,
- bool* allow_caching_ptr);
-};
-
-} // namespace unibrow
-
-#endif // V8_UNICODE_H_