diff options
Diffstat (limited to 'JavaScriptCore/wtf')
22 files changed, 1085 insertions, 177 deletions
diff --git a/JavaScriptCore/wtf/CMakeLists.txt b/JavaScriptCore/wtf/CMakeLists.txt new file mode 100644 index 0000000..f4d90ce --- /dev/null +++ b/JavaScriptCore/wtf/CMakeLists.txt @@ -0,0 +1,41 @@ +SET(WTF_SOURCES + Assertions.cpp + ByteArray.cpp + CurrentTime.cpp + FastMalloc.cpp + HashTable.cpp + MainThread.cpp + RandomNumber.cpp + RefCountedLeakCounter.cpp + StringExtras.cpp + Threading.cpp + TypeTraits.cpp + WTFThreadData.cpp + dtoa.cpp + + text/AtomicString.cpp + text/CString.cpp + text/StringImpl.cpp + text/StringStatics.cpp + text/WTFString.cpp + + unicode/UTF8.cpp +) + +SET(WTF_LIBRARIES +) + +SET(WTF_PORT_FLAGS ) +INCLUDE_IF_EXISTS(${JAVASCRIPTCORE_DIR}/wtf/CMakeLists${PORT}.txt) + +LIST(APPEND WTF_INCLUDE_DIRECTORIES + "${CMAKE_BINARY_DIR}" +) + +WEBKIT_WRAP_SOURCELIST(${WTF_SOURCES}) +INCLUDE_DIRECTORIES(${WTF_INCLUDE_DIRECTORIES}) +ADD_DEFINITIONS(-DBUILDING_WTF) +ADD_LIBRARY(${WTF_LIBRARY_NAME} ${WTF_LIBRARY_TYPE} ${WTF_SOURCES}) +TARGET_LINK_LIBRARIES(${WTF_LIBRARY_NAME} ${WTF_LIBRARIES}) + +ADD_TARGET_PROPERTIES(${WTF_LIBRARY_NAME} LINK_FLAGS ${WTF_LINK_FLAGS}) diff --git a/JavaScriptCore/wtf/CMakeListsEfl.txt b/JavaScriptCore/wtf/CMakeListsEfl.txt new file mode 100644 index 0000000..3e128be --- /dev/null +++ b/JavaScriptCore/wtf/CMakeListsEfl.txt @@ -0,0 +1,27 @@ +ADD_DEFINITIONS(-DUSE_SYSTEM_MALLOC=1) + +LIST(APPEND WTF_SOURCES + efl/MainThreadEfl.cpp + + gobject/GOwnPtr.cpp + gobject/GRefPtr.cpp + + ThreadIdentifierDataPthreads.cpp + ThreadingPthreads.cpp + + unicode/icu/CollatorICU.cpp +) + +LIST(APPEND WTF_LIBRARIES + pthread + ${ICU_LIBRARIES} +) + +LIST(APPEND WTF_LINK_FLAGS + ${ECORE_LDFLAGS} +) + +LIST(APPEND WTF_INCLUDE_DIRECTORIES + ${ECORE_INCLUDE_DIRS} + ${JAVASCRIPTCORE_DIR}/wtf/unicode/ +) diff --git a/JavaScriptCore/wtf/MD5.cpp b/JavaScriptCore/wtf/MD5.cpp index cd1837a..e995102 100644 --- a/JavaScriptCore/wtf/MD5.cpp +++ b/JavaScriptCore/wtf/MD5.cpp @@ -67,7 +67,8 @@ static void expectMD5(CString input, CString expected) { MD5 md5; md5.addBytes(reinterpret_cast<const uint8_t*>(input.data()), input.length()); - Vector<uint8_t, 16> digest = md5.checksum(); + Vector<uint8_t, 16> digest; + md5.checksum(digest); char* buf = 0; CString actual = CString::newUninitialized(32, buf); for (size_t i = 0; i < 16; i++) { @@ -256,7 +257,7 @@ void MD5::addBytes(const uint8_t* input, size_t length) memcpy(m_in, buf, length); } -Vector<uint8_t, 16> MD5::checksum() +void MD5::checksum(Vector<uint8_t, 16>& digest) { // Compute number of bytes mod 64 unsigned count = (m_bits[0] >> 3) & 0x3F; @@ -291,14 +292,16 @@ Vector<uint8_t, 16> MD5::checksum() MD5Transform(m_buf, reinterpret_cast<uint32_t*>(m_in)); reverseBytes(reinterpret_cast<uint8_t*>(m_buf), 4); - Vector<uint8_t, 16> digest; + + // Now, m_buf contains checksum result. + if (!digest.isEmpty()) + digest.clear(); digest.append(reinterpret_cast<uint8_t*>(m_buf), 16); // In case it's sensitive memset(m_buf, 0, sizeof(m_buf)); memset(m_bits, 0, sizeof(m_bits)); memset(m_in, 0, sizeof(m_in)); - return digest; } } // namespace WTF diff --git a/JavaScriptCore/wtf/MD5.h b/JavaScriptCore/wtf/MD5.h index 8ebfc45..3caa810 100644 --- a/JavaScriptCore/wtf/MD5.h +++ b/JavaScriptCore/wtf/MD5.h @@ -46,7 +46,7 @@ public: void addBytes(const uint8_t* input, size_t length); // checksum has a side effect of resetting the state of the object. - Vector<uint8_t, 16> checksum(); + void checksum(Vector<uint8_t, 16>&); private: uint32_t m_buf[4]; diff --git a/JavaScriptCore/wtf/MainThread.cpp b/JavaScriptCore/wtf/MainThread.cpp index a041bb2..1a0682b 100644 --- a/JavaScriptCore/wtf/MainThread.cpp +++ b/JavaScriptCore/wtf/MainThread.cpp @@ -51,8 +51,22 @@ struct FunctionWithContext { , syncFlag(syncFlag) { } + bool operator == (const FunctionWithContext& o) + { + return function == o.function + && context == o.context + && syncFlag == o.syncFlag; + } +}; + +class FunctionWithContextFinder { +public: + FunctionWithContextFinder(const FunctionWithContext& m) : m(m) {} + bool operator() (FunctionWithContext& o) { return o == m; } + FunctionWithContext m; }; + typedef Deque<FunctionWithContext> FunctionQueue; static bool callbacksPaused; // This global variable is only accessed from main thread. @@ -185,6 +199,24 @@ void callOnMainThreadAndWait(MainThreadFunction* function, void* context) syncFlag.wait(functionQueueMutex); } +void cancelCallOnMainThread(MainThreadFunction* function, void* context) +{ + ASSERT(function); + + MutexLocker locker(mainThreadFunctionQueueMutex()); + + FunctionWithContextFinder pred(FunctionWithContext(function, context)); + + while (true) { + // We must redefine 'i' each pass, because the itererator's operator= + // requires 'this' to be valid, and remove() invalidates all iterators + FunctionQueue::iterator i(functionQueue().findIf(pred)); + if (i == functionQueue().end()) + break; + functionQueue().remove(i); + } +} + void setMainThreadCallbacksPaused(bool paused) { ASSERT(isMainThread()); diff --git a/JavaScriptCore/wtf/MainThread.h b/JavaScriptCore/wtf/MainThread.h index d037d0b..7703f3e 100644 --- a/JavaScriptCore/wtf/MainThread.h +++ b/JavaScriptCore/wtf/MainThread.h @@ -42,6 +42,8 @@ void initializeMainThread(); void callOnMainThread(MainThreadFunction*, void* context); void callOnMainThreadAndWait(MainThreadFunction*, void* context); +void cancelCallOnMainThread(MainThreadFunction*, void* context); + void setMainThreadCallbacksPaused(bool paused); bool isMainThread(); @@ -63,7 +65,7 @@ void initializeMainThreadToProcessMainThreadPlatform(); using WTF::callOnMainThread; using WTF::callOnMainThreadAndWait; +using WTF::cancelCallOnMainThread; using WTF::setMainThreadCallbacksPaused; using WTF::isMainThread; - #endif // MainThread_h diff --git a/JavaScriptCore/wtf/Platform.h b/JavaScriptCore/wtf/Platform.h index f667b9a..310eb04 100644 --- a/JavaScriptCore/wtf/Platform.h +++ b/JavaScriptCore/wtf/Platform.h @@ -1,6 +1,7 @@ /* * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. * Copyright (C) 2007-2009 Torch Mobile, Inc. + * Copyright (C) Research In Motion Limited 2010. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -222,12 +223,16 @@ #elif defined(__ARM_ARCH_5__) \ || defined(__ARM_ARCH_5T__) \ - || defined(__ARM_ARCH_5E__) \ - || defined(__ARM_ARCH_5TE__) \ - || defined(__ARM_ARCH_5TEJ__) \ || defined(__MARM_ARMV5__) #define WTF_ARM_ARCH_VERSION 5 +#elif defined(__ARM_ARCH_5E__) \ + || defined(__ARM_ARCH_5TE__) \ + || defined(__ARM_ARCH_5TEJ__) +#define WTF_ARM_ARCH_VERSION 5 +/*ARMv5TE requires allocators to use aligned memory*/ +#define WTF_USE_ARENA_ALLOC_ALIGNMENT_INTEGER 1 + #elif defined(__ARM_ARCH_6__) \ || defined(__ARM_ARCH_6J__) \ || defined(__ARM_ARCH_6K__) \ @@ -245,6 +250,13 @@ #elif defined(__TARGET_ARCH_ARM) #define WTF_ARM_ARCH_VERSION __TARGET_ARCH_ARM +#if defined(__TARGET_ARCH_5E) \ + || defined(__TARGET_ARCH_5TE) \ + || defined(__TARGET_ARCH_5TEJ) +/*ARMv5TE requires allocators to use aligned memory*/ +#define WTF_USE_ARENA_ALLOC_ALIGNMENT_INTEGER 1 +#endif + #else #define WTF_ARM_ARCH_VERSION 0 @@ -515,20 +527,8 @@ #endif -/* OS(WINCE) && PLATFORM(QT) - We can not determine the endianess at compile time. For - Qt for Windows CE the endianess is specified in the - device specific makespec -*/ #if OS(WINCE) && PLATFORM(QT) -# include <QtGlobal> -# undef WTF_CPU_BIG_ENDIAN -# undef WTF_CPU_MIDDLE_ENDIAN -# if Q_BYTE_ORDER == Q_BIG_ENDIAN -# define WTF_CPU_BIG_ENDIAN 1 -# endif - -# include <ce_time.h> +#include <ce_time.h> #endif #if (PLATFORM(IPHONE) || PLATFORM(MAC) || PLATFORM(WIN) || (PLATFORM(QT) && OS(DARWIN) && !ENABLE(SINGLE_THREADED))) && !defined(ENABLE_JSC_MULTIPLE_THREADS) @@ -790,7 +790,7 @@ /* ENABLE macro defaults */ #if PLATFORM(QT) -// We musn't customize the global operator new and delete for the Qt port. +// We must not customize the global operator new and delete for the Qt port. #define ENABLE_GLOBAL_FASTMALLOC_NEW 0 #endif @@ -952,8 +952,6 @@ on MinGW. See https://bugs.webkit.org/show_bug.cgi?id=29268 */ #elif CPU(X86) && OS(WINDOWS) && COMPILER(MINGW) && GCC_VERSION >= 40100 #define ENABLE_JIT 1 #define WTF_USE_JIT_STUB_ARGUMENT_VA_LIST 1 -#elif CPU(X86_64) && OS(WINDOWS) && COMPILER(MINGW64) && GCC_VERSION >= 40100 - #define ENABLE_JIT 1 #elif CPU(X86) && OS(WINDOWS) && COMPILER(MSVC) #define ENABLE_JIT 1 #define WTF_USE_JIT_STUB_ARGUMENT_REGISTER 1 @@ -1020,32 +1018,11 @@ on MinGW. See https://bugs.webkit.org/show_bug.cgi?id=29268 */ /* Yet Another Regex Runtime. */ #if !defined(ENABLE_YARR_JIT) -/* YARR supports x86 & x86-64, and has been tested on Mac and Windows. */ -#if (CPU(X86) && PLATFORM(MAC)) \ - || (CPU(X86_64) && PLATFORM(MAC)) \ - || (CPU(ARM_THUMB2) && PLATFORM(IPHONE)) \ - || (CPU(ARM_THUMB2) && PLATFORM(ANDROID) && ENABLE(ANDROID_JSC_JIT)) \ - || (CPU(X86) && PLATFORM(WIN)) \ - || (CPU(X86) && PLATFORM(WX)) -#define ENABLE_YARR 1 -#define ENABLE_YARR_JIT 1 -#endif - -#if PLATFORM(QT) -#if (CPU(X86) && OS(WINDOWS) && COMPILER(MINGW) && GCC_VERSION >= 40100) \ - || (CPU(X86_64) && OS(WINDOWS) && COMPILER(MINGW64) && GCC_VERSION >= 40100) \ - || (CPU(X86) && OS(WINDOWS) && COMPILER(MSVC)) \ - || (CPU(X86) && OS(LINUX) && GCC_VERSION >= 40100) \ - || (CPU(X86_64) && OS(LINUX) && GCC_VERSION >= 40100) \ - || (CPU(ARM_TRADITIONAL) && OS(LINUX)) \ - || (CPU(ARM_TRADITIONAL) && OS(SYMBIAN) && COMPILER(RVCT)) \ - || (CPU(MIPS) && OS(LINUX)) \ - || (CPU(X86) && OS(DARWIN)) \ - || (CPU(X86_64) && OS(DARWIN)) +/* YARR and YARR_JIT is usually turned on for JIT enabled ports */ +#if ENABLE(JIT) #define ENABLE_YARR 1 #define ENABLE_YARR_JIT 1 #endif -#endif #endif /* !defined(ENABLE_YARR_JIT) */ diff --git a/JavaScriptCore/wtf/RefPtr.h b/JavaScriptCore/wtf/RefPtr.h index eed7933..86e4323 100644 --- a/JavaScriptCore/wtf/RefPtr.h +++ b/JavaScriptCore/wtf/RefPtr.h @@ -39,21 +39,21 @@ namespace WTF { template <typename T> class RefPtr : public FastAllocBase { public: - RefPtr() : m_ptr(0) { } - RefPtr(T* ptr) : m_ptr(ptr) { refIfNotNull(ptr); } - RefPtr(const RefPtr& o) : m_ptr(o.m_ptr) { T* ptr = m_ptr; refIfNotNull(ptr); } + ALWAYS_INLINE RefPtr() : m_ptr(0) { } + ALWAYS_INLINE RefPtr(T* ptr) : m_ptr(ptr) { refIfNotNull(ptr); } + ALWAYS_INLINE RefPtr(const RefPtr& o) : m_ptr(o.m_ptr) { T* ptr = m_ptr; refIfNotNull(ptr); } // see comment in PassRefPtr.h for why this takes const reference template <typename U> RefPtr(const PassRefPtr<U>&); template <typename U> RefPtr(const NonNullPassRefPtr<U>&); // Special constructor for cases where we overwrite an object in place. - RefPtr(PlacementNewAdoptType) { } + ALWAYS_INLINE RefPtr(PlacementNewAdoptType) { } // Hash table deleted values, which are only constructed and never copied or destroyed. RefPtr(HashTableDeletedValueType) : m_ptr(hashTableDeletedValue()) { } bool isHashTableDeletedValue() const { return m_ptr == hashTableDeletedValue(); } - ~RefPtr() { derefIfNotNull(m_ptr); } + ALWAYS_INLINE ~RefPtr() { derefIfNotNull(m_ptr); } template <typename U> RefPtr(const RefPtr<U>& o) : m_ptr(o.get()) { T* ptr = m_ptr; refIfNotNull(ptr); } diff --git a/JavaScriptCore/wtf/Vector.h b/JavaScriptCore/wtf/Vector.h index e495067..4d9ea61 100644 --- a/JavaScriptCore/wtf/Vector.h +++ b/JavaScriptCore/wtf/Vector.h @@ -686,6 +686,12 @@ namespace WTF { return *this; } +// Works around an assert in VS2010. See https://connect.microsoft.com/VisualStudio/feedback/details/558044/std-copy-should-not-check-dest-when-first-last +#if COMPILER(MSVC) && defined(_ITERATOR_DEBUG_LEVEL) && _ITERATOR_DEBUG_LEVEL + if (!begin()) + return *this; +#endif + std::copy(other.begin(), other.begin() + size(), begin()); TypeOperations::uninitializedCopy(other.begin() + size(), other.end(), end()); m_size = other.size(); @@ -709,6 +715,12 @@ namespace WTF { return *this; } +// Works around an assert in VS2010. See https://connect.microsoft.com/VisualStudio/feedback/details/558044/std-copy-should-not-check-dest-when-first-last +#if COMPILER(MSVC) && defined(_ITERATOR_DEBUG_LEVEL) && _ITERATOR_DEBUG_LEVEL + if (!begin()) + return *this; +#endif + std::copy(other.begin(), other.begin() + size(), begin()); TypeOperations::uninitializedCopy(other.begin() + size(), other.end(), end()); m_size = other.size(); diff --git a/JavaScriptCore/wtf/text/AtomicString.cpp b/JavaScriptCore/wtf/text/AtomicString.cpp index ab52488..0547b8c 100644 --- a/JavaScriptCore/wtf/text/AtomicString.cpp +++ b/JavaScriptCore/wtf/text/AtomicString.cpp @@ -20,13 +20,8 @@ #include "config.h" -#ifdef SKIP_STATIC_CONSTRUCTORS_ON_GCC -#define ATOMICSTRING_HIDE_GLOBALS 1 -#endif - #include "AtomicString.h" -#include "StaticConstructors.h" #include "StringHash.h" #include <wtf/HashSet.h> #include <wtf/Threading.h> @@ -34,6 +29,8 @@ namespace WebCore { +COMPILE_ASSERT(sizeof(AtomicString) == sizeof(String), atomic_string_and_string_must_be_same_size); + class AtomicStringTable { public: static AtomicStringTable* create() @@ -255,14 +252,14 @@ PassRefPtr<StringImpl> AtomicString::add(const UChar* s) return addResult.second ? adoptRef(*addResult.first) : *addResult.first; } -PassRefPtr<StringImpl> AtomicString::add(StringImpl* r) +PassRefPtr<StringImpl> AtomicString::addSlowCase(StringImpl* r) { if (!r || r->isAtomic()) return r; if (r->length() == 0) return StringImpl::empty(); - + StringImpl* result = *stringTable().add(r).first; if (result == r) r->setIsAtomic(true); @@ -299,32 +296,4 @@ AtomicString AtomicString::lower() const return AtomicString(newImpl); } -JS_EXPORTDATA DEFINE_GLOBAL(AtomicString, nullAtom) -JS_EXPORTDATA DEFINE_GLOBAL(AtomicString, emptyAtom, "") -JS_EXPORTDATA DEFINE_GLOBAL(AtomicString, textAtom, "#text") -JS_EXPORTDATA DEFINE_GLOBAL(AtomicString, commentAtom, "#comment") -JS_EXPORTDATA DEFINE_GLOBAL(AtomicString, starAtom, "*") -JS_EXPORTDATA DEFINE_GLOBAL(AtomicString, xmlAtom, "xml") -JS_EXPORTDATA DEFINE_GLOBAL(AtomicString, xmlnsAtom, "xmlns") - -void AtomicString::init() -{ - static bool initialized; - if (!initialized) { - // Initialization is not thread safe, so this function must be called from the main thread first. - ASSERT(isMainThread()); - - // Use placement new to initialize the globals. - new ((void*)&nullAtom) AtomicString; - new ((void*)&emptyAtom) AtomicString(""); - new ((void*)&textAtom) AtomicString("#text"); - new ((void*)&commentAtom) AtomicString("#comment"); - new ((void*)&starAtom) AtomicString("*"); - new ((void*)&xmlAtom) AtomicString("xml"); - new ((void*)&xmlnsAtom) AtomicString("xmlns"); - - initialized = true; - } -} - } diff --git a/JavaScriptCore/wtf/text/AtomicString.h b/JavaScriptCore/wtf/text/AtomicString.h index 9db70f4..5bb2cf9 100644 --- a/JavaScriptCore/wtf/text/AtomicString.h +++ b/JavaScriptCore/wtf/text/AtomicString.h @@ -38,7 +38,7 @@ namespace WebCore { struct AtomicStringHash; -class AtomicString : public FastAllocBase { +class AtomicString { public: static void init(); @@ -117,7 +117,13 @@ private: static PassRefPtr<StringImpl> add(const UChar*, unsigned length); static PassRefPtr<StringImpl> add(const UChar*, unsigned length, unsigned existingHash); static PassRefPtr<StringImpl> add(const UChar*); - static PassRefPtr<StringImpl> add(StringImpl*); + ALWAYS_INLINE PassRefPtr<StringImpl> add(StringImpl* r) + { + if (!r || r->isAtomic()) + return r; + return addSlowCase(r); + } + static PassRefPtr<StringImpl> addSlowCase(StringImpl*); }; inline bool operator==(const AtomicString& a, const AtomicString& b) { return a.impl() == b.impl(); } diff --git a/JavaScriptCore/wtf/text/CString.cpp b/JavaScriptCore/wtf/text/CString.cpp index d93a5a3..7d09f12 100644 --- a/JavaScriptCore/wtf/text/CString.cpp +++ b/JavaScriptCore/wtf/text/CString.cpp @@ -51,11 +51,6 @@ void CString::init(const char* str, unsigned length) m_buffer->mutableData()[length] = '\0'; } -const char* CString::data() const -{ - return m_buffer ? m_buffer->data() : 0; -} - char* CString::mutableData() { copyBufferIfNeeded(); @@ -64,11 +59,6 @@ char* CString::mutableData() return m_buffer->mutableData(); } -unsigned CString::length() const -{ - return m_buffer ? m_buffer->length() - 1 : 0; -} - CString CString::newUninitialized(size_t length, char*& characterBuffer) { CString result; diff --git a/JavaScriptCore/wtf/text/CString.h b/JavaScriptCore/wtf/text/CString.h index 47f7675..d8250c5 100644 --- a/JavaScriptCore/wtf/text/CString.h +++ b/JavaScriptCore/wtf/text/CString.h @@ -57,9 +57,15 @@ public: CString(CStringBuffer* buffer) : m_buffer(buffer) { } static CString newUninitialized(size_t length, char*& characterBuffer); - const char* data() const; + const char* data() const + { + return m_buffer ? m_buffer->data() : 0; + } char* mutableData(); - unsigned length() const; + unsigned length() const + { + return m_buffer ? m_buffer->length() - 1 : 0; + } bool isNull() const { return !m_buffer; } diff --git a/JavaScriptCore/wtf/text/StringImpl.cpp b/JavaScriptCore/wtf/text/StringImpl.cpp index ff69737..68ff456 100644 --- a/JavaScriptCore/wtf/text/StringImpl.cpp +++ b/JavaScriptCore/wtf/text/StringImpl.cpp @@ -66,20 +66,6 @@ StringImpl::~StringImpl() } } -StringImpl* StringImpl::empty() -{ - // FIXME: This works around a bug in our port of PCRE, that a regular expression - // run on the empty string may still perform a read from the first element, and - // as such we need this to be a valid pointer. No code should ever be reading - // from a zero length string, so this should be able to be a non-null pointer - // into the zero-page. - // Replace this with 'reinterpret_cast<UChar*>(static_cast<intptr_t>(1))' once - // PCRE goes away. - static UChar emptyUCharData = 0; - DEFINE_STATIC_LOCAL(StringImpl, emptyString, (&emptyUCharData, 0, ConstructStaticString)); - return &emptyString; -} - PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, UChar*& data) { if (!length) { diff --git a/JavaScriptCore/wtf/text/StringImpl.h b/JavaScriptCore/wtf/text/StringImpl.h index dbf51e3..f4b2970 100644 --- a/JavaScriptCore/wtf/text/StringImpl.h +++ b/JavaScriptCore/wtf/text/StringImpl.h @@ -151,7 +151,7 @@ public: static PassRefPtr<StringImpl> create(const char*, unsigned length); static PassRefPtr<StringImpl> create(const char*); static PassRefPtr<StringImpl> create(const UChar*, unsigned length, PassRefPtr<SharedUChar> sharedBuffer); - static PassRefPtr<StringImpl> create(PassRefPtr<StringImpl> rep, unsigned offset, unsigned length) + static ALWAYS_INLINE PassRefPtr<StringImpl> create(PassRefPtr<StringImpl> rep, unsigned offset, unsigned length) { ASSERT(rep); ASSERT(length <= rep->length()); @@ -164,18 +164,22 @@ public: } static PassRefPtr<StringImpl> createUninitialized(unsigned length, UChar*& data); - static PassRefPtr<StringImpl> tryCreateUninitialized(unsigned length, UChar*& output) + static ALWAYS_INLINE PassRefPtr<StringImpl> tryCreateUninitialized(unsigned length, UChar*& output) { if (!length) { output = 0; return empty(); } - if (length > ((std::numeric_limits<size_t>::max() - sizeof(StringImpl)) / sizeof(UChar))) + if (length > ((std::numeric_limits<size_t>::max() - sizeof(StringImpl)) / sizeof(UChar))) { + output = 0; return 0; + } StringImpl* resultImpl; - if (!tryFastMalloc(sizeof(UChar) * length + sizeof(StringImpl)).getValue(resultImpl)) + if (!tryFastMalloc(sizeof(UChar) * length + sizeof(StringImpl)).getValue(resultImpl)) { + output = 0; return 0; + } output = reinterpret_cast<UChar*>(resultImpl + 1); return adoptRef(new(resultImpl) StringImpl(length)); } diff --git a/JavaScriptCore/wtf/text/StringStatics.cpp b/JavaScriptCore/wtf/text/StringStatics.cpp new file mode 100644 index 0000000..4a23a16 --- /dev/null +++ b/JavaScriptCore/wtf/text/StringStatics.cpp @@ -0,0 +1,80 @@ +/* + * Copyright (C) 2010 Apple Inc. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +#ifdef SKIP_STATIC_CONSTRUCTORS_ON_GCC +#define ATOMICSTRING_HIDE_GLOBALS 1 +#endif + +#include "AtomicString.h" +#include "StaticConstructors.h" +#include "StringImpl.h" + +namespace WebCore { + +StringImpl* StringImpl::empty() +{ + // FIXME: This works around a bug in our port of PCRE, that a regular expression + // run on the empty string may still perform a read from the first element, and + // as such we need this to be a valid pointer. No code should ever be reading + // from a zero length string, so this should be able to be a non-null pointer + // into the zero-page. + // Replace this with 'reinterpret_cast<UChar*>(static_cast<intptr_t>(1))' once + // PCRE goes away. + static UChar emptyUCharData = 0; + DEFINE_STATIC_LOCAL(StringImpl, emptyString, (&emptyUCharData, 0, ConstructStaticString)); + return &emptyString; +} + +JS_EXPORTDATA DEFINE_GLOBAL(AtomicString, nullAtom) +JS_EXPORTDATA DEFINE_GLOBAL(AtomicString, emptyAtom, "") +JS_EXPORTDATA DEFINE_GLOBAL(AtomicString, textAtom, "#text") +JS_EXPORTDATA DEFINE_GLOBAL(AtomicString, commentAtom, "#comment") +JS_EXPORTDATA DEFINE_GLOBAL(AtomicString, starAtom, "*") +JS_EXPORTDATA DEFINE_GLOBAL(AtomicString, xmlAtom, "xml") +JS_EXPORTDATA DEFINE_GLOBAL(AtomicString, xmlnsAtom, "xmlns") + +void AtomicString::init() +{ + static bool initialized; + if (!initialized) { + // Initialization is not thread safe, so this function must be called from the main thread first. + ASSERT(isMainThread()); + + // Use placement new to initialize the globals. + new ((void*)&nullAtom) AtomicString; + new ((void*)&emptyAtom) AtomicString(""); + new ((void*)&textAtom) AtomicString("#text"); + new ((void*)&commentAtom) AtomicString("#comment"); + new ((void*)&starAtom) AtomicString("*"); + new ((void*)&xmlAtom) AtomicString("xml"); + new ((void*)&xmlnsAtom) AtomicString("xmlns"); + + initialized = true; + } +} + +} diff --git a/JavaScriptCore/wtf/text/WTFString.cpp b/JavaScriptCore/wtf/text/WTFString.cpp index a683e3d..842d755 100644 --- a/JavaScriptCore/wtf/text/WTFString.cpp +++ b/JavaScriptCore/wtf/text/WTFString.cpp @@ -37,13 +37,6 @@ using namespace WTF::Unicode; namespace WebCore { -String::String(const UChar* str, unsigned len) -{ - if (!str) - return; - m_impl = StringImpl::create(str, len); -} - String::String(const UChar* str) { if (!str) @@ -56,20 +49,6 @@ String::String(const UChar* str) m_impl = StringImpl::create(str, len); } -String::String(const char* str) -{ - if (!str) - return; - m_impl = StringImpl::create(str); -} - -String::String(const char* str, unsigned length) -{ - if (!str) - return; - m_impl = StringImpl::create(str, length); -} - void String::append(const String& str) { if (str.isEmpty()) @@ -202,13 +181,6 @@ void String::insert(const UChar* charactersToInsert, unsigned lengthToInsert, un m_impl = newImpl.release(); } -UChar String::operator[](unsigned i) const -{ - if (!m_impl || i >= m_impl->length()) - return 0; - return m_impl->characters()[i]; -} - UChar32 String::characterStartingAt(unsigned i) const { if (!m_impl || i >= m_impl->length()) @@ -216,13 +188,6 @@ UChar32 String::characterStartingAt(unsigned i) const return m_impl->characterStartingAt(i); } -unsigned String::length() const -{ - if (!m_impl) - return 0; - return m_impl->length(); -} - void String::truncate(unsigned position) { if (position >= length()) @@ -311,13 +276,6 @@ bool String::percentage(int& result) const return true; } -const UChar* String::characters() const -{ - if (!m_impl) - return 0; - return m_impl->characters(); -} - const UChar* String::charactersWithNullTermination() { if (!m_impl) @@ -591,11 +549,6 @@ String String::crossThreadString() const return m_impl->crossThreadString(); } -bool String::isEmpty() const -{ - return !m_impl || !m_impl->length(); -} - void String::split(const String& separator, bool allowEmptyEntries, Vector<String>& result) const { result.clear(); diff --git a/JavaScriptCore/wtf/text/WTFString.h b/JavaScriptCore/wtf/text/WTFString.h index 7c3c2dd..d98621c 100644 --- a/JavaScriptCore/wtf/text/WTFString.h +++ b/JavaScriptCore/wtf/text/WTFString.h @@ -86,10 +86,25 @@ int reverseFind(const UChar*, size_t, UChar, int startPosition = -1); class String { public: String() { } // gives null string, distinguishable from an empty string - String(const UChar*, unsigned length); + String(const UChar* str, unsigned len) + { + if (!str) + return; + m_impl = StringImpl::create(str, len); + } + String(const char* str) + { + if (!str) + return; + m_impl = StringImpl::create(str); + } + String(const char* str, unsigned length) + { + if (!str) + return; + m_impl = StringImpl::create(str, length); + } String(const UChar*); // Specifically for null terminated UTF-16 - String(const char*); - String(const char*, unsigned length); String(StringImpl* i) : m_impl(i) { } String(PassRefPtr<StringImpl> i) : m_impl(i) { } String(RefPtr<StringImpl> i) : m_impl(i) { } @@ -103,11 +118,28 @@ public: static String adopt(StringBuffer& buffer) { return StringImpl::adopt(buffer); } static String adopt(Vector<UChar>& vector) { return StringImpl::adopt(vector); } - unsigned length() const; - const UChar* characters() const; + ALWAYS_INLINE unsigned length() const + { + if (!m_impl) + return 0; + return m_impl->length(); + } + + const UChar* characters() const + { + if (!m_impl) + return 0; + return m_impl->characters(); + } + const UChar* charactersWithNullTermination(); - UChar operator[](unsigned i) const; // if i >= length(), returns 0 + UChar operator[](unsigned i) const // if i >= length(), returns 0 + { + if (!m_impl || i >= m_impl->length()) + return 0; + return m_impl->characters()[i]; + } UChar32 characterStartingAt(unsigned) const; // Ditto. bool contains(UChar c) const { return find(c) != -1; } @@ -215,7 +247,7 @@ public: String threadsafeCopy() const; bool isNull() const { return !m_impl; } - bool isEmpty() const; + ALWAYS_INLINE bool isEmpty() const { return !m_impl || !m_impl->length(); } StringImpl* impl() const { return m_impl.get(); } diff --git a/JavaScriptCore/wtf/url/src/RawURLBuffer.h b/JavaScriptCore/wtf/url/src/RawURLBuffer.h new file mode 100644 index 0000000..9bb2e8e --- /dev/null +++ b/JavaScriptCore/wtf/url/src/RawURLBuffer.h @@ -0,0 +1,71 @@ +// Copyright 2010, Google Inc. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef RawURLBuffer_h +#define RawURLBuffer_h + +#include "URLBuffer.h" +#include <stdlib.h> + +namespace WTF { + +// Simple implementation of the URLBuffer using new[]. This class +// also supports a static buffer so if it is allocated on the stack, most +// URLs can be canonicalized with no heap allocations. +template<typename CHAR, int inlineCapacity = 1024> +class RawURLBuffer : public URLBuffer<CHAR> { +public: + RawURLBuffer() : URLBuffer<CHAR>() + { + this->m_buffer = m_inlineBuffer; + this->m_capacity = inlineCapacity; + } + + virtual ~RawURLBuffer() + { + if (this->m_buffer != m_inlineBuffer) + delete[] this->m_buffer; + } + + virtual void resize(int size) + { + CHAR* newBuffer = new CHAR[size]; + memcpy(newBuffer, this->m_buffer, sizeof(CHAR) * (this->m_length < size ? this->m_length : size)); + if (this->m_buffer != m_inlineBuffer) + delete[] this->m_buffer; + this->m_buffer = newBuffer; + this->m_capacity = size; + } + +protected: + CHAR m_inlineBuffer[inlineCapacity]; +}; + +} // namespace WTF + +#endif // RawURLBuffer_h diff --git a/JavaScriptCore/wtf/url/src/URLBuffer.h b/JavaScriptCore/wtf/url/src/URLBuffer.h new file mode 100644 index 0000000..e07402e --- /dev/null +++ b/JavaScriptCore/wtf/url/src/URLBuffer.h @@ -0,0 +1,136 @@ +// Copyright 2010, Google Inc. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef URLBuffer_h +#define URLBuffer_h + +namespace WTF { + +// Base class for the canonicalizer output, this maintains a buffer and +// supports simple resizing and append operations on it. +// +// It is VERY IMPORTANT that no virtual function calls be made on the common +// code path. We only have two virtual function calls, the destructor and a +// resize function that is called when the existing buffer is not big enough. +// The derived class is then in charge of setting up our buffer which we will +// manage. +template<typename CHAR> +class URLBuffer { +public: + URLBuffer() : m_buffer(0), m_capacity(0), m_length(0) { } + virtual ~URLBuffer() { } + + // Implemented to resize the buffer. This function should update the buffer + // pointer to point to the new buffer, and any old data up to |m_length| in + // the buffer must be copied over. + // + // The new size must be larger than m_capacity. + virtual void resize(int) = 0; + + inline char at(int offset) const { return m_buffer[offset]; } + inline void set(int offset, CHAR ch) + { + // FIXME: Add ASSERT(offset < length()); + m_buffer[offset] = ch; + } + + // Returns the current capacity of the buffer. The length() is the number of + // characters that have been declared to be written, but the capacity() is + // the number that can be written without reallocation. If the caller must + // write many characters at once, it can make sure there is enough capacity, + // write the data, then use setLength() to declare the new length(). + int capacity() const { return m_capacity; } + int length() const { return m_length; } + + // The output will NOT be 0-terminated. Call length() to get the length. + const CHAR* data() const { return m_buffer; } + CHAR* data() { return m_buffer; } + + // Shortens the URL to the new length. Used for "backing up" when processing + // relative paths. This can also be used if an external function writes a lot + // of data to the buffer (when using the "Raw" version below) beyond the end, + // to declare the new length. + void setLength(int length) + { + // FIXME: Add ASSERT(length < capacity()); + m_length = length; + } + + // This is the most performance critical function, since it is called for + // every character. + void append(CHAR ch) + { + // In VC2005, putting this common case first speeds up execution + // dramatically because this branch is predicted as taken. + if (m_length < m_capacity) { + m_buffer[m_length] = ch; + ++m_length; + return; + } + + if (!grow(1)) + return; + + m_buffer[m_length] = ch; + ++m_length; + } + + void append(const CHAR* str, int strLength) + { + if (m_length + strLength > m_capacity) { + if (!grow(m_length + strLength - m_capacity)) + return; + } + for (int i = 0; i < strLength; i++) + m_buffer[m_length + i] = str[i]; + m_length += strLength; + } + +protected: + // Returns true if the buffer could be resized, false on OOM. + bool grow(int minimumAdditionalCapacity) + { + static const int minimumCapacity = 16; + int newCapacity = m_capacity ? m_capacity : minimumCapacity; + do { + if (newCapacity >= (1 << 30)) // Prevent overflow below. + return false; + newCapacity *= 2; + } while (newCapacity < m_capacity + minimumAdditionalCapacity); + resize(newCapacity); + return true; + } + + CHAR* m_buffer; + int m_capacity; + int m_length; // Used characters in the buffer. +}; + +} // namespace WTF + +#endif // URLBuffer_h diff --git a/JavaScriptCore/wtf/url/src/URLComponent.h b/JavaScriptCore/wtf/url/src/URLComponent.h index ca7e6f3..1b7976e 100644 --- a/JavaScriptCore/wtf/url/src/URLComponent.h +++ b/JavaScriptCore/wtf/url/src/URLComponent.h @@ -37,13 +37,19 @@ public: URLComponent() : m_begin(0), m_length(-1) { } URLComponent(int begin, int length) : m_begin(begin), m_length(length) { } + // Helper that returns a component created with the given begin and ending + // points. The ending point is non-inclusive. + static inline URLComponent fromRange(int begin, int end) + { + return URLComponent(begin, end - begin); + } + // Returns true if this component is valid, meaning the length is given. Even // valid components may be empty to record the fact that they exist. bool isValid() const { return m_length != -1; } - // Returns true if the given component is specified on false, the component - // is either empty or invalid. - bool isNonempty() const { return m_length > 0; } + bool isNonEmpty() const { return m_length > 0; } + bool isEmptyOrInvalid() const { return m_length <= 0; } void reset() { diff --git a/JavaScriptCore/wtf/url/src/URLParser.h b/JavaScriptCore/wtf/url/src/URLParser.h new file mode 100644 index 0000000..4d5ca51 --- /dev/null +++ b/JavaScriptCore/wtf/url/src/URLParser.h @@ -0,0 +1,575 @@ +/* Based on nsURLParsers.cc from Mozilla + * ------------------------------------- + * Copyright (C) 1998 Netscape Communications Corporation. + * + * Other contributors: + * Darin Fisher (original author) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * Alternatively, the contents of this file may be used under the terms + * of either the Mozilla Public License Version 1.1, found at + * http://www.mozilla.org/MPL/ (the "MPL") or the GNU General Public + * License Version 2.0, found at http://www.fsf.org/copyleft/gpl.html + * (the "GPL"), in which case the provisions of the MPL or the GPL are + * applicable instead of those above. If you wish to allow use of your + * version of this file only under the terms of one of those two + * licenses (the MPL or the GPL) and not to allow others to use your + * version of this file under the LGPL, indicate your decision by + * deletingthe provisions above and replace them with the notice and + * other provisions required by the MPL or the GPL, as the case may be. + * If you do not delete the provisions above, a recipient may use your + * version of this file under any of the LGPL, the MPL or the GPL. + */ + +#ifndef URLParser_h +#define URLParser_h + +#include "URLComponent.h" +#include "URLSegments.h" + +namespace WTF { + +template<typename CHAR> +class URLParser { +public: + enum SpecialPort { + UnspecifiedPort = -1, + InvalidPort = -2, + }; + + // This handles everything that may be an authority terminator, including + // backslash. For special backslash handling see parseAfterScheme. + static bool isPossibleAuthorityTerminator(CHAR ch) + { + return isURLSlash(ch) || ch == '?' || ch == '#' || ch == ';'; + } + + // Given an already-identified auth section, breaks it into its constituent + // parts. The port number will be parsed and the resulting integer will be + // filled into the given *port variable, or -1 if there is no port number + // or it is invalid. + static void parseAuthority(const CHAR* spec, const URLComponent& auth, URLComponent& username, URLComponent& password, URLComponent& host, URLComponent& port) + { + // FIXME: add ASSERT(auth.isValid()); // We should always get an authority. + if (!auth.length()) { + username.reset(); + password.reset(); + host.reset(); + port.reset(); + return; + } + + // Search backwards for @, which is the separator between the user info + // and the server info. RFC 3986 forbids @ from occuring in auth, but + // someone might include it in a password unescaped. + int i = auth.begin() + auth.length() - 1; + while (i > auth.begin() && spec[i] != '@') + --i; + + if (spec[i] == '@') { + // Found user info: <user-info>@<server-info> + parseUserInfo(spec, URLComponent(auth.begin(), i - auth.begin()), username, password); + parseServerInfo(spec, URLComponent::fromRange(i + 1, auth.begin() + auth.length()), host, port); + } else { + // No user info, everything is server info. + username.reset(); + password.reset(); + parseServerInfo(spec, auth, host, port); + } + } + + static bool extractScheme(const CHAR* spec, int specLength, URLComponent& scheme) + { + // Skip leading whitespace and control characters. + int begin = 0; + while (begin < specLength && shouldTrimFromURL(spec[begin])) + begin++; + if (begin == specLength) + return false; // Input is empty or all whitespace. + + // Find the first colon character. + for (int i = begin; i < specLength; i++) { + if (spec[i] == ':') { + scheme = URLComponent::fromRange(begin, i); + return true; + } + } + return false; // No colon found: no scheme + } + + // Fills in all members of the URLSegments structure (except for the + // scheme) for standard URLs. + // + // |spec| is the full spec being parsed, of length |specLength|. + // |afterScheme| is the character immediately following the scheme (after + // the colon) where we'll begin parsing. + static void parseAfterScheme(const CHAR* spec, int specLength, int afterScheme, URLSegments& parsed) + { + int numberOfSlashes = consecutiveSlashes(spec, afterScheme, specLength); + int afterSlashes = afterScheme + numberOfSlashes; + + // First split into two main parts, the authority (username, password, + // host, and port) and the full path (path, query, and reference). + URLComponent authority; + URLComponent fullPath; + + // Found "//<some data>", looks like an authority section. Treat + // everything from there to the next slash (or end of spec) to be the + // authority. Note that we ignore the number of slashes and treat it as + // the authority. + int authEnd = nextAuthorityTerminator(spec, afterSlashes, specLength); + authority = URLComponent(afterSlashes, authEnd - afterSlashes); + + if (authEnd == specLength) // No beginning of path found. + fullPath = URLComponent(); + else // Everything starting from the slash to the end is the path. + fullPath = URLComponent(authEnd, specLength - authEnd); + + // Now parse those two sub-parts. + parseAuthority(spec, authority, parsed.username, parsed.password, parsed.host, parsed.port); + parsePath(spec, fullPath, parsed.path, parsed.query, parsed.fragment); + } + + // The main parsing function for standard URLs. Standard URLs have a scheme, + // host, path, etc. + static void parseStandardURL(const CHAR* spec, int specLength, URLSegments& parsed) + { + // FIXME: add ASSERT(specLength >= 0); + + // Strip leading & trailing spaces and control characters. + int begin = 0; + trimURL(spec, begin, specLength); + + int afterScheme; + if (extractScheme(spec, specLength, parsed.scheme)) + afterScheme = parsed.scheme.end() + 1; // Skip past the colon. + else { + // Say there's no scheme when there is a colon. We could also say + // that everything is the scheme. Both would produce an invalid + // URL, but this way seems less wrong in more cases. + parsed.scheme.reset(); + afterScheme = begin; + } + parseAfterScheme(spec, specLength, afterScheme, parsed); + } + + static void parsePath(const CHAR* spec, const URLComponent& path, URLComponent& filepath, URLComponent& query, URLComponent& fragment) + { + // path = [/]<segment1>/<segment2>/<...>/<segmentN>;<param>?<query>#<fragment> + + // Special case when there is no path. + if (!path.isValid()) { + filepath.reset(); + query.reset(); + fragment.reset(); + return; + } + // FIXME: add ASSERT(path.length() > 0); // We should never have 0 length paths. + + // Search for first occurrence of either ? or #. + int pathEnd = path.begin() + path.length(); + + int querySeparator = -1; // Index of the '?' + int refSeparator = -1; // Index of the '#' + for (int i = path.begin(); i < pathEnd; i++) { + switch (spec[i]) { + case '?': + if (querySeparator < 0) + querySeparator = i; + break; + case '#': + refSeparator = i; + i = pathEnd; // Break out of the loop. + break; + default: + break; + } + } + + // Markers pointing to the character after each of these corresponding + // components. The code below works from the end back to the beginning, + // and will update these indices as it finds components that exist. + int fileEnd, queryEnd; + + // Fragment: from the # to the end of the path. + if (refSeparator >= 0) { + fileEnd = refSeparator; + queryEnd = refSeparator; + fragment = URLComponent::fromRange(refSeparator + 1, pathEnd); + } else { + fileEnd = pathEnd; + queryEnd = pathEnd; + fragment.reset(); + } + + // Query fragment: everything from the ? to the next boundary (either + // the end of the path or the fragment fragment). + if (querySeparator >= 0) { + fileEnd = querySeparator; + query = URLComponent::fromRange(querySeparator + 1, queryEnd); + } else + query.reset(); + + // File path: treat an empty file path as no file path. + if (fileEnd != path.begin()) + filepath = URLComponent::fromRange(path.begin(), fileEnd); + else + filepath.reset(); + } + + // Initializes a path URL which is merely a scheme followed by a path. + // Examples include "about:foo" and "javascript:alert('bar');" + static void parsePathURL(const CHAR* spec, int specLength, URLSegments& parsed) + { + // Get the non-path and non-scheme parts of the URL out of the way, we + // never use them. + parsed.username.reset(); + parsed.password.reset(); + parsed.host.reset(); + parsed.port.reset(); + parsed.query.reset(); + parsed.fragment.reset(); + + // Strip leading & trailing spaces and control characters. + // FIXME: Perhaps this is unnecessary? + int begin = 0; + trimURL(spec, begin, specLength); + + // Handle empty specs or ones that contain only whitespace or control + // chars. + if (begin == specLength) { + parsed.scheme.reset(); + parsed.path.reset(); + return; + } + + // Extract the scheme, with the path being everything following. We also + // handle the case where there is no scheme. + if (extractScheme(&spec[begin], specLength - begin, parsed.scheme)) { + // Offset the results since we gave extractScheme a substring. + parsed.scheme.setBegin(parsed.scheme.begin() + begin); + + // For compatibility with the standard URL parser, we treat no path + // as -1, rather than having a length of 0 (we normally wouldn't + // care so much for these non-standard URLs). + if (parsed.scheme.end() == specLength - 1) + parsed.path.reset(); + else + parsed.path = URLComponent::fromRange(parsed.scheme.end() + 1, specLength); + } else { + // No scheme found, just path. + parsed.scheme.reset(); + parsed.path = URLComponent::fromRange(begin, specLength); + } + } + + static void parseMailtoURL(const CHAR* spec, int specLength, URLSegments& parsed) + { + // FIXME: add ASSERT(specLength >= 0); + + // Get the non-path and non-scheme parts of the URL out of the way, we + // never use them. + parsed.username.reset(); + parsed.password.reset(); + parsed.host.reset(); + parsed.port.reset(); + parsed.fragment.reset(); + parsed.query.reset(); // May use this; reset for convenience. + + // Strip leading & trailing spaces and control characters. + int begin = 0; + trimURL(spec, begin, specLength); + + // Handle empty specs or ones that contain only whitespace or control + // chars. + if (begin == specLength) { + parsed.scheme.reset(); + parsed.path.reset(); + return; + } + + int pathBegin = -1; + int pathEnd = -1; + + // Extract the scheme, with the path being everything following. We also + // handle the case where there is no scheme. + if (extractScheme(&spec[begin], specLength - begin, parsed.scheme)) { + // Offset the results since we gave extractScheme a substring. + parsed.scheme.setBegin(parsed.scheme.begin() + begin); + + if (parsed.scheme.end() != specLength - 1) { + pathBegin = parsed.scheme.end() + 1; + pathEnd = specLength; + } + } else { + // No scheme found, just path. + parsed.scheme.reset(); + pathBegin = begin; + pathEnd = specLength; + } + + // Split [pathBegin, pathEnd) into a path + query. + for (int i = pathBegin; i < pathEnd; ++i) { + if (spec[i] == '?') { + parsed.query = URLComponent::fromRange(i + 1, pathEnd); + pathEnd = i; + break; + } + } + + // For compatibility with the standard URL parser, treat no path as + // -1, rather than having a length of 0 + if (pathBegin == pathEnd) + parsed.path.reset(); + else + parsed.path = URLComponent::fromRange(pathBegin, pathEnd); + } + + static int parsePort(const CHAR* spec, const URLComponent& component) + { + // Easy success case when there is no port. + const int maxDigits = 5; + if (component.isEmptyOrInvalid()) + return UnspecifiedPort; + + URLComponent nonZeroDigits(component.end(), 0); + for (int i = 0; i < component.length(); ++i) { + if (spec[component.begin() + i] != '0') { + nonZeroDigits = URLComponent::fromRange(component.begin() + i, component.end()); + break; + } + } + if (!nonZeroDigits.length()) + return 0; // All digits were 0. + + if (nonZeroDigits.length() > maxDigits) + return InvalidPort; + + int port = 0; + for (int i = 0; i < nonZeroDigits.length(); ++i) { + CHAR ch = spec[nonZeroDigits.begin() + i]; + if (!isPortDigit(ch)) + return InvalidPort; + port *= 10; + port += static_cast<char>(ch) - '0'; + } + if (port > 65535) + return InvalidPort; + return port; + } + + static void extractFileName(const CHAR* spec, const URLComponent& path, URLComponent& fileName) + { + // Handle empty paths: they have no file names. + if (path.isEmptyOrInvalid()) { + fileName.reset(); + return; + } + + // Search backwards for a parameter, which is a normally unused field + // in a URL delimited by a semicolon. We parse the parameter as part of + // the path, but here, we don't want to count it. The last semicolon is + // the parameter. + int fileEnd = path.end(); + for (int i = path.end() - 1; i > path.begin(); --i) { + if (spec[i] == ';') { + fileEnd = i; + break; + } + } + + // Now search backwards from the filename end to the previous slash + // to find the beginning of the filename. + for (int i = fileEnd - 1; i >= path.begin(); --i) { + if (isURLSlash(spec[i])) { + // File name is everything following this character to the end + fileName = URLComponent::fromRange(i + 1, fileEnd); + return; + } + } + + // No slash found, this means the input was degenerate (generally paths + // will start with a slash). Let's call everything the file name. + fileName = URLComponent::fromRange(path.begin(), fileEnd); + } + + static bool extractQueryKeyValue(const CHAR* spec, URLComponent& query, URLComponent& key, URLComponent& value) + { + if (query.isEmptyOrInvalid()) + return false; + + int start = query.begin(); + int current = start; + int end = query.end(); + + // We assume the beginning of the input is the beginning of the "key" + // and we skip to the end of it. + key.setBegin(current); + while (current < end && spec[current] != '&' && spec[current] != '=') + ++current; + key.setLength(current - key.begin()); + + // Skip the separator after the key (if any). + if (current < end && spec[current] == '=') + ++current; + + // Find the value part. + value.setBegin(current); + while (current < end && spec[current] != '&') + ++current; + value.setLength(current - value.begin()); + + // Finally skip the next separator if any + if (current < end && spec[current] == '&') + ++current; + + // Save the new query + query = URLComponent::fromRange(current, end); + return true; + } + +// FIXME: This should be protected or private. +public: + // We treat slashes and backslashes the same for IE compatibility. + static inline bool isURLSlash(CHAR ch) + { + return ch == '/' || ch == '\\'; + } + + // Returns true if we should trim this character from the URL because it is + // a space or a control character. + static inline bool shouldTrimFromURL(CHAR ch) + { + return ch <= ' '; + } + + // Given an already-initialized begin index and end index (the index after + // the last CHAR in spec), this shrinks the range to eliminate + // "should-be-trimmed" characters. + static inline void trimURL(const CHAR* spec, int& begin, int& end) + { + // Strip leading whitespace and control characters. + while (begin < end && shouldTrimFromURL(spec[begin])) + ++begin; + + // Strip trailing whitespace and control characters. We need the >i + // test for when the input string is all blanks; we don't want to back + // past the input. + while (end > begin && shouldTrimFromURL(spec[end - 1])) + --end; + } + + // Counts the number of consecutive slashes starting at the given offset + // in the given string of the given length. + static inline int consecutiveSlashes(const CHAR *string, int beginOffset, int stringLength) + { + int count = 0; + while (beginOffset + count < stringLength && isURLSlash(string[beginOffset + count])) + ++count; + return count; + } + +private: + // URLParser cannot be constructed. + URLParser(); + + // Returns true if the given character is a valid digit to use in a port. + static inline bool isPortDigit(CHAR ch) + { + return ch >= '0' && ch <= '9'; + } + + // Returns the offset of the next authority terminator in the input starting + // from startOffset. If no terminator is found, the return value will be equal + // to specLength. + static int nextAuthorityTerminator(const CHAR* spec, int startOffset, int specLength) + { + for (int i = startOffset; i < specLength; i++) { + if (isPossibleAuthorityTerminator(spec[i])) + return i; + } + return specLength; // Not found. + } + + static void parseUserInfo(const CHAR* spec, const URLComponent& user, URLComponent& username, URLComponent& password) + { + // Find the first colon in the user section, which separates the + // username and password. + int colonOffset = 0; + while (colonOffset < user.length() && spec[user.begin() + colonOffset] != ':') + ++colonOffset; + + if (colonOffset < user.length()) { + // Found separator: <username>:<password> + username = URLComponent(user.begin(), colonOffset); + password = URLComponent::fromRange(user.begin() + colonOffset + 1, user.begin() + user.length()); + } else { + // No separator, treat everything as the username + username = user; + password = URLComponent(); + } + } + + static void parseServerInfo(const CHAR* spec, const URLComponent& serverInfo, URLComponent& host, URLComponent& port) + { + if (!serverInfo.length()) { + // No server info, host name is empty. + host.reset(); + port.reset(); + return; + } + + // If the host starts with a left-bracket, assume the entire host is an + // IPv6 literal. Otherwise, assume none of the host is an IPv6 literal. + // This assumption will be overridden if we find a right-bracket. + // + // Our IPv6 address canonicalization code requires both brackets to + // exist, but the ability to locate an incomplete address can still be + // useful. + int ipv6Terminator = spec[serverInfo.begin()] == '[' ? serverInfo.end() : -1; + int colon = -1; + + // Find the last right-bracket, and the last colon. + for (int i = serverInfo.begin(); i < serverInfo.end(); i++) { + switch (spec[i]) { + case ']': + ipv6Terminator = i; + break; + case ':': + colon = i; + break; + default: + break; + } + } + + if (colon > ipv6Terminator) { + // Found a port number: <hostname>:<port> + host = URLComponent::fromRange(serverInfo.begin(), colon); + if (!host.length()) + host.reset(); + port = URLComponent::fromRange(colon + 1, serverInfo.end()); + } else { + // No port: <hostname> + host = serverInfo; + port.reset(); + } + } +}; + +} // namespace WTF + +#endif // URLParser_h |