diff options
author | Steve Block <steveblock@google.com> | 2011-05-13 06:44:40 -0700 |
---|---|---|
committer | Android (Google) Code Review <android-gerrit@google.com> | 2011-05-13 06:44:40 -0700 |
commit | 08014c20784f3db5df3a89b73cce46037b77eb59 (patch) | |
tree | 47749210d31e19e6e2f64036fa8fae2ad693476f /Source/JavaScriptCore/wtf/url | |
parent | 860220379e56aeb66424861ad602b07ee22b4055 (diff) | |
parent | 4c3661f7918f8b3f139f824efb7855bedccb4c94 (diff) | |
download | external_webkit-08014c20784f3db5df3a89b73cce46037b77eb59.zip external_webkit-08014c20784f3db5df3a89b73cce46037b77eb59.tar.gz external_webkit-08014c20784f3db5df3a89b73cce46037b77eb59.tar.bz2 |
Merge changes Ide388898,Ic49f367c,I1158a808,Iacb6ca5d,I2100dd3a,I5c1abe54,Ib0ef9902,I31dbc523,I570314b3
* changes:
Merge WebKit at r75315: Update WebKit version
Merge WebKit at r75315: Add FrameLoaderClient PageCache stubs
Merge WebKit at r75315: Stub out AXObjectCache::remove()
Merge WebKit at r75315: Fix ImageBuffer
Merge WebKit at r75315: Fix PluginData::initPlugins()
Merge WebKit at r75315: Fix conflicts
Merge WebKit at r75315: Fix Makefiles
Merge WebKit at r75315: Move Android-specific WebCore files to Source
Merge WebKit at r75315: Initial merge by git.
Diffstat (limited to 'Source/JavaScriptCore/wtf/url')
-rw-r--r-- | Source/JavaScriptCore/wtf/url/api/ParsedURL.cpp | 90 | ||||
-rw-r--r-- | Source/JavaScriptCore/wtf/url/api/ParsedURL.h | 62 | ||||
-rw-r--r-- | Source/JavaScriptCore/wtf/url/api/URLString.h | 55 | ||||
-rw-r--r-- | Source/JavaScriptCore/wtf/url/src/RawURLBuffer.h | 71 | ||||
-rw-r--r-- | Source/JavaScriptCore/wtf/url/src/URLBuffer.h | 136 | ||||
-rw-r--r-- | Source/JavaScriptCore/wtf/url/src/URLCharacterTypes.cpp | 173 | ||||
-rw-r--r-- | Source/JavaScriptCore/wtf/url/src/URLCharacterTypes.h | 61 | ||||
-rw-r--r-- | Source/JavaScriptCore/wtf/url/src/URLComponent.h | 77 | ||||
-rw-r--r-- | Source/JavaScriptCore/wtf/url/src/URLEscape.cpp | 39 | ||||
-rw-r--r-- | Source/JavaScriptCore/wtf/url/src/URLEscape.h | 49 | ||||
-rw-r--r-- | Source/JavaScriptCore/wtf/url/src/URLParser.h | 575 | ||||
-rw-r--r-- | Source/JavaScriptCore/wtf/url/src/URLQueryCanonicalizer.h | 107 | ||||
-rw-r--r-- | Source/JavaScriptCore/wtf/url/src/URLSegments.cpp | 110 | ||||
-rw-r--r-- | Source/JavaScriptCore/wtf/url/src/URLSegments.h | 105 | ||||
-rw-r--r-- | Source/JavaScriptCore/wtf/url/wtfurl.gyp | 58 |
15 files changed, 1768 insertions, 0 deletions
diff --git a/Source/JavaScriptCore/wtf/url/api/ParsedURL.cpp b/Source/JavaScriptCore/wtf/url/api/ParsedURL.cpp new file mode 100644 index 0000000..abe0061 --- /dev/null +++ b/Source/JavaScriptCore/wtf/url/api/ParsedURL.cpp @@ -0,0 +1,90 @@ +/* + * Copyright (C) 2010 Google, Inc. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "ParsedURL.h" + +#include "URLComponent.h" +#include "URLParser.h" + +namespace WTF { + +ParsedURL::ParsedURL(const URLString& spec) + : m_spec(spec) +{ + // FIXME: Handle non-standard URLs. + if (spec.string().isEmpty()) + return; + URLParser<UChar>::parseStandardURL(spec.string().characters(), spec.string().length(), m_segments); +} + +String ParsedURL::scheme() const +{ + return segment(m_segments.scheme); +} + +String ParsedURL::username() const +{ + return segment(m_segments.username); +} + +String ParsedURL::password() const +{ + return segment(m_segments.password); +} + +String ParsedURL::host() const +{ + return segment(m_segments.host); +} + +String ParsedURL::port() const +{ + return segment(m_segments.port); +} + +String ParsedURL::path() const +{ + return segment(m_segments.path); +} + +String ParsedURL::query() const +{ + return segment(m_segments.query); +} + +String ParsedURL::fragment() const +{ + return segment(m_segments.fragment); +} + +String ParsedURL::segment(const URLComponent& component) const +{ + if (!component.isValid()) + return String(); + return m_spec.string().substring(component.begin(), component.length()); +} + +} diff --git a/Source/JavaScriptCore/wtf/url/api/ParsedURL.h b/Source/JavaScriptCore/wtf/url/api/ParsedURL.h new file mode 100644 index 0000000..ebc19b7 --- /dev/null +++ b/Source/JavaScriptCore/wtf/url/api/ParsedURL.h @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2010 Google, Inc. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ParsedURL_h +#define ParsedURL_h + +#include "URLSegments.h" +#include "URLString.h" + +namespace WTF { + +class URLComponent; + +class ParsedURL { +public: + explicit ParsedURL(const URLString&); + + // FIXME: Add a method for parsing non-canonicalized URLs. + + String scheme() const; + String username() const; + String password() const; + String host() const; + String port() const; + String path() const; + String query() const; + String fragment() const; + + URLString spec() { return m_spec; } + +private: + inline String segment(const URLComponent&) const; + + URLString m_spec; + URLSegments m_segments; +}; + +} + +#endif diff --git a/Source/JavaScriptCore/wtf/url/api/URLString.h b/Source/JavaScriptCore/wtf/url/api/URLString.h new file mode 100644 index 0000000..7395d49 --- /dev/null +++ b/Source/JavaScriptCore/wtf/url/api/URLString.h @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2010 Google, Inc. All Rights Reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef URLString_h +#define URLString_h + +#include "WTFString.h" + +namespace WTF { + +// URLString represents a string that's a canonicalized URL. +class URLString { +public: + URLString() { } + + const String& string() const { return m_string;} + +private: + friend class ParsedURL; + + // URLString can only be constructed by a ParsedURL. + explicit URLString(const String& string) + : m_string(string) + { + } + + String m_string; +}; + +} + +#endif + diff --git a/Source/JavaScriptCore/wtf/url/src/RawURLBuffer.h b/Source/JavaScriptCore/wtf/url/src/RawURLBuffer.h new file mode 100644 index 0000000..9bb2e8e --- /dev/null +++ b/Source/JavaScriptCore/wtf/url/src/RawURLBuffer.h @@ -0,0 +1,71 @@ +// Copyright 2010, Google Inc. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef RawURLBuffer_h +#define RawURLBuffer_h + +#include "URLBuffer.h" +#include <stdlib.h> + +namespace WTF { + +// Simple implementation of the URLBuffer using new[]. This class +// also supports a static buffer so if it is allocated on the stack, most +// URLs can be canonicalized with no heap allocations. +template<typename CHAR, int inlineCapacity = 1024> +class RawURLBuffer : public URLBuffer<CHAR> { +public: + RawURLBuffer() : URLBuffer<CHAR>() + { + this->m_buffer = m_inlineBuffer; + this->m_capacity = inlineCapacity; + } + + virtual ~RawURLBuffer() + { + if (this->m_buffer != m_inlineBuffer) + delete[] this->m_buffer; + } + + virtual void resize(int size) + { + CHAR* newBuffer = new CHAR[size]; + memcpy(newBuffer, this->m_buffer, sizeof(CHAR) * (this->m_length < size ? this->m_length : size)); + if (this->m_buffer != m_inlineBuffer) + delete[] this->m_buffer; + this->m_buffer = newBuffer; + this->m_capacity = size; + } + +protected: + CHAR m_inlineBuffer[inlineCapacity]; +}; + +} // namespace WTF + +#endif // RawURLBuffer_h diff --git a/Source/JavaScriptCore/wtf/url/src/URLBuffer.h b/Source/JavaScriptCore/wtf/url/src/URLBuffer.h new file mode 100644 index 0000000..e07402e --- /dev/null +++ b/Source/JavaScriptCore/wtf/url/src/URLBuffer.h @@ -0,0 +1,136 @@ +// Copyright 2010, Google Inc. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef URLBuffer_h +#define URLBuffer_h + +namespace WTF { + +// Base class for the canonicalizer output, this maintains a buffer and +// supports simple resizing and append operations on it. +// +// It is VERY IMPORTANT that no virtual function calls be made on the common +// code path. We only have two virtual function calls, the destructor and a +// resize function that is called when the existing buffer is not big enough. +// The derived class is then in charge of setting up our buffer which we will +// manage. +template<typename CHAR> +class URLBuffer { +public: + URLBuffer() : m_buffer(0), m_capacity(0), m_length(0) { } + virtual ~URLBuffer() { } + + // Implemented to resize the buffer. This function should update the buffer + // pointer to point to the new buffer, and any old data up to |m_length| in + // the buffer must be copied over. + // + // The new size must be larger than m_capacity. + virtual void resize(int) = 0; + + inline char at(int offset) const { return m_buffer[offset]; } + inline void set(int offset, CHAR ch) + { + // FIXME: Add ASSERT(offset < length()); + m_buffer[offset] = ch; + } + + // Returns the current capacity of the buffer. The length() is the number of + // characters that have been declared to be written, but the capacity() is + // the number that can be written without reallocation. If the caller must + // write many characters at once, it can make sure there is enough capacity, + // write the data, then use setLength() to declare the new length(). + int capacity() const { return m_capacity; } + int length() const { return m_length; } + + // The output will NOT be 0-terminated. Call length() to get the length. + const CHAR* data() const { return m_buffer; } + CHAR* data() { return m_buffer; } + + // Shortens the URL to the new length. Used for "backing up" when processing + // relative paths. This can also be used if an external function writes a lot + // of data to the buffer (when using the "Raw" version below) beyond the end, + // to declare the new length. + void setLength(int length) + { + // FIXME: Add ASSERT(length < capacity()); + m_length = length; + } + + // This is the most performance critical function, since it is called for + // every character. + void append(CHAR ch) + { + // In VC2005, putting this common case first speeds up execution + // dramatically because this branch is predicted as taken. + if (m_length < m_capacity) { + m_buffer[m_length] = ch; + ++m_length; + return; + } + + if (!grow(1)) + return; + + m_buffer[m_length] = ch; + ++m_length; + } + + void append(const CHAR* str, int strLength) + { + if (m_length + strLength > m_capacity) { + if (!grow(m_length + strLength - m_capacity)) + return; + } + for (int i = 0; i < strLength; i++) + m_buffer[m_length + i] = str[i]; + m_length += strLength; + } + +protected: + // Returns true if the buffer could be resized, false on OOM. + bool grow(int minimumAdditionalCapacity) + { + static const int minimumCapacity = 16; + int newCapacity = m_capacity ? m_capacity : minimumCapacity; + do { + if (newCapacity >= (1 << 30)) // Prevent overflow below. + return false; + newCapacity *= 2; + } while (newCapacity < m_capacity + minimumAdditionalCapacity); + resize(newCapacity); + return true; + } + + CHAR* m_buffer; + int m_capacity; + int m_length; // Used characters in the buffer. +}; + +} // namespace WTF + +#endif // URLBuffer_h diff --git a/Source/JavaScriptCore/wtf/url/src/URLCharacterTypes.cpp b/Source/JavaScriptCore/wtf/url/src/URLCharacterTypes.cpp new file mode 100644 index 0000000..ee2014e --- /dev/null +++ b/Source/JavaScriptCore/wtf/url/src/URLCharacterTypes.cpp @@ -0,0 +1,173 @@ +// Copyright 2010, Google Inc. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "config.h" +#include "URLCharacterTypes.h" + +namespace WTF { + +const unsigned char URLCharacterTypes::characterTypeTable[0x100] = { + InvalidCharacter, InvalidCharacter, InvalidCharacter, InvalidCharacter, + InvalidCharacter, InvalidCharacter, InvalidCharacter, InvalidCharacter, + InvalidCharacter, InvalidCharacter, InvalidCharacter, InvalidCharacter, + InvalidCharacter, InvalidCharacter, InvalidCharacter, InvalidCharacter, // 0x00 - 0x0f + InvalidCharacter, InvalidCharacter, InvalidCharacter, InvalidCharacter, + InvalidCharacter, InvalidCharacter, InvalidCharacter, InvalidCharacter, + InvalidCharacter, InvalidCharacter, InvalidCharacter, InvalidCharacter, + InvalidCharacter, InvalidCharacter, InvalidCharacter, InvalidCharacter, // 0x10 - 0x1f + InvalidCharacter, // 0x20 ' ' (escape spaces in queries) + QueryCharacter | UserInfoCharacter, // 0x21 ! + InvalidCharacter, // 0x22 " + InvalidCharacter, // 0x23 # (invalid in query since it marks the ref) + QueryCharacter | UserInfoCharacter, // 0x24 $ + QueryCharacter | UserInfoCharacter, // 0x25 % + QueryCharacter | UserInfoCharacter, // 0x26 & + QueryCharacter | UserInfoCharacter, // 0x27 ' + QueryCharacter | UserInfoCharacter, // 0x28 ( + QueryCharacter | UserInfoCharacter, // 0x29 ) + QueryCharacter | UserInfoCharacter, // 0x2a * + QueryCharacter | UserInfoCharacter, // 0x2b + + QueryCharacter | UserInfoCharacter, // 0x2c , + QueryCharacter | UserInfoCharacter, // 0x2d - + QueryCharacter | UserInfoCharacter | IPv4Character, // 0x2e . + QueryCharacter, // 0x2f / + QueryCharacter | UserInfoCharacter | IPv4Character | HexCharacter | DecimalCharacter | OctalCharacter, // 0x30 0 + QueryCharacter | UserInfoCharacter | IPv4Character | HexCharacter | DecimalCharacter | OctalCharacter, // 0x31 1 + QueryCharacter | UserInfoCharacter | IPv4Character | HexCharacter | DecimalCharacter | OctalCharacter, // 0x32 2 + QueryCharacter | UserInfoCharacter | IPv4Character | HexCharacter | DecimalCharacter | OctalCharacter, // 0x33 3 + QueryCharacter | UserInfoCharacter | IPv4Character | HexCharacter | DecimalCharacter | OctalCharacter, // 0x34 4 + QueryCharacter | UserInfoCharacter | IPv4Character | HexCharacter | DecimalCharacter | OctalCharacter, // 0x35 5 + QueryCharacter | UserInfoCharacter | IPv4Character | HexCharacter | DecimalCharacter | OctalCharacter, // 0x36 6 + QueryCharacter | UserInfoCharacter | IPv4Character | HexCharacter | DecimalCharacter | OctalCharacter, // 0x37 7 + QueryCharacter | UserInfoCharacter | IPv4Character | HexCharacter | DecimalCharacter, // 0x38 8 + QueryCharacter | UserInfoCharacter | IPv4Character | HexCharacter | DecimalCharacter, // 0x39 9 + QueryCharacter, // 0x3a : + QueryCharacter, // 0x3b ; + InvalidCharacter, // 0x3c < + QueryCharacter, // 0x3d = + InvalidCharacter, // 0x3e > + QueryCharacter, // 0x3f ? + QueryCharacter, // 0x40 @ + QueryCharacter | UserInfoCharacter | IPv4Character | HexCharacter, // 0x41 A + QueryCharacter | UserInfoCharacter | IPv4Character | HexCharacter, // 0x42 B + QueryCharacter | UserInfoCharacter | IPv4Character | HexCharacter, // 0x43 C + QueryCharacter | UserInfoCharacter | IPv4Character | HexCharacter, // 0x44 D + QueryCharacter | UserInfoCharacter | IPv4Character | HexCharacter, // 0x45 E + QueryCharacter | UserInfoCharacter | IPv4Character | HexCharacter, // 0x46 F + QueryCharacter | UserInfoCharacter, // 0x47 G + QueryCharacter | UserInfoCharacter, // 0x48 H + QueryCharacter | UserInfoCharacter, // 0x49 I + QueryCharacter | UserInfoCharacter, // 0x4a J + QueryCharacter | UserInfoCharacter, // 0x4b K + QueryCharacter | UserInfoCharacter, // 0x4c L + QueryCharacter | UserInfoCharacter, // 0x4d M + QueryCharacter | UserInfoCharacter, // 0x4e N + QueryCharacter | UserInfoCharacter, // 0x4f O + QueryCharacter | UserInfoCharacter, // 0x50 P + QueryCharacter | UserInfoCharacter, // 0x51 Q + QueryCharacter | UserInfoCharacter, // 0x52 R + QueryCharacter | UserInfoCharacter, // 0x53 S + QueryCharacter | UserInfoCharacter, // 0x54 T + QueryCharacter | UserInfoCharacter, // 0x55 U + QueryCharacter | UserInfoCharacter, // 0x56 V + QueryCharacter | UserInfoCharacter, // 0x57 W + QueryCharacter | UserInfoCharacter | IPv4Character, // 0x58 X + QueryCharacter | UserInfoCharacter, // 0x59 Y + QueryCharacter | UserInfoCharacter, // 0x5a Z + QueryCharacter, // 0x5b [ + QueryCharacter, // 0x5c '\' + QueryCharacter, // 0x5d ] + QueryCharacter, // 0x5e ^ + QueryCharacter | UserInfoCharacter, // 0x5f _ + QueryCharacter, // 0x60 ` + QueryCharacter | UserInfoCharacter | IPv4Character | HexCharacter, // 0x61 a + QueryCharacter | UserInfoCharacter | IPv4Character | HexCharacter, // 0x62 b + QueryCharacter | UserInfoCharacter | IPv4Character | HexCharacter, // 0x63 c + QueryCharacter | UserInfoCharacter | IPv4Character | HexCharacter, // 0x64 d + QueryCharacter | UserInfoCharacter | IPv4Character | HexCharacter, // 0x65 e + QueryCharacter | UserInfoCharacter | IPv4Character | HexCharacter, // 0x66 f + QueryCharacter | UserInfoCharacter, // 0x67 g + QueryCharacter | UserInfoCharacter, // 0x68 h + QueryCharacter | UserInfoCharacter, // 0x69 i + QueryCharacter | UserInfoCharacter, // 0x6a j + QueryCharacter | UserInfoCharacter, // 0x6b k + QueryCharacter | UserInfoCharacter, // 0x6c l + QueryCharacter | UserInfoCharacter, // 0x6d m + QueryCharacter | UserInfoCharacter, // 0x6e n + QueryCharacter | UserInfoCharacter, // 0x6f o + QueryCharacter | UserInfoCharacter, // 0x70 p + QueryCharacter | UserInfoCharacter, // 0x71 q + QueryCharacter | UserInfoCharacter, // 0x72 r + QueryCharacter | UserInfoCharacter, // 0x73 s + QueryCharacter | UserInfoCharacter, // 0x74 t + QueryCharacter | UserInfoCharacter, // 0x75 u + QueryCharacter | UserInfoCharacter, // 0x76 v + QueryCharacter | UserInfoCharacter, // 0x77 w + QueryCharacter | UserInfoCharacter | IPv4Character, // 0x78 x + QueryCharacter | UserInfoCharacter, // 0x79 y + QueryCharacter | UserInfoCharacter, // 0x7a z + QueryCharacter, // 0x7b { + QueryCharacter, // 0x7c | + QueryCharacter, // 0x7d } + QueryCharacter | UserInfoCharacter, // 0x7e ~ + InvalidCharacter, // 0x7f + InvalidCharacter, InvalidCharacter, InvalidCharacter, InvalidCharacter, + InvalidCharacter, InvalidCharacter, InvalidCharacter, InvalidCharacter, + InvalidCharacter, InvalidCharacter, InvalidCharacter, InvalidCharacter, + InvalidCharacter, InvalidCharacter, InvalidCharacter, InvalidCharacter, // 0x80 - 0x8f + InvalidCharacter, InvalidCharacter, InvalidCharacter, InvalidCharacter, + InvalidCharacter, InvalidCharacter, InvalidCharacter, InvalidCharacter, + InvalidCharacter, InvalidCharacter, InvalidCharacter, InvalidCharacter, + InvalidCharacter, InvalidCharacter, InvalidCharacter, InvalidCharacter, // 0x90 - 0x9f + InvalidCharacter, InvalidCharacter, InvalidCharacter, InvalidCharacter, + InvalidCharacter, InvalidCharacter, InvalidCharacter, InvalidCharacter, + InvalidCharacter, InvalidCharacter, InvalidCharacter, InvalidCharacter, + InvalidCharacter, InvalidCharacter, InvalidCharacter, InvalidCharacter, // 0xa0 - 0xaf + InvalidCharacter, InvalidCharacter, InvalidCharacter, InvalidCharacter, + InvalidCharacter, InvalidCharacter, InvalidCharacter, InvalidCharacter, + InvalidCharacter, InvalidCharacter, InvalidCharacter, InvalidCharacter, + InvalidCharacter, InvalidCharacter, InvalidCharacter, InvalidCharacter, // 0xb0 - 0xbf + InvalidCharacter, InvalidCharacter, InvalidCharacter, InvalidCharacter, + InvalidCharacter, InvalidCharacter, InvalidCharacter, InvalidCharacter, + InvalidCharacter, InvalidCharacter, InvalidCharacter, InvalidCharacter, + InvalidCharacter, InvalidCharacter, InvalidCharacter, InvalidCharacter, // 0xc0 - 0xcf + InvalidCharacter, InvalidCharacter, InvalidCharacter, InvalidCharacter, + InvalidCharacter, InvalidCharacter, InvalidCharacter, InvalidCharacter, + InvalidCharacter, InvalidCharacter, InvalidCharacter, InvalidCharacter, + InvalidCharacter, InvalidCharacter, InvalidCharacter, InvalidCharacter, // 0xd0 - 0xdf + InvalidCharacter, InvalidCharacter, InvalidCharacter, InvalidCharacter, + InvalidCharacter, InvalidCharacter, InvalidCharacter, InvalidCharacter, + InvalidCharacter, InvalidCharacter, InvalidCharacter, InvalidCharacter, + InvalidCharacter, InvalidCharacter, InvalidCharacter, InvalidCharacter, // 0xe0 - 0xef + InvalidCharacter, InvalidCharacter, InvalidCharacter, InvalidCharacter, + InvalidCharacter, InvalidCharacter, InvalidCharacter, InvalidCharacter, + InvalidCharacter, InvalidCharacter, InvalidCharacter, InvalidCharacter, + InvalidCharacter, InvalidCharacter, InvalidCharacter, InvalidCharacter, // 0xf0 - 0xff +}; + +} diff --git a/Source/JavaScriptCore/wtf/url/src/URLCharacterTypes.h b/Source/JavaScriptCore/wtf/url/src/URLCharacterTypes.h new file mode 100644 index 0000000..194f6b0 --- /dev/null +++ b/Source/JavaScriptCore/wtf/url/src/URLCharacterTypes.h @@ -0,0 +1,61 @@ +// Copyright 2010, Google Inc. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef URLCharacterTypes_h +#define URLCharacterTypes_h + +namespace WTF { + +class URLCharacterTypes { +public: + static inline bool isQueryChar(unsigned char c) { return isCharOfType(c, QueryCharacter); } + static inline bool isIPv4Char(unsigned char c) { return isCharOfType(c, IPv4Character); } + static inline bool isHexChar(unsigned char c) { return isCharOfType(c, HexCharacter); } + +private: + enum CharTypes { + InvalidCharacter = 0, + QueryCharacter = 1 << 0, + UserInfoCharacter = 1 << 1, + IPv4Character = 1 << 2, + HexCharacter = 1 << 3, + DecimalCharacter = 1 << 4, + OctalCharacter = 1 << 5, + }; + + static const unsigned char characterTypeTable[0x100]; + + static inline bool isCharOfType(unsigned char c, CharTypes type) + { + return !!(characterTypeTable[c] & type); + } +}; + +} + +#endif diff --git a/Source/JavaScriptCore/wtf/url/src/URLComponent.h b/Source/JavaScriptCore/wtf/url/src/URLComponent.h new file mode 100644 index 0000000..1b7976e --- /dev/null +++ b/Source/JavaScriptCore/wtf/url/src/URLComponent.h @@ -0,0 +1,77 @@ +// Copyright 2010, Google Inc. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef URLComponent_h +#define URLComponent_h + +namespace WTF { + +// Represents a substring for URL parsing. +class URLComponent { +public: + URLComponent() : m_begin(0), m_length(-1) { } + URLComponent(int begin, int length) : m_begin(begin), m_length(length) { } + + // Helper that returns a component created with the given begin and ending + // points. The ending point is non-inclusive. + static inline URLComponent fromRange(int begin, int end) + { + return URLComponent(begin, end - begin); + } + + // Returns true if this component is valid, meaning the length is given. Even + // valid components may be empty to record the fact that they exist. + bool isValid() const { return m_length != -1; } + + bool isNonEmpty() const { return m_length > 0; } + bool isEmptyOrInvalid() const { return m_length <= 0; } + + void reset() + { + m_begin = 0; + m_length = -1; + } + + bool operator==(const URLComponent& other) const { return m_begin == other.m_begin && m_length == other.m_length; } + + int begin() const { return m_begin; } + void setBegin(int begin) { m_begin = begin; } + + int length() const { return m_length; } + void setLength(int length) { m_length = length; } + + int end() const { return m_begin + m_length; } + +private: + int m_begin; // Byte offset in the string of this component. + int m_length; // Will be -1 if the component is unspecified. +}; + +} // namespace WTF + +#endif // URLComponent_h diff --git a/Source/JavaScriptCore/wtf/url/src/URLEscape.cpp b/Source/JavaScriptCore/wtf/url/src/URLEscape.cpp new file mode 100644 index 0000000..2987343 --- /dev/null +++ b/Source/JavaScriptCore/wtf/url/src/URLEscape.cpp @@ -0,0 +1,39 @@ +// Copyright 2010, Google Inc. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "config.h" +#include "URLEscape.h" + +namespace WTF { + +const char hexCharacterTable[16] = { + '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', +}; + +} diff --git a/Source/JavaScriptCore/wtf/url/src/URLEscape.h b/Source/JavaScriptCore/wtf/url/src/URLEscape.h new file mode 100644 index 0000000..cc2b77f --- /dev/null +++ b/Source/JavaScriptCore/wtf/url/src/URLEscape.h @@ -0,0 +1,49 @@ +// Copyright 2010, Google Inc. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +#ifndef URLEscape_h +#define URLEscape_h + +#include "URLBuffer.h" + +namespace WTF { + +extern const char hexCharacterTable[16]; + +template<typename InChar, typename OutChar> +inline void appendURLEscapedCharacter(InChar ch, URLBuffer<OutChar>& buffer) +{ + buffer.append('%'); + buffer.append(hexCharacterTable[ch >> 4]); + buffer.append(hexCharacterTable[ch & 0xf]); +} + +} + +#endif diff --git a/Source/JavaScriptCore/wtf/url/src/URLParser.h b/Source/JavaScriptCore/wtf/url/src/URLParser.h new file mode 100644 index 0000000..4d5ca51 --- /dev/null +++ b/Source/JavaScriptCore/wtf/url/src/URLParser.h @@ -0,0 +1,575 @@ +/* Based on nsURLParsers.cc from Mozilla + * ------------------------------------- + * Copyright (C) 1998 Netscape Communications Corporation. + * + * Other contributors: + * Darin Fisher (original author) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * Alternatively, the contents of this file may be used under the terms + * of either the Mozilla Public License Version 1.1, found at + * http://www.mozilla.org/MPL/ (the "MPL") or the GNU General Public + * License Version 2.0, found at http://www.fsf.org/copyleft/gpl.html + * (the "GPL"), in which case the provisions of the MPL or the GPL are + * applicable instead of those above. If you wish to allow use of your + * version of this file only under the terms of one of those two + * licenses (the MPL or the GPL) and not to allow others to use your + * version of this file under the LGPL, indicate your decision by + * deletingthe provisions above and replace them with the notice and + * other provisions required by the MPL or the GPL, as the case may be. + * If you do not delete the provisions above, a recipient may use your + * version of this file under any of the LGPL, the MPL or the GPL. + */ + +#ifndef URLParser_h +#define URLParser_h + +#include "URLComponent.h" +#include "URLSegments.h" + +namespace WTF { + +template<typename CHAR> +class URLParser { +public: + enum SpecialPort { + UnspecifiedPort = -1, + InvalidPort = -2, + }; + + // This handles everything that may be an authority terminator, including + // backslash. For special backslash handling see parseAfterScheme. + static bool isPossibleAuthorityTerminator(CHAR ch) + { + return isURLSlash(ch) || ch == '?' || ch == '#' || ch == ';'; + } + + // Given an already-identified auth section, breaks it into its constituent + // parts. The port number will be parsed and the resulting integer will be + // filled into the given *port variable, or -1 if there is no port number + // or it is invalid. + static void parseAuthority(const CHAR* spec, const URLComponent& auth, URLComponent& username, URLComponent& password, URLComponent& host, URLComponent& port) + { + // FIXME: add ASSERT(auth.isValid()); // We should always get an authority. + if (!auth.length()) { + username.reset(); + password.reset(); + host.reset(); + port.reset(); + return; + } + + // Search backwards for @, which is the separator between the user info + // and the server info. RFC 3986 forbids @ from occuring in auth, but + // someone might include it in a password unescaped. + int i = auth.begin() + auth.length() - 1; + while (i > auth.begin() && spec[i] != '@') + --i; + + if (spec[i] == '@') { + // Found user info: <user-info>@<server-info> + parseUserInfo(spec, URLComponent(auth.begin(), i - auth.begin()), username, password); + parseServerInfo(spec, URLComponent::fromRange(i + 1, auth.begin() + auth.length()), host, port); + } else { + // No user info, everything is server info. + username.reset(); + password.reset(); + parseServerInfo(spec, auth, host, port); + } + } + + static bool extractScheme(const CHAR* spec, int specLength, URLComponent& scheme) + { + // Skip leading whitespace and control characters. + int begin = 0; + while (begin < specLength && shouldTrimFromURL(spec[begin])) + begin++; + if (begin == specLength) + return false; // Input is empty or all whitespace. + + // Find the first colon character. + for (int i = begin; i < specLength; i++) { + if (spec[i] == ':') { + scheme = URLComponent::fromRange(begin, i); + return true; + } + } + return false; // No colon found: no scheme + } + + // Fills in all members of the URLSegments structure (except for the + // scheme) for standard URLs. + // + // |spec| is the full spec being parsed, of length |specLength|. + // |afterScheme| is the character immediately following the scheme (after + // the colon) where we'll begin parsing. + static void parseAfterScheme(const CHAR* spec, int specLength, int afterScheme, URLSegments& parsed) + { + int numberOfSlashes = consecutiveSlashes(spec, afterScheme, specLength); + int afterSlashes = afterScheme + numberOfSlashes; + + // First split into two main parts, the authority (username, password, + // host, and port) and the full path (path, query, and reference). + URLComponent authority; + URLComponent fullPath; + + // Found "//<some data>", looks like an authority section. Treat + // everything from there to the next slash (or end of spec) to be the + // authority. Note that we ignore the number of slashes and treat it as + // the authority. + int authEnd = nextAuthorityTerminator(spec, afterSlashes, specLength); + authority = URLComponent(afterSlashes, authEnd - afterSlashes); + + if (authEnd == specLength) // No beginning of path found. + fullPath = URLComponent(); + else // Everything starting from the slash to the end is the path. + fullPath = URLComponent(authEnd, specLength - authEnd); + + // Now parse those two sub-parts. + parseAuthority(spec, authority, parsed.username, parsed.password, parsed.host, parsed.port); + parsePath(spec, fullPath, parsed.path, parsed.query, parsed.fragment); + } + + // The main parsing function for standard URLs. Standard URLs have a scheme, + // host, path, etc. + static void parseStandardURL(const CHAR* spec, int specLength, URLSegments& parsed) + { + // FIXME: add ASSERT(specLength >= 0); + + // Strip leading & trailing spaces and control characters. + int begin = 0; + trimURL(spec, begin, specLength); + + int afterScheme; + if (extractScheme(spec, specLength, parsed.scheme)) + afterScheme = parsed.scheme.end() + 1; // Skip past the colon. + else { + // Say there's no scheme when there is a colon. We could also say + // that everything is the scheme. Both would produce an invalid + // URL, but this way seems less wrong in more cases. + parsed.scheme.reset(); + afterScheme = begin; + } + parseAfterScheme(spec, specLength, afterScheme, parsed); + } + + static void parsePath(const CHAR* spec, const URLComponent& path, URLComponent& filepath, URLComponent& query, URLComponent& fragment) + { + // path = [/]<segment1>/<segment2>/<...>/<segmentN>;<param>?<query>#<fragment> + + // Special case when there is no path. + if (!path.isValid()) { + filepath.reset(); + query.reset(); + fragment.reset(); + return; + } + // FIXME: add ASSERT(path.length() > 0); // We should never have 0 length paths. + + // Search for first occurrence of either ? or #. + int pathEnd = path.begin() + path.length(); + + int querySeparator = -1; // Index of the '?' + int refSeparator = -1; // Index of the '#' + for (int i = path.begin(); i < pathEnd; i++) { + switch (spec[i]) { + case '?': + if (querySeparator < 0) + querySeparator = i; + break; + case '#': + refSeparator = i; + i = pathEnd; // Break out of the loop. + break; + default: + break; + } + } + + // Markers pointing to the character after each of these corresponding + // components. The code below works from the end back to the beginning, + // and will update these indices as it finds components that exist. + int fileEnd, queryEnd; + + // Fragment: from the # to the end of the path. + if (refSeparator >= 0) { + fileEnd = refSeparator; + queryEnd = refSeparator; + fragment = URLComponent::fromRange(refSeparator + 1, pathEnd); + } else { + fileEnd = pathEnd; + queryEnd = pathEnd; + fragment.reset(); + } + + // Query fragment: everything from the ? to the next boundary (either + // the end of the path or the fragment fragment). + if (querySeparator >= 0) { + fileEnd = querySeparator; + query = URLComponent::fromRange(querySeparator + 1, queryEnd); + } else + query.reset(); + + // File path: treat an empty file path as no file path. + if (fileEnd != path.begin()) + filepath = URLComponent::fromRange(path.begin(), fileEnd); + else + filepath.reset(); + } + + // Initializes a path URL which is merely a scheme followed by a path. + // Examples include "about:foo" and "javascript:alert('bar');" + static void parsePathURL(const CHAR* spec, int specLength, URLSegments& parsed) + { + // Get the non-path and non-scheme parts of the URL out of the way, we + // never use them. + parsed.username.reset(); + parsed.password.reset(); + parsed.host.reset(); + parsed.port.reset(); + parsed.query.reset(); + parsed.fragment.reset(); + + // Strip leading & trailing spaces and control characters. + // FIXME: Perhaps this is unnecessary? + int begin = 0; + trimURL(spec, begin, specLength); + + // Handle empty specs or ones that contain only whitespace or control + // chars. + if (begin == specLength) { + parsed.scheme.reset(); + parsed.path.reset(); + return; + } + + // Extract the scheme, with the path being everything following. We also + // handle the case where there is no scheme. + if (extractScheme(&spec[begin], specLength - begin, parsed.scheme)) { + // Offset the results since we gave extractScheme a substring. + parsed.scheme.setBegin(parsed.scheme.begin() + begin); + + // For compatibility with the standard URL parser, we treat no path + // as -1, rather than having a length of 0 (we normally wouldn't + // care so much for these non-standard URLs). + if (parsed.scheme.end() == specLength - 1) + parsed.path.reset(); + else + parsed.path = URLComponent::fromRange(parsed.scheme.end() + 1, specLength); + } else { + // No scheme found, just path. + parsed.scheme.reset(); + parsed.path = URLComponent::fromRange(begin, specLength); + } + } + + static void parseMailtoURL(const CHAR* spec, int specLength, URLSegments& parsed) + { + // FIXME: add ASSERT(specLength >= 0); + + // Get the non-path and non-scheme parts of the URL out of the way, we + // never use them. + parsed.username.reset(); + parsed.password.reset(); + parsed.host.reset(); + parsed.port.reset(); + parsed.fragment.reset(); + parsed.query.reset(); // May use this; reset for convenience. + + // Strip leading & trailing spaces and control characters. + int begin = 0; + trimURL(spec, begin, specLength); + + // Handle empty specs or ones that contain only whitespace or control + // chars. + if (begin == specLength) { + parsed.scheme.reset(); + parsed.path.reset(); + return; + } + + int pathBegin = -1; + int pathEnd = -1; + + // Extract the scheme, with the path being everything following. We also + // handle the case where there is no scheme. + if (extractScheme(&spec[begin], specLength - begin, parsed.scheme)) { + // Offset the results since we gave extractScheme a substring. + parsed.scheme.setBegin(parsed.scheme.begin() + begin); + + if (parsed.scheme.end() != specLength - 1) { + pathBegin = parsed.scheme.end() + 1; + pathEnd = specLength; + } + } else { + // No scheme found, just path. + parsed.scheme.reset(); + pathBegin = begin; + pathEnd = specLength; + } + + // Split [pathBegin, pathEnd) into a path + query. + for (int i = pathBegin; i < pathEnd; ++i) { + if (spec[i] == '?') { + parsed.query = URLComponent::fromRange(i + 1, pathEnd); + pathEnd = i; + break; + } + } + + // For compatibility with the standard URL parser, treat no path as + // -1, rather than having a length of 0 + if (pathBegin == pathEnd) + parsed.path.reset(); + else + parsed.path = URLComponent::fromRange(pathBegin, pathEnd); + } + + static int parsePort(const CHAR* spec, const URLComponent& component) + { + // Easy success case when there is no port. + const int maxDigits = 5; + if (component.isEmptyOrInvalid()) + return UnspecifiedPort; + + URLComponent nonZeroDigits(component.end(), 0); + for (int i = 0; i < component.length(); ++i) { + if (spec[component.begin() + i] != '0') { + nonZeroDigits = URLComponent::fromRange(component.begin() + i, component.end()); + break; + } + } + if (!nonZeroDigits.length()) + return 0; // All digits were 0. + + if (nonZeroDigits.length() > maxDigits) + return InvalidPort; + + int port = 0; + for (int i = 0; i < nonZeroDigits.length(); ++i) { + CHAR ch = spec[nonZeroDigits.begin() + i]; + if (!isPortDigit(ch)) + return InvalidPort; + port *= 10; + port += static_cast<char>(ch) - '0'; + } + if (port > 65535) + return InvalidPort; + return port; + } + + static void extractFileName(const CHAR* spec, const URLComponent& path, URLComponent& fileName) + { + // Handle empty paths: they have no file names. + if (path.isEmptyOrInvalid()) { + fileName.reset(); + return; + } + + // Search backwards for a parameter, which is a normally unused field + // in a URL delimited by a semicolon. We parse the parameter as part of + // the path, but here, we don't want to count it. The last semicolon is + // the parameter. + int fileEnd = path.end(); + for (int i = path.end() - 1; i > path.begin(); --i) { + if (spec[i] == ';') { + fileEnd = i; + break; + } + } + + // Now search backwards from the filename end to the previous slash + // to find the beginning of the filename. + for (int i = fileEnd - 1; i >= path.begin(); --i) { + if (isURLSlash(spec[i])) { + // File name is everything following this character to the end + fileName = URLComponent::fromRange(i + 1, fileEnd); + return; + } + } + + // No slash found, this means the input was degenerate (generally paths + // will start with a slash). Let's call everything the file name. + fileName = URLComponent::fromRange(path.begin(), fileEnd); + } + + static bool extractQueryKeyValue(const CHAR* spec, URLComponent& query, URLComponent& key, URLComponent& value) + { + if (query.isEmptyOrInvalid()) + return false; + + int start = query.begin(); + int current = start; + int end = query.end(); + + // We assume the beginning of the input is the beginning of the "key" + // and we skip to the end of it. + key.setBegin(current); + while (current < end && spec[current] != '&' && spec[current] != '=') + ++current; + key.setLength(current - key.begin()); + + // Skip the separator after the key (if any). + if (current < end && spec[current] == '=') + ++current; + + // Find the value part. + value.setBegin(current); + while (current < end && spec[current] != '&') + ++current; + value.setLength(current - value.begin()); + + // Finally skip the next separator if any + if (current < end && spec[current] == '&') + ++current; + + // Save the new query + query = URLComponent::fromRange(current, end); + return true; + } + +// FIXME: This should be protected or private. +public: + // We treat slashes and backslashes the same for IE compatibility. + static inline bool isURLSlash(CHAR ch) + { + return ch == '/' || ch == '\\'; + } + + // Returns true if we should trim this character from the URL because it is + // a space or a control character. + static inline bool shouldTrimFromURL(CHAR ch) + { + return ch <= ' '; + } + + // Given an already-initialized begin index and end index (the index after + // the last CHAR in spec), this shrinks the range to eliminate + // "should-be-trimmed" characters. + static inline void trimURL(const CHAR* spec, int& begin, int& end) + { + // Strip leading whitespace and control characters. + while (begin < end && shouldTrimFromURL(spec[begin])) + ++begin; + + // Strip trailing whitespace and control characters. We need the >i + // test for when the input string is all blanks; we don't want to back + // past the input. + while (end > begin && shouldTrimFromURL(spec[end - 1])) + --end; + } + + // Counts the number of consecutive slashes starting at the given offset + // in the given string of the given length. + static inline int consecutiveSlashes(const CHAR *string, int beginOffset, int stringLength) + { + int count = 0; + while (beginOffset + count < stringLength && isURLSlash(string[beginOffset + count])) + ++count; + return count; + } + +private: + // URLParser cannot be constructed. + URLParser(); + + // Returns true if the given character is a valid digit to use in a port. + static inline bool isPortDigit(CHAR ch) + { + return ch >= '0' && ch <= '9'; + } + + // Returns the offset of the next authority terminator in the input starting + // from startOffset. If no terminator is found, the return value will be equal + // to specLength. + static int nextAuthorityTerminator(const CHAR* spec, int startOffset, int specLength) + { + for (int i = startOffset; i < specLength; i++) { + if (isPossibleAuthorityTerminator(spec[i])) + return i; + } + return specLength; // Not found. + } + + static void parseUserInfo(const CHAR* spec, const URLComponent& user, URLComponent& username, URLComponent& password) + { + // Find the first colon in the user section, which separates the + // username and password. + int colonOffset = 0; + while (colonOffset < user.length() && spec[user.begin() + colonOffset] != ':') + ++colonOffset; + + if (colonOffset < user.length()) { + // Found separator: <username>:<password> + username = URLComponent(user.begin(), colonOffset); + password = URLComponent::fromRange(user.begin() + colonOffset + 1, user.begin() + user.length()); + } else { + // No separator, treat everything as the username + username = user; + password = URLComponent(); + } + } + + static void parseServerInfo(const CHAR* spec, const URLComponent& serverInfo, URLComponent& host, URLComponent& port) + { + if (!serverInfo.length()) { + // No server info, host name is empty. + host.reset(); + port.reset(); + return; + } + + // If the host starts with a left-bracket, assume the entire host is an + // IPv6 literal. Otherwise, assume none of the host is an IPv6 literal. + // This assumption will be overridden if we find a right-bracket. + // + // Our IPv6 address canonicalization code requires both brackets to + // exist, but the ability to locate an incomplete address can still be + // useful. + int ipv6Terminator = spec[serverInfo.begin()] == '[' ? serverInfo.end() : -1; + int colon = -1; + + // Find the last right-bracket, and the last colon. + for (int i = serverInfo.begin(); i < serverInfo.end(); i++) { + switch (spec[i]) { + case ']': + ipv6Terminator = i; + break; + case ':': + colon = i; + break; + default: + break; + } + } + + if (colon > ipv6Terminator) { + // Found a port number: <hostname>:<port> + host = URLComponent::fromRange(serverInfo.begin(), colon); + if (!host.length()) + host.reset(); + port = URLComponent::fromRange(colon + 1, serverInfo.end()); + } else { + // No port: <hostname> + host = serverInfo; + port.reset(); + } + } +}; + +} // namespace WTF + +#endif // URLParser_h diff --git a/Source/JavaScriptCore/wtf/url/src/URLQueryCanonicalizer.h b/Source/JavaScriptCore/wtf/url/src/URLQueryCanonicalizer.h new file mode 100644 index 0000000..7740200 --- /dev/null +++ b/Source/JavaScriptCore/wtf/url/src/URLQueryCanonicalizer.h @@ -0,0 +1,107 @@ +// Copyright 2010, Google Inc. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +#ifndef URLQueryCanonicalizer_h +#define URLQueryCanonicalizer_h + +#include "RawURLBuffer.h" +#include "URLBuffer.h" +#include "URLCharacterTypes.h" +#include "URLComponent.h" +#include "URLEscape.h" + +namespace WTF { + +template<typename InChar, typename OutChar, void convertCharset(const InChar*, int length, URLBuffer<char>&)> +class URLQueryCanonicalizer { +public: + static void canonicalize(const InChar* spec, const URLComponent& query, URLBuffer<OutChar>& buffer, URLComponent& resultQuery) + { + if (query.length() < 0) { + resultQuery = URLComponent(); + return; + } + + buffer->append('?'); + resultQuery.setBegin(buffer->length()); + convertToQueryEncoding(spec, query, buffer); + resultQuery.setLength(buffer->length() - resultQuery.begin()); + } + +private: + static bool isAllASCII(const InChar* spec, const URLComponent& query) + { + int end = query.end(); + for (int i = query.begin(); i < end; ++i) { + if (static_cast<unsigned>(spec[i]) >= 0x80) + return false; + } + return true; + } + +#ifndef NDEBUG + static bool isRaw8Bit(const InChar* source, int length) + { + for (int i = source; i < length; ++i) { + if (source[i] & 0xFF != source[i]) + return false; + } + return true; + } +#endif + + static void appendRaw8BitQueryString(const InChar* source, int length, URLBuffer<OutChar>* buffer) + { + ASSERT(isRaw8Bit(source, length)); + for (int i = 0; i < length; ++i) { + if (!URLCharacterTypes::isQueryChar(source[i])) + appendURLEscapedCharacter(static_cast<unsigned char>(source[i]), buffer); + else + buffer->append(static_cast<char>(source[i])); + } + } + + static void convertToQueryEncoding(const InChar* spec, const URLComponent& query, URLBuffer<OutChar>& buffer) + { + if (isAllASCII(spec, query)) { + appendRaw8BitQueryString(&spec[query.begin()], query.length(), buffer); + return; + } + + RawURLBuffer<char, 1024> convertedQuery; + convertCharset(spec, query, convertedQuery); + appendRaw8BitQueryString(convertedQuery.data(), convertedQuery.length(), buffer); + } +}; + +} + +#endif + + diff --git a/Source/JavaScriptCore/wtf/url/src/URLSegments.cpp b/Source/JavaScriptCore/wtf/url/src/URLSegments.cpp new file mode 100644 index 0000000..bb9542f --- /dev/null +++ b/Source/JavaScriptCore/wtf/url/src/URLSegments.cpp @@ -0,0 +1,110 @@ +/* Based on nsURLParsers.cc from Mozilla + * ------------------------------------- + * Copyright (C) 1998 Netscape Communications Corporation. + * + * Other contributors: + * Darin Fisher (original author) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * Alternatively, the contents of this file may be used under the terms + * of either the Mozilla Public License Version 1.1, found at + * http://www.mozilla.org/MPL/ (the "MPL") or the GNU General Public + * License Version 2.0, found at http://www.fsf.org/copyleft/gpl.html + * (the "GPL"), in which case the provisions of the MPL or the GPL are + * applicable instead of those above. If you wish to allow use of your + * version of this file only under the terms of one of those two + * licenses (the MPL or the GPL) and not to allow others to use your + * version of this file under the LGPL, indicate your decision by + * deletingthe provisions above and replace them with the notice and + * other provisions required by the MPL or the GPL, as the case may be. + * If you do not delete the provisions above, a recipient may use your + * version of this file under any of the LGPL, the MPL or the GPL. + */ + +#include "config.h" +#include "URLSegments.h" + +namespace WTF { + +int URLSegments::length() const +{ + if (fragment.isValid()) + return fragment.end(); + return charactersBefore(Fragment, false); +} + +int URLSegments::charactersBefore(ComponentType type, bool includeDelimiter) const +{ + if (type == Scheme) + return scheme.begin(); + + int current = 0; + if (scheme.isValid()) + current = scheme.end() + 1; // Advance over the ':' at the end of the scheme. + + if (username.isValid()) { + if (type <= Username) + return username.begin(); + current = username.end() + 1; // Advance over the '@' or ':' at the end. + } + + if (password.isValid()) { + if (type <= Password) + return password.begin(); + current = password.end() + 1; // Advance over the '@' at the end. + } + + if (host.isValid()) { + if (type <= Host) + return host.begin(); + current = host.end(); + } + + if (port.isValid()) { + if (type < Port || (type == Port && includeDelimiter)) + return port.begin() - 1; // Back over delimiter. + if (type == Port) + return port.begin(); // Don't want delimiter counted. + current = port.end(); + } + + if (path.isValid()) { + if (type <= Path) + return path.begin(); + current = path.end(); + } + + if (query.isValid()) { + if (type < Query || (type == Query && includeDelimiter)) + return query.begin() - 1; // Back over delimiter. + if (type == Query) + return query.begin(); // Don't want delimiter counted. + current = query.end(); + } + + if (fragment.isValid()) { + if (type == Fragment && !includeDelimiter) + return fragment.begin(); // Back over delimiter. + + // When there is a fragment and we get here, the component we wanted was before + // this and not found, so we always know the beginning of the fragment is right. + return fragment.begin() - 1; // Don't want delimiter counted. + } + + return current; +} + +} // namespace WTF diff --git a/Source/JavaScriptCore/wtf/url/src/URLSegments.h b/Source/JavaScriptCore/wtf/url/src/URLSegments.h new file mode 100644 index 0000000..436c7fe --- /dev/null +++ b/Source/JavaScriptCore/wtf/url/src/URLSegments.h @@ -0,0 +1,105 @@ +// Copyright 2007, Google Inc. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef URLSegments_h +#define URLSegments_h + +#include "URLComponent.h" + +namespace WTF { + +// A structure that holds the identified parts of an input URL. This structure +// does NOT store the URL itself. The caller will have to store the URL text +// and its corresponding Parsed structure separately. +class URLSegments { +public: + // Identifies different components. + enum ComponentType { + Scheme, + Username, + Password, + Host, + Port, + Path, + Query, + Fragment, + }; + + URLSegments() { } + + // Returns the length of the URL (the end of the last component). + // + // Note that for some invalid, non-canonical URLs, this may not be the length + // of the string. For example "http://": the parsed structure will only + // contain an entry for the four-character scheme, and it doesn't know about + // the "://". For all other last-components, it will return the real length. + int length() const; + + // Returns the number of characters before the given component if it exists, + // or where the component would be if it did exist. This will return the + // string length if the component would be appended to the end. + // + // Note that this can get a little funny for the port, query, and fragment + // components which have a delimiter that is not counted as part of the + // component. The |includeDelimiter| flag controls if you want this counted + // as part of the component or not when the component exists. + // + // This example shows the difference between the two flags for two of these + // delimited components that is present (the port and query) and one that + // isn't (the reference). The components that this flag affects are marked + // with a *. + // 0 1 2 + // 012345678901234567890 + // Example input: http://foo:80/?query + // include_delim=true, ...=false ("<-" indicates different) + // Scheme: 0 0 + // Username: 5 5 + // Password: 5 5 + // Host: 7 7 + // *Port: 10 11 <- + // Path: 13 13 + // *Query: 14 15 <- + // *Fragment: 20 20 + // + int charactersBefore(ComponentType, bool includeDelimiter) const; + + // Each component excludes the related delimiters and has a length of -1 + // if that component is absent but 0 if the component exists but is empty. + URLComponent scheme; + URLComponent username; + URLComponent password; + URLComponent host; + URLComponent port; + URLComponent path; + URLComponent query; + URLComponent fragment; +}; + +} // namespace WTF + +#endif // URLSegments_h diff --git a/Source/JavaScriptCore/wtf/url/wtfurl.gyp b/Source/JavaScriptCore/wtf/url/wtfurl.gyp new file mode 100644 index 0000000..f254ae4 --- /dev/null +++ b/Source/JavaScriptCore/wtf/url/wtfurl.gyp @@ -0,0 +1,58 @@ +# Copyright (C) 2009 Google Inc. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +{ + 'variables': { + 'chromium_code': 1, + }, + 'targets': [ + { + 'target_name': 'wtfurl', + 'type': '<(library)', + 'msvs_guid': 'EF5E94AB-B646-4E5B-A058-52EF07B8351C', + 'dependencies': [ + ], + 'sources': [ + 'src/URLComponent.h', + 'src/URLSegments.cpp', + 'src/URLSegments.h', + ], + 'direct_dependent_settings': { + 'include_dirs': [ + 'src', + ], + }, + }, + ], +} + +# Local Variables: +# tab-width:2 +# indent-tabs-mode:nil +# End: +# vim: set expandtab tabstop=2 shiftwidth=2: |