diff options
Diffstat (limited to 'WebKitTools/Scripts/webkitpy/thirdparty')
46 files changed, 7468 insertions, 0 deletions
diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/BeautifulSoup.py b/WebKitTools/Scripts/webkitpy/thirdparty/BeautifulSoup.py new file mode 100644 index 0000000..34204e7 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/BeautifulSoup.py @@ -0,0 +1,2000 @@ +"""Beautiful Soup +Elixir and Tonic +"The Screen-Scraper's Friend" +http://www.crummy.com/software/BeautifulSoup/ + +Beautiful Soup parses a (possibly invalid) XML or HTML document into a +tree representation. It provides methods and Pythonic idioms that make +it easy to navigate, search, and modify the tree. + +A well-formed XML/HTML document yields a well-formed data +structure. An ill-formed XML/HTML document yields a correspondingly +ill-formed data structure. If your document is only locally +well-formed, you can use this library to find and process the +well-formed part of it. + +Beautiful Soup works with Python 2.2 and up. It has no external +dependencies, but you'll have more success at converting data to UTF-8 +if you also install these three packages: + +* chardet, for auto-detecting character encodings + http://chardet.feedparser.org/ +* cjkcodecs and iconv_codec, which add more encodings to the ones supported + by stock Python. + http://cjkpython.i18n.org/ + +Beautiful Soup defines classes for two main parsing strategies: + + * BeautifulStoneSoup, for parsing XML, SGML, or your domain-specific + language that kind of looks like XML. + + * BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid + or invalid. This class has web browser-like heuristics for + obtaining a sensible parse tree in the face of common HTML errors. + +Beautiful Soup also defines a class (UnicodeDammit) for autodetecting +the encoding of an HTML or XML document, and converting it to +Unicode. Much of this code is taken from Mark Pilgrim's Universal Feed Parser. + +For more than you ever wanted to know about Beautiful Soup, see the +documentation: +http://www.crummy.com/software/BeautifulSoup/documentation.html + +Here, have some legalese: + +Copyright (c) 2004-2009, Leonard Richardson + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of the the Beautiful Soup Consortium and All + Night Kosher Bakery nor the names of its contributors may be + used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE, DAMMIT. + +""" +from __future__ import generators + +__author__ = "Leonard Richardson (leonardr@segfault.org)" +__version__ = "3.1.0.1" +__copyright__ = "Copyright (c) 2004-2009 Leonard Richardson" +__license__ = "New-style BSD" + +import codecs +import markupbase +import types +import re +from HTMLParser import HTMLParser, HTMLParseError +try: + from htmlentitydefs import name2codepoint +except ImportError: + name2codepoint = {} +try: + set +except NameError: + from sets import Set as set + +#These hacks make Beautiful Soup able to parse XML with namespaces +markupbase._declname_match = re.compile(r'[a-zA-Z][-_.:a-zA-Z0-9]*\s*').match + +DEFAULT_OUTPUT_ENCODING = "utf-8" + +# First, the classes that represent markup elements. + +def sob(unicode, encoding): + """Returns either the given Unicode string or its encoding.""" + if encoding is None: + return unicode + else: + return unicode.encode(encoding) + +class PageElement: + """Contains the navigational information for some part of the page + (either a tag or a piece of text)""" + + def setup(self, parent=None, previous=None): + """Sets up the initial relations between this element and + other elements.""" + self.parent = parent + self.previous = previous + self.next = None + self.previousSibling = None + self.nextSibling = None + if self.parent and self.parent.contents: + self.previousSibling = self.parent.contents[-1] + self.previousSibling.nextSibling = self + + def replaceWith(self, replaceWith): + oldParent = self.parent + myIndex = self.parent.contents.index(self) + if hasattr(replaceWith, 'parent') and replaceWith.parent == self.parent: + # We're replacing this element with one of its siblings. + index = self.parent.contents.index(replaceWith) + if index and index < myIndex: + # Furthermore, it comes before this element. That + # means that when we extract it, the index of this + # element will change. + myIndex = myIndex - 1 + self.extract() + oldParent.insert(myIndex, replaceWith) + + def extract(self): + """Destructively rips this element out of the tree.""" + if self.parent: + try: + self.parent.contents.remove(self) + except ValueError: + pass + + #Find the two elements that would be next to each other if + #this element (and any children) hadn't been parsed. Connect + #the two. + lastChild = self._lastRecursiveChild() + nextElement = lastChild.next + + if self.previous: + self.previous.next = nextElement + if nextElement: + nextElement.previous = self.previous + self.previous = None + lastChild.next = None + + self.parent = None + if self.previousSibling: + self.previousSibling.nextSibling = self.nextSibling + if self.nextSibling: + self.nextSibling.previousSibling = self.previousSibling + self.previousSibling = self.nextSibling = None + return self + + def _lastRecursiveChild(self): + "Finds the last element beneath this object to be parsed." + lastChild = self + while hasattr(lastChild, 'contents') and lastChild.contents: + lastChild = lastChild.contents[-1] + return lastChild + + def insert(self, position, newChild): + if (isinstance(newChild, basestring) + or isinstance(newChild, unicode)) \ + and not isinstance(newChild, NavigableString): + newChild = NavigableString(newChild) + + position = min(position, len(self.contents)) + if hasattr(newChild, 'parent') and newChild.parent != None: + # We're 'inserting' an element that's already one + # of this object's children. + if newChild.parent == self: + index = self.find(newChild) + if index and index < position: + # Furthermore we're moving it further down the + # list of this object's children. That means that + # when we extract this element, our target index + # will jump down one. + position = position - 1 + newChild.extract() + + newChild.parent = self + previousChild = None + if position == 0: + newChild.previousSibling = None + newChild.previous = self + else: + previousChild = self.contents[position-1] + newChild.previousSibling = previousChild + newChild.previousSibling.nextSibling = newChild + newChild.previous = previousChild._lastRecursiveChild() + if newChild.previous: + newChild.previous.next = newChild + + newChildsLastElement = newChild._lastRecursiveChild() + + if position >= len(self.contents): + newChild.nextSibling = None + + parent = self + parentsNextSibling = None + while not parentsNextSibling: + parentsNextSibling = parent.nextSibling + parent = parent.parent + if not parent: # This is the last element in the document. + break + if parentsNextSibling: + newChildsLastElement.next = parentsNextSibling + else: + newChildsLastElement.next = None + else: + nextChild = self.contents[position] + newChild.nextSibling = nextChild + if newChild.nextSibling: + newChild.nextSibling.previousSibling = newChild + newChildsLastElement.next = nextChild + + if newChildsLastElement.next: + newChildsLastElement.next.previous = newChildsLastElement + self.contents.insert(position, newChild) + + def append(self, tag): + """Appends the given tag to the contents of this tag.""" + self.insert(len(self.contents), tag) + + def findNext(self, name=None, attrs={}, text=None, **kwargs): + """Returns the first item that matches the given criteria and + appears after this Tag in the document.""" + return self._findOne(self.findAllNext, name, attrs, text, **kwargs) + + def findAllNext(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns all items that match the given criteria and appear + after this Tag in the document.""" + return self._findAll(name, attrs, text, limit, self.nextGenerator, + **kwargs) + + def findNextSibling(self, name=None, attrs={}, text=None, **kwargs): + """Returns the closest sibling to this Tag that matches the + given criteria and appears after this Tag in the document.""" + return self._findOne(self.findNextSiblings, name, attrs, text, + **kwargs) + + def findNextSiblings(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns the siblings of this Tag that match the given + criteria and appear after this Tag in the document.""" + return self._findAll(name, attrs, text, limit, + self.nextSiblingGenerator, **kwargs) + fetchNextSiblings = findNextSiblings # Compatibility with pre-3.x + + def findPrevious(self, name=None, attrs={}, text=None, **kwargs): + """Returns the first item that matches the given criteria and + appears before this Tag in the document.""" + return self._findOne(self.findAllPrevious, name, attrs, text, **kwargs) + + def findAllPrevious(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns all items that match the given criteria and appear + before this Tag in the document.""" + return self._findAll(name, attrs, text, limit, self.previousGenerator, + **kwargs) + fetchPrevious = findAllPrevious # Compatibility with pre-3.x + + def findPreviousSibling(self, name=None, attrs={}, text=None, **kwargs): + """Returns the closest sibling to this Tag that matches the + given criteria and appears before this Tag in the document.""" + return self._findOne(self.findPreviousSiblings, name, attrs, text, + **kwargs) + + def findPreviousSiblings(self, name=None, attrs={}, text=None, + limit=None, **kwargs): + """Returns the siblings of this Tag that match the given + criteria and appear before this Tag in the document.""" + return self._findAll(name, attrs, text, limit, + self.previousSiblingGenerator, **kwargs) + fetchPreviousSiblings = findPreviousSiblings # Compatibility with pre-3.x + + def findParent(self, name=None, attrs={}, **kwargs): + """Returns the closest parent of this Tag that matches the given + criteria.""" + # NOTE: We can't use _findOne because findParents takes a different + # set of arguments. + r = None + l = self.findParents(name, attrs, 1) + if l: + r = l[0] + return r + + def findParents(self, name=None, attrs={}, limit=None, **kwargs): + """Returns the parents of this Tag that match the given + criteria.""" + + return self._findAll(name, attrs, None, limit, self.parentGenerator, + **kwargs) + fetchParents = findParents # Compatibility with pre-3.x + + #These methods do the real heavy lifting. + + def _findOne(self, method, name, attrs, text, **kwargs): + r = None + l = method(name, attrs, text, 1, **kwargs) + if l: + r = l[0] + return r + + def _findAll(self, name, attrs, text, limit, generator, **kwargs): + "Iterates over a generator looking for things that match." + + if isinstance(name, SoupStrainer): + strainer = name + else: + # Build a SoupStrainer + strainer = SoupStrainer(name, attrs, text, **kwargs) + results = ResultSet(strainer) + g = generator() + while True: + try: + i = g.next() + except StopIteration: + break + if i: + found = strainer.search(i) + if found: + results.append(found) + if limit and len(results) >= limit: + break + return results + + #These Generators can be used to navigate starting from both + #NavigableStrings and Tags. + def nextGenerator(self): + i = self + while i: + i = i.next + yield i + + def nextSiblingGenerator(self): + i = self + while i: + i = i.nextSibling + yield i + + def previousGenerator(self): + i = self + while i: + i = i.previous + yield i + + def previousSiblingGenerator(self): + i = self + while i: + i = i.previousSibling + yield i + + def parentGenerator(self): + i = self + while i: + i = i.parent + yield i + + # Utility methods + def substituteEncoding(self, str, encoding=None): + encoding = encoding or "utf-8" + return str.replace("%SOUP-ENCODING%", encoding) + + def toEncoding(self, s, encoding=None): + """Encodes an object to a string in some encoding, or to Unicode. + .""" + if isinstance(s, unicode): + if encoding: + s = s.encode(encoding) + elif isinstance(s, str): + if encoding: + s = s.encode(encoding) + else: + s = unicode(s) + else: + if encoding: + s = self.toEncoding(str(s), encoding) + else: + s = unicode(s) + return s + +class NavigableString(unicode, PageElement): + + def __new__(cls, value): + """Create a new NavigableString. + + When unpickling a NavigableString, this method is called with + the string in DEFAULT_OUTPUT_ENCODING. That encoding needs to be + passed in to the superclass's __new__ or the superclass won't know + how to handle non-ASCII characters. + """ + if isinstance(value, unicode): + return unicode.__new__(cls, value) + return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING) + + def __getnewargs__(self): + return (unicode(self),) + + def __getattr__(self, attr): + """text.string gives you text. This is for backwards + compatibility for Navigable*String, but for CData* it lets you + get the string without the CData wrapper.""" + if attr == 'string': + return self + else: + raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr) + + def encode(self, encoding=DEFAULT_OUTPUT_ENCODING): + return self.decode().encode(encoding) + + def decodeGivenEventualEncoding(self, eventualEncoding): + return self + +class CData(NavigableString): + + def decodeGivenEventualEncoding(self, eventualEncoding): + return u'<![CDATA[' + self + u']]>' + +class ProcessingInstruction(NavigableString): + + def decodeGivenEventualEncoding(self, eventualEncoding): + output = self + if u'%SOUP-ENCODING%' in output: + output = self.substituteEncoding(output, eventualEncoding) + return u'<?' + output + u'?>' + +class Comment(NavigableString): + def decodeGivenEventualEncoding(self, eventualEncoding): + return u'<!--' + self + u'-->' + +class Declaration(NavigableString): + def decodeGivenEventualEncoding(self, eventualEncoding): + return u'<!' + self + u'>' + +class Tag(PageElement): + + """Represents a found HTML tag with its attributes and contents.""" + + def _invert(h): + "Cheap function to invert a hash." + i = {} + for k,v in h.items(): + i[v] = k + return i + + XML_ENTITIES_TO_SPECIAL_CHARS = { "apos" : "'", + "quot" : '"', + "amp" : "&", + "lt" : "<", + "gt" : ">" } + + XML_SPECIAL_CHARS_TO_ENTITIES = _invert(XML_ENTITIES_TO_SPECIAL_CHARS) + + def _convertEntities(self, match): + """Used in a call to re.sub to replace HTML, XML, and numeric + entities with the appropriate Unicode characters. If HTML + entities are being converted, any unrecognized entities are + escaped.""" + x = match.group(1) + if self.convertHTMLEntities and x in name2codepoint: + return unichr(name2codepoint[x]) + elif x in self.XML_ENTITIES_TO_SPECIAL_CHARS: + if self.convertXMLEntities: + return self.XML_ENTITIES_TO_SPECIAL_CHARS[x] + else: + return u'&%s;' % x + elif len(x) > 0 and x[0] == '#': + # Handle numeric entities + if len(x) > 1 and x[1] == 'x': + return unichr(int(x[2:], 16)) + else: + return unichr(int(x[1:])) + + elif self.escapeUnrecognizedEntities: + return u'&%s;' % x + else: + return u'&%s;' % x + + def __init__(self, parser, name, attrs=None, parent=None, + previous=None): + "Basic constructor." + + # We don't actually store the parser object: that lets extracted + # chunks be garbage-collected + self.parserClass = parser.__class__ + self.isSelfClosing = parser.isSelfClosingTag(name) + self.name = name + if attrs == None: + attrs = [] + self.attrs = attrs + self.contents = [] + self.setup(parent, previous) + self.hidden = False + self.containsSubstitutions = False + self.convertHTMLEntities = parser.convertHTMLEntities + self.convertXMLEntities = parser.convertXMLEntities + self.escapeUnrecognizedEntities = parser.escapeUnrecognizedEntities + + def convert(kval): + "Converts HTML, XML and numeric entities in the attribute value." + k, val = kval + if val is None: + return kval + return (k, re.sub("&(#\d+|#x[0-9a-fA-F]+|\w+);", + self._convertEntities, val)) + self.attrs = map(convert, self.attrs) + + def get(self, key, default=None): + """Returns the value of the 'key' attribute for the tag, or + the value given for 'default' if it doesn't have that + attribute.""" + return self._getAttrMap().get(key, default) + + def has_key(self, key): + return self._getAttrMap().has_key(key) + + def __getitem__(self, key): + """tag[key] returns the value of the 'key' attribute for the tag, + and throws an exception if it's not there.""" + return self._getAttrMap()[key] + + def __iter__(self): + "Iterating over a tag iterates over its contents." + return iter(self.contents) + + def __len__(self): + "The length of a tag is the length of its list of contents." + return len(self.contents) + + def __contains__(self, x): + return x in self.contents + + def __nonzero__(self): + "A tag is non-None even if it has no contents." + return True + + def __setitem__(self, key, value): + """Setting tag[key] sets the value of the 'key' attribute for the + tag.""" + self._getAttrMap() + self.attrMap[key] = value + found = False + for i in range(0, len(self.attrs)): + if self.attrs[i][0] == key: + self.attrs[i] = (key, value) + found = True + if not found: + self.attrs.append((key, value)) + self._getAttrMap()[key] = value + + def __delitem__(self, key): + "Deleting tag[key] deletes all 'key' attributes for the tag." + for item in self.attrs: + if item[0] == key: + self.attrs.remove(item) + #We don't break because bad HTML can define the same + #attribute multiple times. + self._getAttrMap() + if self.attrMap.has_key(key): + del self.attrMap[key] + + def __call__(self, *args, **kwargs): + """Calling a tag like a function is the same as calling its + findAll() method. Eg. tag('a') returns a list of all the A tags + found within this tag.""" + return apply(self.findAll, args, kwargs) + + def __getattr__(self, tag): + #print "Getattr %s.%s" % (self.__class__, tag) + if len(tag) > 3 and tag.rfind('Tag') == len(tag)-3: + return self.find(tag[:-3]) + elif tag.find('__') != 0: + return self.find(tag) + raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__, tag) + + def __eq__(self, other): + """Returns true iff this tag has the same name, the same attributes, + and the same contents (recursively) as the given tag. + + NOTE: right now this will return false if two tags have the + same attributes in a different order. Should this be fixed?""" + if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not hasattr(other, 'contents') or self.name != other.name or self.attrs != other.attrs or len(self) != len(other): + return False + for i in range(0, len(self.contents)): + if self.contents[i] != other.contents[i]: + return False + return True + + def __ne__(self, other): + """Returns true iff this tag is not identical to the other tag, + as defined in __eq__.""" + return not self == other + + def __repr__(self, encoding=DEFAULT_OUTPUT_ENCODING): + """Renders this tag as a string.""" + return self.decode(eventualEncoding=encoding) + + BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|" + + "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)" + + ")") + + def _sub_entity(self, x): + """Used with a regular expression to substitute the + appropriate XML entity for an XML special character.""" + return "&" + self.XML_SPECIAL_CHARS_TO_ENTITIES[x.group(0)[0]] + ";" + + def __unicode__(self): + return self.decode() + + def __str__(self): + return self.encode() + + def encode(self, encoding=DEFAULT_OUTPUT_ENCODING, + prettyPrint=False, indentLevel=0): + return self.decode(prettyPrint, indentLevel, encoding).encode(encoding) + + def decode(self, prettyPrint=False, indentLevel=0, + eventualEncoding=DEFAULT_OUTPUT_ENCODING): + """Returns a string or Unicode representation of this tag and + its contents. To get Unicode, pass None for encoding.""" + + attrs = [] + if self.attrs: + for key, val in self.attrs: + fmt = '%s="%s"' + if isString(val): + if (self.containsSubstitutions + and eventualEncoding is not None + and '%SOUP-ENCODING%' in val): + val = self.substituteEncoding(val, eventualEncoding) + + # The attribute value either: + # + # * Contains no embedded double quotes or single quotes. + # No problem: we enclose it in double quotes. + # * Contains embedded single quotes. No problem: + # double quotes work here too. + # * Contains embedded double quotes. No problem: + # we enclose it in single quotes. + # * Embeds both single _and_ double quotes. This + # can't happen naturally, but it can happen if + # you modify an attribute value after parsing + # the document. Now we have a bit of a + # problem. We solve it by enclosing the + # attribute in single quotes, and escaping any + # embedded single quotes to XML entities. + if '"' in val: + fmt = "%s='%s'" + if "'" in val: + # TODO: replace with apos when + # appropriate. + val = val.replace("'", "&squot;") + + # Now we're okay w/r/t quotes. But the attribute + # value might also contain angle brackets, or + # ampersands that aren't part of entities. We need + # to escape those to XML entities too. + val = self.BARE_AMPERSAND_OR_BRACKET.sub(self._sub_entity, val) + if val is None: + # Handle boolean attributes. + decoded = key + else: + decoded = fmt % (key, val) + attrs.append(decoded) + close = '' + closeTag = '' + if self.isSelfClosing: + close = ' /' + else: + closeTag = '</%s>' % self.name + + indentTag, indentContents = 0, 0 + if prettyPrint: + indentTag = indentLevel + space = (' ' * (indentTag-1)) + indentContents = indentTag + 1 + contents = self.decodeContents(prettyPrint, indentContents, + eventualEncoding) + if self.hidden: + s = contents + else: + s = [] + attributeString = '' + if attrs: + attributeString = ' ' + ' '.join(attrs) + if prettyPrint: + s.append(space) + s.append('<%s%s%s>' % (self.name, attributeString, close)) + if prettyPrint: + s.append("\n") + s.append(contents) + if prettyPrint and contents and contents[-1] != "\n": + s.append("\n") + if prettyPrint and closeTag: + s.append(space) + s.append(closeTag) + if prettyPrint and closeTag and self.nextSibling: + s.append("\n") + s = ''.join(s) + return s + + def decompose(self): + """Recursively destroys the contents of this tree.""" + contents = [i for i in self.contents] + for i in contents: + if isinstance(i, Tag): + i.decompose() + else: + i.extract() + self.extract() + + def prettify(self, encoding=DEFAULT_OUTPUT_ENCODING): + return self.encode(encoding, True) + + def encodeContents(self, encoding=DEFAULT_OUTPUT_ENCODING, + prettyPrint=False, indentLevel=0): + return self.decodeContents(prettyPrint, indentLevel).encode(encoding) + + def decodeContents(self, prettyPrint=False, indentLevel=0, + eventualEncoding=DEFAULT_OUTPUT_ENCODING): + """Renders the contents of this tag as a string in the given + encoding. If encoding is None, returns a Unicode string..""" + s=[] + for c in self: + text = None + if isinstance(c, NavigableString): + text = c.decodeGivenEventualEncoding(eventualEncoding) + elif isinstance(c, Tag): + s.append(c.decode(prettyPrint, indentLevel, eventualEncoding)) + if text and prettyPrint: + text = text.strip() + if text: + if prettyPrint: + s.append(" " * (indentLevel-1)) + s.append(text) + if prettyPrint: + s.append("\n") + return ''.join(s) + + #Soup methods + + def find(self, name=None, attrs={}, recursive=True, text=None, + **kwargs): + """Return only the first child of this Tag matching the given + criteria.""" + r = None + l = self.findAll(name, attrs, recursive, text, 1, **kwargs) + if l: + r = l[0] + return r + findChild = find + + def findAll(self, name=None, attrs={}, recursive=True, text=None, + limit=None, **kwargs): + """Extracts a list of Tag objects that match the given + criteria. You can specify the name of the Tag and any + attributes you want the Tag to have. + + The value of a key-value pair in the 'attrs' map can be a + string, a list of strings, a regular expression object, or a + callable that takes a string and returns whether or not the + string matches for some custom definition of 'matches'. The + same is true of the tag name.""" + generator = self.recursiveChildGenerator + if not recursive: + generator = self.childGenerator + return self._findAll(name, attrs, text, limit, generator, **kwargs) + findChildren = findAll + + # Pre-3.x compatibility methods. Will go away in 4.0. + first = find + fetch = findAll + + def fetchText(self, text=None, recursive=True, limit=None): + return self.findAll(text=text, recursive=recursive, limit=limit) + + def firstText(self, text=None, recursive=True): + return self.find(text=text, recursive=recursive) + + # 3.x compatibility methods. Will go away in 4.0. + def renderContents(self, encoding=DEFAULT_OUTPUT_ENCODING, + prettyPrint=False, indentLevel=0): + if encoding is None: + return self.decodeContents(prettyPrint, indentLevel, encoding) + else: + return self.encodeContents(encoding, prettyPrint, indentLevel) + + + #Private methods + + def _getAttrMap(self): + """Initializes a map representation of this tag's attributes, + if not already initialized.""" + if not getattr(self, 'attrMap'): + self.attrMap = {} + for (key, value) in self.attrs: + self.attrMap[key] = value + return self.attrMap + + #Generator methods + def recursiveChildGenerator(self): + if not len(self.contents): + raise StopIteration + stopNode = self._lastRecursiveChild().next + current = self.contents[0] + while current is not stopNode: + yield current + current = current.next + + def childGenerator(self): + if not len(self.contents): + raise StopIteration + current = self.contents[0] + while current: + yield current + current = current.nextSibling + raise StopIteration + +# Next, a couple classes to represent queries and their results. +class SoupStrainer: + """Encapsulates a number of ways of matching a markup element (tag or + text).""" + + def __init__(self, name=None, attrs={}, text=None, **kwargs): + self.name = name + if isString(attrs): + kwargs['class'] = attrs + attrs = None + if kwargs: + if attrs: + attrs = attrs.copy() + attrs.update(kwargs) + else: + attrs = kwargs + self.attrs = attrs + self.text = text + + def __str__(self): + if self.text: + return self.text + else: + return "%s|%s" % (self.name, self.attrs) + + def searchTag(self, markupName=None, markupAttrs={}): + found = None + markup = None + if isinstance(markupName, Tag): + markup = markupName + markupAttrs = markup + callFunctionWithTagData = callable(self.name) \ + and not isinstance(markupName, Tag) + + if (not self.name) \ + or callFunctionWithTagData \ + or (markup and self._matches(markup, self.name)) \ + or (not markup and self._matches(markupName, self.name)): + if callFunctionWithTagData: + match = self.name(markupName, markupAttrs) + else: + match = True + markupAttrMap = None + for attr, matchAgainst in self.attrs.items(): + if not markupAttrMap: + if hasattr(markupAttrs, 'get'): + markupAttrMap = markupAttrs + else: + markupAttrMap = {} + for k,v in markupAttrs: + markupAttrMap[k] = v + attrValue = markupAttrMap.get(attr) + if not self._matches(attrValue, matchAgainst): + match = False + break + if match: + if markup: + found = markup + else: + found = markupName + return found + + def search(self, markup): + #print 'looking for %s in %s' % (self, markup) + found = None + # If given a list of items, scan it for a text element that + # matches. + if isList(markup) and not isinstance(markup, Tag): + for element in markup: + if isinstance(element, NavigableString) \ + and self.search(element): + found = element + break + # If it's a Tag, make sure its name or attributes match. + # Don't bother with Tags if we're searching for text. + elif isinstance(markup, Tag): + if not self.text: + found = self.searchTag(markup) + # If it's text, make sure the text matches. + elif isinstance(markup, NavigableString) or \ + isString(markup): + if self._matches(markup, self.text): + found = markup + else: + raise Exception, "I don't know how to match against a %s" \ + % markup.__class__ + return found + + def _matches(self, markup, matchAgainst): + #print "Matching %s against %s" % (markup, matchAgainst) + result = False + if matchAgainst == True and type(matchAgainst) == types.BooleanType: + result = markup != None + elif callable(matchAgainst): + result = matchAgainst(markup) + else: + #Custom match methods take the tag as an argument, but all + #other ways of matching match the tag name as a string. + if isinstance(markup, Tag): + markup = markup.name + if markup is not None and not isString(markup): + markup = unicode(markup) + #Now we know that chunk is either a string, or None. + if hasattr(matchAgainst, 'match'): + # It's a regexp object. + result = markup and matchAgainst.search(markup) + elif (isList(matchAgainst) + and (markup is not None or not isString(matchAgainst))): + result = markup in matchAgainst + elif hasattr(matchAgainst, 'items'): + result = markup.has_key(matchAgainst) + elif matchAgainst and isString(markup): + if isinstance(markup, unicode): + matchAgainst = unicode(matchAgainst) + else: + matchAgainst = str(matchAgainst) + + if not result: + result = matchAgainst == markup + return result + +class ResultSet(list): + """A ResultSet is just a list that keeps track of the SoupStrainer + that created it.""" + def __init__(self, source): + list.__init__([]) + self.source = source + +# Now, some helper functions. + +def isList(l): + """Convenience method that works with all 2.x versions of Python + to determine whether or not something is listlike.""" + return ((hasattr(l, '__iter__') and not isString(l)) + or (type(l) in (types.ListType, types.TupleType))) + +def isString(s): + """Convenience method that works with all 2.x versions of Python + to determine whether or not something is stringlike.""" + try: + return isinstance(s, unicode) or isinstance(s, basestring) + except NameError: + return isinstance(s, str) + +def buildTagMap(default, *args): + """Turns a list of maps, lists, or scalars into a single map. + Used to build the SELF_CLOSING_TAGS, NESTABLE_TAGS, and + NESTING_RESET_TAGS maps out of lists and partial maps.""" + built = {} + for portion in args: + if hasattr(portion, 'items'): + #It's a map. Merge it. + for k,v in portion.items(): + built[k] = v + elif isList(portion) and not isString(portion): + #It's a list. Map each item to the default. + for k in portion: + built[k] = default + else: + #It's a scalar. Map it to the default. + built[portion] = default + return built + +# Now, the parser classes. + +class HTMLParserBuilder(HTMLParser): + + def __init__(self, soup): + HTMLParser.__init__(self) + self.soup = soup + + # We inherit feed() and reset(). + + def handle_starttag(self, name, attrs): + if name == 'meta': + self.soup.extractCharsetFromMeta(attrs) + else: + self.soup.unknown_starttag(name, attrs) + + def handle_endtag(self, name): + self.soup.unknown_endtag(name) + + def handle_data(self, content): + self.soup.handle_data(content) + + def _toStringSubclass(self, text, subclass): + """Adds a certain piece of text to the tree as a NavigableString + subclass.""" + self.soup.endData() + self.handle_data(text) + self.soup.endData(subclass) + + def handle_pi(self, text): + """Handle a processing instruction as a ProcessingInstruction + object, possibly one with a %SOUP-ENCODING% slot into which an + encoding will be plugged later.""" + if text[:3] == "xml": + text = u"xml version='1.0' encoding='%SOUP-ENCODING%'" + self._toStringSubclass(text, ProcessingInstruction) + + def handle_comment(self, text): + "Handle comments as Comment objects." + self._toStringSubclass(text, Comment) + + def handle_charref(self, ref): + "Handle character references as data." + if self.soup.convertEntities: + data = unichr(int(ref)) + else: + data = '&#%s;' % ref + self.handle_data(data) + + def handle_entityref(self, ref): + """Handle entity references as data, possibly converting known + HTML and/or XML entity references to the corresponding Unicode + characters.""" + data = None + if self.soup.convertHTMLEntities: + try: + data = unichr(name2codepoint[ref]) + except KeyError: + pass + + if not data and self.soup.convertXMLEntities: + data = self.soup.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref) + + if not data and self.soup.convertHTMLEntities and \ + not self.soup.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref): + # TODO: We've got a problem here. We're told this is + # an entity reference, but it's not an XML entity + # reference or an HTML entity reference. Nonetheless, + # the logical thing to do is to pass it through as an + # unrecognized entity reference. + # + # Except: when the input is "&carol;" this function + # will be called with input "carol". When the input is + # "AT&T", this function will be called with input + # "T". We have no way of knowing whether a semicolon + # was present originally, so we don't know whether + # this is an unknown entity or just a misplaced + # ampersand. + # + # The more common case is a misplaced ampersand, so I + # escape the ampersand and omit the trailing semicolon. + data = "&%s" % ref + if not data: + # This case is different from the one above, because we + # haven't already gone through a supposedly comprehensive + # mapping of entities to Unicode characters. We might not + # have gone through any mapping at all. So the chances are + # very high that this is a real entity, and not a + # misplaced ampersand. + data = "&%s;" % ref + self.handle_data(data) + + def handle_decl(self, data): + "Handle DOCTYPEs and the like as Declaration objects." + self._toStringSubclass(data, Declaration) + + def parse_declaration(self, i): + """Treat a bogus SGML declaration as raw data. Treat a CDATA + declaration as a CData object.""" + j = None + if self.rawdata[i:i+9] == '<![CDATA[': + k = self.rawdata.find(']]>', i) + if k == -1: + k = len(self.rawdata) + data = self.rawdata[i+9:k] + j = k+3 + self._toStringSubclass(data, CData) + else: + try: + j = HTMLParser.parse_declaration(self, i) + except HTMLParseError: + toHandle = self.rawdata[i:] + self.handle_data(toHandle) + j = i + len(toHandle) + return j + + +class BeautifulStoneSoup(Tag): + + """This class contains the basic parser and search code. It defines + a parser that knows nothing about tag behavior except for the + following: + + You can't close a tag without closing all the tags it encloses. + That is, "<foo><bar></foo>" actually means + "<foo><bar></bar></foo>". + + [Another possible explanation is "<foo><bar /></foo>", but since + this class defines no SELF_CLOSING_TAGS, it will never use that + explanation.] + + This class is useful for parsing XML or made-up markup languages, + or when BeautifulSoup makes an assumption counter to what you were + expecting.""" + + SELF_CLOSING_TAGS = {} + NESTABLE_TAGS = {} + RESET_NESTING_TAGS = {} + QUOTE_TAGS = {} + PRESERVE_WHITESPACE_TAGS = [] + + MARKUP_MASSAGE = [(re.compile('(<[^<>]*)/>'), + lambda x: x.group(1) + ' />'), + (re.compile('<!\s+([^<>]*)>'), + lambda x: '<!' + x.group(1) + '>') + ] + + ROOT_TAG_NAME = u'[document]' + + HTML_ENTITIES = "html" + XML_ENTITIES = "xml" + XHTML_ENTITIES = "xhtml" + # TODO: This only exists for backwards-compatibility + ALL_ENTITIES = XHTML_ENTITIES + + # Used when determining whether a text node is all whitespace and + # can be replaced with a single space. A text node that contains + # fancy Unicode spaces (usually non-breaking) should be left + # alone. + STRIP_ASCII_SPACES = { 9: None, 10: None, 12: None, 13: None, 32: None, } + + def __init__(self, markup="", parseOnlyThese=None, fromEncoding=None, + markupMassage=True, smartQuotesTo=XML_ENTITIES, + convertEntities=None, selfClosingTags=None, isHTML=False, + builder=HTMLParserBuilder): + """The Soup object is initialized as the 'root tag', and the + provided markup (which can be a string or a file-like object) + is fed into the underlying parser. + + HTMLParser will process most bad HTML, and the BeautifulSoup + class has some tricks for dealing with some HTML that kills + HTMLParser, but Beautiful Soup can nonetheless choke or lose data + if your data uses self-closing tags or declarations + incorrectly. + + By default, Beautiful Soup uses regexes to sanitize input, + avoiding the vast majority of these problems. If the problems + don't apply to you, pass in False for markupMassage, and + you'll get better performance. + + The default parser massage techniques fix the two most common + instances of invalid HTML that choke HTMLParser: + + <br/> (No space between name of closing tag and tag close) + <! --Comment--> (Extraneous whitespace in declaration) + + You can pass in a custom list of (RE object, replace method) + tuples to get Beautiful Soup to scrub your input the way you + want.""" + + self.parseOnlyThese = parseOnlyThese + self.fromEncoding = fromEncoding + self.smartQuotesTo = smartQuotesTo + self.convertEntities = convertEntities + # Set the rules for how we'll deal with the entities we + # encounter + if self.convertEntities: + # It doesn't make sense to convert encoded characters to + # entities even while you're converting entities to Unicode. + # Just convert it all to Unicode. + self.smartQuotesTo = None + if convertEntities == self.HTML_ENTITIES: + self.convertXMLEntities = False + self.convertHTMLEntities = True + self.escapeUnrecognizedEntities = True + elif convertEntities == self.XHTML_ENTITIES: + self.convertXMLEntities = True + self.convertHTMLEntities = True + self.escapeUnrecognizedEntities = False + elif convertEntities == self.XML_ENTITIES: + self.convertXMLEntities = True + self.convertHTMLEntities = False + self.escapeUnrecognizedEntities = False + else: + self.convertXMLEntities = False + self.convertHTMLEntities = False + self.escapeUnrecognizedEntities = False + + self.instanceSelfClosingTags = buildTagMap(None, selfClosingTags) + self.builder = builder(self) + self.reset() + + if hasattr(markup, 'read'): # It's a file-type object. + markup = markup.read() + self.markup = markup + self.markupMassage = markupMassage + try: + self._feed(isHTML=isHTML) + except StopParsing: + pass + self.markup = None # The markup can now be GCed. + self.builder = None # So can the builder. + + def _feed(self, inDocumentEncoding=None, isHTML=False): + # Convert the document to Unicode. + markup = self.markup + if isinstance(markup, unicode): + if not hasattr(self, 'originalEncoding'): + self.originalEncoding = None + else: + dammit = UnicodeDammit\ + (markup, [self.fromEncoding, inDocumentEncoding], + smartQuotesTo=self.smartQuotesTo, isHTML=isHTML) + markup = dammit.unicode + self.originalEncoding = dammit.originalEncoding + self.declaredHTMLEncoding = dammit.declaredHTMLEncoding + if markup: + if self.markupMassage: + if not isList(self.markupMassage): + self.markupMassage = self.MARKUP_MASSAGE + for fix, m in self.markupMassage: + markup = fix.sub(m, markup) + # TODO: We get rid of markupMassage so that the + # soup object can be deepcopied later on. Some + # Python installations can't copy regexes. If anyone + # was relying on the existence of markupMassage, this + # might cause problems. + del(self.markupMassage) + self.builder.reset() + + self.builder.feed(markup) + # Close out any unfinished strings and close all the open tags. + self.endData() + while self.currentTag.name != self.ROOT_TAG_NAME: + self.popTag() + + def isSelfClosingTag(self, name): + """Returns true iff the given string is the name of a + self-closing tag according to this parser.""" + return self.SELF_CLOSING_TAGS.has_key(name) \ + or self.instanceSelfClosingTags.has_key(name) + + def reset(self): + Tag.__init__(self, self, self.ROOT_TAG_NAME) + self.hidden = 1 + self.builder.reset() + self.currentData = [] + self.currentTag = None + self.tagStack = [] + self.quoteStack = [] + self.pushTag(self) + + def popTag(self): + tag = self.tagStack.pop() + # Tags with just one string-owning child get the child as a + # 'string' property, so that soup.tag.string is shorthand for + # soup.tag.contents[0] + if len(self.currentTag.contents) == 1 and \ + isinstance(self.currentTag.contents[0], NavigableString): + self.currentTag.string = self.currentTag.contents[0] + + #print "Pop", tag.name + if self.tagStack: + self.currentTag = self.tagStack[-1] + return self.currentTag + + def pushTag(self, tag): + #print "Push", tag.name + if self.currentTag: + self.currentTag.contents.append(tag) + self.tagStack.append(tag) + self.currentTag = self.tagStack[-1] + + def endData(self, containerClass=NavigableString): + if self.currentData: + currentData = u''.join(self.currentData) + if (currentData.translate(self.STRIP_ASCII_SPACES) == '' and + not set([tag.name for tag in self.tagStack]).intersection( + self.PRESERVE_WHITESPACE_TAGS)): + if '\n' in currentData: + currentData = '\n' + else: + currentData = ' ' + self.currentData = [] + if self.parseOnlyThese and len(self.tagStack) <= 1 and \ + (not self.parseOnlyThese.text or \ + not self.parseOnlyThese.search(currentData)): + return + o = containerClass(currentData) + o.setup(self.currentTag, self.previous) + if self.previous: + self.previous.next = o + self.previous = o + self.currentTag.contents.append(o) + + + def _popToTag(self, name, inclusivePop=True): + """Pops the tag stack up to and including the most recent + instance of the given tag. If inclusivePop is false, pops the tag + stack up to but *not* including the most recent instqance of + the given tag.""" + #print "Popping to %s" % name + if name == self.ROOT_TAG_NAME: + return + + numPops = 0 + mostRecentTag = None + for i in range(len(self.tagStack)-1, 0, -1): + if name == self.tagStack[i].name: + numPops = len(self.tagStack)-i + break + if not inclusivePop: + numPops = numPops - 1 + + for i in range(0, numPops): + mostRecentTag = self.popTag() + return mostRecentTag + + def _smartPop(self, name): + + """We need to pop up to the previous tag of this type, unless + one of this tag's nesting reset triggers comes between this + tag and the previous tag of this type, OR unless this tag is a + generic nesting trigger and another generic nesting trigger + comes between this tag and the previous tag of this type. + + Examples: + <p>Foo<b>Bar *<p>* should pop to 'p', not 'b'. + <p>Foo<table>Bar *<p>* should pop to 'table', not 'p'. + <p>Foo<table><tr>Bar *<p>* should pop to 'tr', not 'p'. + + <li><ul><li> *<li>* should pop to 'ul', not the first 'li'. + <tr><table><tr> *<tr>* should pop to 'table', not the first 'tr' + <td><tr><td> *<td>* should pop to 'tr', not the first 'td' + """ + + nestingResetTriggers = self.NESTABLE_TAGS.get(name) + isNestable = nestingResetTriggers != None + isResetNesting = self.RESET_NESTING_TAGS.has_key(name) + popTo = None + inclusive = True + for i in range(len(self.tagStack)-1, 0, -1): + p = self.tagStack[i] + if (not p or p.name == name) and not isNestable: + #Non-nestable tags get popped to the top or to their + #last occurance. + popTo = name + break + if (nestingResetTriggers != None + and p.name in nestingResetTriggers) \ + or (nestingResetTriggers == None and isResetNesting + and self.RESET_NESTING_TAGS.has_key(p.name)): + + #If we encounter one of the nesting reset triggers + #peculiar to this tag, or we encounter another tag + #that causes nesting to reset, pop up to but not + #including that tag. + popTo = p.name + inclusive = False + break + p = p.parent + if popTo: + self._popToTag(popTo, inclusive) + + def unknown_starttag(self, name, attrs, selfClosing=0): + #print "Start tag %s: %s" % (name, attrs) + if self.quoteStack: + #This is not a real tag. + #print "<%s> is not real!" % name + attrs = ''.join(map(lambda(x, y): ' %s="%s"' % (x, y), attrs)) + self.handle_data('<%s%s>' % (name, attrs)) + return + self.endData() + + if not self.isSelfClosingTag(name) and not selfClosing: + self._smartPop(name) + + if self.parseOnlyThese and len(self.tagStack) <= 1 \ + and (self.parseOnlyThese.text or not self.parseOnlyThese.searchTag(name, attrs)): + return + + tag = Tag(self, name, attrs, self.currentTag, self.previous) + if self.previous: + self.previous.next = tag + self.previous = tag + self.pushTag(tag) + if selfClosing or self.isSelfClosingTag(name): + self.popTag() + if name in self.QUOTE_TAGS: + #print "Beginning quote (%s)" % name + self.quoteStack.append(name) + self.literal = 1 + return tag + + def unknown_endtag(self, name): + #print "End tag %s" % name + if self.quoteStack and self.quoteStack[-1] != name: + #This is not a real end tag. + #print "</%s> is not real!" % name + self.handle_data('</%s>' % name) + return + self.endData() + self._popToTag(name) + if self.quoteStack and self.quoteStack[-1] == name: + self.quoteStack.pop() + self.literal = (len(self.quoteStack) > 0) + + def handle_data(self, data): + self.currentData.append(data) + + def extractCharsetFromMeta(self, attrs): + self.unknown_starttag('meta', attrs) + + +class BeautifulSoup(BeautifulStoneSoup): + + """This parser knows the following facts about HTML: + + * Some tags have no closing tag and should be interpreted as being + closed as soon as they are encountered. + + * The text inside some tags (ie. 'script') may contain tags which + are not really part of the document and which should be parsed + as text, not tags. If you want to parse the text as tags, you can + always fetch it and parse it explicitly. + + * Tag nesting rules: + + Most tags can't be nested at all. For instance, the occurance of + a <p> tag should implicitly close the previous <p> tag. + + <p>Para1<p>Para2 + should be transformed into: + <p>Para1</p><p>Para2 + + Some tags can be nested arbitrarily. For instance, the occurance + of a <blockquote> tag should _not_ implicitly close the previous + <blockquote> tag. + + Alice said: <blockquote>Bob said: <blockquote>Blah + should NOT be transformed into: + Alice said: <blockquote>Bob said: </blockquote><blockquote>Blah + + Some tags can be nested, but the nesting is reset by the + interposition of other tags. For instance, a <tr> tag should + implicitly close the previous <tr> tag within the same <table>, + but not close a <tr> tag in another table. + + <table><tr>Blah<tr>Blah + should be transformed into: + <table><tr>Blah</tr><tr>Blah + but, + <tr>Blah<table><tr>Blah + should NOT be transformed into + <tr>Blah<table></tr><tr>Blah + + Differing assumptions about tag nesting rules are a major source + of problems with the BeautifulSoup class. If BeautifulSoup is not + treating as nestable a tag your page author treats as nestable, + try ICantBelieveItsBeautifulSoup, MinimalSoup, or + BeautifulStoneSoup before writing your own subclass.""" + + def __init__(self, *args, **kwargs): + if not kwargs.has_key('smartQuotesTo'): + kwargs['smartQuotesTo'] = self.HTML_ENTITIES + kwargs['isHTML'] = True + BeautifulStoneSoup.__init__(self, *args, **kwargs) + + SELF_CLOSING_TAGS = buildTagMap(None, + ['br' , 'hr', 'input', 'img', 'meta', + 'spacer', 'link', 'frame', 'base']) + + PRESERVE_WHITESPACE_TAGS = set(['pre', 'textarea']) + + QUOTE_TAGS = {'script' : None, 'textarea' : None} + + #According to the HTML standard, each of these inline tags can + #contain another tag of the same type. Furthermore, it's common + #to actually use these tags this way. + NESTABLE_INLINE_TAGS = ['span', 'font', 'q', 'object', 'bdo', 'sub', 'sup', + 'center'] + + #According to the HTML standard, these block tags can contain + #another tag of the same type. Furthermore, it's common + #to actually use these tags this way. + NESTABLE_BLOCK_TAGS = ['blockquote', 'div', 'fieldset', 'ins', 'del'] + + #Lists can contain other lists, but there are restrictions. + NESTABLE_LIST_TAGS = { 'ol' : [], + 'ul' : [], + 'li' : ['ul', 'ol'], + 'dl' : [], + 'dd' : ['dl'], + 'dt' : ['dl'] } + + #Tables can contain other tables, but there are restrictions. + NESTABLE_TABLE_TAGS = {'table' : [], + 'tr' : ['table', 'tbody', 'tfoot', 'thead'], + 'td' : ['tr'], + 'th' : ['tr'], + 'thead' : ['table'], + 'tbody' : ['table'], + 'tfoot' : ['table'], + } + + NON_NESTABLE_BLOCK_TAGS = ['address', 'form', 'p', 'pre'] + + #If one of these tags is encountered, all tags up to the next tag of + #this type are popped. + RESET_NESTING_TAGS = buildTagMap(None, NESTABLE_BLOCK_TAGS, 'noscript', + NON_NESTABLE_BLOCK_TAGS, + NESTABLE_LIST_TAGS, + NESTABLE_TABLE_TAGS) + + NESTABLE_TAGS = buildTagMap([], NESTABLE_INLINE_TAGS, NESTABLE_BLOCK_TAGS, + NESTABLE_LIST_TAGS, NESTABLE_TABLE_TAGS) + + # Used to detect the charset in a META tag; see start_meta + CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)", re.M) + + def extractCharsetFromMeta(self, attrs): + """Beautiful Soup can detect a charset included in a META tag, + try to convert the document to that charset, and re-parse the + document from the beginning.""" + httpEquiv = None + contentType = None + contentTypeIndex = None + tagNeedsEncodingSubstitution = False + + for i in range(0, len(attrs)): + key, value = attrs[i] + key = key.lower() + if key == 'http-equiv': + httpEquiv = value + elif key == 'content': + contentType = value + contentTypeIndex = i + + if httpEquiv and contentType: # It's an interesting meta tag. + match = self.CHARSET_RE.search(contentType) + if match: + if (self.declaredHTMLEncoding is not None or + self.originalEncoding == self.fromEncoding): + # An HTML encoding was sniffed while converting + # the document to Unicode, or an HTML encoding was + # sniffed during a previous pass through the + # document, or an encoding was specified + # explicitly and it worked. Rewrite the meta tag. + def rewrite(match): + return match.group(1) + "%SOUP-ENCODING%" + newAttr = self.CHARSET_RE.sub(rewrite, contentType) + attrs[contentTypeIndex] = (attrs[contentTypeIndex][0], + newAttr) + tagNeedsEncodingSubstitution = True + else: + # This is our first pass through the document. + # Go through it again with the encoding information. + newCharset = match.group(3) + if newCharset and newCharset != self.originalEncoding: + self.declaredHTMLEncoding = newCharset + self._feed(self.declaredHTMLEncoding) + raise StopParsing + pass + tag = self.unknown_starttag("meta", attrs) + if tag and tagNeedsEncodingSubstitution: + tag.containsSubstitutions = True + + +class StopParsing(Exception): + pass + +class ICantBelieveItsBeautifulSoup(BeautifulSoup): + + """The BeautifulSoup class is oriented towards skipping over + common HTML errors like unclosed tags. However, sometimes it makes + errors of its own. For instance, consider this fragment: + + <b>Foo<b>Bar</b></b> + + This is perfectly valid (if bizarre) HTML. However, the + BeautifulSoup class will implicitly close the first b tag when it + encounters the second 'b'. It will think the author wrote + "<b>Foo<b>Bar", and didn't close the first 'b' tag, because + there's no real-world reason to bold something that's already + bold. When it encounters '</b></b>' it will close two more 'b' + tags, for a grand total of three tags closed instead of two. This + can throw off the rest of your document structure. The same is + true of a number of other tags, listed below. + + It's much more common for someone to forget to close a 'b' tag + than to actually use nested 'b' tags, and the BeautifulSoup class + handles the common case. This class handles the not-co-common + case: where you can't believe someone wrote what they did, but + it's valid HTML and BeautifulSoup screwed up by assuming it + wouldn't be.""" + + I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = \ + ['em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong', + 'cite', 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b', + 'big'] + + I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS = ['noscript'] + + NESTABLE_TAGS = buildTagMap([], BeautifulSoup.NESTABLE_TAGS, + I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS, + I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS) + +class MinimalSoup(BeautifulSoup): + """The MinimalSoup class is for parsing HTML that contains + pathologically bad markup. It makes no assumptions about tag + nesting, but it does know which tags are self-closing, that + <script> tags contain Javascript and should not be parsed, that + META tags may contain encoding information, and so on. + + This also makes it better for subclassing than BeautifulStoneSoup + or BeautifulSoup.""" + + RESET_NESTING_TAGS = buildTagMap('noscript') + NESTABLE_TAGS = {} + +class BeautifulSOAP(BeautifulStoneSoup): + """This class will push a tag with only a single string child into + the tag's parent as an attribute. The attribute's name is the tag + name, and the value is the string child. An example should give + the flavor of the change: + + <foo><bar>baz</bar></foo> + => + <foo bar="baz"><bar>baz</bar></foo> + + You can then access fooTag['bar'] instead of fooTag.barTag.string. + + This is, of course, useful for scraping structures that tend to + use subelements instead of attributes, such as SOAP messages. Note + that it modifies its input, so don't print the modified version + out. + + I'm not sure how many people really want to use this class; let me + know if you do. Mainly I like the name.""" + + def popTag(self): + if len(self.tagStack) > 1: + tag = self.tagStack[-1] + parent = self.tagStack[-2] + parent._getAttrMap() + if (isinstance(tag, Tag) and len(tag.contents) == 1 and + isinstance(tag.contents[0], NavigableString) and + not parent.attrMap.has_key(tag.name)): + parent[tag.name] = tag.contents[0] + BeautifulStoneSoup.popTag(self) + +#Enterprise class names! It has come to our attention that some people +#think the names of the Beautiful Soup parser classes are too silly +#and "unprofessional" for use in enterprise screen-scraping. We feel +#your pain! For such-minded folk, the Beautiful Soup Consortium And +#All-Night Kosher Bakery recommends renaming this file to +#"RobustParser.py" (or, in cases of extreme enterprisiness, +#"RobustParserBeanInterface.class") and using the following +#enterprise-friendly class aliases: +class RobustXMLParser(BeautifulStoneSoup): + pass +class RobustHTMLParser(BeautifulSoup): + pass +class RobustWackAssHTMLParser(ICantBelieveItsBeautifulSoup): + pass +class RobustInsanelyWackAssHTMLParser(MinimalSoup): + pass +class SimplifyingSOAPParser(BeautifulSOAP): + pass + +###################################################### +# +# Bonus library: Unicode, Dammit +# +# This class forces XML data into a standard format (usually to UTF-8 +# or Unicode). It is heavily based on code from Mark Pilgrim's +# Universal Feed Parser. It does not rewrite the XML or HTML to +# reflect a new encoding: that happens in BeautifulStoneSoup.handle_pi +# (XML) and BeautifulSoup.start_meta (HTML). + +# Autodetects character encodings. +# Download from http://chardet.feedparser.org/ +try: + import chardet +# import chardet.constants +# chardet.constants._debug = 1 +except ImportError: + chardet = None + +# cjkcodecs and iconv_codec make Python know about more character encodings. +# Both are available from http://cjkpython.i18n.org/ +# They're built in if you use Python 2.4. +try: + import cjkcodecs.aliases +except ImportError: + pass +try: + import iconv_codec +except ImportError: + pass + +class UnicodeDammit: + """A class for detecting the encoding of a *ML document and + converting it to a Unicode string. If the source encoding is + windows-1252, can replace MS smart quotes with their HTML or XML + equivalents.""" + + # This dictionary maps commonly seen values for "charset" in HTML + # meta tags to the corresponding Python codec names. It only covers + # values that aren't in Python's aliases and can't be determined + # by the heuristics in find_codec. + CHARSET_ALIASES = { "macintosh" : "mac-roman", + "x-sjis" : "shift-jis" } + + def __init__(self, markup, overrideEncodings=[], + smartQuotesTo='xml', isHTML=False): + self.declaredHTMLEncoding = None + self.markup, documentEncoding, sniffedEncoding = \ + self._detectEncoding(markup, isHTML) + self.smartQuotesTo = smartQuotesTo + self.triedEncodings = [] + if markup == '' or isinstance(markup, unicode): + self.originalEncoding = None + self.unicode = unicode(markup) + return + + u = None + for proposedEncoding in overrideEncodings: + u = self._convertFrom(proposedEncoding) + if u: break + if not u: + for proposedEncoding in (documentEncoding, sniffedEncoding): + u = self._convertFrom(proposedEncoding) + if u: break + + # If no luck and we have auto-detection library, try that: + if not u and chardet and not isinstance(self.markup, unicode): + u = self._convertFrom(chardet.detect(self.markup)['encoding']) + + # As a last resort, try utf-8 and windows-1252: + if not u: + for proposed_encoding in ("utf-8", "windows-1252"): + u = self._convertFrom(proposed_encoding) + if u: break + + self.unicode = u + if not u: self.originalEncoding = None + + def _subMSChar(self, match): + """Changes a MS smart quote character to an XML or HTML + entity.""" + orig = match.group(1) + sub = self.MS_CHARS.get(orig) + if type(sub) == types.TupleType: + if self.smartQuotesTo == 'xml': + sub = '&#x'.encode() + sub[1].encode() + ';'.encode() + else: + sub = '&'.encode() + sub[0].encode() + ';'.encode() + else: + sub = sub.encode() + return sub + + def _convertFrom(self, proposed): + proposed = self.find_codec(proposed) + if not proposed or proposed in self.triedEncodings: + return None + self.triedEncodings.append(proposed) + markup = self.markup + + # Convert smart quotes to HTML if coming from an encoding + # that might have them. + if self.smartQuotesTo and proposed.lower() in("windows-1252", + "iso-8859-1", + "iso-8859-2"): + smart_quotes_re = "([\x80-\x9f])" + smart_quotes_compiled = re.compile(smart_quotes_re) + markup = smart_quotes_compiled.sub(self._subMSChar, markup) + + try: + # print "Trying to convert document to %s" % proposed + u = self._toUnicode(markup, proposed) + self.markup = u + self.originalEncoding = proposed + except Exception, e: + # print "That didn't work!" + # print e + return None + #print "Correct encoding: %s" % proposed + return self.markup + + def _toUnicode(self, data, encoding): + '''Given a string and its encoding, decodes the string into Unicode. + %encoding is a string recognized by encodings.aliases''' + + # strip Byte Order Mark (if present) + if (len(data) >= 4) and (data[:2] == '\xfe\xff') \ + and (data[2:4] != '\x00\x00'): + encoding = 'utf-16be' + data = data[2:] + elif (len(data) >= 4) and (data[:2] == '\xff\xfe') \ + and (data[2:4] != '\x00\x00'): + encoding = 'utf-16le' + data = data[2:] + elif data[:3] == '\xef\xbb\xbf': + encoding = 'utf-8' + data = data[3:] + elif data[:4] == '\x00\x00\xfe\xff': + encoding = 'utf-32be' + data = data[4:] + elif data[:4] == '\xff\xfe\x00\x00': + encoding = 'utf-32le' + data = data[4:] + newdata = unicode(data, encoding) + return newdata + + def _detectEncoding(self, xml_data, isHTML=False): + """Given a document, tries to detect its XML encoding.""" + xml_encoding = sniffed_xml_encoding = None + try: + if xml_data[:4] == '\x4c\x6f\xa7\x94': + # EBCDIC + xml_data = self._ebcdic_to_ascii(xml_data) + elif xml_data[:4] == '\x00\x3c\x00\x3f': + # UTF-16BE + sniffed_xml_encoding = 'utf-16be' + xml_data = unicode(xml_data, 'utf-16be').encode('utf-8') + elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') \ + and (xml_data[2:4] != '\x00\x00'): + # UTF-16BE with BOM + sniffed_xml_encoding = 'utf-16be' + xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8') + elif xml_data[:4] == '\x3c\x00\x3f\x00': + # UTF-16LE + sniffed_xml_encoding = 'utf-16le' + xml_data = unicode(xml_data, 'utf-16le').encode('utf-8') + elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and \ + (xml_data[2:4] != '\x00\x00'): + # UTF-16LE with BOM + sniffed_xml_encoding = 'utf-16le' + xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8') + elif xml_data[:4] == '\x00\x00\x00\x3c': + # UTF-32BE + sniffed_xml_encoding = 'utf-32be' + xml_data = unicode(xml_data, 'utf-32be').encode('utf-8') + elif xml_data[:4] == '\x3c\x00\x00\x00': + # UTF-32LE + sniffed_xml_encoding = 'utf-32le' + xml_data = unicode(xml_data, 'utf-32le').encode('utf-8') + elif xml_data[:4] == '\x00\x00\xfe\xff': + # UTF-32BE with BOM + sniffed_xml_encoding = 'utf-32be' + xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8') + elif xml_data[:4] == '\xff\xfe\x00\x00': + # UTF-32LE with BOM + sniffed_xml_encoding = 'utf-32le' + xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8') + elif xml_data[:3] == '\xef\xbb\xbf': + # UTF-8 with BOM + sniffed_xml_encoding = 'utf-8' + xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8') + else: + sniffed_xml_encoding = 'ascii' + pass + except: + xml_encoding_match = None + xml_encoding_re = '^<\?.*encoding=[\'"](.*?)[\'"].*\?>'.encode() + xml_encoding_match = re.compile(xml_encoding_re).match(xml_data) + if not xml_encoding_match and isHTML: + meta_re = '<\s*meta[^>]+charset=([^>]*?)[;\'">]'.encode() + regexp = re.compile(meta_re, re.I) + xml_encoding_match = regexp.search(xml_data) + if xml_encoding_match is not None: + xml_encoding = xml_encoding_match.groups()[0].decode( + 'ascii').lower() + if isHTML: + self.declaredHTMLEncoding = xml_encoding + if sniffed_xml_encoding and \ + (xml_encoding in ('iso-10646-ucs-2', 'ucs-2', 'csunicode', + 'iso-10646-ucs-4', 'ucs-4', 'csucs4', + 'utf-16', 'utf-32', 'utf_16', 'utf_32', + 'utf16', 'u16')): + xml_encoding = sniffed_xml_encoding + return xml_data, xml_encoding, sniffed_xml_encoding + + + def find_codec(self, charset): + return self._codec(self.CHARSET_ALIASES.get(charset, charset)) \ + or (charset and self._codec(charset.replace("-", ""))) \ + or (charset and self._codec(charset.replace("-", "_"))) \ + or charset + + def _codec(self, charset): + if not charset: return charset + codec = None + try: + codecs.lookup(charset) + codec = charset + except (LookupError, ValueError): + pass + return codec + + EBCDIC_TO_ASCII_MAP = None + def _ebcdic_to_ascii(self, s): + c = self.__class__ + if not c.EBCDIC_TO_ASCII_MAP: + emap = (0,1,2,3,156,9,134,127,151,141,142,11,12,13,14,15, + 16,17,18,19,157,133,8,135,24,25,146,143,28,29,30,31, + 128,129,130,131,132,10,23,27,136,137,138,139,140,5,6,7, + 144,145,22,147,148,149,150,4,152,153,154,155,20,21,158,26, + 32,160,161,162,163,164,165,166,167,168,91,46,60,40,43,33, + 38,169,170,171,172,173,174,175,176,177,93,36,42,41,59,94, + 45,47,178,179,180,181,182,183,184,185,124,44,37,95,62,63, + 186,187,188,189,190,191,192,193,194,96,58,35,64,39,61,34, + 195,97,98,99,100,101,102,103,104,105,196,197,198,199,200, + 201,202,106,107,108,109,110,111,112,113,114,203,204,205, + 206,207,208,209,126,115,116,117,118,119,120,121,122,210, + 211,212,213,214,215,216,217,218,219,220,221,222,223,224, + 225,226,227,228,229,230,231,123,65,66,67,68,69,70,71,72, + 73,232,233,234,235,236,237,125,74,75,76,77,78,79,80,81, + 82,238,239,240,241,242,243,92,159,83,84,85,86,87,88,89, + 90,244,245,246,247,248,249,48,49,50,51,52,53,54,55,56,57, + 250,251,252,253,254,255) + import string + c.EBCDIC_TO_ASCII_MAP = string.maketrans( \ + ''.join(map(chr, range(256))), ''.join(map(chr, emap))) + return s.translate(c.EBCDIC_TO_ASCII_MAP) + + MS_CHARS = { '\x80' : ('euro', '20AC'), + '\x81' : ' ', + '\x82' : ('sbquo', '201A'), + '\x83' : ('fnof', '192'), + '\x84' : ('bdquo', '201E'), + '\x85' : ('hellip', '2026'), + '\x86' : ('dagger', '2020'), + '\x87' : ('Dagger', '2021'), + '\x88' : ('circ', '2C6'), + '\x89' : ('permil', '2030'), + '\x8A' : ('Scaron', '160'), + '\x8B' : ('lsaquo', '2039'), + '\x8C' : ('OElig', '152'), + '\x8D' : '?', + '\x8E' : ('#x17D', '17D'), + '\x8F' : '?', + '\x90' : '?', + '\x91' : ('lsquo', '2018'), + '\x92' : ('rsquo', '2019'), + '\x93' : ('ldquo', '201C'), + '\x94' : ('rdquo', '201D'), + '\x95' : ('bull', '2022'), + '\x96' : ('ndash', '2013'), + '\x97' : ('mdash', '2014'), + '\x98' : ('tilde', '2DC'), + '\x99' : ('trade', '2122'), + '\x9a' : ('scaron', '161'), + '\x9b' : ('rsaquo', '203A'), + '\x9c' : ('oelig', '153'), + '\x9d' : '?', + '\x9e' : ('#x17E', '17E'), + '\x9f' : ('Yuml', ''),} + +####################################################################### + + +#By default, act as an HTML pretty-printer. +if __name__ == '__main__': + import sys + soup = BeautifulSoup(sys.stdin) + print soup.prettify() diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/__init__.py b/WebKitTools/Scripts/webkitpy/thirdparty/__init__.py new file mode 100644 index 0000000..f1e5334 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/__init__.py @@ -0,0 +1,97 @@ +# Copyright (C) 2010 Chris Jerdonek (cjerdonek@webkit.org) +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR +# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This module is required for Python to treat this directory as a package. + +"""Autoinstalls third-party code required by WebKit.""" + +import os + +from webkitpy.common.system.autoinstall import AutoInstaller + +# Putting the autoinstall code into webkitpy/thirdparty/__init__.py +# ensures that no autoinstalling occurs until a caller imports from +# webkitpy.thirdparty. This is useful if the caller wants to configure +# logging prior to executing autoinstall code. + +# FIXME: Ideally, a package should be autoinstalled only if the caller +# attempts to import from that individual package. This would +# make autoinstalling lazier than it is currently. This can +# perhaps be done using Python's import hooks as the original +# autoinstall implementation did. + +# We put auto-installed third-party modules in this directory-- +# +# webkitpy/thirdparty/autoinstalled +thirdparty_dir = os.path.dirname(__file__) +autoinstalled_dir = os.path.join(thirdparty_dir, "autoinstalled") + +# We need to download ClientForm since the mechanize package that we download +# below requires it. The mechanize package uses ClientForm, for example, +# in _html.py. Since mechanize imports ClientForm in the following way, +# +# > import sgmllib, ClientForm +# +# the search path needs to include ClientForm. We put ClientForm in +# its own directory so that we can include it in the search path without +# including other modules as a side effect. +clientform_dir = os.path.join(autoinstalled_dir, "clientform") +installer = AutoInstaller(append_to_search_path=True, + target_dir=clientform_dir) +installer.install(url="http://pypi.python.org/packages/source/C/ClientForm/ClientForm-0.2.10.zip", + url_subpath="ClientForm.py") + +# The remaining packages do not need to be in the search path, so we create +# a new AutoInstaller instance that does not append to the search path. +installer = AutoInstaller(target_dir=autoinstalled_dir) + +installer.install(url="http://pypi.python.org/packages/source/m/mechanize/mechanize-0.1.11.zip", + url_subpath="mechanize") +installer.install(url="http://pypi.python.org/packages/source/p/pep8/pep8-0.5.0.tar.gz#md5=512a818af9979290cd619cce8e9c2e2b", + url_subpath="pep8-0.5.0/pep8.py") + + +rietveld_dir = os.path.join(autoinstalled_dir, "rietveld") +installer = AutoInstaller(target_dir=rietveld_dir) +installer.install(url="http://webkit-rietveld.googlecode.com/svn/trunk/static/upload.py", + target_name="upload.py") + + +# Since irclib and ircbot are two top-level packages, we need to import +# them separately. We group them into an irc package for better +# organization purposes. +irc_dir = os.path.join(autoinstalled_dir, "irc") +installer = AutoInstaller(target_dir=irc_dir) +installer.install(url="http://iweb.dl.sourceforge.net/project/python-irclib/python-irclib/0.4.8/python-irclib-0.4.8.zip", + url_subpath="irclib.py") +installer.install(url="http://iweb.dl.sourceforge.net/project/python-irclib/python-irclib/0.4.8/python-irclib-0.4.8.zip", + url_subpath="ircbot.py") + +readme_path = os.path.join(autoinstalled_dir, "README") +if not os.path.exists(readme_path): + file = open(readme_path, "w") + try: + file.write("This directory is auto-generated by WebKit and is " + "safe to delete.\nIt contains needed third-party Python " + "packages automatically downloaded from the web.") + finally: + file.close() diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/mock.py b/WebKitTools/Scripts/webkitpy/thirdparty/mock.py new file mode 100644 index 0000000..015c19e --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/mock.py @@ -0,0 +1,309 @@ +# mock.py +# Test tools for mocking and patching. +# Copyright (C) 2007-2009 Michael Foord +# E-mail: fuzzyman AT voidspace DOT org DOT uk + +# mock 0.6.0 +# http://www.voidspace.org.uk/python/mock/ + +# Released subject to the BSD License +# Please see http://www.voidspace.org.uk/python/license.shtml + +# 2009-11-25: Licence downloaded from above URL. +# BEGIN DOWNLOADED LICENSE +# +# Copyright (c) 2003-2009, Michael Foord +# All rights reserved. +# E-mail : fuzzyman AT voidspace DOT org DOT uk +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials provided +# with the distribution. +# +# * Neither the name of Michael Foord nor the name of Voidspace +# may be used to endorse or promote products derived from this +# software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# END DOWNLOADED LICENSE + +# Scripts maintained at http://www.voidspace.org.uk/python/index.shtml +# Comments, suggestions and bug reports welcome. + + +__all__ = ( + 'Mock', + 'patch', + 'patch_object', + 'sentinel', + 'DEFAULT' +) + +__version__ = '0.6.0' + +class SentinelObject(object): + def __init__(self, name): + self.name = name + + def __repr__(self): + return '<SentinelObject "%s">' % self.name + + +class Sentinel(object): + def __init__(self): + self._sentinels = {} + + def __getattr__(self, name): + return self._sentinels.setdefault(name, SentinelObject(name)) + + +sentinel = Sentinel() + +DEFAULT = sentinel.DEFAULT + +class OldStyleClass: + pass +ClassType = type(OldStyleClass) + +def _is_magic(name): + return '__%s__' % name[2:-2] == name + +def _copy(value): + if type(value) in (dict, list, tuple, set): + return type(value)(value) + return value + + +class Mock(object): + + def __init__(self, spec=None, side_effect=None, return_value=DEFAULT, + name=None, parent=None, wraps=None): + self._parent = parent + self._name = name + if spec is not None and not isinstance(spec, list): + spec = [member for member in dir(spec) if not _is_magic(member)] + + self._methods = spec + self._children = {} + self._return_value = return_value + self.side_effect = side_effect + self._wraps = wraps + + self.reset_mock() + + + def reset_mock(self): + self.called = False + self.call_args = None + self.call_count = 0 + self.call_args_list = [] + self.method_calls = [] + for child in self._children.itervalues(): + child.reset_mock() + if isinstance(self._return_value, Mock): + self._return_value.reset_mock() + + + def __get_return_value(self): + if self._return_value is DEFAULT: + self._return_value = Mock() + return self._return_value + + def __set_return_value(self, value): + self._return_value = value + + return_value = property(__get_return_value, __set_return_value) + + + def __call__(self, *args, **kwargs): + self.called = True + self.call_count += 1 + self.call_args = (args, kwargs) + self.call_args_list.append((args, kwargs)) + + parent = self._parent + name = self._name + while parent is not None: + parent.method_calls.append((name, args, kwargs)) + if parent._parent is None: + break + name = parent._name + '.' + name + parent = parent._parent + + ret_val = DEFAULT + if self.side_effect is not None: + if (isinstance(self.side_effect, Exception) or + isinstance(self.side_effect, (type, ClassType)) and + issubclass(self.side_effect, Exception)): + raise self.side_effect + + ret_val = self.side_effect(*args, **kwargs) + if ret_val is DEFAULT: + ret_val = self.return_value + + if self._wraps is not None and self._return_value is DEFAULT: + return self._wraps(*args, **kwargs) + if ret_val is DEFAULT: + ret_val = self.return_value + return ret_val + + + def __getattr__(self, name): + if self._methods is not None: + if name not in self._methods: + raise AttributeError("Mock object has no attribute '%s'" % name) + elif _is_magic(name): + raise AttributeError(name) + + if name not in self._children: + wraps = None + if self._wraps is not None: + wraps = getattr(self._wraps, name) + self._children[name] = Mock(parent=self, name=name, wraps=wraps) + + return self._children[name] + + + def assert_called_with(self, *args, **kwargs): + assert self.call_args == (args, kwargs), 'Expected: %s\nCalled with: %s' % ((args, kwargs), self.call_args) + + +def _dot_lookup(thing, comp, import_path): + try: + return getattr(thing, comp) + except AttributeError: + __import__(import_path) + return getattr(thing, comp) + + +def _importer(target): + components = target.split('.') + import_path = components.pop(0) + thing = __import__(import_path) + + for comp in components: + import_path += ".%s" % comp + thing = _dot_lookup(thing, comp, import_path) + return thing + + +class _patch(object): + def __init__(self, target, attribute, new, spec, create): + self.target = target + self.attribute = attribute + self.new = new + self.spec = spec + self.create = create + self.has_local = False + + + def __call__(self, func): + if hasattr(func, 'patchings'): + func.patchings.append(self) + return func + + def patched(*args, **keywargs): + # don't use a with here (backwards compatability with 2.5) + extra_args = [] + for patching in patched.patchings: + arg = patching.__enter__() + if patching.new is DEFAULT: + extra_args.append(arg) + args += tuple(extra_args) + try: + return func(*args, **keywargs) + finally: + for patching in getattr(patched, 'patchings', []): + patching.__exit__() + + patched.patchings = [self] + patched.__name__ = func.__name__ + patched.compat_co_firstlineno = getattr(func, "compat_co_firstlineno", + func.func_code.co_firstlineno) + return patched + + + def get_original(self): + target = self.target + name = self.attribute + create = self.create + + original = DEFAULT + if _has_local_attr(target, name): + try: + original = target.__dict__[name] + except AttributeError: + # for instances of classes with slots, they have no __dict__ + original = getattr(target, name) + elif not create and not hasattr(target, name): + raise AttributeError("%s does not have the attribute %r" % (target, name)) + return original + + + def __enter__(self): + new, spec, = self.new, self.spec + original = self.get_original() + if new is DEFAULT: + # XXXX what if original is DEFAULT - shouldn't use it as a spec + inherit = False + if spec == True: + # set spec to the object we are replacing + spec = original + if isinstance(spec, (type, ClassType)): + inherit = True + new = Mock(spec=spec) + if inherit: + new.return_value = Mock(spec=spec) + self.temp_original = original + setattr(self.target, self.attribute, new) + return new + + + def __exit__(self, *_): + if self.temp_original is not DEFAULT: + setattr(self.target, self.attribute, self.temp_original) + else: + delattr(self.target, self.attribute) + del self.temp_original + + +def patch_object(target, attribute, new=DEFAULT, spec=None, create=False): + return _patch(target, attribute, new, spec, create) + + +def patch(target, new=DEFAULT, spec=None, create=False): + try: + target, attribute = target.rsplit('.', 1) + except (TypeError, ValueError): + raise TypeError("Need a valid target to patch. You supplied: %r" % (target,)) + target = _importer(target) + return _patch(target, attribute, new, spec, create) + + + +def _has_local_attr(obj, name): + try: + return name in vars(obj) + except TypeError: + # objects without a __dict__ + return hasattr(obj, name) diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/COPYING b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/COPYING new file mode 100644 index 0000000..ab9d52d --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/COPYING @@ -0,0 +1,28 @@ +Copyright 2009, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/MANIFEST.in b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/MANIFEST.in new file mode 100644 index 0000000..1925688 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/MANIFEST.in @@ -0,0 +1,6 @@ +include COPYING +include MANIFEST.in +include README +recursive-include example *.py +recursive-include mod_pywebsocket *.py +recursive-include test *.py diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/README b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/README new file mode 100644 index 0000000..1f9f05f --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/README @@ -0,0 +1,6 @@ +Install this package by: +$ python setup.py build +$ sudo python setup.py install + +Then read document by: +$ pydoc mod_pywebsocket diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/README.webkit b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/README.webkit new file mode 100644 index 0000000..83e3cee --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/README.webkit @@ -0,0 +1,14 @@ +This directory contains a copy of the pywebsocket Python module obtained +from the following location: + +http://code.google.com/p/pywebsocket/ + +This directory currently contains the following version: +0.4.9.2 + +The following modifications have been made to this local version: +minor updates in WebSocketRequestHandler.is_cgi + +More information on these local modifications can be found here: + +http://trac.webkit.org/browser/trunk/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/example/echo_client.py b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/example/echo_client.py new file mode 100644 index 0000000..2b976e1 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/example/echo_client.py @@ -0,0 +1,209 @@ +#!/usr/bin/env python +# +# Copyright 2009, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +"""Web Socket Echo client. + +This is an example Web Socket client that talks with echo_wsh.py. +This may be useful for checking mod_pywebsocket installation. + +Note: +This code is far from robust, e.g., we cut corners in handshake. +""" + + +import codecs +from optparse import OptionParser +import socket +import sys + + +_TIMEOUT_SEC = 10 + +_DEFAULT_PORT = 80 +_DEFAULT_SECURE_PORT = 443 +_UNDEFINED_PORT = -1 + +_UPGRADE_HEADER = 'Upgrade: WebSocket\r\n' +_CONNECTION_HEADER = 'Connection: Upgrade\r\n' +_EXPECTED_RESPONSE = ( + 'HTTP/1.1 101 Web Socket Protocol Handshake\r\n' + + _UPGRADE_HEADER + + _CONNECTION_HEADER) + +_GOODBYE_MESSAGE = 'Goodbye' + + +def _method_line(resource): + return 'GET %s HTTP/1.1\r\n' % resource + + +def _origin_header(origin): + return 'Origin: %s\r\n' % origin + + +class _TLSSocket(object): + """Wrapper for a TLS connection.""" + + def __init__(self, raw_socket): + self._ssl = socket.ssl(raw_socket) + + def send(self, bytes): + return self._ssl.write(bytes) + + def recv(self, size=-1): + return self._ssl.read(size) + + def close(self): + # Nothing to do. + pass + + +class EchoClient(object): + """Web Socket echo client.""" + + def __init__(self, options): + self._options = options + self._socket = None + + def run(self): + """Run the client. + + Shake hands and then repeat sending message and receiving its echo. + """ + self._socket = socket.socket() + self._socket.settimeout(self._options.socket_timeout) + try: + self._socket.connect((self._options.server_host, + self._options.server_port)) + if self._options.use_tls: + self._socket = _TLSSocket(self._socket) + self._handshake() + for line in self._options.message.split(',') + [_GOODBYE_MESSAGE]: + frame = '\x00' + line.encode('utf-8') + '\xff' + self._socket.send(frame) + if self._options.verbose: + print 'Send: %s' % line + received = self._socket.recv(len(frame)) + if received != frame: + raise Exception('Incorrect echo: %r' % received) + if self._options.verbose: + print 'Recv: %s' % received[1:-1].decode('utf-8', + 'replace') + finally: + self._socket.close() + + def _handshake(self): + self._socket.send(_method_line(self._options.resource)) + self._socket.send(_UPGRADE_HEADER) + self._socket.send(_CONNECTION_HEADER) + self._socket.send(self._format_host_header()) + self._socket.send(_origin_header(self._options.origin)) + self._socket.send('\r\n') + + for expected_char in _EXPECTED_RESPONSE: + received = self._socket.recv(1)[0] + if expected_char != received: + raise Exception('Handshake failure') + # We cut corners and skip other headers. + self._skip_headers() + + def _skip_headers(self): + terminator = '\r\n\r\n' + pos = 0 + while pos < len(terminator): + received = self._socket.recv(1)[0] + if received == terminator[pos]: + pos += 1 + elif received == terminator[0]: + pos = 1 + else: + pos = 0 + + def _format_host_header(self): + host = 'Host: ' + self._options.server_host + if ((not self._options.use_tls and + self._options.server_port != _DEFAULT_PORT) or + (self._options.use_tls and + self._options.server_port != _DEFAULT_SECURE_PORT)): + host += ':' + str(self._options.server_port) + host += '\r\n' + return host + + +def main(): + sys.stdout = codecs.getwriter('utf-8')(sys.stdout) + + parser = OptionParser() + parser.add_option('-s', '--server-host', '--server_host', + dest='server_host', type='string', + default='localhost', help='server host') + parser.add_option('-p', '--server-port', '--server_port', + dest='server_port', type='int', + default=_UNDEFINED_PORT, help='server port') + parser.add_option('-o', '--origin', dest='origin', type='string', + default='http://localhost/', help='origin') + parser.add_option('-r', '--resource', dest='resource', type='string', + default='/echo', help='resource path') + parser.add_option('-m', '--message', dest='message', type='string', + help=('comma-separated messages to send excluding "%s" ' + 'that is always sent at the end' % + _GOODBYE_MESSAGE)) + parser.add_option('-q', '--quiet', dest='verbose', action='store_false', + default=True, help='suppress messages') + parser.add_option('-t', '--tls', dest='use_tls', action='store_true', + default=False, help='use TLS (wss://)') + parser.add_option('-k', '--socket-timeout', '--socket_timeout', + dest='socket_timeout', type='int', default=_TIMEOUT_SEC, + help='Timeout(sec) for sockets') + + (options, unused_args) = parser.parse_args() + + # Default port number depends on whether TLS is used. + if options.server_port == _UNDEFINED_PORT: + if options.use_tls: + options.server_port = _DEFAULT_SECURE_PORT + else: + options.server_port = _DEFAULT_PORT + + # optparse doesn't seem to handle non-ascii default values. + # Set default message here. + if not options.message: + options.message = u'Hello,\u65e5\u672c' # "Japan" in Japanese + + EchoClient(options).run() + + +if __name__ == '__main__': + main() + + +# vi:sts=4 sw=4 et diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/example/echo_wsh.py b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/example/echo_wsh.py new file mode 100644 index 0000000..50cad31 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/example/echo_wsh.py @@ -0,0 +1,49 @@ +# Copyright 2009, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +from mod_pywebsocket import msgutil + + +_GOODBYE_MESSAGE = 'Goodbye' + + +def web_socket_do_extra_handshake(request): + pass # Always accept. + + +def web_socket_transfer_data(request): + while True: + line = msgutil.receive_message(request) + msgutil.send_message(request, line) + if line == _GOODBYE_MESSAGE: + return + + +# vi:sts=4 sw=4 et diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/example/handler_map.txt b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/example/handler_map.txt new file mode 100644 index 0000000..21c4c09 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/example/handler_map.txt @@ -0,0 +1,11 @@ +# websocket handler map file, used by standalone.py -m option. +# A line starting with '#' is a comment line. +# Each line consists of 'alias_resource_path' and 'existing_resource_path' +# separated by spaces. +# Aliasing is processed from the top to the bottom of the line, and +# 'existing_resource_path' must exist before it is aliased. +# For example, +# / /echo +# means that a request to '/' will be handled by handlers for '/echo'. +/ /echo + diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/mod_pywebsocket/__init__.py b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/mod_pywebsocket/__init__.py new file mode 100644 index 0000000..05e80e8 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/mod_pywebsocket/__init__.py @@ -0,0 +1,105 @@ +# Copyright 2009, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +"""Web Socket extension for Apache HTTP Server. + +mod_pywebsocket is a Web Socket extension for Apache HTTP Server +intended for testing or experimental purposes. mod_python is required. + +Installation: + +0. Prepare an Apache HTTP Server for which mod_python is enabled. + +1. Specify the following Apache HTTP Server directives to suit your + configuration. + + If mod_pywebsocket is not in the Python path, specify the following. + <websock_lib> is the directory where mod_pywebsocket is installed. + + PythonPath "sys.path+['<websock_lib>']" + + Always specify the following. <websock_handlers> is the directory where + user-written Web Socket handlers are placed. + + PythonOption mod_pywebsocket.handler_root <websock_handlers> + PythonHeaderParserHandler mod_pywebsocket.headerparserhandler + + To limit the search for Web Socket handlers to a directory <scan_dir> + under <websock_handlers>, configure as follows: + + PythonOption mod_pywebsocket.handler_scan <scan_dir> + + <scan_dir> is useful in saving scan time when <websock_handlers> + contains many non-Web Socket handler files. + + Example snippet of httpd.conf: + (mod_pywebsocket is in /websock_lib, Web Socket handlers are in + /websock_handlers, port is 80 for ws, 443 for wss.) + + <IfModule python_module> + PythonPath "sys.path+['/websock_lib']" + PythonOption mod_pywebsocket.handler_root /websock_handlers + PythonHeaderParserHandler mod_pywebsocket.headerparserhandler + </IfModule> + +Writing Web Socket handlers: + +When a Web Socket request comes in, the resource name +specified in the handshake is considered as if it is a file path under +<websock_handlers> and the handler defined in +<websock_handlers>/<resource_name>_wsh.py is invoked. + +For example, if the resource name is /example/chat, the handler defined in +<websock_handlers>/example/chat_wsh.py is invoked. + +A Web Socket handler is composed of the following two functions: + + web_socket_do_extra_handshake(request) + web_socket_transfer_data(request) + +where: + request: mod_python request. + +web_socket_do_extra_handshake is called during the handshake after the +headers are successfully parsed and Web Socket properties (ws_location, +ws_origin, ws_protocol, and ws_resource) are added to request. A handler +can reject the request by raising an exception. + +web_socket_transfer_data is called after the handshake completed +successfully. A handler can receive/send messages from/to the client +using request. mod_pywebsocket.msgutil module provides utilities +for data transfer. + +A Web Socket handler must be thread-safe if the server (Apache or +standalone.py) is configured to use threads. +""" + + +# vi:sts=4 sw=4 et tw=72 diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/mod_pywebsocket/dispatch.py b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/mod_pywebsocket/dispatch.py new file mode 100644 index 0000000..c52e9eb --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/mod_pywebsocket/dispatch.py @@ -0,0 +1,231 @@ +# Copyright 2009, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +"""Dispatch Web Socket request. +""" + + +import os +import re + +import util + + +_SOURCE_PATH_PATTERN = re.compile(r'(?i)_wsh\.py$') +_SOURCE_SUFFIX = '_wsh.py' +_DO_EXTRA_HANDSHAKE_HANDLER_NAME = 'web_socket_do_extra_handshake' +_TRANSFER_DATA_HANDLER_NAME = 'web_socket_transfer_data' + + +class DispatchError(Exception): + """Exception in dispatching Web Socket request.""" + + pass + + +def _normalize_path(path): + """Normalize path. + + Args: + path: the path to normalize. + + Path is converted to the absolute path. + The input path can use either '\\' or '/' as the separator. + The normalized path always uses '/' regardless of the platform. + """ + + path = path.replace('\\', os.path.sep) + path = os.path.realpath(path) + path = path.replace('\\', '/') + return path + + +def _path_to_resource_converter(base_dir): + base_dir = _normalize_path(base_dir) + base_len = len(base_dir) + suffix_len = len(_SOURCE_SUFFIX) + def converter(path): + if not path.endswith(_SOURCE_SUFFIX): + return None + path = _normalize_path(path) + if not path.startswith(base_dir): + return None + return path[base_len:-suffix_len] + return converter + + +def _source_file_paths(directory): + """Yield Web Socket Handler source file names in the given directory.""" + + for root, unused_dirs, files in os.walk(directory): + for base in files: + path = os.path.join(root, base) + if _SOURCE_PATH_PATTERN.search(path): + yield path + + +def _source(source_str): + """Source a handler definition string.""" + + global_dic = {} + try: + exec source_str in global_dic + except Exception: + raise DispatchError('Error in sourcing handler:' + + util.get_stack_trace()) + return (_extract_handler(global_dic, _DO_EXTRA_HANDSHAKE_HANDLER_NAME), + _extract_handler(global_dic, _TRANSFER_DATA_HANDLER_NAME)) + + +def _extract_handler(dic, name): + if name not in dic: + raise DispatchError('%s is not defined.' % name) + handler = dic[name] + if not callable(handler): + raise DispatchError('%s is not callable.' % name) + return handler + + +class Dispatcher(object): + """Dispatches Web Socket requests. + + This class maintains a map from resource name to handlers. + """ + + def __init__(self, root_dir, scan_dir=None): + """Construct an instance. + + Args: + root_dir: The directory where handler definition files are + placed. + scan_dir: The directory where handler definition files are + searched. scan_dir must be a directory under root_dir, + including root_dir itself. If scan_dir is None, root_dir + is used as scan_dir. scan_dir can be useful in saving + scan time when root_dir contains many subdirectories. + """ + + self._handlers = {} + self._source_warnings = [] + if scan_dir is None: + scan_dir = root_dir + if not os.path.realpath(scan_dir).startswith( + os.path.realpath(root_dir)): + raise DispatchError('scan_dir:%s must be a directory under ' + 'root_dir:%s.' % (scan_dir, root_dir)) + self._source_files_in_dir(root_dir, scan_dir) + + def add_resource_path_alias(self, + alias_resource_path, existing_resource_path): + """Add resource path alias. + + Once added, request to alias_resource_path would be handled by + handler registered for existing_resource_path. + + Args: + alias_resource_path: alias resource path + existing_resource_path: existing resource path + """ + try: + handler = self._handlers[existing_resource_path] + self._handlers[alias_resource_path] = handler + except KeyError: + raise DispatchError('No handler for: %r' % existing_resource_path) + + def source_warnings(self): + """Return warnings in sourcing handlers.""" + + return self._source_warnings + + def do_extra_handshake(self, request): + """Do extra checking in Web Socket handshake. + + Select a handler based on request.uri and call its + web_socket_do_extra_handshake function. + + Args: + request: mod_python request. + """ + + do_extra_handshake_, unused_transfer_data = self._handler(request) + try: + do_extra_handshake_(request) + except Exception, e: + util.prepend_message_to_exception( + '%s raised exception for %s: ' % ( + _DO_EXTRA_HANDSHAKE_HANDLER_NAME, + request.ws_resource), + e) + raise + + def transfer_data(self, request): + """Let a handler transfer_data with a Web Socket client. + + Select a handler based on request.ws_resource and call its + web_socket_transfer_data function. + + Args: + request: mod_python request. + """ + + unused_do_extra_handshake, transfer_data_ = self._handler(request) + try: + transfer_data_(request) + except Exception, e: + util.prepend_message_to_exception( + '%s raised exception for %s: ' % ( + _TRANSFER_DATA_HANDLER_NAME, request.ws_resource), + e) + raise + + def _handler(self, request): + try: + ws_resource_path = request.ws_resource.split('?', 1)[0] + return self._handlers[ws_resource_path] + except KeyError: + raise DispatchError('No handler for: %r' % request.ws_resource) + + def _source_files_in_dir(self, root_dir, scan_dir): + """Source all the handler source files in the scan_dir directory. + + The resource path is determined relative to root_dir. + """ + + to_resource = _path_to_resource_converter(root_dir) + for path in _source_file_paths(scan_dir): + try: + handlers = _source(open(path).read()) + except DispatchError, e: + self._source_warnings.append('%s: %s' % (path, e)) + continue + self._handlers[to_resource(path)] = handlers + + +# vi:sts=4 sw=4 et diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/mod_pywebsocket/handshake.py b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/mod_pywebsocket/handshake.py new file mode 100644 index 0000000..b86278e --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/mod_pywebsocket/handshake.py @@ -0,0 +1,220 @@ +# Copyright 2009, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +"""Web Socket handshaking. + +Note: request.connection.write/read are used in this module, even though +mod_python document says that they should be used only in connection handlers. +Unfortunately, we have no other options. For example, request.write/read are +not suitable because they don't allow direct raw bytes writing/reading. +""" + + +import re + +import util + + +_DEFAULT_WEB_SOCKET_PORT = 80 +_DEFAULT_WEB_SOCKET_SECURE_PORT = 443 +_WEB_SOCKET_SCHEME = 'ws' +_WEB_SOCKET_SECURE_SCHEME = 'wss' + +_MANDATORY_HEADERS = [ + # key, expected value or None + ['Upgrade', 'WebSocket'], + ['Connection', 'Upgrade'], + ['Host', None], + ['Origin', None], +] + +_FIRST_FIVE_LINES = map(re.compile, [ + r'^GET /[\S]* HTTP/1.1\r\n$', + r'^Upgrade: WebSocket\r\n$', + r'^Connection: Upgrade\r\n$', + r'^Host: [\S]+\r\n$', + r'^Origin: [\S]+\r\n$', +]) + +_SIXTH_AND_LATER = re.compile( + r'^' + r'(WebSocket-Protocol: [\x20-\x7e]+\r\n)?' + r'(Cookie: [^\r]*\r\n)*' + r'(Cookie2: [^\r]*\r\n)?' + r'(Cookie: [^\r]*\r\n)*' + r'\r\n') + + +def _default_port(is_secure): + if is_secure: + return _DEFAULT_WEB_SOCKET_SECURE_PORT + else: + return _DEFAULT_WEB_SOCKET_PORT + + +class HandshakeError(Exception): + """Exception in Web Socket Handshake.""" + + pass + + +def _validate_protocol(protocol): + """Validate WebSocket-Protocol string.""" + + if not protocol: + raise HandshakeError('Invalid WebSocket-Protocol: empty') + for c in protocol: + if not 0x20 <= ord(c) <= 0x7e: + raise HandshakeError('Illegal character in protocol: %r' % c) + + +class Handshaker(object): + """This class performs Web Socket handshake.""" + + def __init__(self, request, dispatcher, strict=False): + """Construct an instance. + + Args: + request: mod_python request. + dispatcher: Dispatcher (dispatch.Dispatcher). + strict: Strictly check handshake request. Default: False. + If True, request.connection must provide get_memorized_lines + method. + + Handshaker will add attributes such as ws_resource in performing + handshake. + """ + + self._request = request + self._dispatcher = dispatcher + self._strict = strict + + def do_handshake(self): + """Perform Web Socket Handshake.""" + + self._check_header_lines() + self._set_resource() + self._set_origin() + self._set_location() + self._set_protocol() + self._dispatcher.do_extra_handshake(self._request) + self._send_handshake() + + def _set_resource(self): + self._request.ws_resource = self._request.uri + + def _set_origin(self): + self._request.ws_origin = self._request.headers_in['Origin'] + + def _set_location(self): + location_parts = [] + if self._request.is_https(): + location_parts.append(_WEB_SOCKET_SECURE_SCHEME) + else: + location_parts.append(_WEB_SOCKET_SCHEME) + location_parts.append('://') + host, port = self._parse_host_header() + connection_port = self._request.connection.local_addr[1] + if port != connection_port: + raise HandshakeError('Header/connection port mismatch: %d/%d' % + (port, connection_port)) + location_parts.append(host) + if (port != _default_port(self._request.is_https())): + location_parts.append(':') + location_parts.append(str(port)) + location_parts.append(self._request.uri) + self._request.ws_location = ''.join(location_parts) + + def _parse_host_header(self): + fields = self._request.headers_in['Host'].split(':', 1) + if len(fields) == 1: + return fields[0], _default_port(self._request.is_https()) + try: + return fields[0], int(fields[1]) + except ValueError, e: + raise HandshakeError('Invalid port number format: %r' % e) + + def _set_protocol(self): + protocol = self._request.headers_in.get('WebSocket-Protocol') + if protocol is not None: + _validate_protocol(protocol) + self._request.ws_protocol = protocol + + def _send_handshake(self): + self._request.connection.write( + 'HTTP/1.1 101 Web Socket Protocol Handshake\r\n') + self._request.connection.write('Upgrade: WebSocket\r\n') + self._request.connection.write('Connection: Upgrade\r\n') + self._request.connection.write('WebSocket-Origin: ') + self._request.connection.write(self._request.ws_origin) + self._request.connection.write('\r\n') + self._request.connection.write('WebSocket-Location: ') + self._request.connection.write(self._request.ws_location) + self._request.connection.write('\r\n') + if self._request.ws_protocol: + self._request.connection.write('WebSocket-Protocol: ') + self._request.connection.write(self._request.ws_protocol) + self._request.connection.write('\r\n') + self._request.connection.write('\r\n') + + def _check_header_lines(self): + for key, expected_value in _MANDATORY_HEADERS: + actual_value = self._request.headers_in.get(key) + if not actual_value: + raise HandshakeError('Header %s is not defined' % key) + if expected_value: + if actual_value != expected_value: + raise HandshakeError('Illegal value for header %s: %s' % + (key, actual_value)) + if self._strict: + try: + lines = self._request.connection.get_memorized_lines() + except AttributeError, e: + util.prepend_message_to_exception( + 'Strict handshake is specified but the connection ' + 'doesn\'t provide get_memorized_lines()', e) + raise + self._check_first_lines(lines) + + def _check_first_lines(self, lines): + if len(lines) < len(_FIRST_FIVE_LINES): + raise HandshakeError('Too few header lines: %d' % len(lines)) + for line, regexp in zip(lines, _FIRST_FIVE_LINES): + if not regexp.search(line): + raise HandshakeError('Unexpected header: %r doesn\'t match %r' + % (line, regexp.pattern)) + sixth_and_later = ''.join(lines[5:]) + if not _SIXTH_AND_LATER.search(sixth_and_later): + raise HandshakeError('Unexpected header: %r doesn\'t match %r' + % (sixth_and_later, + _SIXTH_AND_LATER.pattern)) + + +# vi:sts=4 sw=4 et diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/mod_pywebsocket/headerparserhandler.py b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/mod_pywebsocket/headerparserhandler.py new file mode 100644 index 0000000..124b9f1 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/mod_pywebsocket/headerparserhandler.py @@ -0,0 +1,99 @@ +# Copyright 2009, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +"""PythonHeaderParserHandler for mod_pywebsocket. + +Apache HTTP Server and mod_python must be configured such that this +function is called to handle Web Socket request. +""" + + +from mod_python import apache + +import dispatch +import handshake +import util + + +# PythonOption to specify the handler root directory. +_PYOPT_HANDLER_ROOT = 'mod_pywebsocket.handler_root' + +# PythonOption to specify the handler scan directory. +# This must be a directory under the root directory. +# The default is the root directory. +_PYOPT_HANDLER_SCAN = 'mod_pywebsocket.handler_scan' + + +def _create_dispatcher(): + _HANDLER_ROOT = apache.main_server.get_options().get( + _PYOPT_HANDLER_ROOT, None) + if not _HANDLER_ROOT: + raise Exception('PythonOption %s is not defined' % _PYOPT_HANDLER_ROOT, + apache.APLOG_ERR) + _HANDLER_SCAN = apache.main_server.get_options().get( + _PYOPT_HANDLER_SCAN, _HANDLER_ROOT) + dispatcher = dispatch.Dispatcher(_HANDLER_ROOT, _HANDLER_SCAN) + for warning in dispatcher.source_warnings(): + apache.log_error('mod_pywebsocket: %s' % warning, apache.APLOG_WARNING) + return dispatcher + + +# Initialize +_dispatcher = _create_dispatcher() + + +def headerparserhandler(request): + """Handle request. + + Args: + request: mod_python request. + + This function is named headerparserhandler because it is the default name + for a PythonHeaderParserHandler. + """ + + try: + handshaker = handshake.Handshaker(request, _dispatcher) + handshaker.do_handshake() + request.log_error('mod_pywebsocket: resource: %r' % request.ws_resource, + apache.APLOG_DEBUG) + _dispatcher.transfer_data(request) + except handshake.HandshakeError, e: + # Handshake for ws/wss failed. + # But the request can be valid http/https request. + request.log_error('mod_pywebsocket: %s' % e, apache.APLOG_INFO) + return apache.DECLINED + except dispatch.DispatchError, e: + request.log_error('mod_pywebsocket: %s' % e, apache.APLOG_WARNING) + return apache.DECLINED + return apache.DONE # Return DONE such that no other handlers are invoked. + + +# vi:sts=4 sw=4 et diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/mod_pywebsocket/memorizingfile.py b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/mod_pywebsocket/memorizingfile.py new file mode 100644 index 0000000..2f8a54e --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/mod_pywebsocket/memorizingfile.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python +# +# Copyright 2009, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +"""Memorizing file. + +A memorizing file wraps a file and memorizes lines read by readline. +""" + + +import sys + + +class MemorizingFile(object): + """MemorizingFile wraps a file and memorizes lines read by readline. + + Note that data read by other methods are not memorized. This behavior + is good enough for memorizing lines SimpleHTTPServer reads before + the control reaches WebSocketRequestHandler. + """ + def __init__(self, file_, max_memorized_lines=sys.maxint): + """Construct an instance. + + Args: + file_: the file object to wrap. + max_memorized_lines: the maximum number of lines to memorize. + Only the first max_memorized_lines are memorized. + Default: sys.maxint. + """ + self._file = file_ + self._memorized_lines = [] + self._max_memorized_lines = max_memorized_lines + + def __getattribute__(self, name): + if name in ('_file', '_memorized_lines', '_max_memorized_lines', + 'readline', 'get_memorized_lines'): + return object.__getattribute__(self, name) + return self._file.__getattribute__(name) + + def readline(self): + """Override file.readline and memorize the line read.""" + + line = self._file.readline() + if line and len(self._memorized_lines) < self._max_memorized_lines: + self._memorized_lines.append(line) + return line + + def get_memorized_lines(self): + """Get lines memorized so far.""" + return self._memorized_lines + + +# vi:sts=4 sw=4 et diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/mod_pywebsocket/msgutil.py b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/mod_pywebsocket/msgutil.py new file mode 100644 index 0000000..90ae715 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/mod_pywebsocket/msgutil.py @@ -0,0 +1,250 @@ +# Copyright 2009, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +"""Message related utilities. + +Note: request.connection.write/read are used in this module, even though +mod_python document says that they should be used only in connection handlers. +Unfortunately, we have no other options. For example, request.write/read are +not suitable because they don't allow direct raw bytes writing/reading. +""" + + +import Queue +import threading +import util + + +class MsgUtilException(Exception): + pass + + +def _read(request, length): + bytes = request.connection.read(length) + if not bytes: + raise MsgUtilException( + 'Failed to receive message from %r' % + (request.connection.remote_addr,)) + return bytes + + +def _write(request, bytes): + try: + request.connection.write(bytes) + except Exception, e: + util.prepend_message_to_exception( + 'Failed to send message to %r: ' % + (request.connection.remote_addr,), + e) + raise + + +def send_message(request, message): + """Send message. + + Args: + request: mod_python request. + message: unicode string to send. + """ + + _write(request, '\x00' + message.encode('utf-8') + '\xff') + + +def receive_message(request): + """Receive a Web Socket frame and return its payload as unicode string. + + Args: + request: mod_python request. + """ + + while True: + # Read 1 byte. + # mp_conn.read will block if no bytes are available. + # Timeout is controlled by TimeOut directive of Apache. + frame_type_str = _read(request, 1) + frame_type = ord(frame_type_str[0]) + if (frame_type & 0x80) == 0x80: + # The payload length is specified in the frame. + # Read and discard. + length = _payload_length(request) + _receive_bytes(request, length) + else: + # The payload is delimited with \xff. + bytes = _read_until(request, '\xff') + # The Web Socket protocol section 4.4 specifies that invalid + # characters must be replaced with U+fffd REPLACEMENT CHARACTER. + message = bytes.decode('utf-8', 'replace') + if frame_type == 0x00: + return message + # Discard data of other types. + + +def _payload_length(request): + length = 0 + while True: + b_str = _read(request, 1) + b = ord(b_str[0]) + length = length * 128 + (b & 0x7f) + if (b & 0x80) == 0: + break + return length + + +def _receive_bytes(request, length): + bytes = [] + while length > 0: + new_bytes = _read(request, length) + bytes.append(new_bytes) + length -= len(new_bytes) + return ''.join(bytes) + + +def _read_until(request, delim_char): + bytes = [] + while True: + ch = _read(request, 1) + if ch == delim_char: + break + bytes.append(ch) + return ''.join(bytes) + + +class MessageReceiver(threading.Thread): + """This class receives messages from the client. + + This class provides three ways to receive messages: blocking, non-blocking, + and via callback. Callback has the highest precedence. + + Note: This class should not be used with the standalone server for wss + because pyOpenSSL used by the server raises a fatal error if the socket + is accessed from multiple threads. + """ + def __init__(self, request, onmessage=None): + """Construct an instance. + + Args: + request: mod_python request. + onmessage: a function to be called when a message is received. + May be None. If not None, the function is called on + another thread. In that case, MessageReceiver.receive + and MessageReceiver.receive_nowait are useless because + they will never return any messages. + """ + threading.Thread.__init__(self) + self._request = request + self._queue = Queue.Queue() + self._onmessage = onmessage + self._stop_requested = False + self.setDaemon(True) + self.start() + + def run(self): + while not self._stop_requested: + message = receive_message(self._request) + if self._onmessage: + self._onmessage(message) + else: + self._queue.put(message) + + def receive(self): + """ Receive a message from the channel, blocking. + + Returns: + message as a unicode string. + """ + return self._queue.get() + + def receive_nowait(self): + """ Receive a message from the channel, non-blocking. + + Returns: + message as a unicode string if available. None otherwise. + """ + try: + message = self._queue.get_nowait() + except Queue.Empty: + message = None + return message + + def stop(self): + """Request to stop this instance. + + The instance will be stopped after receiving the next message. + This method may not be very useful, but there is no clean way + in Python to forcefully stop a running thread. + """ + self._stop_requested = True + + +class MessageSender(threading.Thread): + """This class sends messages to the client. + + This class provides both synchronous and asynchronous ways to send + messages. + + Note: This class should not be used with the standalone server for wss + because pyOpenSSL used by the server raises a fatal error if the socket + is accessed from multiple threads. + """ + def __init__(self, request): + """Construct an instance. + + Args: + request: mod_python request. + """ + threading.Thread.__init__(self) + self._request = request + self._queue = Queue.Queue() + self.setDaemon(True) + self.start() + + def run(self): + while True: + message, condition = self._queue.get() + condition.acquire() + send_message(self._request, message) + condition.notify() + condition.release() + + def send(self, message): + """Send a message, blocking.""" + + condition = threading.Condition() + condition.acquire() + self._queue.put((message, condition)) + condition.wait() + + def send_nowait(self, message): + """Send a message, non-blocking.""" + + self._queue.put((message, threading.Condition())) + + +# vi:sts=4 sw=4 et diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/mod_pywebsocket/standalone.py b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/mod_pywebsocket/standalone.py new file mode 100644 index 0000000..f411910 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/mod_pywebsocket/standalone.py @@ -0,0 +1,453 @@ +#!/usr/bin/env python +# +# Copyright 2009, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +"""Standalone Web Socket server. + +Use this server to run mod_pywebsocket without Apache HTTP Server. + +Usage: + python standalone.py [-p <ws_port>] [-w <websock_handlers>] + [-s <scan_dir>] + [-d <document_root>] + [-m <websock_handlers_map_file>] + ... for other options, see _main below ... + +<ws_port> is the port number to use for ws:// connection. + +<document_root> is the path to the root directory of HTML files. + +<websock_handlers> is the path to the root directory of Web Socket handlers. +See __init__.py for details of <websock_handlers> and how to write Web Socket +handlers. If this path is relative, <document_root> is used as the base. + +<scan_dir> is a path under the root directory. If specified, only the handlers +under scan_dir are scanned. This is useful in saving scan time. + +Note: +This server is derived from SocketServer.ThreadingMixIn. Hence a thread is +used for each request. +""" + +import BaseHTTPServer +import CGIHTTPServer +import SimpleHTTPServer +import SocketServer +import logging +import logging.handlers +import optparse +import os +import re +import socket +import sys + +_HAS_OPEN_SSL = False +try: + import OpenSSL.SSL + _HAS_OPEN_SSL = True +except ImportError: + pass + +import dispatch +import handshake +import memorizingfile +import util + + +_LOG_LEVELS = { + 'debug': logging.DEBUG, + 'info': logging.INFO, + 'warn': logging.WARN, + 'error': logging.ERROR, + 'critical': logging.CRITICAL}; + +_DEFAULT_LOG_MAX_BYTES = 1024 * 256 +_DEFAULT_LOG_BACKUP_COUNT = 5 + +_DEFAULT_REQUEST_QUEUE_SIZE = 128 + +# 1024 is practically large enough to contain WebSocket handshake lines. +_MAX_MEMORIZED_LINES = 1024 + +def _print_warnings_if_any(dispatcher): + warnings = dispatcher.source_warnings() + if warnings: + for warning in warnings: + logging.warning('mod_pywebsocket: %s' % warning) + + +class _StandaloneConnection(object): + """Mimic mod_python mp_conn.""" + + def __init__(self, request_handler): + """Construct an instance. + + Args: + request_handler: A WebSocketRequestHandler instance. + """ + self._request_handler = request_handler + + def get_local_addr(self): + """Getter to mimic mp_conn.local_addr.""" + return (self._request_handler.server.server_name, + self._request_handler.server.server_port) + local_addr = property(get_local_addr) + + def get_remote_addr(self): + """Getter to mimic mp_conn.remote_addr. + + Setting the property in __init__ won't work because the request + handler is not initialized yet there.""" + return self._request_handler.client_address + remote_addr = property(get_remote_addr) + + def write(self, data): + """Mimic mp_conn.write().""" + return self._request_handler.wfile.write(data) + + def read(self, length): + """Mimic mp_conn.read().""" + return self._request_handler.rfile.read(length) + + def get_memorized_lines(self): + """Get memorized lines.""" + return self._request_handler.rfile.get_memorized_lines() + + +class _StandaloneRequest(object): + """Mimic mod_python request.""" + + def __init__(self, request_handler, use_tls): + """Construct an instance. + + Args: + request_handler: A WebSocketRequestHandler instance. + """ + self._request_handler = request_handler + self.connection = _StandaloneConnection(request_handler) + self._use_tls = use_tls + + def get_uri(self): + """Getter to mimic request.uri.""" + return self._request_handler.path + uri = property(get_uri) + + def get_headers_in(self): + """Getter to mimic request.headers_in.""" + return self._request_handler.headers + headers_in = property(get_headers_in) + + def is_https(self): + """Mimic request.is_https().""" + return self._use_tls + + +class WebSocketServer(SocketServer.ThreadingMixIn, BaseHTTPServer.HTTPServer): + """HTTPServer specialized for Web Socket.""" + + SocketServer.ThreadingMixIn.daemon_threads = True + + def __init__(self, server_address, RequestHandlerClass): + """Override SocketServer.BaseServer.__init__.""" + + SocketServer.BaseServer.__init__( + self, server_address, RequestHandlerClass) + self.socket = self._create_socket() + self.server_bind() + self.server_activate() + + def _create_socket(self): + socket_ = socket.socket(self.address_family, self.socket_type) + if WebSocketServer.options.use_tls: + ctx = OpenSSL.SSL.Context(OpenSSL.SSL.SSLv23_METHOD) + ctx.use_privatekey_file(WebSocketServer.options.private_key) + ctx.use_certificate_file(WebSocketServer.options.certificate) + socket_ = OpenSSL.SSL.Connection(ctx, socket_) + return socket_ + + def handle_error(self, rquest, client_address): + """Override SocketServer.handle_error.""" + + logging.error( + ('Exception in processing request from: %r' % (client_address,)) + + '\n' + util.get_stack_trace()) + # Note: client_address is a tuple. To match it against %r, we need the + # trailing comma. + + +class WebSocketRequestHandler(CGIHTTPServer.CGIHTTPRequestHandler): + """CGIHTTPRequestHandler specialized for Web Socket.""" + + def setup(self): + """Override SocketServer.StreamRequestHandler.setup.""" + + self.connection = self.request + self.rfile = memorizingfile.MemorizingFile( + socket._fileobject(self.request, 'rb', self.rbufsize), + max_memorized_lines=_MAX_MEMORIZED_LINES) + self.wfile = socket._fileobject(self.request, 'wb', self.wbufsize) + + def __init__(self, *args, **keywords): + self._request = _StandaloneRequest( + self, WebSocketRequestHandler.options.use_tls) + self._dispatcher = WebSocketRequestHandler.options.dispatcher + self._print_warnings_if_any() + self._handshaker = handshake.Handshaker( + self._request, self._dispatcher, + WebSocketRequestHandler.options.strict) + CGIHTTPServer.CGIHTTPRequestHandler.__init__( + self, *args, **keywords) + + def _print_warnings_if_any(self): + warnings = self._dispatcher.source_warnings() + if warnings: + for warning in warnings: + logging.warning('mod_pywebsocket: %s' % warning) + + def parse_request(self): + """Override BaseHTTPServer.BaseHTTPRequestHandler.parse_request. + + Return True to continue processing for HTTP(S), False otherwise. + """ + result = CGIHTTPServer.CGIHTTPRequestHandler.parse_request(self) + if result: + try: + self._handshaker.do_handshake() + self._dispatcher.transfer_data(self._request) + return False + except handshake.HandshakeError, e: + # Handshake for ws(s) failed. Assume http(s). + logging.info('mod_pywebsocket: %s' % e) + return True + except dispatch.DispatchError, e: + logging.warning('mod_pywebsocket: %s' % e) + return False + except Exception, e: + logging.warning('mod_pywebsocket: %s' % e) + logging.info('mod_pywebsocket: %s' % util.get_stack_trace()) + return False + return result + + def log_request(self, code='-', size='-'): + """Override BaseHTTPServer.log_request.""" + + logging.info('"%s" %s %s', + self.requestline, str(code), str(size)) + + def log_error(self, *args): + """Override BaseHTTPServer.log_error.""" + + # Despite the name, this method is for warnings than for errors. + # For example, HTTP status code is logged by this method. + logging.warn('%s - %s' % (self.address_string(), (args[0] % args[1:]))) + + def is_cgi(self): + """Test whether self.path corresponds to a CGI script. + + Add extra check that self.path doesn't contains .. + Also check if the file is a executable file or not. + If the file is not executable, it is handled as static file or dir + rather than a CGI script. + """ + if CGIHTTPServer.CGIHTTPRequestHandler.is_cgi(self): + if '..' in self.path: + return False + # strip query parameter from request path + resource_name = self.path.split('?', 2)[0] + # convert resource_name into real path name in filesystem. + scriptfile = self.translate_path(resource_name) + if not os.path.isfile(scriptfile): + return False + if not self.is_executable(scriptfile): + return False + return True + return False + + +def _configure_logging(options): + logger = logging.getLogger() + logger.setLevel(_LOG_LEVELS[options.log_level]) + if options.log_file: + handler = logging.handlers.RotatingFileHandler( + options.log_file, 'a', options.log_max, options.log_count) + else: + handler = logging.StreamHandler() + formatter = logging.Formatter( + "[%(asctime)s] [%(levelname)s] %(name)s: %(message)s") + handler.setFormatter(formatter) + logger.addHandler(handler) + +def _alias_handlers(dispatcher, websock_handlers_map_file): + """Set aliases specified in websock_handler_map_file in dispatcher. + + Args: + dispatcher: dispatch.Dispatcher instance + websock_handler_map_file: alias map file + """ + fp = open(websock_handlers_map_file) + try: + for line in fp: + if line[0] == '#' or line.isspace(): + continue + m = re.match('(\S+)\s+(\S+)', line) + if not m: + logging.warning('Wrong format in map file:' + line) + continue + try: + dispatcher.add_resource_path_alias( + m.group(1), m.group(2)) + except dispatch.DispatchError, e: + logging.error(str(e)) + finally: + fp.close() + + + +def _main(): + parser = optparse.OptionParser() + parser.add_option('-H', '--server-host', '--server_host', + dest='server_host', + default='', + help='server hostname to listen to') + parser.add_option('-p', '--port', dest='port', type='int', + default=handshake._DEFAULT_WEB_SOCKET_PORT, + help='port to listen to') + parser.add_option('-w', '--websock-handlers', '--websock_handlers', + dest='websock_handlers', + default='.', + help='Web Socket handlers root directory.') + parser.add_option('-m', '--websock-handlers-map-file', + '--websock_handlers_map_file', + dest='websock_handlers_map_file', + default=None, + help=('Web Socket handlers map file. ' + 'Each line consists of alias_resource_path and ' + 'existing_resource_path, separated by spaces.')) + parser.add_option('-s', '--scan-dir', '--scan_dir', dest='scan_dir', + default=None, + help=('Web Socket handlers scan directory. ' + 'Must be a directory under websock_handlers.')) + parser.add_option('-d', '--document-root', '--document_root', + dest='document_root', default='.', + help='Document root directory.') + parser.add_option('-x', '--cgi-paths', '--cgi_paths', dest='cgi_paths', + default=None, + help=('CGI paths relative to document_root.' + 'Comma-separated. (e.g -x /cgi,/htbin) ' + 'Files under document_root/cgi_path are handled ' + 'as CGI programs. Must be executable.')) + parser.add_option('-t', '--tls', dest='use_tls', action='store_true', + default=False, help='use TLS (wss://)') + parser.add_option('-k', '--private-key', '--private_key', + dest='private_key', + default='', help='TLS private key file.') + parser.add_option('-c', '--certificate', dest='certificate', + default='', help='TLS certificate file.') + parser.add_option('-l', '--log-file', '--log_file', dest='log_file', + default='', help='Log file.') + parser.add_option('--log-level', '--log_level', type='choice', + dest='log_level', default='warn', + choices=['debug', 'info', 'warn', 'error', 'critical'], + help='Log level.') + parser.add_option('--log-max', '--log_max', dest='log_max', type='int', + default=_DEFAULT_LOG_MAX_BYTES, + help='Log maximum bytes') + parser.add_option('--log-count', '--log_count', dest='log_count', + type='int', default=_DEFAULT_LOG_BACKUP_COUNT, + help='Log backup count') + parser.add_option('--strict', dest='strict', action='store_true', + default=False, help='Strictly check handshake request') + parser.add_option('-q', '--queue', dest='request_queue_size', type='int', + default=_DEFAULT_REQUEST_QUEUE_SIZE, + help='request queue size') + options = parser.parse_args()[0] + + os.chdir(options.document_root) + + _configure_logging(options) + + SocketServer.TCPServer.request_queue_size = options.request_queue_size + CGIHTTPServer.CGIHTTPRequestHandler.cgi_directories = [] + + if options.cgi_paths: + CGIHTTPServer.CGIHTTPRequestHandler.cgi_directories = \ + options.cgi_paths.split(',') + if sys.platform in ('cygwin', 'win32'): + cygwin_path = None + # For Win32 Python, it is expected that CYGWIN_PATH + # is set to a directory of cygwin binaries. + # For example, websocket_server.py in Chromium sets CYGWIN_PATH to + # full path of third_party/cygwin/bin. + if 'CYGWIN_PATH' in os.environ: + cygwin_path = os.environ['CYGWIN_PATH'] + util.wrap_popen3_for_win(cygwin_path) + def __check_script(scriptpath): + return util.get_script_interp(scriptpath, cygwin_path) + CGIHTTPServer.executable = __check_script + + if options.use_tls: + if not _HAS_OPEN_SSL: + logging.critical('To use TLS, install pyOpenSSL.') + sys.exit(1) + if not options.private_key or not options.certificate: + logging.critical( + 'To use TLS, specify private_key and certificate.') + sys.exit(1) + + if not options.scan_dir: + options.scan_dir = options.websock_handlers + + try: + # Share a Dispatcher among request handlers to save time for + # instantiation. Dispatcher can be shared because it is thread-safe. + options.dispatcher = dispatch.Dispatcher(options.websock_handlers, + options.scan_dir) + if options.websock_handlers_map_file: + _alias_handlers(options.dispatcher, + options.websock_handlers_map_file) + _print_warnings_if_any(options.dispatcher) + + WebSocketRequestHandler.options = options + WebSocketServer.options = options + + server = WebSocketServer((options.server_host, options.port), + WebSocketRequestHandler) + server.serve_forever() + except Exception, e: + logging.critical(str(e)) + sys.exit(1) + + +if __name__ == '__main__': + _main() + + +# vi:sts=4 sw=4 et diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/mod_pywebsocket/util.py b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/mod_pywebsocket/util.py new file mode 100644 index 0000000..8ec9dca --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/mod_pywebsocket/util.py @@ -0,0 +1,121 @@ +# Copyright 2009, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +"""Web Sockets utilities. +""" + + +import StringIO +import os +import re +import traceback + + +def get_stack_trace(): + """Get the current stack trace as string. + + This is needed to support Python 2.3. + TODO: Remove this when we only support Python 2.4 and above. + Use traceback.format_exc instead. + """ + + out = StringIO.StringIO() + traceback.print_exc(file=out) + return out.getvalue() + + +def prepend_message_to_exception(message, exc): + """Prepend message to the exception.""" + + exc.args = (message + str(exc),) + return + + +def __translate_interp(interp, cygwin_path): + """Translate interp program path for Win32 python to run cygwin program + (e.g. perl). Note that it doesn't support path that contains space, + which is typically true for Unix, where #!-script is written. + For Win32 python, cygwin_path is a directory of cygwin binaries. + + Args: + interp: interp command line + cygwin_path: directory name of cygwin binary, or None + Returns: + translated interp command line. + """ + if not cygwin_path: + return interp + m = re.match("^[^ ]*/([^ ]+)( .*)?", interp) + if m: + cmd = os.path.join(cygwin_path, m.group(1)) + return cmd + m.group(2) + return interp + + +def get_script_interp(script_path, cygwin_path=None): + """Gets #!-interpreter command line from the script. + + It also fixes command path. When Cygwin Python is used, e.g. in WebKit, + it could run "/usr/bin/perl -wT hello.pl". + When Win32 Python is used, e.g. in Chromium, it couldn't. So, fix + "/usr/bin/perl" to "<cygwin_path>\perl.exe". + + Args: + script_path: pathname of the script + cygwin_path: directory name of cygwin binary, or None + Returns: + #!-interpreter command line, or None if it is not #!-script. + """ + fp = open(script_path) + line = fp.readline() + fp.close() + m = re.match("^#!(.*)", line) + if m: + return __translate_interp(m.group(1), cygwin_path) + return None + +def wrap_popen3_for_win(cygwin_path): + """Wrap popen3 to support #!-script on Windows. + + Args: + cygwin_path: path for cygwin binary if command path is needed to be + translated. None if no translation required. + """ + __orig_popen3 = os.popen3 + def __wrap_popen3(cmd, mode='t', bufsize=-1): + cmdline = cmd.split(' ') + interp = get_script_interp(cmdline[0], cygwin_path) + if interp: + cmd = interp + " " + cmd + return __orig_popen3(cmd, mode, bufsize) + os.popen3 = __wrap_popen3 + + +# vi:sts=4 sw=4 et diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/setup.py b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/setup.py new file mode 100644 index 0000000..a34a83b --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/setup.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python +# +# Copyright 2009, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +"""Set up script for mod_pywebsocket. +""" + + +from distutils.core import setup +import sys + + +_PACKAGE_NAME = 'mod_pywebsocket' + +if sys.version < '2.3': + print >>sys.stderr, '%s requires Python 2.3 or later.' % _PACKAGE_NAME + sys.exit(1) + +setup(author='Yuzo Fujishima', + author_email='yuzo@chromium.org', + description='Web Socket extension for Apache HTTP Server.', + long_description=( + 'mod_pywebsocket is an Apache HTTP Server extension for ' + 'Web Socket (http://tools.ietf.org/html/' + 'draft-hixie-thewebsocketprotocol). ' + 'See mod_pywebsocket/__init__.py for more detail.'), + license='See COPYING', + name=_PACKAGE_NAME, + packages=[_PACKAGE_NAME], + url='http://code.google.com/p/pywebsocket/', + version='0.4.9.2', + ) + + +# vi:sts=4 sw=4 et diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/config.py b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/config.py new file mode 100644 index 0000000..5aaab8c --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/config.py @@ -0,0 +1,45 @@ +# Copyright 2009, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +"""Configuration for testing. + +Test files should import this module before mod_pywebsocket. +""" + + +import os +import sys + + +# Add the parent directory to sys.path to enable importing mod_pywebsocket. +sys.path += [os.path.join(os.path.split(__file__)[0], '..')] + + +# vi:sts=4 sw=4 et diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/mock.py b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/mock.py new file mode 100644 index 0000000..3b85d64 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/mock.py @@ -0,0 +1,205 @@ +# Copyright 2009, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +"""Mocks for testing. +""" + + +import Queue +import threading + + +class _MockConnBase(object): + """Base class of mocks for mod_python.apache.mp_conn. + + This enables tests to check what is written to a (mock) mp_conn. + """ + + def __init__(self): + self._write_data = [] + + def write(self, data): + """Override mod_python.apache.mp_conn.write.""" + + self._write_data.append(data) + + def written_data(self): + """Get bytes written to this mock.""" + + return ''.join(self._write_data) + + +class MockConn(_MockConnBase): + """Mock for mod_python.apache.mp_conn. + + This enables tests to specify what should be read from a (mock) mp_conn as + well as to check what is written to it. + """ + + def __init__(self, read_data): + """Constructs an instance. + + Args: + read_data: bytes that should be returned when read* methods are + called. + """ + + _MockConnBase.__init__(self) + self._read_data = read_data + self._read_pos = 0 + + def readline(self): + """Override mod_python.apache.mp_conn.readline.""" + + if self._read_pos >= len(self._read_data): + return '' + end_index = self._read_data.find('\n', self._read_pos) + 1 + if not end_index: + end_index = len(self._read_data) + return self._read_up_to(end_index) + + def read(self, length): + """Override mod_python.apache.mp_conn.read.""" + + if self._read_pos >= len(self._read_data): + return '' + end_index = min(len(self._read_data), self._read_pos + length) + return self._read_up_to(end_index) + + def _read_up_to(self, end_index): + line = self._read_data[self._read_pos:end_index] + self._read_pos = end_index + return line + + +class MockBlockingConn(_MockConnBase): + """Blocking mock for mod_python.apache.mp_conn. + + This enables tests to specify what should be read from a (mock) mp_conn as + well as to check what is written to it. + Callers of read* methods will block if there is no bytes available. + """ + + def __init__(self): + _MockConnBase.__init__(self) + self._queue = Queue.Queue() + + def readline(self): + """Override mod_python.apache.mp_conn.readline.""" + line = '' + while True: + c = self._queue.get() + line += c + if c == '\n': + return line + + def read(self, length): + """Override mod_python.apache.mp_conn.read.""" + + data = '' + for unused in range(length): + data += self._queue.get() + return data + + def put_bytes(self, bytes): + """Put bytes to be read from this mock. + + Args: + bytes: bytes to be read. + """ + + for byte in bytes: + self._queue.put(byte) + + +class MockTable(dict): + """Mock table. + + This mimics mod_python mp_table. Note that only the methods used by + tests are overridden. + """ + + def __init__(self, copy_from={}): + if isinstance(copy_from, dict): + copy_from = copy_from.items() + for key, value in copy_from: + self.__setitem__(key, value) + + def __getitem__(self, key): + return super(MockTable, self).__getitem__(key.lower()) + + def __setitem__(self, key, value): + super(MockTable, self).__setitem__(key.lower(), value) + + def get(self, key, def_value=None): + return super(MockTable, self).get(key.lower(), def_value) + + +class MockRequest(object): + """Mock request. + + This mimics mod_python request. + """ + + def __init__(self, uri=None, headers_in={}, connection=None, + is_https=False): + """Construct an instance. + + Arguments: + uri: URI of the request. + headers_in: Request headers. + connection: Connection used for the request. + is_https: Whether this request is over SSL. + + See the document of mod_python Request for details. + """ + self.uri = uri + self.connection = connection + self.headers_in = MockTable(headers_in) + # self.is_https_ needs to be accessible from tests. To avoid name + # conflict with self.is_https(), it is named as such. + self.is_https_ = is_https + + def is_https(self): + """Return whether this request is over SSL.""" + return self.is_https_ + + +class MockDispatcher(object): + """Mock for dispatch.Dispatcher.""" + + def do_extra_handshake(self, conn_context): + pass + + def transfer_data(self, conn_context): + pass + + +# vi:sts=4 sw=4 et diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/run_all.py b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/run_all.py new file mode 100644 index 0000000..3885618 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/run_all.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python +# +# Copyright 2009, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +"""Run all tests in the same directory. +""" + + +import os +import re +import unittest + + +_TEST_MODULE_PATTERN = re.compile(r'^(test_.+)\.py$') + + +def _list_test_modules(directory): + module_names = [] + for filename in os.listdir(directory): + match = _TEST_MODULE_PATTERN.search(filename) + if match: + module_names.append(match.group(1)) + return module_names + + +def _suite(): + loader = unittest.TestLoader() + return loader.loadTestsFromNames( + _list_test_modules(os.path.join(os.path.split(__file__)[0], '.'))) + + +if __name__ == '__main__': + unittest.main(defaultTest='_suite') + + +# vi:sts=4 sw=4 et diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/test_dispatch.py b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/test_dispatch.py new file mode 100644 index 0000000..5403228 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/test_dispatch.py @@ -0,0 +1,244 @@ +#!/usr/bin/env python +# +# Copyright 2009, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +"""Tests for dispatch module.""" + + + +import os +import unittest + +import config # This must be imported before mod_pywebsocket. +from mod_pywebsocket import dispatch + +import mock + + +_TEST_HANDLERS_DIR = os.path.join( + os.path.split(__file__)[0], 'testdata', 'handlers') + +_TEST_HANDLERS_SUB_DIR = os.path.join(_TEST_HANDLERS_DIR, 'sub') + +class DispatcherTest(unittest.TestCase): + def test_normalize_path(self): + self.assertEqual(os.path.abspath('/a/b').replace('\\', '/'), + dispatch._normalize_path('/a/b')) + self.assertEqual(os.path.abspath('/a/b').replace('\\', '/'), + dispatch._normalize_path('\\a\\b')) + self.assertEqual(os.path.abspath('/a/b').replace('\\', '/'), + dispatch._normalize_path('/a/c/../b')) + self.assertEqual(os.path.abspath('abc').replace('\\', '/'), + dispatch._normalize_path('abc')) + + def test_converter(self): + converter = dispatch._path_to_resource_converter('/a/b') + self.assertEqual('/h', converter('/a/b/h_wsh.py')) + self.assertEqual('/c/h', converter('/a/b/c/h_wsh.py')) + self.assertEqual(None, converter('/a/b/h.py')) + self.assertEqual(None, converter('a/b/h_wsh.py')) + + converter = dispatch._path_to_resource_converter('a/b') + self.assertEqual('/h', converter('a/b/h_wsh.py')) + + converter = dispatch._path_to_resource_converter('/a/b///') + self.assertEqual('/h', converter('/a/b/h_wsh.py')) + self.assertEqual('/h', converter('/a/b/../b/h_wsh.py')) + + converter = dispatch._path_to_resource_converter('/a/../a/b/../b/') + self.assertEqual('/h', converter('/a/b/h_wsh.py')) + + converter = dispatch._path_to_resource_converter(r'\a\b') + self.assertEqual('/h', converter(r'\a\b\h_wsh.py')) + self.assertEqual('/h', converter(r'/a/b/h_wsh.py')) + + def test_source_file_paths(self): + paths = list(dispatch._source_file_paths(_TEST_HANDLERS_DIR)) + paths.sort() + self.assertEqual(7, len(paths)) + expected_paths = [ + os.path.join(_TEST_HANDLERS_DIR, 'blank_wsh.py'), + os.path.join(_TEST_HANDLERS_DIR, 'origin_check_wsh.py'), + os.path.join(_TEST_HANDLERS_DIR, 'sub', + 'exception_in_transfer_wsh.py'), + os.path.join(_TEST_HANDLERS_DIR, 'sub', 'non_callable_wsh.py'), + os.path.join(_TEST_HANDLERS_DIR, 'sub', 'plain_wsh.py'), + os.path.join(_TEST_HANDLERS_DIR, 'sub', + 'wrong_handshake_sig_wsh.py'), + os.path.join(_TEST_HANDLERS_DIR, 'sub', + 'wrong_transfer_sig_wsh.py'), + ] + for expected, actual in zip(expected_paths, paths): + self.assertEqual(expected, actual) + + def test_source(self): + self.assertRaises(dispatch.DispatchError, dispatch._source, '') + self.assertRaises(dispatch.DispatchError, dispatch._source, 'def') + self.assertRaises(dispatch.DispatchError, dispatch._source, '1/0') + self.failUnless(dispatch._source( + 'def web_socket_do_extra_handshake(request):pass\n' + 'def web_socket_transfer_data(request):pass\n')) + + def test_source_warnings(self): + dispatcher = dispatch.Dispatcher(_TEST_HANDLERS_DIR, None) + warnings = dispatcher.source_warnings() + warnings.sort() + expected_warnings = [ + (os.path.join(_TEST_HANDLERS_DIR, 'blank_wsh.py') + + ': web_socket_do_extra_handshake is not defined.'), + (os.path.join(_TEST_HANDLERS_DIR, 'sub', + 'non_callable_wsh.py') + + ': web_socket_do_extra_handshake is not callable.'), + (os.path.join(_TEST_HANDLERS_DIR, 'sub', + 'wrong_handshake_sig_wsh.py') + + ': web_socket_do_extra_handshake is not defined.'), + (os.path.join(_TEST_HANDLERS_DIR, 'sub', + 'wrong_transfer_sig_wsh.py') + + ': web_socket_transfer_data is not defined.'), + ] + self.assertEquals(4, len(warnings)) + for expected, actual in zip(expected_warnings, warnings): + self.assertEquals(expected, actual) + + def test_do_extra_handshake(self): + dispatcher = dispatch.Dispatcher(_TEST_HANDLERS_DIR, None) + request = mock.MockRequest() + request.ws_resource = '/origin_check' + request.ws_origin = 'http://example.com' + dispatcher.do_extra_handshake(request) # Must not raise exception. + + request.ws_origin = 'http://bad.example.com' + self.assertRaises(Exception, dispatcher.do_extra_handshake, request) + + def test_transfer_data(self): + dispatcher = dispatch.Dispatcher(_TEST_HANDLERS_DIR, None) + request = mock.MockRequest(connection=mock.MockConn('')) + request.ws_resource = '/origin_check' + request.ws_protocol = 'p1' + + dispatcher.transfer_data(request) + self.assertEqual('origin_check_wsh.py is called for /origin_check, p1', + request.connection.written_data()) + + request = mock.MockRequest(connection=mock.MockConn('')) + request.ws_resource = '/sub/plain' + request.ws_protocol = None + dispatcher.transfer_data(request) + self.assertEqual('sub/plain_wsh.py is called for /sub/plain, None', + request.connection.written_data()) + + request = mock.MockRequest(connection=mock.MockConn('')) + request.ws_resource = '/sub/plain?' + request.ws_protocol = None + dispatcher.transfer_data(request) + self.assertEqual('sub/plain_wsh.py is called for /sub/plain?, None', + request.connection.written_data()) + + request = mock.MockRequest(connection=mock.MockConn('')) + request.ws_resource = '/sub/plain?q=v' + request.ws_protocol = None + dispatcher.transfer_data(request) + self.assertEqual('sub/plain_wsh.py is called for /sub/plain?q=v, None', + request.connection.written_data()) + + def test_transfer_data_no_handler(self): + dispatcher = dispatch.Dispatcher(_TEST_HANDLERS_DIR, None) + for resource in ['/blank', '/sub/non_callable', + '/sub/no_wsh_at_the_end', '/does/not/exist']: + request = mock.MockRequest(connection=mock.MockConn('')) + request.ws_resource = resource + request.ws_protocol = 'p2' + try: + dispatcher.transfer_data(request) + self.fail() + except dispatch.DispatchError, e: + self.failUnless(str(e).find('No handler') != -1) + except Exception: + self.fail() + + def test_transfer_data_handler_exception(self): + dispatcher = dispatch.Dispatcher(_TEST_HANDLERS_DIR, None) + request = mock.MockRequest(connection=mock.MockConn('')) + request.ws_resource = '/sub/exception_in_transfer' + request.ws_protocol = 'p3' + try: + dispatcher.transfer_data(request) + self.fail() + except Exception, e: + self.failUnless(str(e).find('Intentional') != -1) + + def test_scan_dir(self): + disp = dispatch.Dispatcher(_TEST_HANDLERS_DIR, None) + self.assertEqual(3, len(disp._handlers)) + self.failUnless(disp._handlers.has_key('/origin_check')) + self.failUnless(disp._handlers.has_key('/sub/exception_in_transfer')) + self.failUnless(disp._handlers.has_key('/sub/plain')) + + def test_scan_sub_dir(self): + disp = dispatch.Dispatcher(_TEST_HANDLERS_DIR, _TEST_HANDLERS_SUB_DIR) + self.assertEqual(2, len(disp._handlers)) + self.failIf(disp._handlers.has_key('/origin_check')) + self.failUnless(disp._handlers.has_key('/sub/exception_in_transfer')) + self.failUnless(disp._handlers.has_key('/sub/plain')) + + def test_scan_sub_dir_as_root(self): + disp = dispatch.Dispatcher(_TEST_HANDLERS_SUB_DIR, + _TEST_HANDLERS_SUB_DIR) + self.assertEqual(2, len(disp._handlers)) + self.failIf(disp._handlers.has_key('/origin_check')) + self.failIf(disp._handlers.has_key('/sub/exception_in_transfer')) + self.failIf(disp._handlers.has_key('/sub/plain')) + self.failUnless(disp._handlers.has_key('/exception_in_transfer')) + self.failUnless(disp._handlers.has_key('/plain')) + + def test_scan_dir_must_under_root(self): + dispatch.Dispatcher('a/b', 'a/b/c') # OK + dispatch.Dispatcher('a/b///', 'a/b') # OK + self.assertRaises(dispatch.DispatchError, + dispatch.Dispatcher, 'a/b/c', 'a/b') + + def test_resource_path_alias(self): + disp = dispatch.Dispatcher(_TEST_HANDLERS_DIR, None) + disp.add_resource_path_alias('/', '/origin_check') + self.assertEqual(4, len(disp._handlers)) + self.failUnless(disp._handlers.has_key('/origin_check')) + self.failUnless(disp._handlers.has_key('/sub/exception_in_transfer')) + self.failUnless(disp._handlers.has_key('/sub/plain')) + self.failUnless(disp._handlers.has_key('/')) + self.assertRaises(dispatch.DispatchError, + disp.add_resource_path_alias, '/alias', '/not-exist') + + +if __name__ == '__main__': + unittest.main() + + +# vi:sts=4 sw=4 et diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/test_handshake.py b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/test_handshake.py new file mode 100644 index 0000000..8bf07be --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/test_handshake.py @@ -0,0 +1,513 @@ +#!/usr/bin/env python +# +# Copyright 2009, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +"""Tests for handshake module.""" + + +import unittest + +import config # This must be imported before mod_pywebsocket. +from mod_pywebsocket import handshake + +import mock + + +_GOOD_REQUEST = ( + 80, + '/demo', + { + 'Upgrade':'WebSocket', + 'Connection':'Upgrade', + 'Host':'example.com', + 'Origin':'http://example.com', + 'WebSocket-Protocol':'sample', + } +) + +_GOOD_RESPONSE_DEFAULT_PORT = ( + 'HTTP/1.1 101 Web Socket Protocol Handshake\r\n' + 'Upgrade: WebSocket\r\n' + 'Connection: Upgrade\r\n' + 'WebSocket-Origin: http://example.com\r\n' + 'WebSocket-Location: ws://example.com/demo\r\n' + 'WebSocket-Protocol: sample\r\n' + '\r\n') + +_GOOD_RESPONSE_SECURE = ( + 'HTTP/1.1 101 Web Socket Protocol Handshake\r\n' + 'Upgrade: WebSocket\r\n' + 'Connection: Upgrade\r\n' + 'WebSocket-Origin: http://example.com\r\n' + 'WebSocket-Location: wss://example.com/demo\r\n' + 'WebSocket-Protocol: sample\r\n' + '\r\n') + +_GOOD_REQUEST_NONDEFAULT_PORT = ( + 8081, + '/demo', + { + 'Upgrade':'WebSocket', + 'Connection':'Upgrade', + 'Host':'example.com:8081', + 'Origin':'http://example.com', + 'WebSocket-Protocol':'sample', + } +) + +_GOOD_RESPONSE_NONDEFAULT_PORT = ( + 'HTTP/1.1 101 Web Socket Protocol Handshake\r\n' + 'Upgrade: WebSocket\r\n' + 'Connection: Upgrade\r\n' + 'WebSocket-Origin: http://example.com\r\n' + 'WebSocket-Location: ws://example.com:8081/demo\r\n' + 'WebSocket-Protocol: sample\r\n' + '\r\n') + +_GOOD_RESPONSE_SECURE_NONDEF = ( + 'HTTP/1.1 101 Web Socket Protocol Handshake\r\n' + 'Upgrade: WebSocket\r\n' + 'Connection: Upgrade\r\n' + 'WebSocket-Origin: http://example.com\r\n' + 'WebSocket-Location: wss://example.com:8081/demo\r\n' + 'WebSocket-Protocol: sample\r\n' + '\r\n') + +_GOOD_REQUEST_NO_PROTOCOL = ( + 80, + '/demo', + { + 'Upgrade':'WebSocket', + 'Connection':'Upgrade', + 'Host':'example.com', + 'Origin':'http://example.com', + } +) + +_GOOD_RESPONSE_NO_PROTOCOL = ( + 'HTTP/1.1 101 Web Socket Protocol Handshake\r\n' + 'Upgrade: WebSocket\r\n' + 'Connection: Upgrade\r\n' + 'WebSocket-Origin: http://example.com\r\n' + 'WebSocket-Location: ws://example.com/demo\r\n' + '\r\n') + +_GOOD_REQUEST_WITH_OPTIONAL_HEADERS = ( + 80, + '/demo', + { + 'Upgrade':'WebSocket', + 'Connection':'Upgrade', + 'Host':'example.com', + 'Origin':'http://example.com', + 'WebSocket-Protocol':'sample', + 'AKey':'AValue', + 'EmptyValue':'', + } +) + +_BAD_REQUESTS = ( + ( # HTTP request + 80, + '/demo', + { + 'Host':'www.google.com', + 'User-Agent':'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5;' + ' en-US; rv:1.9.1.3) Gecko/20090824 Firefox/3.5.3' + ' GTB6 GTBA', + 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,' + '*/*;q=0.8', + 'Accept-Language':'en-us,en;q=0.5', + 'Accept-Encoding':'gzip,deflate', + 'Accept-Charset':'ISO-8859-1,utf-8;q=0.7,*;q=0.7', + 'Keep-Alive':'300', + 'Connection':'keep-alive', + } + ), + ( # Missing Upgrade + 80, + '/demo', + { + 'Connection':'Upgrade', + 'Host':'example.com', + 'Origin':'http://example.com', + 'WebSocket-Protocol':'sample', + } + ), + ( # Wrong Upgrade + 80, + '/demo', + { + 'Upgrade':'NonWebSocket', + 'Connection':'Upgrade', + 'Host':'example.com', + 'Origin':'http://example.com', + 'WebSocket-Protocol':'sample', + } + ), + ( # Empty WebSocket-Protocol + 80, + '/demo', + { + 'Upgrade':'WebSocket', + 'Connection':'Upgrade', + 'Host':'example.com', + 'Origin':'http://example.com', + 'WebSocket-Protocol':'', + } + ), + ( # Wrong port number format + 80, + '/demo', + { + 'Upgrade':'WebSocket', + 'Connection':'Upgrade', + 'Host':'example.com:0x50', + 'Origin':'http://example.com', + 'WebSocket-Protocol':'sample', + } + ), + ( # Header/connection port mismatch + 8080, + '/demo', + { + 'Upgrade':'WebSocket', + 'Connection':'Upgrade', + 'Host':'example.com', + 'Origin':'http://example.com', + 'WebSocket-Protocol':'sample', + } + ), + ( # Illegal WebSocket-Protocol + 80, + '/demo', + { + 'Upgrade':'WebSocket', + 'Connection':'Upgrade', + 'Host':'example.com', + 'Origin':'http://example.com', + 'WebSocket-Protocol':'illegal\x09protocol', + } + ), +) + +_STRICTLY_GOOD_REQUESTS = ( + ( + 'GET /demo HTTP/1.1\r\n', + 'Upgrade: WebSocket\r\n', + 'Connection: Upgrade\r\n', + 'Host: example.com\r\n', + 'Origin: http://example.com\r\n', + '\r\n', + ), + ( # WebSocket-Protocol + 'GET /demo HTTP/1.1\r\n', + 'Upgrade: WebSocket\r\n', + 'Connection: Upgrade\r\n', + 'Host: example.com\r\n', + 'Origin: http://example.com\r\n', + 'WebSocket-Protocol: sample\r\n', + '\r\n', + ), + ( # WebSocket-Protocol and Cookie + 'GET /demo HTTP/1.1\r\n', + 'Upgrade: WebSocket\r\n', + 'Connection: Upgrade\r\n', + 'Host: example.com\r\n', + 'Origin: http://example.com\r\n', + 'WebSocket-Protocol: sample\r\n', + 'Cookie: xyz\r\n' + '\r\n', + ), + ( # Cookie + 'GET /demo HTTP/1.1\r\n', + 'Upgrade: WebSocket\r\n', + 'Connection: Upgrade\r\n', + 'Host: example.com\r\n', + 'Origin: http://example.com\r\n', + 'Cookie: abc/xyz\r\n' + 'Cookie2: $Version=1\r\n' + 'Cookie: abc\r\n' + '\r\n', + ), + ( + 'GET / HTTP/1.1\r\n', + 'Upgrade: WebSocket\r\n', + 'Connection: Upgrade\r\n', + 'Host: example.com\r\n', + 'Origin: http://example.com\r\n', + '\r\n', + ), +) + +_NOT_STRICTLY_GOOD_REQUESTS = ( + ( # Extra space after GET + 'GET /demo HTTP/1.1\r\n', + 'Upgrade: WebSocket\r\n', + 'Connection: Upgrade\r\n', + 'Host: example.com\r\n', + 'Origin: http://example.com\r\n', + '\r\n', + ), + ( # Resource name doesn't stat with '/' + 'GET demo HTTP/1.1\r\n', + 'Upgrade: WebSocket\r\n', + 'Connection: Upgrade\r\n', + 'Host: example.com\r\n', + 'Origin: http://example.com\r\n', + '\r\n', + ), + ( # No space after : + 'GET /demo HTTP/1.1\r\n', + 'Upgrade:WebSocket\r\n', + 'Connection: Upgrade\r\n', + 'Host: example.com\r\n', + 'Origin: http://example.com\r\n', + '\r\n', + ), + ( # Lower case Upgrade header + 'GET /demo HTTP/1.1\r\n', + 'upgrade: WebSocket\r\n', + 'Connection: Upgrade\r\n', + 'Host: example.com\r\n', + 'Origin: http://example.com\r\n', + '\r\n', + ), + ( # Connection comes before Upgrade + 'GET /demo HTTP/1.1\r\n', + 'Connection: Upgrade\r\n', + 'Upgrade: WebSocket\r\n', + 'Host: example.com\r\n', + 'Origin: http://example.com\r\n', + '\r\n', + ), + ( # Origin comes before Host + 'GET /demo HTTP/1.1\r\n', + 'Upgrade: WebSocket\r\n', + 'Connection: Upgrade\r\n', + 'Origin: http://example.com\r\n', + 'Host: example.com\r\n', + '\r\n', + ), + ( # Host continued to the next line + 'GET /demo HTTP/1.1\r\n', + 'Upgrade: WebSocket\r\n', + 'Connection: Upgrade\r\n', + 'Host: example\r\n', + ' .com\r\n', + 'Origin: http://example.com\r\n', + '\r\n', + ), + ( # Cookie comes before WebSocket-Protocol + 'GET /demo HTTP/1.1\r\n', + 'Upgrade: WebSocket\r\n', + 'Connection: Upgrade\r\n', + 'Host: example.com\r\n', + 'Origin: http://example.com\r\n', + 'Cookie: xyz\r\n' + 'WebSocket-Protocol: sample\r\n', + '\r\n', + ), + ( # Unknown header + 'GET /demo HTTP/1.1\r\n', + 'Upgrade: WebSocket\r\n', + 'Connection: Upgrade\r\n', + 'Host: example.com\r\n', + 'Origin: http://example.com\r\n', + 'Content-Type: text/html\r\n' + '\r\n', + ), + ( # Cookie with continuation lines + 'GET /demo HTTP/1.1\r\n', + 'Upgrade: WebSocket\r\n', + 'Connection: Upgrade\r\n', + 'Host: example.com\r\n', + 'Origin: http://example.com\r\n', + 'Cookie: xyz\r\n', + ' abc\r\n', + ' defg\r\n', + '\r\n', + ), + ( # Wrong-case cookie + 'GET /demo HTTP/1.1\r\n', + 'Upgrade: WebSocket\r\n', + 'Connection: Upgrade\r\n', + 'Host: example.com\r\n', + 'Origin: http://example.com\r\n', + 'cookie: abc/xyz\r\n' + '\r\n', + ), + ( # Cookie, no space after colon + 'GET /demo HTTP/1.1\r\n', + 'Upgrade: WebSocket\r\n', + 'Connection: Upgrade\r\n', + 'Host: example.com\r\n', + 'Origin: http://example.com\r\n', + 'Cookie:abc/xyz\r\n' + '\r\n', + ), +) + + +def _create_request(request_def): + conn = mock.MockConn('') + conn.local_addr = ('0.0.0.0', request_def[0]) + return mock.MockRequest( + uri=request_def[1], + headers_in=request_def[2], + connection=conn) + + +def _create_get_memorized_lines(lines): + def get_memorized_lines(): + return lines + return get_memorized_lines + + +def _create_requests_with_lines(request_lines_set): + requests = [] + for lines in request_lines_set: + request = _create_request(_GOOD_REQUEST) + request.connection.get_memorized_lines = _create_get_memorized_lines( + lines) + requests.append(request) + return requests + + +class HandshakerTest(unittest.TestCase): + def test_validate_protocol(self): + handshake._validate_protocol('sample') # should succeed. + handshake._validate_protocol('Sample') # should succeed. + handshake._validate_protocol('sample\x20protocol') # should succeed. + handshake._validate_protocol('sample\x7eprotocol') # should succeed. + self.assertRaises(handshake.HandshakeError, + handshake._validate_protocol, + '') + self.assertRaises(handshake.HandshakeError, + handshake._validate_protocol, + 'sample\x19protocol') + self.assertRaises(handshake.HandshakeError, + handshake._validate_protocol, + 'sample\x7fprotocol') + self.assertRaises(handshake.HandshakeError, + handshake._validate_protocol, + # "Japan" in Japanese + u'\u65e5\u672c') + + def test_good_request_default_port(self): + request = _create_request(_GOOD_REQUEST) + handshaker = handshake.Handshaker(request, + mock.MockDispatcher()) + handshaker.do_handshake() + self.assertEqual(_GOOD_RESPONSE_DEFAULT_PORT, + request.connection.written_data()) + self.assertEqual('/demo', request.ws_resource) + self.assertEqual('http://example.com', request.ws_origin) + self.assertEqual('ws://example.com/demo', request.ws_location) + self.assertEqual('sample', request.ws_protocol) + + def test_good_request_secure_default_port(self): + request = _create_request(_GOOD_REQUEST) + request.connection.local_addr = ('0.0.0.0', 443) + request.is_https_ = True + handshaker = handshake.Handshaker(request, + mock.MockDispatcher()) + handshaker.do_handshake() + self.assertEqual(_GOOD_RESPONSE_SECURE, + request.connection.written_data()) + self.assertEqual('sample', request.ws_protocol) + + def test_good_request_nondefault_port(self): + request = _create_request(_GOOD_REQUEST_NONDEFAULT_PORT) + handshaker = handshake.Handshaker(request, + mock.MockDispatcher()) + handshaker.do_handshake() + self.assertEqual(_GOOD_RESPONSE_NONDEFAULT_PORT, + request.connection.written_data()) + self.assertEqual('sample', request.ws_protocol) + + def test_good_request_secure_non_default_port(self): + request = _create_request(_GOOD_REQUEST_NONDEFAULT_PORT) + request.is_https_ = True + handshaker = handshake.Handshaker(request, + mock.MockDispatcher()) + handshaker.do_handshake() + self.assertEqual(_GOOD_RESPONSE_SECURE_NONDEF, + request.connection.written_data()) + self.assertEqual('sample', request.ws_protocol) + + def test_good_request_default_no_protocol(self): + request = _create_request(_GOOD_REQUEST_NO_PROTOCOL) + handshaker = handshake.Handshaker(request, + mock.MockDispatcher()) + handshaker.do_handshake() + self.assertEqual(_GOOD_RESPONSE_NO_PROTOCOL, + request.connection.written_data()) + self.assertEqual(None, request.ws_protocol) + + def test_good_request_optional_headers(self): + request = _create_request(_GOOD_REQUEST_WITH_OPTIONAL_HEADERS) + handshaker = handshake.Handshaker(request, + mock.MockDispatcher()) + handshaker.do_handshake() + self.assertEqual('AValue', + request.headers_in['AKey']) + self.assertEqual('', + request.headers_in['EmptyValue']) + + def test_bad_requests(self): + for request in map(_create_request, _BAD_REQUESTS): + handshaker = handshake.Handshaker(request, + mock.MockDispatcher()) + self.assertRaises(handshake.HandshakeError, handshaker.do_handshake) + + def test_strictly_good_requests(self): + for request in _create_requests_with_lines(_STRICTLY_GOOD_REQUESTS): + strict_handshaker = handshake.Handshaker(request, + mock.MockDispatcher(), + True) + strict_handshaker.do_handshake() + + def test_not_strictly_good_requests(self): + for request in _create_requests_with_lines(_NOT_STRICTLY_GOOD_REQUESTS): + strict_handshaker = handshake.Handshaker(request, + mock.MockDispatcher(), + True) + self.assertRaises(handshake.HandshakeError, + strict_handshaker.do_handshake) + + + +if __name__ == '__main__': + unittest.main() + + +# vi:sts=4 sw=4 et diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/test_memorizingfile.py b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/test_memorizingfile.py new file mode 100644 index 0000000..2de77ba --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/test_memorizingfile.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python +# +# Copyright 2009, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +"""Tests for memorizingfile module.""" + + +import StringIO +import unittest + +import config # This must be imported before mod_pywebsocket. +from mod_pywebsocket import memorizingfile + + +class UtilTest(unittest.TestCase): + def check(self, memorizing_file, num_read, expected_list): + for unused in range(num_read): + memorizing_file.readline() + actual_list = memorizing_file.get_memorized_lines() + self.assertEqual(len(expected_list), len(actual_list)) + for expected, actual in zip(expected_list, actual_list): + self.assertEqual(expected, actual) + + def test_get_memorized_lines(self): + memorizing_file = memorizingfile.MemorizingFile(StringIO.StringIO( + 'Hello\nWorld\nWelcome')) + self.check(memorizing_file, 3, ['Hello\n', 'World\n', 'Welcome']) + + def test_get_memorized_lines_limit_memorized_lines(self): + memorizing_file = memorizingfile.MemorizingFile(StringIO.StringIO( + 'Hello\nWorld\nWelcome'), 2) + self.check(memorizing_file, 3, ['Hello\n', 'World\n']) + + def test_get_memorized_lines_empty_file(self): + memorizing_file = memorizingfile.MemorizingFile(StringIO.StringIO( + '')) + self.check(memorizing_file, 10, []) + + +if __name__ == '__main__': + unittest.main() + + +# vi:sts=4 sw=4 et diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/test_mock.py b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/test_mock.py new file mode 100644 index 0000000..8b137d1 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/test_mock.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python +# +# Copyright 2009, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +"""Tests for mock module.""" + + +import Queue +import threading +import unittest + +import mock + + +class MockConnTest(unittest.TestCase): + def setUp(self): + self._conn = mock.MockConn('ABC\r\nDEFG\r\n\r\nHIJK') + + def test_readline(self): + self.assertEqual('ABC\r\n', self._conn.readline()) + self.assertEqual('DEFG\r\n', self._conn.readline()) + self.assertEqual('\r\n', self._conn.readline()) + self.assertEqual('HIJK', self._conn.readline()) + self.assertEqual('', self._conn.readline()) + + def test_read(self): + self.assertEqual('ABC\r\nD', self._conn.read(6)) + self.assertEqual('EFG\r\n\r\nHI', self._conn.read(9)) + self.assertEqual('JK', self._conn.read(10)) + self.assertEqual('', self._conn.read(10)) + + def test_read_and_readline(self): + self.assertEqual('ABC\r\nD', self._conn.read(6)) + self.assertEqual('EFG\r\n', self._conn.readline()) + self.assertEqual('\r\nHIJK', self._conn.read(9)) + self.assertEqual('', self._conn.readline()) + + def test_write(self): + self._conn.write('Hello\r\n') + self._conn.write('World\r\n') + self.assertEqual('Hello\r\nWorld\r\n', self._conn.written_data()) + + +class MockBlockingConnTest(unittest.TestCase): + def test_read(self): + class LineReader(threading.Thread): + def __init__(self, conn, queue): + threading.Thread.__init__(self) + self._queue = queue + self._conn = conn + self.setDaemon(True) + self.start() + def run(self): + while True: + data = self._conn.readline() + self._queue.put(data) + conn = mock.MockBlockingConn() + queue = Queue.Queue() + reader = LineReader(conn, queue) + self.failUnless(queue.empty()) + conn.put_bytes('Foo bar\r\n') + read = queue.get() + self.assertEqual('Foo bar\r\n', read) + + +class MockTableTest(unittest.TestCase): + def test_create_from_dict(self): + table = mock.MockTable({'Key':'Value'}) + self.assertEqual('Value', table.get('KEY')) + self.assertEqual('Value', table['key']) + + def test_create_from_list(self): + table = mock.MockTable([('Key', 'Value')]) + self.assertEqual('Value', table.get('KEY')) + self.assertEqual('Value', table['key']) + + def test_create_from_tuple(self): + table = mock.MockTable((('Key', 'Value'),)) + self.assertEqual('Value', table.get('KEY')) + self.assertEqual('Value', table['key']) + + def test_set_and_get(self): + table = mock.MockTable() + self.assertEqual(None, table.get('Key')) + table['Key'] = 'Value' + self.assertEqual('Value', table.get('Key')) + self.assertEqual('Value', table.get('key')) + self.assertEqual('Value', table.get('KEY')) + self.assertEqual('Value', table['Key']) + self.assertEqual('Value', table['key']) + self.assertEqual('Value', table['KEY']) + + +if __name__ == '__main__': + unittest.main() + + +# vi:sts=4 sw=4 et diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/test_msgutil.py b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/test_msgutil.py new file mode 100644 index 0000000..16b88e0 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/test_msgutil.py @@ -0,0 +1,156 @@ +#!/usr/bin/env python +# +# Copyright 2009, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +"""Tests for msgutil module.""" + + +import Queue +import unittest + +import config # This must be imported before mod_pywebsocket. +from mod_pywebsocket import msgutil + +import mock + + +def _create_request(read_data): + return mock.MockRequest(connection=mock.MockConn(read_data)) + +def _create_blocking_request(): + return mock.MockRequest(connection=mock.MockBlockingConn()) + +class MessageTest(unittest.TestCase): + def test_send_message(self): + request = _create_request('') + msgutil.send_message(request, 'Hello') + self.assertEqual('\x00Hello\xff', request.connection.written_data()) + + def test_send_message_unicode(self): + request = _create_request('') + msgutil.send_message(request, u'\u65e5') + # U+65e5 is encoded as e6,97,a5 in UTF-8 + self.assertEqual('\x00\xe6\x97\xa5\xff', + request.connection.written_data()) + + def test_receive_message(self): + request = _create_request('\x00Hello\xff\x00World!\xff') + self.assertEqual('Hello', msgutil.receive_message(request)) + self.assertEqual('World!', msgutil.receive_message(request)) + + def test_receive_message_unicode(self): + request = _create_request('\x00\xe6\x9c\xac\xff') + # U+672c is encoded as e6,9c,ac in UTF-8 + self.assertEqual(u'\u672c', msgutil.receive_message(request)) + + def test_receive_message_erroneous_unicode(self): + # \x80 and \x81 are invalid as UTF-8. + request = _create_request('\x00\x80\x81\xff') + # Invalid characters should be replaced with + # U+fffd REPLACEMENT CHARACTER + self.assertEqual(u'\ufffd\ufffd', msgutil.receive_message(request)) + + def test_receive_message_discard(self): + request = _create_request('\x80\x06IGNORE\x00Hello\xff' + '\x01DISREGARD\xff\x00World!\xff') + self.assertEqual('Hello', msgutil.receive_message(request)) + self.assertEqual('World!', msgutil.receive_message(request)) + + def test_payload_length(self): + for length, bytes in ((0, '\x00'), (0x7f, '\x7f'), (0x80, '\x81\x00'), + (0x1234, '\x80\xa4\x34')): + self.assertEqual(length, + msgutil._payload_length(_create_request(bytes))) + + def test_receive_bytes(self): + request = _create_request('abcdefg') + self.assertEqual('abc', msgutil._receive_bytes(request, 3)) + self.assertEqual('defg', msgutil._receive_bytes(request, 4)) + + def test_read_until(self): + request = _create_request('abcXdefgX') + self.assertEqual('abc', msgutil._read_until(request, 'X')) + self.assertEqual('defg', msgutil._read_until(request, 'X')) + + +class MessageReceiverTest(unittest.TestCase): + def test_queue(self): + request = _create_blocking_request() + receiver = msgutil.MessageReceiver(request) + + self.assertEqual(None, receiver.receive_nowait()) + + request.connection.put_bytes('\x00Hello!\xff') + self.assertEqual('Hello!', receiver.receive()) + + def test_onmessage(self): + onmessage_queue = Queue.Queue() + def onmessage_handler(message): + onmessage_queue.put(message) + + request = _create_blocking_request() + receiver = msgutil.MessageReceiver(request, onmessage_handler) + + request.connection.put_bytes('\x00Hello!\xff') + self.assertEqual('Hello!', onmessage_queue.get()) + + +class MessageSenderTest(unittest.TestCase): + def test_send(self): + request = _create_blocking_request() + sender = msgutil.MessageSender(request) + + sender.send('World') + self.assertEqual('\x00World\xff', request.connection.written_data()) + + def test_send_nowait(self): + # Use a queue to check the bytes written by MessageSender. + # request.connection.written_data() cannot be used here because + # MessageSender runs in a separate thread. + send_queue = Queue.Queue() + def write(bytes): + send_queue.put(bytes) + request = _create_blocking_request() + request.connection.write = write + + sender = msgutil.MessageSender(request) + + sender.send_nowait('Hello') + sender.send_nowait('World') + self.assertEqual('\x00Hello\xff', send_queue.get()) + self.assertEqual('\x00World\xff', send_queue.get()) + + +if __name__ == '__main__': + unittest.main() + + +# vi:sts=4 sw=4 et diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/test_util.py b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/test_util.py new file mode 100644 index 0000000..61f0db5 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/test_util.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python +# +# Copyright 2009, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +"""Tests for util module.""" + + +import os +import sys +import unittest + +import config # This must be imported before mod_pywebsocket. +from mod_pywebsocket import util + +_TEST_DATA_DIR = os.path.join(os.path.split(__file__)[0], 'testdata') + +class UtilTest(unittest.TestCase): + def test_get_stack_trace(self): + self.assertEqual('None\n', util.get_stack_trace()) + try: + a = 1 / 0 # Intentionally raise exception. + except Exception: + trace = util.get_stack_trace() + self.failUnless(trace.startswith('Traceback')) + self.failUnless(trace.find('ZeroDivisionError') != -1) + + def test_prepend_message_to_exception(self): + exc = Exception('World') + self.assertEqual('World', str(exc)) + util.prepend_message_to_exception('Hello ', exc) + self.assertEqual('Hello World', str(exc)) + + def test_get_script_interp(self): + cygwin_path = 'c:\\cygwin\\bin' + cygwin_perl = os.path.join(cygwin_path, 'perl') + self.assertEqual(None, util.get_script_interp( + os.path.join(_TEST_DATA_DIR, 'README'))) + self.assertEqual(None, util.get_script_interp( + os.path.join(_TEST_DATA_DIR, 'README'), cygwin_path)) + self.assertEqual('/usr/bin/perl -wT', util.get_script_interp( + os.path.join(_TEST_DATA_DIR, 'hello.pl'))) + self.assertEqual(cygwin_perl + ' -wT', util.get_script_interp( + os.path.join(_TEST_DATA_DIR, 'hello.pl'), cygwin_path)) + + +if __name__ == '__main__': + unittest.main() + + +# vi:sts=4 sw=4 et diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/testdata/README b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/testdata/README new file mode 100644 index 0000000..c001aa5 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/testdata/README @@ -0,0 +1 @@ +Test data directory diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/testdata/handlers/blank_wsh.py b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/testdata/handlers/blank_wsh.py new file mode 100644 index 0000000..7f87c6a --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/testdata/handlers/blank_wsh.py @@ -0,0 +1,31 @@ +# Copyright 2009, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +# intentionally left blank diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/testdata/handlers/origin_check_wsh.py b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/testdata/handlers/origin_check_wsh.py new file mode 100644 index 0000000..2c139fa --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/testdata/handlers/origin_check_wsh.py @@ -0,0 +1,42 @@ +# Copyright 2009, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +def web_socket_do_extra_handshake(request): + if request.ws_origin == 'http://example.com': + return + raise ValueError('Unacceptable origin: %r' % request.ws_origin) + + +def web_socket_transfer_data(request): + request.connection.write('origin_check_wsh.py is called for %s, %s' % + (request.ws_resource, request.ws_protocol)) + + +# vi:sts=4 sw=4 et diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/testdata/handlers/sub/exception_in_transfer_wsh.py b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/testdata/handlers/sub/exception_in_transfer_wsh.py new file mode 100644 index 0000000..b982d02 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/testdata/handlers/sub/exception_in_transfer_wsh.py @@ -0,0 +1,44 @@ +# Copyright 2009, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +"""Exception in web_socket_transfer_data(). +""" + + +def web_socket_do_extra_handshake(request): + pass + + +def web_socket_transfer_data(request): + raise Exception('Intentional Exception for %s, %s' % + (request.ws_resource, request.ws_protocol)) + + +# vi:sts=4 sw=4 et diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/testdata/handlers/sub/no_wsh_at_the_end.py b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/testdata/handlers/sub/no_wsh_at_the_end.py new file mode 100644 index 0000000..17e7be1 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/testdata/handlers/sub/no_wsh_at_the_end.py @@ -0,0 +1,45 @@ +# Copyright 2009, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +"""Correct signatures, wrong file name. +""" + + +def web_socket_do_extra_handshake(request): + pass + + +def web_socket_transfer_data(request): + request.connection.write( + 'sub/no_wsh_at_the_end.py is called for %s, %s' % + (request.ws_resource, request.ws_protocol)) + + +# vi:sts=4 sw=4 et diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/testdata/handlers/sub/non_callable_wsh.py b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/testdata/handlers/sub/non_callable_wsh.py new file mode 100644 index 0000000..26352eb --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/testdata/handlers/sub/non_callable_wsh.py @@ -0,0 +1,39 @@ +# Copyright 2009, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +"""Non-callable handlers. +""" + + +web_socket_do_extra_handshake = True +web_socket_transfer_data = 1 + + +# vi:sts=4 sw=4 et diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/testdata/handlers/sub/plain_wsh.py b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/testdata/handlers/sub/plain_wsh.py new file mode 100644 index 0000000..db3ff69 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/testdata/handlers/sub/plain_wsh.py @@ -0,0 +1,40 @@ +# Copyright 2009, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +def web_socket_do_extra_handshake(request): + pass + + +def web_socket_transfer_data(request): + request.connection.write('sub/plain_wsh.py is called for %s, %s' % + (request.ws_resource, request.ws_protocol)) + + +# vi:sts=4 sw=4 et diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/testdata/handlers/sub/wrong_handshake_sig_wsh.py b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/testdata/handlers/sub/wrong_handshake_sig_wsh.py new file mode 100644 index 0000000..6bf659b --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/testdata/handlers/sub/wrong_handshake_sig_wsh.py @@ -0,0 +1,45 @@ +# Copyright 2009, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +"""Wrong web_socket_do_extra_handshake signature. +""" + + +def no_web_socket_do_extra_handshake(request): + pass + + +def web_socket_transfer_data(request): + request.connection.write( + 'sub/wrong_handshake_sig_wsh.py is called for %s, %s' % + (request.ws_resource, request.ws_protocol)) + + +# vi:sts=4 sw=4 et diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/testdata/handlers/sub/wrong_transfer_sig_wsh.py b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/testdata/handlers/sub/wrong_transfer_sig_wsh.py new file mode 100644 index 0000000..e0e2e55 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/testdata/handlers/sub/wrong_transfer_sig_wsh.py @@ -0,0 +1,45 @@ +# Copyright 2009, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +"""Wrong web_socket_transfer_data() signature. +""" + + +def web_socket_do_extra_handshake(request): + pass + + +def no_web_socket_transfer_data(request): + request.connection.write( + 'sub/wrong_transfer_sig_wsh.py is called for %s, %s' % + (request.ws_resource, request.ws_protocol)) + + +# vi:sts=4 sw=4 et diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/testdata/hello.pl b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/testdata/hello.pl new file mode 100644 index 0000000..9dd01b4 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/pywebsocket/test/testdata/hello.pl @@ -0,0 +1,2 @@ +#!/usr/bin/perl -wT +print "Hello\n"; diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/simplejson/LICENSE.txt b/WebKitTools/Scripts/webkitpy/thirdparty/simplejson/LICENSE.txt new file mode 100644 index 0000000..ad95f29 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/simplejson/LICENSE.txt @@ -0,0 +1,19 @@ +Copyright (c) 2006 Bob Ippolito + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/simplejson/README.txt b/WebKitTools/Scripts/webkitpy/thirdparty/simplejson/README.txt new file mode 100644 index 0000000..7f726ce --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/simplejson/README.txt @@ -0,0 +1,11 @@ +URL: http://undefined.org/python/#simplejson +Version: 1.7.3 +License: MIT +License File: LICENSE.txt + +Description: +simplejson is a JSON encoder and decoder for Python. + + +Local Modifications: +Removed unit tests from current distribution. diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/simplejson/__init__.py b/WebKitTools/Scripts/webkitpy/thirdparty/simplejson/__init__.py new file mode 100644 index 0000000..38d6229 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/simplejson/__init__.py @@ -0,0 +1,287 @@ +r""" +A simple, fast, extensible JSON encoder and decoder + +JSON (JavaScript Object Notation) <http://json.org> is a subset of +JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data +interchange format. + +simplejson exposes an API familiar to uses of the standard library +marshal and pickle modules. + +Encoding basic Python object hierarchies:: + + >>> import simplejson + >>> simplejson.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}]) + '["foo", {"bar": ["baz", null, 1.0, 2]}]' + >>> print simplejson.dumps("\"foo\bar") + "\"foo\bar" + >>> print simplejson.dumps(u'\u1234') + "\u1234" + >>> print simplejson.dumps('\\') + "\\" + >>> print simplejson.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True) + {"a": 0, "b": 0, "c": 0} + >>> from StringIO import StringIO + >>> io = StringIO() + >>> simplejson.dump(['streaming API'], io) + >>> io.getvalue() + '["streaming API"]' + +Compact encoding:: + + >>> import simplejson + >>> simplejson.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':')) + '[1,2,3,{"4":5,"6":7}]' + +Pretty printing:: + + >>> import simplejson + >>> print simplejson.dumps({'4': 5, '6': 7}, sort_keys=True, indent=4) + { + "4": 5, + "6": 7 + } + +Decoding JSON:: + + >>> import simplejson + >>> simplejson.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') + [u'foo', {u'bar': [u'baz', None, 1.0, 2]}] + >>> simplejson.loads('"\\"foo\\bar"') + u'"foo\x08ar' + >>> from StringIO import StringIO + >>> io = StringIO('["streaming API"]') + >>> simplejson.load(io) + [u'streaming API'] + +Specializing JSON object decoding:: + + >>> import simplejson + >>> def as_complex(dct): + ... if '__complex__' in dct: + ... return complex(dct['real'], dct['imag']) + ... return dct + ... + >>> simplejson.loads('{"__complex__": true, "real": 1, "imag": 2}', + ... object_hook=as_complex) + (1+2j) + +Extending JSONEncoder:: + + >>> import simplejson + >>> class ComplexEncoder(simplejson.JSONEncoder): + ... def default(self, obj): + ... if isinstance(obj, complex): + ... return [obj.real, obj.imag] + ... return simplejson.JSONEncoder.default(self, obj) + ... + >>> dumps(2 + 1j, cls=ComplexEncoder) + '[2.0, 1.0]' + >>> ComplexEncoder().encode(2 + 1j) + '[2.0, 1.0]' + >>> list(ComplexEncoder().iterencode(2 + 1j)) + ['[', '2.0', ', ', '1.0', ']'] + + +Note that the JSON produced by this module's default settings +is a subset of YAML, so it may be used as a serializer for that as well. +""" +__version__ = '1.7.3' +__all__ = [ + 'dump', 'dumps', 'load', 'loads', + 'JSONDecoder', 'JSONEncoder', +] + +from decoder import JSONDecoder +from encoder import JSONEncoder + +_default_encoder = JSONEncoder( + skipkeys=False, + ensure_ascii=True, + check_circular=True, + allow_nan=True, + indent=None, + separators=None, + encoding='utf-8' +) + +def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, + allow_nan=True, cls=None, indent=None, separators=None, + encoding='utf-8', **kw): + """ + Serialize ``obj`` as a JSON formatted stream to ``fp`` (a + ``.write()``-supporting file-like object). + + If ``skipkeys`` is ``True`` then ``dict`` keys that are not basic types + (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) + will be skipped instead of raising a ``TypeError``. + + If ``ensure_ascii`` is ``False``, then the some chunks written to ``fp`` + may be ``unicode`` instances, subject to normal Python ``str`` to + ``unicode`` coercion rules. Unless ``fp.write()`` explicitly + understands ``unicode`` (as in ``codecs.getwriter()``) this is likely + to cause an error. + + If ``check_circular`` is ``False``, then the circular reference check + for container types will be skipped and a circular reference will + result in an ``OverflowError`` (or worse). + + If ``allow_nan`` is ``False``, then it will be a ``ValueError`` to + serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) + in strict compliance of the JSON specification, instead of using the + JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). + + If ``indent`` is a non-negative integer, then JSON array elements and object + members will be pretty-printed with that indent level. An indent level + of 0 will only insert newlines. ``None`` is the most compact representation. + + If ``separators`` is an ``(item_separator, dict_separator)`` tuple + then it will be used instead of the default ``(', ', ': ')`` separators. + ``(',', ':')`` is the most compact JSON representation. + + ``encoding`` is the character encoding for str instances, default is UTF-8. + + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the + ``.default()`` method to serialize additional types), specify it with + the ``cls`` kwarg. + """ + # cached encoder + if (skipkeys is False and ensure_ascii is True and + check_circular is True and allow_nan is True and + cls is None and indent is None and separators is None and + encoding == 'utf-8' and not kw): + iterable = _default_encoder.iterencode(obj) + else: + if cls is None: + cls = JSONEncoder + iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii, + check_circular=check_circular, allow_nan=allow_nan, indent=indent, + separators=separators, encoding=encoding, **kw).iterencode(obj) + # could accelerate with writelines in some versions of Python, at + # a debuggability cost + for chunk in iterable: + fp.write(chunk) + + +def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, + allow_nan=True, cls=None, indent=None, separators=None, + encoding='utf-8', **kw): + """ + Serialize ``obj`` to a JSON formatted ``str``. + + If ``skipkeys`` is ``True`` then ``dict`` keys that are not basic types + (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) + will be skipped instead of raising a ``TypeError``. + + If ``ensure_ascii`` is ``False``, then the return value will be a + ``unicode`` instance subject to normal Python ``str`` to ``unicode`` + coercion rules instead of being escaped to an ASCII ``str``. + + If ``check_circular`` is ``False``, then the circular reference check + for container types will be skipped and a circular reference will + result in an ``OverflowError`` (or worse). + + If ``allow_nan`` is ``False``, then it will be a ``ValueError`` to + serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in + strict compliance of the JSON specification, instead of using the + JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). + + If ``indent`` is a non-negative integer, then JSON array elements and + object members will be pretty-printed with that indent level. An indent + level of 0 will only insert newlines. ``None`` is the most compact + representation. + + If ``separators`` is an ``(item_separator, dict_separator)`` tuple + then it will be used instead of the default ``(', ', ': ')`` separators. + ``(',', ':')`` is the most compact JSON representation. + + ``encoding`` is the character encoding for str instances, default is UTF-8. + + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the + ``.default()`` method to serialize additional types), specify it with + the ``cls`` kwarg. + """ + # cached encoder + if (skipkeys is False and ensure_ascii is True and + check_circular is True and allow_nan is True and + cls is None and indent is None and separators is None and + encoding == 'utf-8' and not kw): + return _default_encoder.encode(obj) + if cls is None: + cls = JSONEncoder + return cls( + skipkeys=skipkeys, ensure_ascii=ensure_ascii, + check_circular=check_circular, allow_nan=allow_nan, indent=indent, + separators=separators, encoding=encoding, + **kw).encode(obj) + +_default_decoder = JSONDecoder(encoding=None, object_hook=None) + +def load(fp, encoding=None, cls=None, object_hook=None, **kw): + """ + Deserialize ``fp`` (a ``.read()``-supporting file-like object containing + a JSON document) to a Python object. + + If the contents of ``fp`` is encoded with an ASCII based encoding other + than utf-8 (e.g. latin-1), then an appropriate ``encoding`` name must + be specified. Encodings that are not ASCII based (such as UCS-2) are + not allowed, and should be wrapped with + ``codecs.getreader(fp)(encoding)``, or simply decoded to a ``unicode`` + object and passed to ``loads()`` + + ``object_hook`` is an optional function that will be called with the + result of any object literal decode (a ``dict``). The return value of + ``object_hook`` will be used instead of the ``dict``. This feature + can be used to implement custom decoders (e.g. JSON-RPC class hinting). + + To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` + kwarg. + """ + return loads(fp.read(), + encoding=encoding, cls=cls, object_hook=object_hook, **kw) + +def loads(s, encoding=None, cls=None, object_hook=None, **kw): + """ + Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON + document) to a Python object. + + If ``s`` is a ``str`` instance and is encoded with an ASCII based encoding + other than utf-8 (e.g. latin-1) then an appropriate ``encoding`` name + must be specified. Encodings that are not ASCII based (such as UCS-2) + are not allowed and should be decoded to ``unicode`` first. + + ``object_hook`` is an optional function that will be called with the + result of any object literal decode (a ``dict``). The return value of + ``object_hook`` will be used instead of the ``dict``. This feature + can be used to implement custom decoders (e.g. JSON-RPC class hinting). + + To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` + kwarg. + """ + if cls is None and encoding is None and object_hook is None and not kw: + return _default_decoder.decode(s) + if cls is None: + cls = JSONDecoder + if object_hook is not None: + kw['object_hook'] = object_hook + return cls(encoding=encoding, **kw).decode(s) + +def read(s): + """ + json-py API compatibility hook. Use loads(s) instead. + """ + import warnings + warnings.warn("simplejson.loads(s) should be used instead of read(s)", + DeprecationWarning) + return loads(s) + +def write(obj): + """ + json-py API compatibility hook. Use dumps(s) instead. + """ + import warnings + warnings.warn("simplejson.dumps(s) should be used instead of write(s)", + DeprecationWarning) + return dumps(obj) + + diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/simplejson/_speedups.c b/WebKitTools/Scripts/webkitpy/thirdparty/simplejson/_speedups.c new file mode 100644 index 0000000..8f290bb --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/simplejson/_speedups.c @@ -0,0 +1,215 @@ +#include "Python.h" +#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN) +typedef int Py_ssize_t; +#define PY_SSIZE_T_MAX INT_MAX +#define PY_SSIZE_T_MIN INT_MIN +#endif + +static Py_ssize_t +ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars); +static PyObject * +ascii_escape_unicode(PyObject *pystr); +static PyObject * +ascii_escape_str(PyObject *pystr); +static PyObject * +py_encode_basestring_ascii(PyObject* self __attribute__((__unused__)), PyObject *pystr); +void init_speedups(void); + +#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '/' && c != '"') + +#define MIN_EXPANSION 6 +#ifdef Py_UNICODE_WIDE +#define MAX_EXPANSION (2 * MIN_EXPANSION) +#else +#define MAX_EXPANSION MIN_EXPANSION +#endif + +static Py_ssize_t +ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars) { + Py_UNICODE x; + output[chars++] = '\\'; + switch (c) { + case '/': output[chars++] = (char)c; break; + case '\\': output[chars++] = (char)c; break; + case '"': output[chars++] = (char)c; break; + case '\b': output[chars++] = 'b'; break; + case '\f': output[chars++] = 'f'; break; + case '\n': output[chars++] = 'n'; break; + case '\r': output[chars++] = 'r'; break; + case '\t': output[chars++] = 't'; break; + default: +#ifdef Py_UNICODE_WIDE + if (c >= 0x10000) { + /* UTF-16 surrogate pair */ + Py_UNICODE v = c - 0x10000; + c = 0xd800 | ((v >> 10) & 0x3ff); + output[chars++] = 'u'; + x = (c & 0xf000) >> 12; + output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); + x = (c & 0x0f00) >> 8; + output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); + x = (c & 0x00f0) >> 4; + output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); + x = (c & 0x000f); + output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); + c = 0xdc00 | (v & 0x3ff); + output[chars++] = '\\'; + } +#endif + output[chars++] = 'u'; + x = (c & 0xf000) >> 12; + output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); + x = (c & 0x0f00) >> 8; + output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); + x = (c & 0x00f0) >> 4; + output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); + x = (c & 0x000f); + output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); + } + return chars; +} + +static PyObject * +ascii_escape_unicode(PyObject *pystr) { + Py_ssize_t i; + Py_ssize_t input_chars; + Py_ssize_t output_size; + Py_ssize_t chars; + PyObject *rval; + char *output; + Py_UNICODE *input_unicode; + + input_chars = PyUnicode_GET_SIZE(pystr); + input_unicode = PyUnicode_AS_UNICODE(pystr); + /* One char input can be up to 6 chars output, estimate 4 of these */ + output_size = 2 + (MIN_EXPANSION * 4) + input_chars; + rval = PyString_FromStringAndSize(NULL, output_size); + if (rval == NULL) { + return NULL; + } + output = PyString_AS_STRING(rval); + chars = 0; + output[chars++] = '"'; + for (i = 0; i < input_chars; i++) { + Py_UNICODE c = input_unicode[i]; + if (S_CHAR(c)) { + output[chars++] = (char)c; + } else { + chars = ascii_escape_char(c, output, chars); + } + if (output_size - chars < (1 + MAX_EXPANSION)) { + /* There's more than four, so let's resize by a lot */ + output_size *= 2; + /* This is an upper bound */ + if (output_size > 2 + (input_chars * MAX_EXPANSION)) { + output_size = 2 + (input_chars * MAX_EXPANSION); + } + if (_PyString_Resize(&rval, output_size) == -1) { + return NULL; + } + output = PyString_AS_STRING(rval); + } + } + output[chars++] = '"'; + if (_PyString_Resize(&rval, chars) == -1) { + return NULL; + } + return rval; +} + +static PyObject * +ascii_escape_str(PyObject *pystr) { + Py_ssize_t i; + Py_ssize_t input_chars; + Py_ssize_t output_size; + Py_ssize_t chars; + PyObject *rval; + char *output; + char *input_str; + + input_chars = PyString_GET_SIZE(pystr); + input_str = PyString_AS_STRING(pystr); + /* One char input can be up to 6 chars output, estimate 4 of these */ + output_size = 2 + (MIN_EXPANSION * 4) + input_chars; + rval = PyString_FromStringAndSize(NULL, output_size); + if (rval == NULL) { + return NULL; + } + output = PyString_AS_STRING(rval); + chars = 0; + output[chars++] = '"'; + for (i = 0; i < input_chars; i++) { + Py_UNICODE c = (Py_UNICODE)input_str[i]; + if (S_CHAR(c)) { + output[chars++] = (char)c; + } else if (c > 0x7F) { + /* We hit a non-ASCII character, bail to unicode mode */ + PyObject *uni; + Py_DECREF(rval); + uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict"); + if (uni == NULL) { + return NULL; + } + rval = ascii_escape_unicode(uni); + Py_DECREF(uni); + return rval; + } else { + chars = ascii_escape_char(c, output, chars); + } + /* An ASCII char can't possibly expand to a surrogate! */ + if (output_size - chars < (1 + MIN_EXPANSION)) { + /* There's more than four, so let's resize by a lot */ + output_size *= 2; + if (output_size > 2 + (input_chars * MIN_EXPANSION)) { + output_size = 2 + (input_chars * MIN_EXPANSION); + } + if (_PyString_Resize(&rval, output_size) == -1) { + return NULL; + } + output = PyString_AS_STRING(rval); + } + } + output[chars++] = '"'; + if (_PyString_Resize(&rval, chars) == -1) { + return NULL; + } + return rval; +} + +PyDoc_STRVAR(pydoc_encode_basestring_ascii, + "encode_basestring_ascii(basestring) -> str\n" + "\n" + "..." +); + +static PyObject * +py_encode_basestring_ascii(PyObject* self __attribute__((__unused__)), PyObject *pystr) { + /* METH_O */ + if (PyString_Check(pystr)) { + return ascii_escape_str(pystr); + } else if (PyUnicode_Check(pystr)) { + return ascii_escape_unicode(pystr); + } + PyErr_SetString(PyExc_TypeError, "first argument must be a string"); + return NULL; +} + +#define DEFN(n, k) \ + { \ + #n, \ + (PyCFunction)py_ ##n, \ + k, \ + pydoc_ ##n \ + } +static PyMethodDef speedups_methods[] = { + DEFN(encode_basestring_ascii, METH_O), + {} +}; +#undef DEFN + +void +init_speedups(void) +{ + PyObject *m; + m = Py_InitModule4("_speedups", speedups_methods, NULL, NULL, PYTHON_API_VERSION); +} diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/simplejson/decoder.py b/WebKitTools/Scripts/webkitpy/thirdparty/simplejson/decoder.py new file mode 100644 index 0000000..b887b58 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/simplejson/decoder.py @@ -0,0 +1,273 @@ +""" +Implementation of JSONDecoder +""" +import re + +from .scanner import Scanner, pattern + +FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL + +def _floatconstants(): + import struct + import sys + _BYTES = '7FF80000000000007FF0000000000000'.decode('hex') + if sys.byteorder != 'big': + _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1] + nan, inf = struct.unpack('dd', _BYTES) + return nan, inf, -inf + +NaN, PosInf, NegInf = _floatconstants() + +def linecol(doc, pos): + lineno = doc.count('\n', 0, pos) + 1 + if lineno == 1: + colno = pos + else: + colno = pos - doc.rindex('\n', 0, pos) + return lineno, colno + +def errmsg(msg, doc, pos, end=None): + lineno, colno = linecol(doc, pos) + if end is None: + return '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos) + endlineno, endcolno = linecol(doc, end) + return '%s: line %d column %d - line %d column %d (char %d - %d)' % ( + msg, lineno, colno, endlineno, endcolno, pos, end) + +_CONSTANTS = { + '-Infinity': NegInf, + 'Infinity': PosInf, + 'NaN': NaN, + 'true': True, + 'false': False, + 'null': None, +} + +def JSONConstant(match, context, c=_CONSTANTS): + return c[match.group(0)], None +pattern('(-?Infinity|NaN|true|false|null)')(JSONConstant) + +def JSONNumber(match, context): + match = JSONNumber.regex.match(match.string, *match.span()) + integer, frac, exp = match.groups() + if frac or exp: + res = float(integer + (frac or '') + (exp or '')) + else: + res = int(integer) + return res, None +pattern(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?')(JSONNumber) + +STRINGCHUNK = re.compile(r'(.*?)(["\\])', FLAGS) +BACKSLASH = { + '"': u'"', '\\': u'\\', '/': u'/', + 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', +} + +DEFAULT_ENCODING = "utf-8" + +def scanstring(s, end, encoding=None, _b=BACKSLASH, _m=STRINGCHUNK.match): + if encoding is None: + encoding = DEFAULT_ENCODING + chunks = [] + _append = chunks.append + begin = end - 1 + while 1: + chunk = _m(s, end) + if chunk is None: + raise ValueError( + errmsg("Unterminated string starting at", s, begin)) + end = chunk.end() + content, terminator = chunk.groups() + if content: + if not isinstance(content, unicode): + content = unicode(content, encoding) + _append(content) + if terminator == '"': + break + try: + esc = s[end] + except IndexError: + raise ValueError( + errmsg("Unterminated string starting at", s, begin)) + if esc != 'u': + try: + m = _b[esc] + except KeyError: + raise ValueError( + errmsg("Invalid \\escape: %r" % (esc,), s, end)) + end += 1 + else: + esc = s[end + 1:end + 5] + try: + m = unichr(int(esc, 16)) + if len(esc) != 4 or not esc.isalnum(): + raise ValueError + except ValueError: + raise ValueError(errmsg("Invalid \\uXXXX escape", s, end)) + end += 5 + _append(m) + return u''.join(chunks), end + +def JSONString(match, context): + encoding = getattr(context, 'encoding', None) + return scanstring(match.string, match.end(), encoding) +pattern(r'"')(JSONString) + +WHITESPACE = re.compile(r'\s*', FLAGS) + +def JSONObject(match, context, _w=WHITESPACE.match): + pairs = {} + s = match.string + end = _w(s, match.end()).end() + nextchar = s[end:end + 1] + # trivial empty object + if nextchar == '}': + return pairs, end + 1 + if nextchar != '"': + raise ValueError(errmsg("Expecting property name", s, end)) + end += 1 + encoding = getattr(context, 'encoding', None) + iterscan = JSONScanner.iterscan + while True: + key, end = scanstring(s, end, encoding) + end = _w(s, end).end() + if s[end:end + 1] != ':': + raise ValueError(errmsg("Expecting : delimiter", s, end)) + end = _w(s, end + 1).end() + try: + value, end = iterscan(s, idx=end, context=context).next() + except StopIteration: + raise ValueError(errmsg("Expecting object", s, end)) + pairs[key] = value + end = _w(s, end).end() + nextchar = s[end:end + 1] + end += 1 + if nextchar == '}': + break + if nextchar != ',': + raise ValueError(errmsg("Expecting , delimiter", s, end - 1)) + end = _w(s, end).end() + nextchar = s[end:end + 1] + end += 1 + if nextchar != '"': + raise ValueError(errmsg("Expecting property name", s, end - 1)) + object_hook = getattr(context, 'object_hook', None) + if object_hook is not None: + pairs = object_hook(pairs) + return pairs, end +pattern(r'{')(JSONObject) + +def JSONArray(match, context, _w=WHITESPACE.match): + values = [] + s = match.string + end = _w(s, match.end()).end() + # look-ahead for trivial empty array + nextchar = s[end:end + 1] + if nextchar == ']': + return values, end + 1 + iterscan = JSONScanner.iterscan + while True: + try: + value, end = iterscan(s, idx=end, context=context).next() + except StopIteration: + raise ValueError(errmsg("Expecting object", s, end)) + values.append(value) + end = _w(s, end).end() + nextchar = s[end:end + 1] + end += 1 + if nextchar == ']': + break + if nextchar != ',': + raise ValueError(errmsg("Expecting , delimiter", s, end)) + end = _w(s, end).end() + return values, end +pattern(r'\[')(JSONArray) + +ANYTHING = [ + JSONObject, + JSONArray, + JSONString, + JSONConstant, + JSONNumber, +] + +JSONScanner = Scanner(ANYTHING) + +class JSONDecoder(object): + """ + Simple JSON <http://json.org> decoder + + Performs the following translations in decoding: + + +---------------+-------------------+ + | JSON | Python | + +===============+===================+ + | object | dict | + +---------------+-------------------+ + | array | list | + +---------------+-------------------+ + | string | unicode | + +---------------+-------------------+ + | number (int) | int, long | + +---------------+-------------------+ + | number (real) | float | + +---------------+-------------------+ + | true | True | + +---------------+-------------------+ + | false | False | + +---------------+-------------------+ + | null | None | + +---------------+-------------------+ + + It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as + their corresponding ``float`` values, which is outside the JSON spec. + """ + + _scanner = Scanner(ANYTHING) + __all__ = ['__init__', 'decode', 'raw_decode'] + + def __init__(self, encoding=None, object_hook=None): + """ + ``encoding`` determines the encoding used to interpret any ``str`` + objects decoded by this instance (utf-8 by default). It has no + effect when decoding ``unicode`` objects. + + Note that currently only encodings that are a superset of ASCII work, + strings of other encodings should be passed in as ``unicode``. + + ``object_hook``, if specified, will be called with the result + of every JSON object decoded and its return value will be used in + place of the given ``dict``. This can be used to provide custom + deserializations (e.g. to support JSON-RPC class hinting). + """ + self.encoding = encoding + self.object_hook = object_hook + + def decode(self, s, _w=WHITESPACE.match): + """ + Return the Python representation of ``s`` (a ``str`` or ``unicode`` + instance containing a JSON document) + """ + obj, end = self.raw_decode(s, idx=_w(s, 0).end()) + end = _w(s, end).end() + if end != len(s): + raise ValueError(errmsg("Extra data", s, end, len(s))) + return obj + + def raw_decode(self, s, **kw): + """ + Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning + with a JSON document) and return a 2-tuple of the Python + representation and the index in ``s`` where the document ended. + + This can be used to decode a JSON document from a string that may + have extraneous data at the end. + """ + kw.setdefault('context', self) + try: + obj, end = self._scanner.iterscan(s, **kw).next() + except StopIteration: + raise ValueError("No JSON object could be decoded") + return obj, end + +__all__ = ['JSONDecoder'] diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/simplejson/encoder.py b/WebKitTools/Scripts/webkitpy/thirdparty/simplejson/encoder.py new file mode 100644 index 0000000..d29919a --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/simplejson/encoder.py @@ -0,0 +1,371 @@ +""" +Implementation of JSONEncoder +""" +import re +try: + from simplejson import _speedups +except ImportError: + _speedups = None + +ESCAPE = re.compile(r'[\x00-\x19\\"\b\f\n\r\t]') +ESCAPE_ASCII = re.compile(r'([\\"/]|[^\ -~])') +ESCAPE_DCT = { + # escape all forward slashes to prevent </script> attack + '/': '\\/', + '\\': '\\\\', + '"': '\\"', + '\b': '\\b', + '\f': '\\f', + '\n': '\\n', + '\r': '\\r', + '\t': '\\t', +} +for i in range(0x20): + ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) + +# assume this produces an infinity on all machines (probably not guaranteed) +INFINITY = float('1e66666') + +def floatstr(o, allow_nan=True): + # Check for specials. Note that this type of test is processor- and/or + # platform-specific, so do tests which don't depend on the internals. + + if o != o: + text = 'NaN' + elif o == INFINITY: + text = 'Infinity' + elif o == -INFINITY: + text = '-Infinity' + else: + return repr(o) + + if not allow_nan: + raise ValueError("Out of range float values are not JSON compliant: %r" + % (o,)) + + return text + + +def encode_basestring(s): + """ + Return a JSON representation of a Python string + """ + def replace(match): + return ESCAPE_DCT[match.group(0)] + return '"' + ESCAPE.sub(replace, s) + '"' + +def encode_basestring_ascii(s): + def replace(match): + s = match.group(0) + try: + return ESCAPE_DCT[s] + except KeyError: + n = ord(s) + if n < 0x10000: + return '\\u%04x' % (n,) + else: + # surrogate pair + n -= 0x10000 + s1 = 0xd800 | ((n >> 10) & 0x3ff) + s2 = 0xdc00 | (n & 0x3ff) + return '\\u%04x\\u%04x' % (s1, s2) + return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' + +try: + encode_basestring_ascii = _speedups.encode_basestring_ascii + _need_utf8 = True +except AttributeError: + _need_utf8 = False + +class JSONEncoder(object): + """ + Extensible JSON <http://json.org> encoder for Python data structures. + + Supports the following objects and types by default: + + +-------------------+---------------+ + | Python | JSON | + +===================+===============+ + | dict | object | + +-------------------+---------------+ + | list, tuple | array | + +-------------------+---------------+ + | str, unicode | string | + +-------------------+---------------+ + | int, long, float | number | + +-------------------+---------------+ + | True | true | + +-------------------+---------------+ + | False | false | + +-------------------+---------------+ + | None | null | + +-------------------+---------------+ + + To extend this to recognize other objects, subclass and implement a + ``.default()`` method with another method that returns a serializable + object for ``o`` if possible, otherwise it should call the superclass + implementation (to raise ``TypeError``). + """ + __all__ = ['__init__', 'default', 'encode', 'iterencode'] + item_separator = ', ' + key_separator = ': ' + def __init__(self, skipkeys=False, ensure_ascii=True, + check_circular=True, allow_nan=True, sort_keys=False, + indent=None, separators=None, encoding='utf-8'): + """ + Constructor for JSONEncoder, with sensible defaults. + + If skipkeys is False, then it is a TypeError to attempt + encoding of keys that are not str, int, long, float or None. If + skipkeys is True, such items are simply skipped. + + If ensure_ascii is True, the output is guaranteed to be str + objects with all incoming unicode characters escaped. If + ensure_ascii is false, the output will be unicode object. + + If check_circular is True, then lists, dicts, and custom encoded + objects will be checked for circular references during encoding to + prevent an infinite recursion (which would cause an OverflowError). + Otherwise, no such check takes place. + + If allow_nan is True, then NaN, Infinity, and -Infinity will be + encoded as such. This behavior is not JSON specification compliant, + but is consistent with most JavaScript based encoders and decoders. + Otherwise, it will be a ValueError to encode such floats. + + If sort_keys is True, then the output of dictionaries will be + sorted by key; this is useful for regression tests to ensure + that JSON serializations can be compared on a day-to-day basis. + + If indent is a non-negative integer, then JSON array + elements and object members will be pretty-printed with that + indent level. An indent level of 0 will only insert newlines. + None is the most compact representation. + + If specified, separators should be a (item_separator, key_separator) + tuple. The default is (', ', ': '). To get the most compact JSON + representation you should specify (',', ':') to eliminate whitespace. + + If encoding is not None, then all input strings will be + transformed into unicode using that encoding prior to JSON-encoding. + The default is UTF-8. + """ + + self.skipkeys = skipkeys + self.ensure_ascii = ensure_ascii + self.check_circular = check_circular + self.allow_nan = allow_nan + self.sort_keys = sort_keys + self.indent = indent + self.current_indent_level = 0 + if separators is not None: + self.item_separator, self.key_separator = separators + self.encoding = encoding + + def _newline_indent(self): + return '\n' + (' ' * (self.indent * self.current_indent_level)) + + def _iterencode_list(self, lst, markers=None): + if not lst: + yield '[]' + return + if markers is not None: + markerid = id(lst) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = lst + yield '[' + if self.indent is not None: + self.current_indent_level += 1 + newline_indent = self._newline_indent() + separator = self.item_separator + newline_indent + yield newline_indent + else: + newline_indent = None + separator = self.item_separator + first = True + for value in lst: + if first: + first = False + else: + yield separator + for chunk in self._iterencode(value, markers): + yield chunk + if newline_indent is not None: + self.current_indent_level -= 1 + yield self._newline_indent() + yield ']' + if markers is not None: + del markers[markerid] + + def _iterencode_dict(self, dct, markers=None): + if not dct: + yield '{}' + return + if markers is not None: + markerid = id(dct) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = dct + yield '{' + key_separator = self.key_separator + if self.indent is not None: + self.current_indent_level += 1 + newline_indent = self._newline_indent() + item_separator = self.item_separator + newline_indent + yield newline_indent + else: + newline_indent = None + item_separator = self.item_separator + first = True + if self.ensure_ascii: + encoder = encode_basestring_ascii + else: + encoder = encode_basestring + allow_nan = self.allow_nan + if self.sort_keys: + keys = dct.keys() + keys.sort() + items = [(k, dct[k]) for k in keys] + else: + items = dct.iteritems() + _encoding = self.encoding + _do_decode = (_encoding is not None + and not (_need_utf8 and _encoding == 'utf-8')) + for key, value in items: + if isinstance(key, str): + if _do_decode: + key = key.decode(_encoding) + elif isinstance(key, basestring): + pass + # JavaScript is weakly typed for these, so it makes sense to + # also allow them. Many encoders seem to do something like this. + elif isinstance(key, float): + key = floatstr(key, allow_nan) + elif isinstance(key, (int, long)): + key = str(key) + elif key is True: + key = 'true' + elif key is False: + key = 'false' + elif key is None: + key = 'null' + elif self.skipkeys: + continue + else: + raise TypeError("key %r is not a string" % (key,)) + if first: + first = False + else: + yield item_separator + yield encoder(key) + yield key_separator + for chunk in self._iterencode(value, markers): + yield chunk + if newline_indent is not None: + self.current_indent_level -= 1 + yield self._newline_indent() + yield '}' + if markers is not None: + del markers[markerid] + + def _iterencode(self, o, markers=None): + if isinstance(o, basestring): + if self.ensure_ascii: + encoder = encode_basestring_ascii + else: + encoder = encode_basestring + _encoding = self.encoding + if (_encoding is not None and isinstance(o, str) + and not (_need_utf8 and _encoding == 'utf-8')): + o = o.decode(_encoding) + yield encoder(o) + elif o is None: + yield 'null' + elif o is True: + yield 'true' + elif o is False: + yield 'false' + elif isinstance(o, (int, long)): + yield str(o) + elif isinstance(o, float): + yield floatstr(o, self.allow_nan) + elif isinstance(o, (list, tuple)): + for chunk in self._iterencode_list(o, markers): + yield chunk + elif isinstance(o, dict): + for chunk in self._iterencode_dict(o, markers): + yield chunk + else: + if markers is not None: + markerid = id(o) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = o + for chunk in self._iterencode_default(o, markers): + yield chunk + if markers is not None: + del markers[markerid] + + def _iterencode_default(self, o, markers=None): + newobj = self.default(o) + return self._iterencode(newobj, markers) + + def default(self, o): + """ + Implement this method in a subclass such that it returns + a serializable object for ``o``, or calls the base implementation + (to raise a ``TypeError``). + + For example, to support arbitrary iterators, you could + implement default like this:: + + def default(self, o): + try: + iterable = iter(o) + except TypeError: + pass + else: + return list(iterable) + return JSONEncoder.default(self, o) + """ + raise TypeError("%r is not JSON serializable" % (o,)) + + def encode(self, o): + """ + Return a JSON string representation of a Python data structure. + + >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) + '{"foo":["bar", "baz"]}' + """ + # This is for extremely simple cases and benchmarks... + if isinstance(o, basestring): + if isinstance(o, str): + _encoding = self.encoding + if (_encoding is not None + and not (_encoding == 'utf-8' and _need_utf8)): + o = o.decode(_encoding) + return encode_basestring_ascii(o) + # This doesn't pass the iterator directly to ''.join() because it + # sucks at reporting exceptions. It's going to do this internally + # anyway because it uses PySequence_Fast or similar. + chunks = list(self.iterencode(o)) + return ''.join(chunks) + + def iterencode(self, o): + """ + Encode the given object and yield each string + representation as available. + + For example:: + + for chunk in JSONEncoder().iterencode(bigobject): + mysocket.write(chunk) + """ + if self.check_circular: + markers = {} + else: + markers = None + return self._iterencode(o, markers) + +__all__ = ['JSONEncoder'] diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/simplejson/jsonfilter.py b/WebKitTools/Scripts/webkitpy/thirdparty/simplejson/jsonfilter.py new file mode 100644 index 0000000..01ca21d --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/simplejson/jsonfilter.py @@ -0,0 +1,40 @@ +import simplejson +import cgi + +class JSONFilter(object): + def __init__(self, app, mime_type='text/x-json'): + self.app = app + self.mime_type = mime_type + + def __call__(self, environ, start_response): + # Read JSON POST input to jsonfilter.json if matching mime type + response = {'status': '200 OK', 'headers': []} + def json_start_response(status, headers): + response['status'] = status + response['headers'].extend(headers) + environ['jsonfilter.mime_type'] = self.mime_type + if environ.get('REQUEST_METHOD', '') == 'POST': + if environ.get('CONTENT_TYPE', '') == self.mime_type: + args = [_ for _ in [environ.get('CONTENT_LENGTH')] if _] + data = environ['wsgi.input'].read(*map(int, args)) + environ['jsonfilter.json'] = simplejson.loads(data) + res = simplejson.dumps(self.app(environ, json_start_response)) + jsonp = cgi.parse_qs(environ.get('QUERY_STRING', '')).get('jsonp') + if jsonp: + content_type = 'text/javascript' + res = ''.join(jsonp + ['(', res, ')']) + elif 'Opera' in environ.get('HTTP_USER_AGENT', ''): + # Opera has bunk XMLHttpRequest support for most mime types + content_type = 'text/plain' + else: + content_type = self.mime_type + headers = [ + ('Content-type', content_type), + ('Content-length', len(res)), + ] + headers.extend(response['headers']) + start_response(response['status'], headers) + return [res] + +def factory(app, global_conf, **kw): + return JSONFilter(app, **kw) diff --git a/WebKitTools/Scripts/webkitpy/thirdparty/simplejson/scanner.py b/WebKitTools/Scripts/webkitpy/thirdparty/simplejson/scanner.py new file mode 100644 index 0000000..64f4999 --- /dev/null +++ b/WebKitTools/Scripts/webkitpy/thirdparty/simplejson/scanner.py @@ -0,0 +1,63 @@ +""" +Iterator based sre token scanner +""" +import sre_parse, sre_compile, sre_constants +from sre_constants import BRANCH, SUBPATTERN +from re import VERBOSE, MULTILINE, DOTALL +import re + +__all__ = ['Scanner', 'pattern'] + +FLAGS = (VERBOSE | MULTILINE | DOTALL) +class Scanner(object): + def __init__(self, lexicon, flags=FLAGS): + self.actions = [None] + # combine phrases into a compound pattern + s = sre_parse.Pattern() + s.flags = flags + p = [] + for idx, token in enumerate(lexicon): + phrase = token.pattern + try: + subpattern = sre_parse.SubPattern(s, + [(SUBPATTERN, (idx + 1, sre_parse.parse(phrase, flags)))]) + except sre_constants.error: + raise + p.append(subpattern) + self.actions.append(token) + + p = sre_parse.SubPattern(s, [(BRANCH, (None, p))]) + self.scanner = sre_compile.compile(p) + + + def iterscan(self, string, idx=0, context=None): + """ + Yield match, end_idx for each match + """ + match = self.scanner.scanner(string, idx).match + actions = self.actions + lastend = idx + end = len(string) + while True: + m = match() + if m is None: + break + matchbegin, matchend = m.span() + if lastend == matchend: + break + action = actions[m.lastindex] + if action is not None: + rval, next_pos = action(m, context) + if next_pos is not None and next_pos != matchend: + # "fast forward" the scanner + matchend = next_pos + match = self.scanner.scanner(string, matchend).match + yield rval, matchend + lastend = matchend + +def pattern(pattern, flags=FLAGS): + def decorator(fn): + fn.pattern = pattern + fn.regex = re.compile(pattern, flags) + return fn + return decorator |